Index: trunk/apertium/apertium/apertium_tagger.cc =================================================================== --- trunk/apertium/apertium/apertium_tagger.cc (revision 70248) +++ trunk/apertium/apertium/apertium_tagger.cc (revision 70251) @@ -688,7 +688,7 @@ FILE *Corpus = try_open_file_utf8("CORPUS", argv[optind + 1], "r"); FILE_Tagger_.train(Corpus, TheFunctionTypeOptionArgument); - try_close_file("CORPUS", argv[optind + 1], UntaggedCorpus); + try_close_file("CORPUS", argv[optind + 1], Corpus); FILE *Serialised_FILE_Tagger = try_open_file("SERIALISED_TAGGER", argv[optind + 3], "wb"); Index: branches/apertium-tagger/experiments/add_to_wikitable.py =================================================================== --- branches/apertium-tagger/experiments/add_to_wikitable.py (revision 70248) +++ branches/apertium-tagger/experiments/add_to_wikitable.py (revision 70251) @@ -190,8 +190,13 @@ print(table) sys.exit() -input_data = eval(open(sys.argv[1]).read()) +input_data = {} +for arg in sys.argv[1:]: + i = eval(open(arg).read()) + for k in i: + input_data[k] = i[k] + for lang, data in input_data.items(): lang_idx = lang_order.index(lang) col_idx = lang_idx + 1 Index: branches/apertium-tagger/experiments/run_experiment.py =================================================================== --- branches/apertium-tagger/experiments/run_experiment.py (revision 70248) +++ branches/apertium-tagger/experiments/run_experiment.py (revision 70251) @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import argparse import datetime import functools @@ -28,18 +29,16 @@ 'texts/dva-samoubijstva.ana.txt' ], 'kaz': [ - 'cg:texts/puupankki/story.tagged.txt', - 'cg:texts/puupankki/udhr.tagged.txt', - 'cg:texts/puupankki/wikipedia.tagged.txt', - 'cg:texts/puupankki/wikitravel.tagged.txt', - 'cg:texts/puupankki/Ер_Төстік.tagged.txt', - 'cg:texts/puupankki/Жиырма_Бесінші_Сөз.tagged.txt', - 'cg:texts/puupankki/Иран.tagged.txt', - 'cg:texts/puupankki/Махамбет_Өтемісұлы.tagged.txt', - 'cg:texts/puupankki/Өлген_қазан.tagged.txt', - 'cg:texts/puupankki/Радиан.tagged.txt', - 'cg:texts/puupankki/Футболдан_әлем_чемпионаты_2014.tagged.txt', - 'cg:texts/puupankki/Шымкент.tagged.txt', + 'cg:texts/texts1/Eurovision_ән_конкурсы_2010.tagged.vislcg.txt', + 'cg:texts/texts1/Абай_Құнанбайұлы.tagged.vislcg.txt', + 'cg:texts/texts1/Біріккен_Ұлттар_Ұйымы.tagged.vislcg.txt', + 'cg:texts/texts1/Жапония.tagged.vislcg.txt', + 'cg:texts/texts1/Жапония_Ұлттық_футбол_құрама_командасы.tagged.vislcg.txt', + 'cg:texts/texts1/Жасуша.tagged.vislcg.txt', + 'cg:texts/texts1/Иран.tagged.vislcg.txt', + 'cg:texts/texts1/Радиан.tagged.vislcg.txt', + 'cg:texts/texts1/Футболдан_әлем_чемпионаты_2014.tagged.vislcg.txt', + 'cg:texts/texts1/Шоқан_Шыңғысұлы_Уәлиханов.tagged.vislcg.txt', ], 'por': [ 'cg:texts/água.tagged.txt', @@ -117,6 +116,10 @@ help="Reuse preprocesed dictionary from previous run", action='store_true') parser.add_argument( + '--dry', + help="Just list the names of experiments which would be run", + action='store_true') + parser.add_argument( '--output', help="Output file for the results of the experiment") parser.add_argument( @@ -157,9 +160,16 @@ def invalidate_hbs(line): return line.startswith('+') +def invalidate_kaz(line): + # Odd... + if '/' not in line: + return True + left, right = line.split('/', 1) + return '<' in left or '<' not in right + LANGUAGE_INVALIDATOR_MAP = { 'por': invalidate_por, - 'hbs': invalidate_hbs, + 'kaz': invalidate_kaz, } @@ -311,6 +321,9 @@ def run_tagger(tagger): experiment = experiments[tagger] if lab.can_run_experiment(experiment): + if args.dry: + print("Running {}/{}".format(lang, tagger)) + else: languages_tagger_accuracies[lang][tagger] = experiment(lab) else: print("Skipping {}/{} since it needs a tsx" Index: branches/apertium-tagger/experiments/experiments.py =================================================================== --- branches/apertium-tagger/experiments/experiments.py (revision 70248) +++ branches/apertium-tagger/experiments/experiments.py (revision 70251) @@ -11,7 +11,7 @@ experiment_groups = {} -def exp_name(n): +def exp_name(name): def reg(func): func.name = name return func