Index: branches/apertium-tagger/experiments/experiments.py =================================================================== --- branches/apertium-tagger/experiments/experiments.py (revision 69991) +++ branches/apertium-tagger/experiments/experiments.py (revision 69992) @@ -8,35 +8,33 @@ # Experiment registry experiments = {} +experiment_groups = {} -def reg_experiment(name): +def exp_name(n): def reg(func): - experiments[name] = func + func.name = name return func return reg +def reg(func): + experiments[func.name] = func + + def needs_tsx(func): func.needs_tsx = True return func -def meta(func): - func.meta = True +def group(name): + def reg(func): + experiment_groups.setdefault(name, {}) + experiment_groups[name][func.name] = func return func + return reg -def default(func): - func.default = True - return func - - -def cg_extra(func): - func.cg_extra = True - return func - - # Statistical helpers def aggregates(data): return (min(data), max(data), @@ -43,15 +41,14 @@ mean(data), stdev(data)) -def xval_experiment(name): - def dec(func=None): +def xval_experiment(func): @functools.wraps(func) def wrapper(lab, *args, **kwargs): recall = [] recall_available = [] for i, xval_fns in enumerate(lab.xval_fns): - xval_fns['test'] = xval_fns['prefix'] + 'test.' + name - xval_fns['model'] = xval_fns['prefix'] + 'model.' + name + xval_fns['test'] = xval_fns['prefix'] + 'test.' + func.name + xval_fns['model'] = xval_fns['prefix'] + 'model.' + func.name func(lab, xval_fns) evaluator = TaggerEvaluator( xval_fns['src'], xval_fns['ref'], xval_fns['test']) @@ -60,7 +57,6 @@ recall_available.append(evaluator.recall_available) return (aggregates(recall), aggregates(recall_available)) return wrapper - return dec def get_single_analysis(lab, test_fn): @@ -74,9 +70,10 @@ unigram_model = 'unigram' + str(unigram_type) name = ('cg' if do_cg else '') + unigram_model - @reg_experiment(name) - @xval_experiment(name) - @default + @reg + @group('default') + @xval_experiment + @exp_name(name) def unigram_experiment(lab, xval_fns, do_cg=do_cg, unigram_model=unigram_model): @@ -94,8 +91,9 @@ for do_cg in [False, True]: name = ('cg' if do_cg else '') + '1st' - @reg_experiment(name) - @default + @reg + @group('default') + @exp_name(name) def pick_first_experiment(lab, do_cg=do_cg): first_fn = pjoin(lab.work_dir, 'test.' + name) if do_cg: @@ -175,16 +173,17 @@ cg_aug=tagger_cg_aug, input=tagger_input, output=xval_fns['test'] ).check_returncode() + model_experiment = exp_name(name)(model_experiment) if not cg_aug and do_cg in [None, 'in']: - model_experiment = default(model_experiment) + model_experiment = group('default')(model_experiment) else: - model_experiment = cg_extra(model_experiment) - model_experiment = xval_experiment(name)(model_experiment) - model_experiment = reg_experiment(name)(model_experiment) + model_experiment = group('cg-extra')(model_experiment) + reg(xval_experiment(model_experiment)) -@reg_experiment('word_count') -@default +@reg +@group('default') +@exp_name('word_count') def word_count(lab): count = 0 for line in open(lab.src_fn): @@ -193,9 +192,10 @@ return count -@reg_experiment('new_cg_ambg') +@reg +@group('meta') +@exp_name('new_cg_ambg') @needs_tsx -@meta def new_cg_ambg(lab): model_fn = pjoin(lab.work_dir, 'new_cg_ambg.model') tagger_train_sup('bigram', model_fn, Index: branches/apertium-tagger/experiments/run_experiment.py =================================================================== --- branches/apertium-tagger/experiments/run_experiment.py (revision 69991) +++ branches/apertium-tagger/experiments/run_experiment.py (revision 69992) @@ -10,7 +10,7 @@ from pprint import pformat import asyncio -from experiments import experiments +from experiments import experiments, experiment_groups from shell_utils import cd, check_run from shell_wrappers import (cg_proc, extract_src, fix_dix, run_cg_conv_clean, split_n_r, strip_blanks, strip_unknown_sent, @@ -78,8 +78,13 @@ parser.add_argument( '--taggers', help="Only run experiments with these taggers, comma separated", - default=DEFAULT_TAGGERS, type=comma_list) + default=[], type=comma_list) parser.add_argument( + '--tagger-groups', + help="Only run these experiment groups. " + "Available: default, cg-extra, meta.", + default=['default'], type=comma_list) + parser.add_argument( '--language-texts', help="Use different texts per language, coma seperated colon pairs", default=DEFAULT_TEXTS, type=comma_colon_dict) @@ -96,10 +101,6 @@ help="Reuse preprocesed dictionary from previous run", action='store_true') parser.add_argument( - '--cg-extra', - help="Run extra CG assisted tagging tests", - action='store_true') - parser.add_argument( '--output', help="Output file for the results of the experiment") parser.add_argument( @@ -258,17 +259,12 @@ split_n_r(self.cgtag_blanks_fn, xval_fn['traincgtag'], xval_fn['cgtag'], self.folds, i) - def get_tagger(self, tagger): - tagger_func = experiments[tagger] - if self.tsx_fn is None and getattr(tagger_func, 'needs_tsx', False): - return None - return functools.partial(experiments[tagger], self) + def can_run_experiment(self, experiment_func): + if self.tsx_fn is None and getattr(experiment_func, 'needs_tsx', False): + return False + return True -DEFAULT_TAGGERS = [k for k, v in experiments.items() - if not getattr(v, 'non_default', False)] - - def main(): args = parse_args() if not isdir(WORK_DIR): @@ -294,16 +290,21 @@ check_run(['./autogen.sh']) check_run(['make']) lab = mk_lab() + def run_tagger(tagger): + experiment = experiments[tagger] + if lab.can_run_experiment(experiment): + languages_tagger_accuracies[lang][tagger] = experiment(lab) + else: + print("Skipping {}/{} since it needs a tsx" + .format(lang, tagger)) languages_tagger_accuracies[lang] = {} + if args.taggers: for tagger in args.taggers: - experiment = lab.get_tagger(tagger) - if experiment is None: - print("Skipping {}/{} since it needs a tsx" - .format(lang, tagger)) - elif getattr(experiment, 'default', False) and - not (args.cg_extra and getattr(experiment, 'cg_extra', False)): + run_tagger(tagger) else: - languages_tagger_accuracies[lang][tagger] = experiment() + for tagger_group in args.tagger_groups: + for tagger in experiment_groups[tagger_group]: + run_tagger(tagger) finally: result_pretty = pformat(languages_tagger_accuracies) print(result_pretty)