Index: branches/apertium-tagger/experiments/run_experiment.py =================================================================== --- branches/apertium-tagger/experiments/run_experiment.py (revision 70246) +++ branches/apertium-tagger/experiments/run_experiment.py (revision 70247) @@ -177,10 +177,14 @@ pair_name = 'apertium-{0}.{0}'.format(lang) self.morphology_fn = pjoin(lang_root, lang + '.automorf.bin') self.cg_fn = pjoin(lang_root, lang + '.rlx.bin') + self.no_tsx = lang in NO_TSX_LANGUAGES + if self.no_tsx: + self.dix_fn = None + else: self.dix_fn = pjoin(lang_root, pair_name + '.dix') if not pexists(self.dix_fn): self.dix_fn = pjoin(lang_root, '.deps', pair_name + '.dix') - if lang in NO_TSX_LANGUAGES: + if self.no_tsx: self.tsx_fn = None elif lang in TSX_MAP: self.tsx_fn = pjoin(lang_root, TSX_MAP[lang]) @@ -208,22 +212,13 @@ for i in range(folds): xval_prefix = pjoin(self.work_dir, 'xval.{}.'.format(i)) - xval_ref_fn = xval_prefix + 'ref' - xval_train_fn = xval_prefix + 'train' - xval_src_fn = xval_prefix + 'src' - xval_trainsrc_fn = xval_prefix + 'trainsrc' - xval_cgtag_fn = xval_prefix + 'cgtag' - xval_traincg_fn = xval_prefix + 'traincgtag' - - self.xval_fns.append({ + xval_fn_dict = { 'prefix': xval_prefix, - 'ref': xval_ref_fn, - 'train': xval_train_fn, - 'src': xval_src_fn, - 'trainsrc': xval_trainsrc_fn, - 'cgtag': xval_cgtag_fn, - 'traincgtag': xval_traincg_fn, - }) + } + for ext in ['ref', 'train', 'src', 'trainsrc', + 'cgtag', 'traincgtag']: + xval_fn_dict[ext] = xval_prefix + ext + self.xval_fns.append(xval_fn_dict) self.validate() @@ -231,12 +226,15 @@ self.do_preprocessing(reuse_dic=reuse_dic) def validate(self): - for fn in [self.morphology_fn, self.cg_fn, self.tsx_fn, self.dix_fn]: + required = [self.morphology_fn, self.cg_fn] + if not self.no_tsx: + required.extend([self.tsx_fn, self.dix_fn]) + for fn in required: if fn is not None and not pexists(fn): raise MissingLanguageDataException(fn=fn) + if not self.no_tsx: check_run(["apertium-validate-dictionary", self.dix_fn]) - if self.tsx_fn is not None: check_run(["apertium-validate-tagger", self.tsx_fn]) def do_preprocessing(self, reuse_dic=False): @@ -266,7 +264,7 @@ copy_blanks(self.joined_fn, self.src_fn, self.src_blanks_fn) cg_proc(self.cg_fn, input=self.src_fn, output=self.cgtag_fn) copy_blanks(self.joined_fn, self.cgtag_fn, self.cgtag_blanks_fn) - if not reuse_dic: + if not reuse_dic and not self.no_tsx: loop.run_until_complete( fix_dix(self.morphology_fn, self.tsx_fn, self.dix_fn, output_fn=self.dic_fn)) @@ -280,7 +278,7 @@ xval_fn['cgtag'], self.folds, i) def can_run_experiment(self, experiment_func): - if self.tsx_fn is None and getattr(experiment_func, 'needs_tsx', False): + if self.no_tsx and getattr(experiment_func, 'needs_tsx', False): return False return True