Index: incubator/apertium-pol-rus/dev/from_z.py =================================================================== --- incubator/apertium-pol-rus/dev/from_z.py (revision 72226) +++ incubator/apertium-pol-rus/dev/from_z.py (revision 72227) @@ -17,6 +17,7 @@ def paradigm_collector(gram_d, secondary = True): '''returns a dictionary, where keys are lemmas and values is a tuple of stem and frozenset of tuples of flections and frozensets of grammar tags''' morph_d = {lexeme : [el[0] for el in gram_d[lexeme]] for lexeme in gram_d} + # gram_d = change_tags(gram_d) paradigms = {} for lemma in morph_d: new_lemma = choose_lemma(gram_d[lemma]) @@ -31,9 +32,11 @@ return paradigms def change_tags(grammar_tags, secondary = True): + '''''' grammar_tags = grammar_tags.replace('pstpss pstpss ', 'pstpss ') if secondary: grammar_tags = grammar_tags.replace('v impf ', '').replace('v perf ', '').replace('tv ', '').replace('iv ', '').replace(' prb', '') + return grammar_tags def choose_lemma(lexeme): @@ -78,7 +81,8 @@ def final_tags(frozen_info): '''replaces tags''' - replacer = {'msc' : 'm', 'anin': 'an', 'fem' : 'f', 'inan' : 'nn', 'anim' : 'aa', 'neu' : 'nt', 'pred' : 'short', 'v' : 'vblex'} + replacer = {'msc' : 'm', 'anin': 'an', 'fem' : 'f', 'inan' : 'nn', 'anim' : 'aa', 'neu' : 'nt', 'pred' : 'short', 'v' : 'vblex', + 'sg1' : 'p1 sg', 'sg2' : 'p2 sg', 'sg3' : 'p3 sg', 'pl1' : 'p1 pl', 'pl2' : 'p2 pl', 'pl3' : 'p3 pl'} new_info = [] for wordform in frozen_info: for replacement in replacer: @@ -277,16 +281,15 @@ if verb in labels[label]: st_and_fl = paradigms[label][0] ending = re.sub('[1234¹²]', '', st_and_fl[1]) + verb = re.sub('[1234¹²]', '', verb) text += ' ' + verb.split(ending)[0] + '\n' thereis = True break if not thereis: print('Something is wrong with entries_maker: ' + verb) - text += ' ' + verb + '\n' return text - def find_ptcp_base(info, lexeme, par, ending): for wordform in info[lexeme]: if par in wordform[1] and 'msc anin sg nom' in wordform[1] and 'pass' not in wordform[1]: @@ -297,7 +300,6 @@ print('something is rong with find_ptcp_base, lexeme ' + lexeme) - def prtcp_affixes(line, prtcp_base): base = line.split('#')[1] if base: