Index: trunk/apertium/apertium/apertium_tagger.cc =================================================================== --- trunk/apertium/apertium/apertium_tagger.cc (revision 69564) +++ trunk/apertium/apertium/apertium_tagger.cc (revision 69566) @@ -425,7 +425,7 @@ {"mark", no_argument, 0, 'm'}, {"show-superficial", no_argument, 0, 'p'}, {"null-flush", no_argument, 0, 'z'}, - {"unigram", no_argument, 0, 'u'}, + {"unigram", required_argument, 0, 'u'}, {"sliding-window", no_argument, 0, 'w'}, {"tagger", no_argument, 0, 'g'}, {"retrain", required_argument, 0, 'r'}, Index: branches/apertium-tagger/experiments/add_to_wikitable.py =================================================================== --- branches/apertium-tagger/experiments/add_to_wikitable.py (revision 69564) +++ branches/apertium-tagger/experiments/add_to_wikitable.py (revision 69566) @@ -4,12 +4,14 @@ import locale import mwparserfromhell from mwparserfromhell.nodes.tag import Tag -from mwparserfromhell.nodes.text import Text from mwparserfromhell.wikicode import Wikicode TAGGER_ORDER = ['1st', 'unigram1', 'unigram2', 'unigram3', 'bigram', 'lwsw'] -rdict = lambda d: {v: k for k, v in d.items()} + +def rdict(d): + return {v: k for k, v in d.items()} + LANG_CODE_NAME_MAP = { 'cat': 'Catalan', 'spa': 'Spanish', @@ -21,6 +23,7 @@ } LANG_NAME_CODE_MAP = rdict(LANG_CODE_NAME_MAP) + def name_to_attrs(name): attrs = {} for tagger in TAGGER_ORDER: @@ -48,10 +51,13 @@ return attrs + def attrs_to_sort_tuple(attrs): # tagger; unsup, sup; nocg, cg; iters - return (TAGGER_ORDER.index(attrs['tagger']), attrs['sup'], attrs['cg'], attrs['iters']) + return (TAGGER_ORDER.index(attrs['tagger']), + attrs['sup'], attrs['cg'], attrs['iters']) + def attrs_to_str(attrs): if attrs['tagger'].startswith('unigram'): out = 'Unigram model ' + attrs['tagger'][len('unigram'):] @@ -73,6 +79,7 @@ return out + def value_to_str(value): if hasattr(value, "__getitem__"): return "{2:.2f}±{3:.2f}".format(*(v * 100 for v in value)) @@ -81,7 +88,7 @@ def result_to_str(result): - return '{}, {}'.format(result[0], result[1]) + return '{}, {}'.format(value_to_str(result[0]), value_to_str(result[1])) def mk_title_td(title): @@ -91,6 +98,7 @@ contents=" '''{}''' ".format(title), closing_wiki_markup='') + def mk_val_td(val, is_last=False): return Tag( 'td', @@ -100,6 +108,7 @@ wiki_style_separator='|', closing_wiki_markup='') + def mk_empty_td(is_last=False): return Tag( 'td', @@ -107,6 +116,7 @@ contents="\n" if is_last else "", closing_wiki_markup='') + def mk_wc_td(val, is_first=False, is_last=False): return Tag( 'td', @@ -114,6 +124,7 @@ contents=" {}{}".format(val, "\n" if is_last else " "), closing_wiki_markup='') + def mk_initial_tr(title): return Tag( 'tr', @@ -136,6 +147,7 @@ continue lang_order.append(LANG_NAME_CODE_MAP[title]) + def insert_into_tr(tr, col_idx, val_str): if len(tr.contents.nodes) <= col_idx: last_td = tr.contents.get(-1) @@ -149,6 +161,7 @@ val_td = mk_val_td(val_str, is_last=has_newline) tr.contents.set(col_idx, val_td) + def insert_into_wc(tr, col_idx, val_str): target_cell = tr.contents.get(col_idx) has_newline = target_cell.contents.endswith('\n') @@ -156,6 +169,7 @@ val_td = mk_wc_td(val_str, is_first=is_first, is_last=has_newline) tr.contents.set(col_idx, val_td) + def format_word_count(word_count): locale.setlocale(locale.LC_ALL, 'en_US') number = locale.format("%d", word_count, grouping=True) @@ -185,7 +199,8 @@ if word_count is not None: word_count_tr = table_inner.get(3) insert_into_wc(word_count_tr, col_idx, format_word_count(word_count)) - data = [(name_to_attrs(name), result_to_str(value)) for name, value in data.items()] + data = [(name_to_attrs(name), result_to_str(value)) + for name, value in data.items()] data = sorted(data, key=lambda pair: attrs_to_sort_tuple(pair[0])) table_idx = 4 for attrs, val_str in data: