commit 161206455661b697b239d68d72eac48e18e682d5 Author: Daniel Swanson Date: Thu Jul 29 14:00:59 2021 -0500 transducer mmap sort by input symbol diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am index e943b39..b2875a7 100644 --- a/lttoolbox/Makefile.am +++ b/lttoolbox/Makefile.am @@ -2,7 +2,7 @@ h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h \ deserialiser.h entry_token.h expander.h fst_processor.h input_file.h lt_locale.h \ match_exe.h match_node.h match_state.h my_stdio.h node.h \ - pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h string_writer.h \ + pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h string_view.h string_writer.h \ transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \ ustring.h cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \ diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc index b281646..7391318 100644 --- a/lttoolbox/transducer.cc +++ b/lttoolbox/transducer.cc @@ -741,12 +741,23 @@ Transducer::write_mmap(FILE* out, const Alphabet& alpha) } for (auto& it : transitions) { + // we want to make sure the transitions are sorted by input symbol + map> symbols; for (auto& it2 : it.second) { - auto sym = alpha.decode(it2.first); - write_le(out, sym.first); // input symbol - write_le(out, sym.second); // output symbol - write_le(out, it2.second.first); // destination - write_le(out, *reinterpret_cast(&it2.second.second)); // weight + symbols[alpha.decode(it2.first).first].insert(it2.first); + } + for (auto& s_in : symbols) { + for (auto& s : s_in.second) { + auto range = it.second.equal_range(s); + for (auto tr = range.first; tr != range.second; ++tr) { + auto sym = alpha.decode(tr->first); + write_le(out, sym.first); // input symbol + write_le(out, sym.second); // output symbol + write_le(out, tr->second.first); // destination + uint64_t w = *reinterpret_cast(&tr->second.second); + write_le(out, w); // weight + } + } } } }