commit 0624071831697c3c894c98ff86091f6f7e473672 Author: Daniel Swanson Date: Wed Jun 2 13:48:20 2021 -0500 lt-proc (unweighted) working diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am index 7874da3..dfb8a61 100644 --- a/lttoolbox/Makefile.am +++ b/lttoolbox/Makefile.am @@ -1,12 +1,12 @@ h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h \ - deserialiser.h entry_token.h expander.h fst_processor.h lt_locale.h \ + deserialiser.h entry_token.h expander.h fst_processor.h input_file.h lt_locale.h \ match_exe.h match_node.h match_state.h my_stdio.h node.h \ pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h \ transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h \ string_to_wostream.h ustring.h cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \ - expander.cc fst_processor.cc lt_locale.cc match_exe.cc \ + expander.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \ match_node.cc match_state.cc node.cc pattern_list.cc \ regexp_compiler.cc sorted_vector.cc state.cc transducer.cc \ trans_exe.cc xml_parse_util.cc tmx_compiler.cc ustring.cc diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc index 7d5ce10..ee01b42 100644 --- a/lttoolbox/att_compiler.cc +++ b/lttoolbox/att_compiler.cc @@ -24,6 +24,7 @@ #include #include #include +#include using namespace std; using namespace icu; @@ -55,19 +56,20 @@ AttCompiler::clear() void AttCompiler::convert_hfst(UString& symbol) { - if (symbol == (const UChar*)"@0@" || symbol == (const UChar*)"ε") + if (symbol == "@0@"_u || symbol == "ε"_u) { - symbol = (const UChar*)""; + symbol.clear(); } - else if (symbol == (const UChar*)"@_SPACE_@") + else if (symbol == "@_SPACE_@"_u) { - symbol = (const UChar*)" "; + symbol = " "_u; } } bool AttCompiler::is_word_punct(UChar symbol) { + return u_charType(symbol) & (U_NON_SPACING_MARK | U_ENCLOSING_MARK | U_COMBINING_SPACING_MARK); // https://en.wikipedia.org/wiki/Combining_character#Unicode_ranges if((symbol >= 0x0300 && symbol <= 0x036F) // Combining Diacritics || (symbol >= 0x1AB0 && symbol <= 0x1AFF) // ... Extended @@ -98,17 +100,17 @@ AttCompiler::symbol_code(const UString& symbol) return alphabet(symbol); } else if (symbol.empty()) { return 0; - } else if ((iswpunct(symbol[0]) || iswspace(symbol[0])) && !is_word_punct(symbol[0])) { + } else if ((u_ispunct(symbol[0]) || u_isspace(symbol[0])) && !is_word_punct(symbol[0])) { return symbol[0]; } else { letters.insert(symbol[0]); - if(iswlower(symbol[0])) + if(u_islower(symbol[0])) { - letters.insert(towupper(symbol[0])); + letters.insert(u_toupper(symbol[0])); } - else if(iswupper(symbol[0])) + else if(u_isupper(symbol[0])) { - letters.insert(towlower(symbol[0])); + letters.insert(u_tolower(symbol[0])); } return symbol[0]; } @@ -134,13 +136,13 @@ AttCompiler::parse(string const &file_name, bool read_rl) { line_number++; tokens.clear(); - tokens.push_back((UChar*)""); + tokens.push_back(""_u); do { UChar32 c = u_fgetcx(infile); if (c == '\n') { break; } else if (c == '\t') { - tokens.push_back((UChar*)""); + tokens.push_back(""_u); } else { tokens.back() += c; } @@ -191,8 +193,7 @@ AttCompiler::parse(string const &file_name, bool read_rl) // Add an Epsilon transition from the new starting state starting_node->transductions.push_back( - Transduction(from, (const UChar*)"", (const UChar*)"", - 0, default_weight)); + Transduction(from, ""_u, ""_u, 0, default_weight)); first_line_in_fst = false; } @@ -358,7 +359,7 @@ AttCompiler::classify_single_transition(Transduction& t) if (letters.find(t.upper[0]) != letters.end()) { t.type |= WORD; } - if (iswpunct(t.upper[0])) { + if (u_ispunct(t.upper[0])) { t.type |= PUNCT; } } @@ -381,10 +382,10 @@ AttCompiler::classify_forwards() for(auto& t1 : n1->transductions) { AttNode* n2 = get_node(t1.to); for(auto& t2 : n2->transductions) { - t2.type |= t1.type; + t2.type |= t1.type; } if(done.find(t1.to) == done.end()) { - todo.push(t1.to); + todo.push(t1.to); } } done.insert(next); @@ -449,12 +450,12 @@ AttCompiler::write(FILE *output) { Compression::multibyte_write(2, output); } - Compression::string_write((const UChar*)"main@standard", output); + Compression::string_write("main@standard"_u, output); Transducer word_fst = extract_transducer(WORD); word_fst.write(output); wcout << L"main@standard" << " " << word_fst.size(); wcout << " " << word_fst.numberOfTransitions() << endl; - Compression::string_write((const UChar*)"final@inconditional", output); + Compression::string_write("final@inconditional"_u, output); if(punct_fst.numberOfTransitions() != 0) { punct_fst.write(output); diff --git a/lttoolbox/compression.cc b/lttoolbox/compression.cc index 455a95e..367b2b2 100644 --- a/lttoolbox/compression.cc +++ b/lttoolbox/compression.cc @@ -21,6 +21,8 @@ #include #include #include +#include +#include void Compression::writeByte(unsigned char byte, FILE *output) @@ -256,8 +258,12 @@ Compression::multibyte_read(istream &input) void Compression::string_write(UString const &str, FILE *output) { - Compression::multibyte_write(str.size(), output); - for(auto c : str) + vector vec; + string temp; + utf8::utf16to8(str.begin(), str.end(), std::back_inserter(temp)); + utf8::utf8to32(temp.begin(), temp.end(), std::back_inserter(vec)); + Compression::multibyte_write(vec.size(), output); + for(auto c : vec) { Compression::multibyte_write(static_cast(c), output); } @@ -267,12 +273,17 @@ UString Compression::string_read(FILE *input) { UString retval; + std::vector vec; for(unsigned int i = 0, limit = Compression::multibyte_read(input); i != limit; i++) { - retval += static_cast(Compression::multibyte_read(input)); + vec.push_back(static_cast(Compression::multibyte_read(input))); + //retval += static_cast(Compression::multibyte_read(input)); } + string temp; + utf8::utf32to8(vec.begin(), vec.end(), std::back_inserter(temp)); + utf8::utf8to16(temp.begin(), temp.end(), std::back_inserter(retval)); return retval; } diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index 30c76a0..77e3d5d 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -190,16 +190,16 @@ FSTProcessor::procNodeRCX() } UChar -FSTProcessor::readEscaped(UFILE *input) +FSTProcessor::readEscaped(InputFile& input) { - if(u_feof(input)) + if(input.eof()) { streamError(); } - UChar val = static_cast(u_fgetc(input)); + UChar val = static_cast(input.get()); - if(u_feof(input)) + if(input.eof()) { streamError(); } @@ -208,15 +208,15 @@ FSTProcessor::readEscaped(UFILE *input) } UString -FSTProcessor::readFullBlock(UFILE *input, UChar const delim1, UChar const delim2) +FSTProcessor::readFullBlock(InputFile& input, UChar const delim1, UChar const delim2) { UString result; result += delim1; UChar c = delim1; - while(!u_feof(input) && c != delim2) + while(!input.eof() && c != delim2) { - c = static_cast(u_fgetc(input)); + c = static_cast(input.get()); result += c; if(c != '\\') { @@ -237,15 +237,15 @@ FSTProcessor::readFullBlock(UFILE *input, UChar const delim1, UChar const delim2 } UString -FSTProcessor::readWblank(UFILE *input) +FSTProcessor::readWblank(InputFile& input) { UString result; result += "[["_u; UChar c = 0; - while(!u_feof(input)) + while(!input.eof()) { - c = static_cast(u_fgetc(input)); + c = static_cast(input.get()); result += c; if(c == '\\') @@ -254,7 +254,7 @@ FSTProcessor::readWblank(UFILE *input) } else if(c == ']') { - c = static_cast(u_fgetc(input)); + c = static_cast(input.get()); result += c; if(c == ']') @@ -273,15 +273,15 @@ FSTProcessor::readWblank(UFILE *input) } bool -FSTProcessor::wblankPostGen(UFILE *input, UFILE *output) +FSTProcessor::wblankPostGen(InputFile& input, UFILE *output) { UString result; result += "[["_u; UChar c = 0; - while(!u_feof(input)) + while(!input.eof()) { - c = static_cast(u_fgetc(input)); + c = static_cast(input.get()); result += c; if(c == '\\') @@ -290,7 +290,7 @@ FSTProcessor::wblankPostGen(UFILE *input, UFILE *output) } else if(c == ']') { - c = static_cast(u_fgetc(input)); + c = static_cast(input.get()); result += c; if(c == ']') @@ -298,12 +298,12 @@ FSTProcessor::wblankPostGen(UFILE *input, UFILE *output) int resultlen = result.size(); if(result[resultlen-5] == '[' && result[resultlen-4] == '[' && result[resultlen-3] == '/') //ending blank [[/]] { - u_fputs(result.c_str(), output); + write(result, output); break; } else { - c = static_cast(u_fgetc(input)); + c = static_cast(input.get()); if(c == '~') { wblankqueue.push(result); @@ -327,25 +327,27 @@ FSTProcessor::wblankPostGen(UFILE *input, UFILE *output) } int -FSTProcessor::readAnalysis(UFILE *input) +FSTProcessor::readAnalysis(InputFile& input) { if(!input_buffer.isEmpty()) { return input_buffer.next(); } - UChar val = static_cast(u_fgetc(input)); + UChar val = input.get(); int altval = 0; - if(u_feof(input)) + if(input.eof()) { input_buffer.add(0); // so it's treated like the NUL byte return 0; + } else if(val == U_EOF) { + val = 0; } if((useIgnoredChars || useDefaultIgnoredChars) && ignored_chars.find(val) != ignored_chars.end()) { input_buffer.add(val); - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); } if(escaped_chars.find(val) != escaped_chars.end()) @@ -358,7 +360,7 @@ FSTProcessor::readAnalysis(UFILE *input) return altval; case '[': - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); if(val == '[') { @@ -366,7 +368,7 @@ FSTProcessor::readAnalysis(UFILE *input) } else { - u_fungetc(val, input); + input.unget(val); blankqueue.push(readFullBlock(input, '[', ']')); } @@ -374,7 +376,7 @@ FSTProcessor::readAnalysis(UFILE *input) return static_cast(' '); case '\\': - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); input_buffer.add(static_cast(val)); return val; @@ -391,7 +393,7 @@ FSTProcessor::readAnalysis(UFILE *input) } int -FSTProcessor::readTMAnalysis(UFILE *input) +FSTProcessor::readTMAnalysis(InputFile& input) { isLastBlankTM = false; if(!input_buffer.isEmpty()) @@ -399,9 +401,9 @@ FSTProcessor::readTMAnalysis(UFILE *input) return input_buffer.next(); } - UChar val = static_cast(u_fgetc(input)); + UChar val = static_cast(input.get()); int altval = 0; - if(u_feof(input)) + if(input.eof()) { return 0; } @@ -416,7 +418,7 @@ FSTProcessor::readTMAnalysis(UFILE *input) return altval; case '[': - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); if(val == '[') { @@ -424,7 +426,7 @@ FSTProcessor::readTMAnalysis(UFILE *input) } else { - u_fungetc(val, input); + input.unget(val); blankqueue.push(readFullBlock(input, '[', ']')); } @@ -433,7 +435,7 @@ FSTProcessor::readTMAnalysis(UFILE *input) return static_cast(' '); case '\\': - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); input_buffer.add(static_cast(val)); return val; case '0': @@ -451,9 +453,9 @@ FSTProcessor::readTMAnalysis(UFILE *input) do { ws += val; - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); } while(iswdigit(val)); - u_fungetc(val, input); + input.unget(val); input_buffer.add(alphabet(""_u)); numbers.push_back(ws); return alphabet(""_u); @@ -470,17 +472,17 @@ FSTProcessor::readTMAnalysis(UFILE *input) } int -FSTProcessor::readPostgeneration(UFILE *input, UFILE *output) +FSTProcessor::readPostgeneration(InputFile& input, UFILE *output) { if(!input_buffer.isEmpty()) { return input_buffer.next(); } - UChar val = static_cast(u_fgetc(input)); + UChar val = static_cast(input.get()); int altval = 0; is_wblank = false; - if(u_feof(input)) + if(input.eof()) { return 0; } @@ -493,7 +495,7 @@ FSTProcessor::readPostgeneration(UFILE *input, UFILE *output) return altval; case '[': - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); if(val == '[') { @@ -515,7 +517,7 @@ FSTProcessor::readPostgeneration(UFILE *input, UFILE *output) } else { - u_fungetc(val, input); + input.unget(val); blankqueue.push(readFullBlock(input, '[', ']')); input_buffer.add(static_cast(' ')); @@ -523,7 +525,7 @@ FSTProcessor::readPostgeneration(UFILE *input, UFILE *output) } case '\\': - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); input_buffer.add(static_cast(val)); return val; @@ -534,12 +536,12 @@ FSTProcessor::readPostgeneration(UFILE *input, UFILE *output) } void -FSTProcessor::skipUntil(UFILE *input, UFILE *output, wint_t const character) +FSTProcessor::skipUntil(InputFile& input, UFILE *output, wint_t const character) { while(true) { - wint_t val = u_fgetc(input); - if(u_feof(input)) + wint_t val = input.get(); + if(input.eof()) { return; } @@ -547,8 +549,8 @@ FSTProcessor::skipUntil(UFILE *input, UFILE *output, wint_t const character) switch(val) { case '\\': - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return; } @@ -579,11 +581,11 @@ FSTProcessor::skipUntil(UFILE *input, UFILE *output, wint_t const character) } int -FSTProcessor::readGeneration(UFILE *input, UFILE *output) +FSTProcessor::readGeneration(InputFile& input, UFILE *output) { - wint_t val = u_fgetc(input); + wint_t val = input.get(); - if(u_feof(input)) + if(input.eof()) { return 0x7fffffff; } @@ -592,8 +594,8 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) { if(val == '^') { - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return 0x7fffffff; } @@ -601,15 +603,15 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) else if(val == '\\') { u_fputc(val, output); - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return 0x7fffffff; } u_fputc(val,output); skipUntil(input, output, '^'); - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return 0x7fffffff; } @@ -618,8 +620,8 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) { u_fputc(val, output); skipUntil(input, output, '^'); - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return 0x7fffffff; } @@ -629,7 +631,7 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) if(val == '\\') { - val = u_fgetc(input); + val = input.get(); return static_cast(val); } else if(val == '$') @@ -642,9 +644,9 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) UString cad; cad += static_cast(val); - while((val = u_fgetc(input)) != '>') + while((val = input.get()) != '>') { - if(u_feof(input)) + if(input.eof()) { streamError(); } @@ -656,15 +658,15 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) } else if(val == '[') { - val = u_fgetc(input); + val = input.get(); if(val == '[') { - u_fputs(readWblank(input).c_str(), output); + write(readWblank(input), output); } else { - u_fungetc(val, input); - u_fputs(readFullBlock(input, '[', ']').c_str(), output); + input.unget(val); + write(readFullBlock(input, '[', ']'), output); } return readGeneration(input, output); @@ -678,12 +680,12 @@ FSTProcessor::readGeneration(UFILE *input, UFILE *output) } pair -FSTProcessor::readBilingual(UFILE *input, UFILE *output) +FSTProcessor::readBilingual(InputFile& input, UFILE *output) { - wint_t val = u_fgetc(input); + wint_t val = input.get(); UString symbol; - if(u_feof(input)) + if(input.eof()) { return pair(symbol, 0x7fffffff); } @@ -692,8 +694,8 @@ FSTProcessor::readBilingual(UFILE *input, UFILE *output) { if(val == '^') { - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return pair(symbol, 0x7fffffff); } @@ -701,15 +703,15 @@ FSTProcessor::readBilingual(UFILE *input, UFILE *output) else if(val == '\\') { u_fputc(val, output); - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return pair(symbol, 0x7fffffff); } u_fputc(val,output); skipUntil(input, output, '^'); - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return pair(symbol, 0x7fffffff); } @@ -718,8 +720,8 @@ FSTProcessor::readBilingual(UFILE *input, UFILE *output) { u_fputc(val, output); skipUntil(input, output, '^'); - val = u_fgetc(input); - if(u_feof(input)) + val = input.get(); + if(input.eof()) { return pair(symbol, 0x7fffffff); } @@ -729,7 +731,7 @@ FSTProcessor::readBilingual(UFILE *input, UFILE *output) if(val == '\\') { - val = u_fgetc(input); + val = input.get(); return pair(symbol, val); } else if(val == '$') @@ -741,9 +743,9 @@ FSTProcessor::readBilingual(UFILE *input, UFILE *output) { UString cad; cad += static_cast(val); - while((val = u_fgetc(input)) != '>') + while((val = input.get()) != '>') { - if(u_feof(input)) + if(input.eof()) { streamError(); } @@ -761,15 +763,15 @@ FSTProcessor::readBilingual(UFILE *input, UFILE *output) } else if(val == '[') { - val = u_fgetc(input); + val = input.get(); if(val == '[') { - u_fputs(readWblank(input).c_str(), output); + write(readWblank(input), output); } else { - u_fungetc(val, input); - u_fputs(readFullBlock(input, '[', ']').c_str(), output); + input.unget(val); + write(readFullBlock(input, '[', ']'), output); } return readBilingual(input, output); @@ -783,7 +785,7 @@ FSTProcessor::flushBlanks(UFILE *output) { for(size_t i = blankqueue.size(); i > 0; i--) { - u_fputs(blankqueue.front().c_str(), output); + write(blankqueue.front(), output); blankqueue.pop(); } } @@ -793,7 +795,7 @@ FSTProcessor::flushWblanks(UFILE *output) { while(wblankqueue.size() > 0) { - u_fputs(wblankqueue.front().c_str(), output); + write(wblankqueue.front(), output); wblankqueue.pop(); } } @@ -938,7 +940,7 @@ FSTProcessor::writeEscapedWithTags(UString const &str, UFILE *output) { if(str[i] == '<' && i >=1 && str[i-1] != '\\') { - u_fputs(str.substr(i).c_str(), output); + write(str.substr(i), output); return; } @@ -957,7 +959,7 @@ FSTProcessor::printWord(UString const &sf, UString const &lf, UFILE *output) { u_fputc('^', output); writeEscaped(sf, output); - u_fputs(lf.c_str(), output); + write(lf, output); u_fputc('$', output); } @@ -966,11 +968,10 @@ FSTProcessor::printWordPopBlank(UString const &sf, UString const &lf, UFILE *out { u_fputc('^', output); size_t postpop = writeEscapedPopBlanks(sf, output); - u_fputs(lf.c_str(), output); - u_fputc('$', output); + u_fprintf(output, "%S$", lf.c_str()); while (postpop-- && blankqueue.size() > 0) { - u_fputs(blankqueue.front().c_str(), output); + write(blankqueue.front(), output); blankqueue.pop(); } } @@ -978,10 +979,7 @@ FSTProcessor::printWordPopBlank(UString const &sf, UString const &lf, UFILE *out void FSTProcessor::printWordBilingual(UString const &sf, UString const &lf, UFILE *output) { - u_fputc('^', output); - u_fputs(sf.c_str(), output); - u_fputs(lf.c_str(), output); - u_fputc('$', output); + u_fprintf(output, "^%S%S$", sf.c_str(), lf.c_str()); } void @@ -1208,7 +1206,7 @@ FSTProcessor::initDecomposition() } void -FSTProcessor::analysis(UFILE *input, UFILE *output) +FSTProcessor::analysis(InputFile& input, UFILE *output) { if(getNullFlush()) { @@ -1229,6 +1227,7 @@ FSTProcessor::analysis(UFILE *input, UFILE *output) do { val = readAnalysis(input); + cerr << "val is " << val << endl; // test for final states if(current_state.isFinal(all_finals)) { @@ -1365,11 +1364,11 @@ FSTProcessor::analysis(UFILE *input, UFILE *output) { if(!isAlphabetic(val) && sf.empty()) { - if(iswspace(val)) + if(u_isspace(val)) { if (blankqueue.size() > 0) { - u_fputs(blankqueue.front().c_str(), output); + write(blankqueue.front(), output); blankqueue.pop(); } else @@ -1527,10 +1526,10 @@ FSTProcessor::analysis(UFILE *input, UFILE *output) } void -FSTProcessor::analysis_wrapper_null_flush(UFILE *input, UFILE *output) +FSTProcessor::analysis_wrapper_null_flush(InputFile& input, UFILE *output) { setNullFlush(false); - while(!u_feof(input)) + while(!input.eof()) { analysis(input, output); u_fputc('\0', output); @@ -1539,13 +1538,13 @@ FSTProcessor::analysis_wrapper_null_flush(UFILE *input, UFILE *output) } void -FSTProcessor::generation_wrapper_null_flush(UFILE *input, UFILE *output, +FSTProcessor::generation_wrapper_null_flush(InputFile& input, UFILE *output, GenerationMode mode) { setNullFlush(false); nullFlushGeneration = true; - while(!u_feof(input)) + while(!input.eof()) { generation(input, output, mode); u_fputc('\0', output); @@ -1554,10 +1553,10 @@ FSTProcessor::generation_wrapper_null_flush(UFILE *input, UFILE *output, } void -FSTProcessor::postgeneration_wrapper_null_flush(UFILE *input, UFILE *output) +FSTProcessor::postgeneration_wrapper_null_flush(InputFile& input, UFILE *output) { setNullFlush(false); - while(!u_feof(input)) + while(!input.eof()) { postgeneration(input, output); u_fputc('\0', output); @@ -1566,10 +1565,10 @@ FSTProcessor::postgeneration_wrapper_null_flush(UFILE *input, UFILE *output) } void -FSTProcessor::intergeneration_wrapper_null_flush(UFILE *input, UFILE *output) +FSTProcessor::intergeneration_wrapper_null_flush(InputFile& input, UFILE *output) { setNullFlush(false); - while (!u_feof(input)) + while (!input.eof()) { intergeneration(input, output); u_fputc('\0', output); @@ -1578,10 +1577,10 @@ FSTProcessor::intergeneration_wrapper_null_flush(UFILE *input, UFILE *output) } void -FSTProcessor::transliteration_wrapper_null_flush(UFILE *input, UFILE *output) +FSTProcessor::transliteration_wrapper_null_flush(InputFile& input, UFILE *output) { setNullFlush(false); - while(!u_feof(input)) + while(!input.eof()) { transliteration(input, output); u_fputc('\0', output); @@ -1590,7 +1589,7 @@ FSTProcessor::transliteration_wrapper_null_flush(UFILE *input, UFILE *output) } void -FSTProcessor::tm_analysis(UFILE *input, UFILE *output) +FSTProcessor::tm_analysis(InputFile& input, UFILE *output) { State current_state = initial_state; UString lf; //lexical form @@ -1682,12 +1681,12 @@ FSTProcessor::tm_analysis(UFILE *input, UFILE *output) if(val == 0) { - u_fputs(sf.c_str(), output); + write(sf, output); return; } input_buffer.back(1); - u_fputs(sf.c_str(), output); + write(sf, output); while(blankqueue.size() > 0) { @@ -1703,7 +1702,7 @@ FSTProcessor::tm_analysis(UFILE *input, UFILE *output) unsigned int size = sf.size(); limit = (limit == static_cast(UString::npos)?size:limit); input_buffer.back(1+(size-limit)); - u_fputs(sf.substr(0, limit).c_str(), output); + write(sf.substr(0, limit), output); */ } else if(lf.empty()) { @@ -1711,10 +1710,10 @@ FSTProcessor::tm_analysis(UFILE *input, UFILE *output) unsigned int size = sf.size(); limit = (limit == static_cast(UString::npos)?size:limit); input_buffer.back(1+(size-limit)); - u_fputs(sf.substr(0, limit).c_str(), output); + write(sf.substr(0, limit), output); */ input_buffer.back(1); - u_fputs(sf.c_str(), output); + write(sf, output); while(blankqueue.size() > 0) { @@ -1728,9 +1727,7 @@ FSTProcessor::tm_analysis(UFILE *input, UFILE *output) } else { - u_fputc('[', output); - u_fputs(lf.c_str(), output); - u_fputc(']', output); + u_fprintf(output, "[%S]", lf.c_str()); input_buffer.setPos(last); input_buffer.back(1); } @@ -1747,7 +1744,7 @@ FSTProcessor::tm_analysis(UFILE *input, UFILE *output) void -FSTProcessor::generation(UFILE *input, UFILE *output, GenerationMode mode) +FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode) { if(getNullFlush()) { @@ -1832,10 +1829,10 @@ FSTProcessor::generation(UFILE *input, UFILE *output, GenerationMode mode) u_fputc('^', output); } - u_fputs(current_state.filterFinals(all_finals, alphabet, - escaped_chars, - displayWeightsMode, maxAnalyses, maxWeightClasses, - uppercase, firstupper).substr(1).c_str(), output); + write(current_state.filterFinals(all_finals, alphabet, + escaped_chars, + displayWeightsMode, maxAnalyses, maxWeightClasses, + uppercase, firstupper).substr(1), output); if(mode == gm_tagged || mode == gm_tagged_nm) { u_fputc('/', output); @@ -1916,7 +1913,7 @@ FSTProcessor::generation(UFILE *input, UFILE *output, GenerationMode mode) } void -FSTProcessor::postgeneration(UFILE *input, UFILE *output) +FSTProcessor::postgeneration(InputFile& input, UFILE *output) { if(getNullFlush()) { @@ -1950,7 +1947,7 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) { if(need_end_wblank) { - u_fputs("[[/]]"_u, output); + write("[[/]]"_u, output); need_end_wblank = false; } @@ -1971,7 +1968,7 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) if(need_end_wblank) { - u_fputs("[[/]]"_u, output); + write("[[/]]"_u, output); need_end_wblank = false; } } @@ -2060,7 +2057,7 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) else { UString final_wblank = combineWblanks(); - u_fputs(final_wblank.c_str(), output); + write(final_wblank, output); if(lf.empty()) { @@ -2082,11 +2079,11 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) if(space_index != sf.size()) { - u_fputs(sf.substr(1, space_index-1).c_str(), output); + write(sf.substr(1, space_index-1), output); if(need_end_wblank) { - u_fputs("[[/]]"_u, output); + write("[[/]]"_u, output); need_end_wblank = false; u_fputc(sf[space_index], output); flushWblanks(output); @@ -2096,12 +2093,12 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) u_fputc(sf[space_index], output); } - u_fputs(sf.substr(space_index+1, mark-space_index-1).c_str(), output); + write(sf.substr(space_index+1, mark-space_index-1), output); } else { flushWblanks(output); - u_fputs(sf.substr(1, mark-1).c_str(), output); + write(sf.substr(1, mark-1), output); } if(mark == sf.size()) @@ -2115,7 +2112,7 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) } else { - u_fputs(lf.substr(1,lf.size()-3).c_str(), output); + write(lf.substr(1,lf.size()-3), output); input_buffer.setPos(last); input_buffer.back(2); val = lf[lf.size()-2]; @@ -2147,7 +2144,7 @@ FSTProcessor::postgeneration(UFILE *input, UFILE *output) } void -FSTProcessor::intergeneration(UFILE *input, UFILE *output) +FSTProcessor::intergeneration(InputFile& input, UFILE *output) { if (getNullFlush()) { @@ -2226,7 +2223,7 @@ FSTProcessor::intergeneration(UFILE *input, UFILE *output) if (val == '\0') { // flush source - u_fputs(source.c_str(), output); + write(source, output); } else { @@ -2296,7 +2293,7 @@ FSTProcessor::intergeneration(UFILE *input, UFILE *output) } void -FSTProcessor::transliteration(UFILE *input, UFILE *output) +FSTProcessor::transliteration(InputFile& input, UFILE *output) { if(getNullFlush()) { @@ -2319,7 +2316,7 @@ FSTProcessor::transliteration(UFILE *input, UFILE *output) uppercase, firstupper, 0); if(!lf.empty()) { - u_fputs(lf.substr(1).c_str(), output); + write(lf.substr(1), output); current_state = initial_state; lf.clear(); sf.clear(); @@ -2358,7 +2355,7 @@ FSTProcessor::transliteration(UFILE *input, UFILE *output) { if(!lf.empty()) { - u_fputs(lf.substr(1).c_str(), output); + write(lf.substr(1), output); input_buffer.setPos(last); input_buffer.back(1); val = lf[lf.size()-1]; @@ -2719,12 +2716,12 @@ FSTProcessor::biltrans(UString const &input_word, bool with_delim) } void -FSTProcessor::bilingual_wrapper_null_flush(UFILE *input, UFILE *output, GenerationMode mode) +FSTProcessor::bilingual_wrapper_null_flush(InputFile& input, UFILE *output, GenerationMode mode) { setNullFlush(false); nullFlushGeneration = true; - while(!u_feof(input)) + while(!input.eof()) { bilingual(input, output, mode); u_fputc('\0', output); @@ -2755,7 +2752,7 @@ FSTProcessor::compose(UString const &lexforms, UString const &queue) const } void -FSTProcessor::bilingual(UFILE *input, UFILE *output, GenerationMode mode) +FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode) { if(getNullFlush()) { @@ -3260,15 +3257,15 @@ FSTProcessor::valid() const } int -FSTProcessor::readSAO(UFILE *input) +FSTProcessor::readSAO(InputFile& input) { if(!input_buffer.isEmpty()) { return input_buffer.next(); } - UChar val = static_cast(u_fgetc(input)); - if(u_feof(input)) + UChar val = static_cast(input.get()); + if(input.eof()) { return 0; } @@ -3294,7 +3291,7 @@ FSTProcessor::readSAO(UFILE *input) } } else if (val == '\\') { - val = static_cast(u_fgetc(input)); + val = static_cast(input.get()); if(isEscaped(val)) { input_buffer.add(val); @@ -3327,7 +3324,7 @@ FSTProcessor::printSAOWord(UString const &lf, UFILE *output) } void -FSTProcessor::SAO(UFILE *input, UFILE *output) +FSTProcessor::SAO(InputFile& input, UFILE *output) { bool last_incond = false; bool last_postblank = false; diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h index 5580b61..f4bd51e 100644 --- a/lttoolbox/fst_processor.h +++ b/lttoolbox/fst_processor.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -262,7 +263,7 @@ private: * @param input the stream to read from * @return code of the character */ - UChar readEscaped(UFILE *input); + UChar readEscaped(InputFile& input); /** * Reads a block from the stream input, enclosed by delim1 and delim2 @@ -270,13 +271,13 @@ private: * @param delim1 the delimiter of the beginning of the sequence * @param delim1 the delimiter of the end of the sequence */ - UString readFullBlock(UFILE *input, UChar const delim1, UChar const delim2); + UString readFullBlock(InputFile& input, UChar const delim1, UChar const delim2); /** * Reads a wordbound blank from the stream input * @param input the stream being read */ - UString readWblank(UFILE *input); + UString readWblank(InputFile& input); /** * Reads a wordbound blank (opening blank to closing blank) from the stream input -> [[...]]xyz[[/]] @@ -284,7 +285,7 @@ private: * @param output the stream to write on * @return true if the word enclosed by the wordbound blank has a ~ for postgeneration activation */ - bool wblankPostGen(UFILE *input, UFILE *output); + bool wblankPostGen(InputFile& input, UFILE *output); /** * Returns true if the character code is identified as alphabetic @@ -305,7 +306,7 @@ private: * @param input the stream to read * @return the next symbol in the stream */ - int readAnalysis(UFILE *input); + int readAnalysis(InputFile& input); /** * Read text from stream (decomposition version) @@ -313,7 +314,7 @@ private: * @param output the stream to write on * @return the next symbol in the stream */ - int readDecomposition(UFILE *input, UFILE *output); + int readDecomposition(InputFile& input, UFILE *output); /** * Read text from stream (postgeneration version) @@ -321,7 +322,7 @@ private: * @param output the stream to write on * @return the next symbol in the stream */ - int readPostgeneration(UFILE *input, UFILE *output); + int readPostgeneration(InputFile& input, UFILE *output); /** * Read text from stream (generation version) @@ -329,7 +330,7 @@ private: * @param output the stream being written to * @return the next symbol in the stream */ - int readGeneration(UFILE *input, UFILE *output); + int readGeneration(InputFile& input, UFILE *output); /** * Read text from stream (biltrans version) @@ -337,14 +338,14 @@ private: * @param output the stream to write on * @return the queue of 0-symbols, and the next symbol in the stream */ - pair readBilingual(UFILE *input, UFILE *output); + pair readBilingual(InputFile& input, UFILE *output); /** * Read text from stream (SAO version) * @param input the stream to read * @return the next symbol in the stream */ - int readSAO(UFILE *input); + int readSAO(InputFile& input); /** * Flush all the blanks remaining in the current process @@ -453,7 +454,7 @@ private: void initDecompositionSymbols(); vector numbers; - int readTMAnalysis(UFILE *input); + int readTMAnalysis(InputFile& input); unsigned int lastBlank(UString const &str); @@ -465,18 +466,18 @@ private: */ void printSpace(UChar const val, UFILE *output); - void skipUntil(UFILE *input, UFILE *output, wint_t const character); + void skipUntil(InputFile& input, UFILE *output, wint_t const character); static UString removeTags(UString const &str); UString compoundAnalysis(UString str, bool uppercase, bool firstupper); size_t firstNotAlpha(UString const &sf); - void analysis_wrapper_null_flush(UFILE *input, UFILE *output); - void bilingual_wrapper_null_flush(UFILE *input, UFILE *output, GenerationMode mode = gm_unknown); - void generation_wrapper_null_flush(UFILE *input, UFILE *output, + void analysis_wrapper_null_flush(InputFile& input, UFILE *output); + void bilingual_wrapper_null_flush(InputFile& input, UFILE *output, GenerationMode mode = gm_unknown); + void generation_wrapper_null_flush(InputFile& input, UFILE *output, GenerationMode mode); - void postgeneration_wrapper_null_flush(UFILE *input, UFILE *output); - void intergeneration_wrapper_null_flush(UFILE *input, UFILE *output); - void transliteration_wrapper_null_flush(UFILE *input, UFILE *output); + void postgeneration_wrapper_null_flush(InputFile& input, UFILE *output); + void intergeneration_wrapper_null_flush(InputFile& input, UFILE *output); + void transliteration_wrapper_null_flush(InputFile& input, UFILE *output); UString compose(UString const &lexforms, UString const &queue) const; @@ -498,18 +499,18 @@ public: void initBiltrans(); void initDecomposition(); - void analysis(UFILE *input, UFILE *output); - void tm_analysis(UFILE *input, UFILE *output); - void generation(UFILE *input, UFILE *output, GenerationMode mode = gm_unknown); - void postgeneration(UFILE *input, UFILE *output); - void intergeneration(UFILE *input, UFILE *output); - void transliteration(UFILE *input, UFILE *output); + void analysis(InputFile& input, UFILE *output); + void tm_analysis(InputFile& input, UFILE *output); + void generation(InputFile& input, UFILE *output, GenerationMode mode = gm_unknown); + void postgeneration(InputFile& input, UFILE *output); + void intergeneration(InputFile& input, UFILE *output); + void transliteration(InputFile& input, UFILE *output); UString biltrans(UString const &input_word, bool with_delim = true); UString biltransfull(UString const &input_word, bool with_delim = true); - void bilingual(UFILE *input, UFILE *output, GenerationMode mode = gm_unknown); + void bilingual(InputFile& input, UFILE *output, GenerationMode mode = gm_unknown); pair biltransWithQueue(UString const &input_word, bool with_delim = true); UString biltransWithoutQueue(UString const &input_word, bool with_delim = true); - void SAO(UFILE *input, UFILE *output); + void SAO(InputFile& input, UFILE *output); void parseICX(string const &file); void parseRCX(string const &file); diff --git a/lttoolbox/input_file.cc b/lttoolbox/input_file.cc new file mode 100644 index 0000000..81ded8e --- /dev/null +++ b/lttoolbox/input_file.cc @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include + +InputFile::InputFile() + : infile(stdin), buffer_size(0) +{} + +InputFile::~InputFile() +{ + close(); +} + +bool +InputFile::open(char* fname) +{ + close(); + if (fname == NULL) { + infile = stdin; + } else { + infile = fopen(fname, "r"); + } + return (infile != NULL); +} + +void +InputFile::close() +{ + if (infile != NULL) { + if (infile != stdin) { + fclose(infile); + delete infile; + } + infile = NULL; + } +} + +void +InputFile::internal_read() +{ + if (buffer_size) { + return; + } + if (feof(infile)) { + ubuffer[buffer_size++] = U_EOF; + return; + } + int i = 1; + cbuffer[0] = fgetc(infile); + if (cbuffer[0] == EOF) { + ubuffer[buffer_size++] = U_EOF; + return; + } else if (cbuffer[0] == '\0') { + ubuffer[buffer_size++] = '\0'; + return; + } + switch (cbuffer[0] & 0xF0) { + case 0xF0: + i += 3; + if (fread(cbuffer+1, 1, 3, infile) != 3) { + throw std::runtime_error("Could not read 3 expected bytes from stream"); + } + break; + case 0xE0: + i += 2; + if (fread(cbuffer+1, 1, 2, infile) != 2) { + throw std::runtime_error("Could not read 2 expected bytes from stream"); + } + break; + case 0xC0: + i += 1; + if (fread(cbuffer+1, 1, 1, infile) != 1) { + throw std::runtime_error("Could not read 1 expected byte from stream"); + } + break; + default: + break; + } + memset(ubuffer, 0, 3*sizeof(UChar)); + utf8::utf8to16(cbuffer, cbuffer+i, ubuffer+1); + if (ubuffer[2]) { + ubuffer[0] = ubuffer[2]; + buffer_size = 2; + } else { + ubuffer[0] = ubuffer[1]; + buffer_size = 1; + } +} + +UChar +InputFile::get() +{ + if (!buffer_size) { + internal_read(); + } + return ubuffer[--buffer_size]; +} + +UChar +InputFile::peek() +{ + if (!buffer_size) { + internal_read(); + } + return ubuffer[buffer_size-1]; +} + +void +InputFile::unget(UChar c) +{ + // this will probably segfault if called multiple times + ubuffer[buffer_size++] = c; +} + +bool +InputFile::eof() +{ + return (infile == NULL) || feof(infile); +} diff --git a/lttoolbox/input_file.h b/lttoolbox/input_file.h new file mode 100644 index 0000000..c2d7c35 --- /dev/null +++ b/lttoolbox/input_file.h @@ -0,0 +1,26 @@ +#ifndef _LT_INPUT_FILE_H_ +#define _LT_INPUT_FILE_H_ + +#include +#include + +class InputFile +{ +private: + FILE* infile; + UChar ubuffer[3]; + char cbuffer[4]; + int buffer_size; + void internal_read(); +public: + InputFile(); + ~InputFile(); + bool open(char* fname); + void close(); + UChar get(); + UChar peek(); + void unget(UChar c); + bool eof(); +}; + +#endif diff --git a/lttoolbox/lt_proc.cc b/lttoolbox/lt_proc.cc index d722416..09bbb62 100644 --- a/lttoolbox/lt_proc.cc +++ b/lttoolbox/lt_proc.cc @@ -183,7 +183,7 @@ int main(int argc, char *argv[]) maxAnalyses = atoi(optarg); if (maxAnalyses < 1) { - wcerr << "Invalid or no argument for analyses count" << endl; + cerr << "Invalid or no argument for analyses count" << endl; exit(EXIT_FAILURE); } fstp.setMaxAnalysesValue(maxAnalyses); @@ -193,7 +193,7 @@ int main(int argc, char *argv[]) maxWeightClasses = atoi(optarg); if (maxWeightClasses < 1) { - wcerr << "Invalid or no argument for weight class count" << endl; + cerr << "Invalid or no argument for weight class count" << endl; exit(EXIT_FAILURE); } fstp.setMaxWeightClassesValue(maxWeightClasses); @@ -252,7 +252,7 @@ int main(int argc, char *argv[]) } } - UFILE* input = u_finit(stdin, NULL, NULL); + InputFile input; UFILE* output = u_finit(stdout, NULL, NULL); LtLocale::tryToSetLocale(); @@ -261,21 +261,19 @@ int main(int argc, char *argv[]) FILE *in = fopen(argv[optind], "rb"); if(in == NULL || ferror(in)) { - wcerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl; + cerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl; exit(EXIT_FAILURE); } - input = u_fopen(argv[optind+1], "rb", NULL, NULL); - if(input == NULL) - { - wcerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl; + if (!input.open(argv[optind+1])) { + cerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl; exit(EXIT_FAILURE); } output = u_fopen(argv[optind+2], "wb", NULL, NULL); if(output == NULL) { - wcerr << "Error: Cannot open file '" << argv[optind+2] << "'." << endl << endl; + cerr << "Error: Cannot open file '" << argv[optind+2] << "'." << endl << endl; exit(EXIT_FAILURE); } @@ -287,14 +285,12 @@ int main(int argc, char *argv[]) FILE *in = fopen(argv[optind], "rb"); if(in == NULL || ferror(in)) { - wcerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl; + cerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl; exit(EXIT_FAILURE); } - input = u_fopen(argv[optind+1], "rb", NULL, NULL); - if(input == NULL) - { - wcerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl; + if (!input.open(argv[optind+1])) { + cerr << "Error: Cannot open file '" << argv[optind+1] << "'." << endl << endl; exit(EXIT_FAILURE); } @@ -306,7 +302,7 @@ int main(int argc, char *argv[]) FILE *in = fopen(argv[optind], "rb"); if(in == NULL || ferror(in)) { - wcerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl; + cerr << "Error: Cannot open file '" << argv[optind] << "'." << endl << endl; exit(EXIT_FAILURE); } fstp.load(in); @@ -415,7 +411,7 @@ int main(int argc, char *argv[]) } catch (exception& e) { - wcerr << e.what(); + cerr << e.what(); if (fstp.getNullFlush()) { u_fputc('\0', output); } @@ -423,7 +419,6 @@ int main(int argc, char *argv[]) exit(1); } - u_fclose(input); u_fclose(output); return EXIT_SUCCESS; } diff --git a/lttoolbox/lt_tmxproc.cc b/lttoolbox/lt_tmxproc.cc index 580b988..0abee7f 100644 --- a/lttoolbox/lt_tmxproc.cc +++ b/lttoolbox/lt_tmxproc.cc @@ -43,7 +43,7 @@ void checkValidity(FSTProcessor const &fstp) int main(int argc, char *argv[]) { - UFILE* input = u_finit(stdin, NULL, NULL); + InputFile input; UFILE* output = u_finit(stdout, NULL, NULL); LtLocale::tryToSetLocale(); FSTProcessor fstp; @@ -59,9 +59,7 @@ int main(int argc, char *argv[]) } // follow case 3: - input = u_fopen(argv[2], "rb", NULL, NULL); - if(!input) - { + if (!input.open(argv[2])) { endProgram(argv[0]); } // follow @@ -83,7 +81,6 @@ int main(int argc, char *argv[]) checkValidity(fstp); fstp.tm_analysis(input, output); - u_fclose(input); u_fclose(output); return EXIT_SUCCESS; } diff --git a/lttoolbox/ustring.cc b/lttoolbox/ustring.cc index 156b755..dd6130a 100644 --- a/lttoolbox/ustring.cc +++ b/lttoolbox/ustring.cc @@ -13,6 +13,13 @@ u_fputs(const UString& str, UFILE* output) u_fputs(str.c_str(), output); } +void +write(const UString& str, UFILE* output) +{ + // u_fputs() inserts a newline + u_fprintf(output, "%S", str.c_str()); +} + int stoi(const UString& str) { @@ -28,7 +35,7 @@ double stod(const UString& str) { double ret; - int c = u_sscanf(str.c_str(), "%f", &ret); + int c = u_sscanf(str.c_str(), "%lf", &ret); if (c != 1) { throw std::invalid_argument("unable to parse float"); } diff --git a/lttoolbox/ustring.h b/lttoolbox/ustring.h index 52cbadb..435bb0a 100644 --- a/lttoolbox/ustring.h +++ b/lttoolbox/ustring.h @@ -9,6 +9,8 @@ typedef std::basic_string UString; void u_fputs(const UString& str, UFILE* output); +void write(const UString& str, UFILE* output); + // like std::stoi, throws invalid_argument if unable to parse int stoi(const UString& str); diff --git a/lttoolbox/xml_parse_util.cc b/lttoolbox/xml_parse_util.cc index 4b507a6..d60f62b 100644 --- a/lttoolbox/xml_parse_util.cc +++ b/lttoolbox/xml_parse_util.cc @@ -29,14 +29,13 @@ XMLParseUtil::attrib(xmlTextReaderPtr reader, UString const &name) } UString -XMLParseUtil::attrib(xmlTextReaderPtr reader, UString const &name, const UString fallback) +XMLParseUtil::attrib(xmlTextReaderPtr reader, UString const& name, const UString& fallback) { std::string temp; temp.reserve(name.size()); utf8::utf16to8(name.begin(), name.end(), std::back_inserter(temp)); - xmlChar *attrname = xmlCharStrdup(temp.c_str()); + const xmlChar *attrname = reinterpret_cast(temp.c_str()); xmlChar *myattr = xmlTextReaderGetAttribute(reader, attrname); - xmlFree(attrname); if(myattr == NULL) { xmlFree(myattr); return fallback; diff --git a/lttoolbox/xml_parse_util.h b/lttoolbox/xml_parse_util.h index c96af30..2dfdd0f 100644 --- a/lttoolbox/xml_parse_util.h +++ b/lttoolbox/xml_parse_util.h @@ -31,7 +31,7 @@ public: static UString attrib(xmlTextReaderPtr reader, UString const &name); /* If attrib does not exist (or other error), returns fallback: */ - static UString attrib(xmlTextReaderPtr reader, UString const &name, const UString fallback); + static UString attrib(xmlTextReaderPtr reader, UString const &name, const UString& fallback); static UString readName(xmlTextReaderPtr reader); static UString readValue(xmlTextReaderPtr reader);