commit 79fbb97522448d92bae3f583f7322f4c26b81d5c Author: Daniel Swanson Date: Thu Jun 17 13:15:19 2021 -0500 final elimination of wide strings diff --git a/apertium/apertium-prelatex.l b/apertium/apertium-prelatex.l index b93315b..13faec1 100644 --- a/apertium/apertium-prelatex.l +++ b/apertium/apertium-prelatex.l @@ -408,9 +408,9 @@ bool ngermanbabel = false; void usage(string const &progname) { - wcerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl; + cerr << "USAGE: " << progname << " [input_file [output_file]" << ']' << endl; - wcerr << "LaTeX format preprocessor " << endl; + cerr << "LaTeX format preprocessor " << endl; exit(EXIT_SUCCESS); } diff --git a/apertium/file_morpho_stream.cc b/apertium/file_morpho_stream.cc index e76c29b..82d264f 100644 --- a/apertium/file_morpho_stream.cc +++ b/apertium/file_morpho_stream.cc @@ -132,7 +132,7 @@ FileMorphoStream::get_next_word() vwords[ivwords]->add_tag(ca_tag_keof, ""_u, td->getPreferRules()); return get_next_word(); } - str += static_cast(symbol); + str += symbol; symbol = '\\'; } else if(symbol == '^') { if(str.size() > 0) { @@ -141,7 +141,7 @@ FileMorphoStream::get_next_word() readRestOfWord(ivwords); return get_next_word(); } else { - str += static_cast(symbol); + str += symbol; } } } @@ -320,7 +320,7 @@ FileMorphoStream::readRestOfWord(int &ivwords) } else { - str += static_cast(symbol); + str += symbol; } } @@ -360,13 +360,13 @@ FileMorphoStream::readRestOfWord(int &ivwords) { if(str[0] != '*')// do nothing with unknown words { - lrlmClassify(str, ivwords); + lrlmClassify(str, ivwords); } return; } else { - str += static_cast(symbol); + str += symbol; } } } diff --git a/apertium/latex_accentsmap.cc b/apertium/latex_accentsmap.cc index 2fe995d..b3d6c8f 100644 --- a/apertium/latex_accentsmap.cc +++ b/apertium/latex_accentsmap.cc @@ -112,7 +112,7 @@ UString AccentsMap::get(UString input){ void AccentsMap::init_locale(){ char *locale = setlocale(LC_ALL, ""); std::locale lollocale(locale); - wcout.imbue(lollocale); + cout.imbue(lollocale); } @@ -181,7 +181,7 @@ void AccentsMap::init_locale(){ map["ß"_u] = "{\\ss}"_u; map["¡"_u] = "{!`}"_u; map["¿"_u] = "{?`}"_u; - map[L"\\"] = "\\\\"_u; # Characters that should be quoted + map["\\"_u] = "\\\\"_u; # Characters that should be quoted map["~"_u] = "\\~"_u; map["&"_u] = "\\&"_u; map["$"_u] = "\\$"_u; diff --git a/apertium/mtx_reader.cc b/apertium/mtx_reader.cc index 4a03312..ea7e597 100644 --- a/apertium/mtx_reader.cc +++ b/apertium/mtx_reader.cc @@ -874,22 +874,22 @@ MTXReader::printStackValueType(VM::StackValueType svt) { switch (svt) { case VM::INTVAL: - std::cerr << "INT"_u; + std::cerr << "INT"; break; case VM::BVAL: - std::cerr << "BOOL"_u; + std::cerr << "BOOL"; break; case VM::STRVAL: - std::cerr << "STR"_u; + std::cerr << "STR"; break; case VM::STRARRVAL: - std::cerr << "STRARR"_u; + std::cerr << "STRARR"; break; case VM::WRDVAL: - std::cerr << "WRD"_u; + std::cerr << "WRD"; break; case VM::WRDARRVAL: - std::cerr << "WRDARR"_u; + std::cerr << "WRDARR"; break; default: throw 1; @@ -901,29 +901,29 @@ MTXReader::printTypeExpr(ExprType expr_type) { switch (expr_type) { case VOIDEXPR: - std::cerr << "VOID"_u; + std::cerr << "VOID"; break; case INTEXPR: - std::cerr << "INT"_u; + std::cerr << "INT"; break; case BEXPR: - std::cerr << "BOOL"_u; + std::cerr << "BOOL"; break; case STREXPR: - std::cerr << "STR"_u; + std::cerr << "STR"; procStrExpr(); break; case STRARREXPR: - std::cerr << "STRARR"_u; + std::cerr << "STRARR"; break; case WRDEXPR: - std::cerr << "WRD"_u; + std::cerr << "WRD"; break; case WRDARREXPR: - std::cerr << "WRDARR"_u; + std::cerr << "WRDARR"; break; case ADDREXPR: - std::cerr << "ADDR"_u; + std::cerr << "ADDR"; break; default: throw 1; @@ -1138,7 +1138,7 @@ MTXReader::printTmplDefns() { std::vector::const_iterator it = template_defns.begin(); for (; it != template_defns.end(); it++) { - std::cerr << " Macro "_u << it - template_defns.begin() << "\n"_u; + std::cerr << " Macro " << it - template_defns.begin() << "\n"; printTmplDefn(*it); } } diff --git a/apertium/postchunk.cc b/apertium/postchunk.cc index 42240da..b1b2c50 100644 --- a/apertium/postchunk.cc +++ b/apertium/postchunk.cc @@ -522,7 +522,7 @@ Postchunk::readToken(InputFile& in) } else { - content += wchar_t(val); + content += val; } } } diff --git a/apertium/pretransfer.cc b/apertium/pretransfer.cc index 7322980..6c8c778 100644 --- a/apertium/pretransfer.cc +++ b/apertium/pretransfer.cc @@ -21,10 +21,10 @@ UString storeAndWriteWblank(InputFile& input, UFILE* output) cerr << "ERROR: Unexpected EOF" << endl; exit(EXIT_FAILURE); } - + content += mychar; u_fputc(mychar, output); - + if(mychar == '\\') { mychar = input.get(); @@ -34,7 +34,7 @@ UString storeAndWriteWblank(InputFile& input, UFILE* output) else if(mychar == ']') { mychar = input.get(); - + if(mychar == ']') { content += mychar; @@ -43,7 +43,7 @@ UString storeAndWriteWblank(InputFile& input, UFILE* output) } } } - + return content; } @@ -117,7 +117,7 @@ void procWord(InputFile& input, UFILE* output, bool surface_forms, bool compound if((mychar != '+' || (mychar == '+' && in_tag == true)) && (mychar != '~' || (mychar == '~' && in_tag == true))) { - buffer += static_cast(mychar); + buffer += mychar; } else if(in_tag == false && mychar == '+') { @@ -165,13 +165,13 @@ void processStream(InputFile& input, UFILE* output, bool null_flush, bool surfac case '[': u_fputc('[', output); mychar = input.get(); - + if(mychar == '[') { u_fputc('[', output); UString wblank = storeAndWriteWblank(input, output); mychar = input.get(); - + if(mychar == '^') { u_fputc(mychar, output); diff --git a/apertium/tmx_builder.cc b/apertium/tmx_builder.cc index 9e94ffe..c62204d 100644 --- a/apertium/tmx_builder.cc +++ b/apertium/tmx_builder.cc @@ -62,38 +62,12 @@ TMXBuilder::~TMXBuilder() UString TMXBuilder::restOfBlank(InputFile& input) { - UString result = "["_u; - - while(true) - { - UChar32 val = input.get(); - if(input.eof()) - { - return ""_u; - } - switch(val) - { - case '\\': - result += '\\'; - val = input.get(); - if(input.eof()) - { - return ""_u; - } - result += static_cast(val); - break; - - case ']': - result += ']'; - return result; - - default: - result += static_cast(val); - break; - } + UString result = input.readBlock('[', ']'); + if (result[result.size()-1] == ']') { + return result; + } else { + return ""_u; } - - return ""_u; } UString @@ -244,7 +218,7 @@ TMXBuilder::nextTU(InputFile& input) case '?': case '!': - current_tu += static_cast(symbol); + current_tu += symbol; return current_tu; } } @@ -914,7 +888,7 @@ TMXBuilder::setEditDistancePercent(double e) } bool -TMXBuilder::isRemovablePunct(wchar_t const &c) +TMXBuilder::isRemovablePunct(UChar32 const &c) { return c == '.'; } diff --git a/apertium/tmx_builder.h b/apertium/tmx_builder.h index 6fea3b8..2aca2de 100644 --- a/apertium/tmx_builder.h +++ b/apertium/tmx_builder.h @@ -61,7 +61,7 @@ private: static vector extractFragment(vector const &text, unsigned int base, unsigned int width); - static bool isRemovablePunct(wchar_t const &c); + static bool isRemovablePunct(UChar32 const &c); bool similar(UString const &s1, UString const &s2); void splitAndMove(InputFile& file, string const &filename); diff --git a/apertium/transfer.cc b/apertium/transfer.cc index 572ec56..1c8f449 100644 --- a/apertium/transfer.cc +++ b/apertium/transfer.cc @@ -827,7 +827,7 @@ Transfer::readToken(InputFile& in) } else { - content += wchar_t(val3); + content += val3; } } } @@ -861,7 +861,7 @@ Transfer::readToken(InputFile& in) } else { - content += wchar_t(val2); + content += val2; } } } @@ -880,7 +880,7 @@ Transfer::readToken(InputFile& in) } else { - content += wchar_t(val); + content += val; } } } diff --git a/apertium/transfer_mult.cc b/apertium/transfer_mult.cc index e577e73..b28c4f0 100644 --- a/apertium/transfer_mult.cc +++ b/apertium/transfer_mult.cc @@ -183,7 +183,7 @@ TransferMult::readToken(InputFile& in) } else { - content += wchar_t(val2); + content += val2; } } } @@ -197,7 +197,7 @@ TransferMult::readToken(InputFile& in) } else { - content += wchar_t(val); + content += val; } } }