commit 257d33c36686532f53cb18bc11051161ed8f4b5a Author: Daniel Swanson Date: Thu Jun 3 15:01:59 2021 -0500 finish eliminating wchar and make more use of helper functions diff --git a/lttoolbox/alphabet.cc b/lttoolbox/alphabet.cc index 79f4817..a313814 100644 --- a/lttoolbox/alphabet.cc +++ b/lttoolbox/alphabet.cc @@ -23,8 +23,6 @@ #include #include #include -#include -#include #include diff --git a/lttoolbox/alphabet.h b/lttoolbox/alphabet.h index fc571a2..8c6dec2 100644 --- a/lttoolbox/alphabet.h +++ b/lttoolbox/alphabet.h @@ -50,7 +50,7 @@ private: /** * Map from symbol-pairs to symbols; tags get negative numbers, - * other characters are wchar_t's casted to ints. + * other characters are UChar32's casted to ints. * @see spairinv */ map, int32_t> spair; diff --git a/lttoolbox/deserialiser.h b/lttoolbox/deserialiser.h index 3551023..b5ae2f2 100644 --- a/lttoolbox/deserialiser.h +++ b/lttoolbox/deserialiser.h @@ -78,11 +78,6 @@ public: inline static uint32_t deserialise(std::istream &Stream_); }; -template <> class Deserialiser { -public: - inline static wchar_t deserialise(std::istream &Stream_); -}; - template <> class Deserialiser { public: inline static char deserialise(std::istream &Stream_); @@ -175,10 +170,6 @@ uint32_t Deserialiser::deserialise(std::istream &Stream_) { return int_deserialise(Stream_); } -wchar_t Deserialiser::deserialise(std::istream &Stream_) { - return int_deserialise(Stream_); -} - char Deserialiser::deserialise(std::istream &Stream_) { return int_deserialise(Stream_); } diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index f08ce27..65480f0 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -22,7 +22,6 @@ #include #include #include -#include #if defined(_WIN32) && !defined(_MSC_VER) #include @@ -1321,7 +1320,7 @@ FSTProcessor::analysis(InputFile& input, UFILE *output) { current_state.step(val, tmpset); } - else if(rcx_map.find(towlower(val)) != rcx_map.end()) + else if(rcx_map.find(u_tolower(val)) != rcx_map.end()) { rcx_map_ptr = rcx_map.find(tolower(val)); tmpset.insert(tolower(val)); @@ -1336,14 +1335,7 @@ FSTProcessor::analysis(InputFile& input, UFILE *output) } else { - if(!u_isupper(val) || caseSensitive) - { - current_state.step(val); - } - else - { - current_state.step(val, towlower(val)); - } + current_state.step_case(val, caseSensitive); } if(current_state.size() != 0) @@ -1609,14 +1601,7 @@ FSTProcessor::tm_analysis(InputFile& input, UFILE *output) last = input_buffer.getPos(); } - if(!u_isupper(val)) - { - current_state.step(val); - } - else - { - current_state.step(val, towlower(val)); - } + current_state.step_case(val, false); if(current_state.size() != 0) { @@ -1889,11 +1874,11 @@ FSTProcessor::generation(InputFile& input, UFILE *output, GenerationMode mode) { if(mode == gm_carefulcase) { - current_state.step_careful(val, towlower(val)); + current_state.step_careful(val, u_tolower(val)); } else { - current_state.step(val, towlower(val)); + current_state.step(val, u_tolower(val)); } } else @@ -2009,11 +1994,11 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output) { if(myfirstupper && i != lf.size()) { - lf[i] = towupper(lf[i]); + lf[i] = u_toupper(lf[i]); } else { - lf[i] = towlower(lf[i]); + lf[i] = u_tolower(lf[i]); } break; } @@ -2021,11 +2006,11 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output) { if(myuppercase) { - lf[i-1] = towupper(lf[i-1]); + lf[i-1] = u_toupper(lf[i-1]); } else { - lf[i-1] = towlower(lf[i-1]); + lf[i-1] = u_tolower(lf[i-1]); } } } @@ -2034,14 +2019,7 @@ FSTProcessor::postgeneration(InputFile& input, UFILE *output) last = input_buffer.getPos(); } - if(!u_isupper(val) || caseSensitive) - { - current_state.step(val); - } - else - { - current_state.step(val, towlower(val)); - } + current_state.step_case(val, caseSensitive); if(current_state.size() != 0) { @@ -2195,14 +2173,7 @@ FSTProcessor::intergeneration(InputFile& input, UFILE *output) if (val != '\0') { - if (!u_isupper(val) || caseSensitive) - { - current_state.step(val); - } - else - { - current_state.step(val, towlower(val)); - } + current_state.step_case(val, caseSensitive); } if (val != '\0' && current_state.size() != 0) @@ -2440,7 +2411,7 @@ FSTProcessor::biltransfull(UString const &input_word, bool with_delim) { if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive) { - current_state.step(val, towlower(val)); + current_state.step(val, u_tolower(val)); } else { @@ -2608,7 +2579,7 @@ FSTProcessor::biltrans(UString const &input_word, bool with_delim) { if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive) { - current_state.step(val, towlower(val)); + current_state.step(val, u_tolower(val)); } else { @@ -2877,7 +2848,7 @@ FSTProcessor::bilingual(InputFile& input, UFILE *output, GenerationMode mode) { if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive) { - current_state.step(val, towlower(val)); + current_state.step(val, u_tolower(val)); } else { @@ -2983,7 +2954,7 @@ FSTProcessor::biltransWithQueue(UString const &input_word, bool with_delim) { if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive) { - current_state.step(val, towlower(val)); + current_state.step(val, u_tolower(val)); } else { @@ -3164,7 +3135,7 @@ FSTProcessor::biltransWithoutQueue(UString const &input_word, bool with_delim) { if(!alphabet.isTag(val) && u_isupper(val) && !caseSensitive) { - current_state.step(val, towlower(val)); + current_state.step(val, u_tolower(val)); } else { @@ -3380,14 +3351,7 @@ FSTProcessor::SAO(InputFile& input, UFILE *output) last = input_buffer.getPos(); } - if(!u_isupper(val) || caseSensitive) - { - current_state.step(val); - } - else - { - current_state.step(val, towlower(val)); - } + current_state.step_case(val, caseSensitive); if(current_state.size() != 0) { diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h index 3785810..8be5eb0 100644 --- a/lttoolbox/fst_processor.h +++ b/lttoolbox/fst_processor.h @@ -27,7 +27,6 @@ #include #include -#include #include #include #include diff --git a/lttoolbox/lt_print.cc b/lttoolbox/lt_print.cc index 241ee10..bc92108 100644 --- a/lttoolbox/lt_print.cc +++ b/lttoolbox/lt_print.cc @@ -152,7 +152,7 @@ int main(int argc, char *argv[]) int len = Compression::multibyte_read(input); while(len > 0) { - alphabetic_chars.insert(static_cast(Compression::multibyte_read(input))); + alphabetic_chars.insert(static_cast(Compression::multibyte_read(input))); len--; } @@ -180,7 +180,6 @@ int main(int argc, char *argv[]) if(it != penum) { u_fputs("--\n"_u, output); - //fwprintf(output, L"--\n", it->first.c_str()); // ToDo: Was %ls meant to go somewhere here? } } diff --git a/lttoolbox/pattern_list.cc b/lttoolbox/pattern_list.cc index e5dd5fc..d8aa56b 100644 --- a/lttoolbox/pattern_list.cc +++ b/lttoolbox/pattern_list.cc @@ -124,7 +124,7 @@ PatternList::insertOutOfSequence(UString const &lemma, UString const &tags, } else { - result.push_back(int((wchar_t) lemma[i])); + result.push_back(static_cast(lemma[i])); } } } diff --git a/lttoolbox/serialiser.h b/lttoolbox/serialiser.h index 833b06b..bc04f2c 100644 --- a/lttoolbox/serialiser.h +++ b/lttoolbox/serialiser.h @@ -97,12 +97,6 @@ public: std::ostream &Output); }; -template <> class Serialiser { -public: - inline static void serialise(const wchar_t &SerialisedType_, - std::ostream &Output); -}; - template <> class Serialiser { public: inline static void serialise(const char &SerialisedType_, @@ -220,11 +214,6 @@ void Serialiser::serialise(const uint32_t &SerialisedType_, int_serialise((uint64_t)SerialisedType_, Output); } -void Serialiser::serialise(const wchar_t &SerialisedType_, - std::ostream &Output) { - int_serialise((uint32_t)SerialisedType_, Output); -} - void Serialiser::serialise(const char &SerialisedType_, std::ostream &Output) { int_serialise((uint8_t)SerialisedType_, Output); diff --git a/lttoolbox/state.cc b/lttoolbox/state.cc index 0d2cdb9..d476efc 100644 --- a/lttoolbox/state.cc +++ b/lttoolbox/state.cc @@ -17,7 +17,6 @@ #include #include -#include #include #include @@ -403,12 +402,12 @@ State::step(int const input, set const alts) } void -State::step_case(UChar val, UChar val2, bool caseSensitive) +State::step_case(UChar32 val, UChar32 val2, bool caseSensitive) { - if (!iswupper(val) || caseSensitive) { + if (!u_isupper(val) || caseSensitive) { step(val, val2); - } else if(val != towlower(val)) { - step(val, towlower(val), val2); + } else if(val != u_tolower(val)) { + step(val, u_tolower(val), val2); } else { step(val, val2); } @@ -416,12 +415,12 @@ State::step_case(UChar val, UChar val2, bool caseSensitive) void -State::step_case(UChar val, bool caseSensitive) +State::step_case(UChar32 val, bool caseSensitive) { - if (!iswupper(val) || caseSensitive) { + if (!u_isupper(val) || caseSensitive) { step(val); } else { - step(val, towlower(val)); + step(val, u_tolower(val)); } } @@ -501,11 +500,11 @@ State::filterFinals(map const &finals, if(result[first_char] == '~') { // skip post-generation mark - result[first_char+1] = towupper(result[first_char+1]); + result[first_char+1] = u_toupper(result[first_char+1]); } else { - result[first_char] = towupper(result[first_char]); + result[first_char] = u_toupper(result[first_char]); } } } @@ -638,11 +637,11 @@ State::filterFinalsSAO(map const &finals, if(result[first_char] == '~') { // skip post-generation mark - result[first_char+1] = towupper(result[first_char+1]); + result[first_char+1] = u_toupper(result[first_char+1]); } else { - result[first_char] = towupper(result[first_char]); + result[first_char] = u_toupper(result[first_char]); } } } @@ -721,7 +720,7 @@ State::filterFinalsTM(map const &finals, for(unsigned int k = (unsigned int) j+3, limit2 = fragment[i].size(); k != limit2; k++) { - if(iswdigit(fragment[i][k])) + if(u_isdigit(fragment[i][k])) { num = num * 10; num += (int) fragment[i][k] - 48; diff --git a/lttoolbox/state.h b/lttoolbox/state.h index 8bb5fc3..31f0e42 100644 --- a/lttoolbox/state.h +++ b/lttoolbox/state.h @@ -190,9 +190,9 @@ public: */ void step(int const input, set const alts); - void step_case(UChar val, bool caseSensitive); + void step_case(UChar32 val, bool caseSensitive); - void step_case(UChar val, UChar val2, bool caseSensitive); + void step_case(UChar32 val, UChar32 val2, bool caseSensitive); void step_careful(int const input, int const alt); diff --git a/lttoolbox/tmx_compiler.cc b/lttoolbox/tmx_compiler.cc index 3afe14a..ed92987 100644 --- a/lttoolbox/tmx_compiler.cc +++ b/lttoolbox/tmx_compiler.cc @@ -101,7 +101,7 @@ TMXCompiler::allBlanks() for(auto c : text) { - flag = flag && iswspace(c); + flag = flag && u_isspace(c); } return flag; @@ -467,7 +467,7 @@ TMXCompiler::trim(vector &v) const { while(v.size() > 0) { - if(iswspace(v[v.size()-1])) + if(u_isspace(v[v.size()-1])) { v.pop_back(); } @@ -481,7 +481,7 @@ TMXCompiler::trim(vector &v) const vector aux; for(auto c : v) { - if(!iswspace(c) || !principio) + if(!u_isspace(c) || !principio) { principio = false; aux.push_back(c); @@ -575,7 +575,7 @@ TMXCompiler::numberLength(vector &v, unsigned int const position) const { for(unsigned int i = position, limit = v.size(); i < limit; i++) { - if(!iswdigit(v[i]) && (v[i] != '.' || i == position) && (v[i] != ',' || i == position)) + if(!u_isdigit(v[i]) && (v[i] != '.' || i == position) && (v[i] != ',' || i == position)) { if(i == position) { @@ -586,7 +586,7 @@ TMXCompiler::numberLength(vector &v, unsigned int const position) const while(i != position) { i--; - if(iswdigit(v[i])) + if(u_isdigit(v[i])) { return i - position + 1; } @@ -600,7 +600,7 @@ TMXCompiler::numberLength(vector &v, unsigned int const position) const while(i != position) { i--; - if(iswdigit(v[i])) + if(u_isdigit(v[i])) { return i - position + 1; }