commit c224569ec0619bfe91490fdddb8d9e86326ee593 Author: Daniel Swanson Date: Thu Jun 17 12:57:34 2021 -0500 final elimination of wide strings diff --git a/src/biltrans-without-queue.cpp b/src/biltrans-without-queue.cpp index 9dc5d55..d394a8c 100644 --- a/src/biltrans-without-queue.cpp +++ b/src/biltrans-without-queue.cpp @@ -3,8 +3,8 @@ int main(int argc, char** argv) { if (argc != 2 && argc != 3) { - wcout << "Usage: " << argv[0]; - wcout << " [--trimmed | -t]" << endl; + cout << "Usage: " << argv[0]; + cout << " [--trimmed | -t]" << endl; exit(1); } string path(argv[1]); diff --git a/src/irstlm_ranker.cpp b/src/irstlm_ranker.cpp index b50c31b..9a047a2 100644 --- a/src/irstlm_ranker.cpp +++ b/src/irstlm_ranker.cpp @@ -19,7 +19,6 @@ IrstlmRanker::IrstlmRanker(const string &filePath, exit(-1); } cout.precision(10); - wcout.precision(10); lineno = 0; sublineno = 0; @@ -387,7 +386,7 @@ int main(int argc, char ** argv) { // I don't know :) if(setlocale(LC_CTYPE, "") == NULL) { - wcerr << L"Warning: unsupported locale, fallback to \"C\"" << endl; + cerr << "Warning: unsupported locale, fallback to \"C\"" << endl; setlocale(LC_ALL, "C"); } @@ -410,4 +409,3 @@ int main(int argc, char ** argv) { return 0; } - diff --git a/src/ldx_proc.cc b/src/ldx_proc.cc index 60f4158..b3fbd01 100644 --- a/src/ldx_proc.cc +++ b/src/ldx_proc.cc @@ -26,112 +26,52 @@ #include #include #include +#include +#include +#include using namespace std; -int readGeneration(FILE *input, FILE *output); -void skipUntil(FILE *input, FILE *output, wint_t const character); -wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2); -wchar_t readEscaped(FILE *input); -void streamError(); +int32_t readGeneration(InputFile& input, UFILE *output); +void skipUntil(InputFile& input, UFILE *output, UChar32 const character); FSTProcessor fstp; bool outOfWord = true; -set escaped_chars; +set escaped_chars; void -streamError() -{ - throw Exception("Error: Malformed input stream."); -} - -wchar_t -readEscaped(FILE *input) -{ - if(feof(input)) - { - streamError(); - } - - wchar_t val = static_cast(fgetwc_unlocked(input)); - - if(feof(input) || escaped_chars.find(val) == escaped_chars.end()) - { - streamError(); - } - - return val; -} - - -wstring -readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2) -{ - wstring result = L""; - result += delim1; - wchar_t c = delim1; - - while(!feof(input) && c != delim2) - { - c = static_cast(fgetwc_unlocked(input)); - result += c; - if(c != L'\\') - { - continue; - } - else - { - result += static_cast(readEscaped(input)); - } - } - - if(c != delim2) - { - streamError(); - } - - return result; -} - - -void -skipUntil(FILE *input, FILE *output, wint_t const character) +skipUntil(InputFile& input, UFILE* output, UChar32 const character) { while(true) { - wint_t val = fgetwc_unlocked(input); - if(feof(input)) - { + UChar32 val = input.get(); + if (input.eof()) { return; } switch(val) { - case L'\\': - val = fgetwc_unlocked(input); - if(feof(input)) - { + case '\\': + val = input.get(); + if (input.eof()) { return; } - fputwc_unlocked(L'\\', output); - fputwc_unlocked(val, output); + u_fputc('\\', ouput); + u_fputc(val, output); break; - case L'\0': - fputwc_unlocked(val, output); + case '\0': + u_fputc(val, output); break; default: - if(val == character) - { + if (val == character) { return; - } - else - { - fputwc_unlocked(val, output); + } else { + u_fputc(val, output); } break; } @@ -139,48 +79,47 @@ skipUntil(FILE *input, FILE *output, wint_t const character) } -int -readGeneration(FILE *input, FILE *output) +int32_t +readGeneration(InputFile& input, UFILE* output) { - wint_t val = fgetwc_unlocked(input); + UChar32 val = input.get(); - if(feof(input)) - { + if (input.eof()) { return 0x7fffffff; } if(outOfWord) { - if(val == L'^') + if(val == '^') { - val = fgetwc_unlocked(input); - if(feof(input)) + val = input.get(); + if(input.eof()) { return 0x7fffffff; } } - else if(val == L'\\') + else if(val == '\\') { - fputwc_unlocked(val, output); - val = fgetwc_unlocked(input); - if(feof(input)) + u_fputc(val, ouput); + val = input.get(); + if(input.eof()) { return 0x7fffffff; } - fputwc_unlocked(val,output); - skipUntil(input, output, L'^'); - val = fgetwc_unlocked(input); - if(feof(input)) + u_fputc(val,output); + skipUntil(input, output, '^'); + val = input.get(); + if(input.eof()) { return 0x7fffffff; } } else { - fputwc_unlocked(val, output); - skipUntil(input, output, L'^'); - val = fgetwc_unlocked(input); - if(feof(input)) + u_fputc(val, output); + skipUntil(input, output, '^'); + val = input.get(); + if(input.eof()) { return 0x7fffffff; } @@ -188,24 +127,24 @@ readGeneration(FILE *input, FILE *output) outOfWord = false; } - if(val == L'\\') + if(val == '\\') { - val = fgetwc_unlocked(input); - return static_cast(val); + val = input.get(); + return static_cast(val); } - else if(val == L'$') + else if(val == '$') { outOfWord = true; - return static_cast(L'$'); + return static_cast('$'); } - else if(val == L'[') + else if(val == '[') { - fputws_unlocked(readFullBlock(input, L'[', L']').c_str(), output); + write(input.readBlock('[', ']'), output); return readGeneration(input, output); } else { - return static_cast(val); + return static_cast(val); } return 0x7fffffff; @@ -214,7 +153,8 @@ readGeneration(FILE *input, FILE *output) int main(int argc, char **argv) { - FILE *input = stdin, *output = stdout; + InputFile input; + UFILE* output = u_finit(stdout, NULL, NULL); LtLocale::tryToSetLocale(); @@ -225,17 +165,17 @@ int main(int argc, char **argv) exit(-1); } - escaped_chars.insert(L'['); - escaped_chars.insert(L']'); - escaped_chars.insert(L'{'); - escaped_chars.insert(L'}'); - escaped_chars.insert(L'^'); - escaped_chars.insert(L'$'); - escaped_chars.insert(L'/'); - escaped_chars.insert(L'\\'); - escaped_chars.insert(L'@'); - escaped_chars.insert(L'<'); - escaped_chars.insert(L'>'); + escaped_chars.insert('['); + escaped_chars.insert(']'); + escaped_chars.insert('{'); + escaped_chars.insert('}'); + escaped_chars.insert('^'); + escaped_chars.insert('$'); + escaped_chars.insert('/'); + escaped_chars.insert('\\'); + escaped_chars.insert('@'); + escaped_chars.insert('<'); + escaped_chars.insert('>'); FILE *t_rl = fopen(argv[1], "rb"); @@ -252,25 +192,25 @@ int main(int argc, char **argv) // read until '/', then read each from '/' adding to a map, then look up first in transducer, and if the result // is found in the map, then output it, otherwise error. - int val = 0, i = 0; + int32_t val = 0, i = 0; bool seenFirst = false; - wstring sl = L""; - wstring tl = L""; - set tllu; - set tllu_defaults; + UString sl; + UString tl; + set tllu; + set tllu_defaults; - skipUntil(input, output, L'^'); + skipUntil(input, output, '^'); outOfWord = false; while((val = readGeneration(input, output)) != 0x7fffffff) { switch(val) { - case L'^': + case '^': outOfWord = false; - val = readGeneration(input, output); + val = readGeneration(input, output); break; - case L'/': + case '/': if(!seenFirst) { seenFirst = true; @@ -280,13 +220,13 @@ int main(int argc, char **argv) tllu.insert(tl); } i++; - tl = L""; - val = readGeneration(input, output); - if(val != L'$') + tl.clear(); + val = readGeneration(input, output); + if(val != '$') { break; } - case L'$': + case '$': outOfWord = true; if(!seenFirst) { @@ -298,23 +238,28 @@ int main(int argc, char **argv) } seenFirst = false; - fputws_unlocked(L"^", output); - fputws_unlocked(sl.c_str(), output); + u_fputc('^', output); + write(sl, output); if(tllu.size() > 1) { - tl = L""; - wstring in = L"^" + sl + L"$"; - wstring trad = fstp.biltrans(in); + tl.clear(); + UString in; + in += '^'; + in.append(sl); + in += '$'; + UString trad = fstp.biltrans(in); int j = 0; bool tlout = false; for(auto& it : tllu) { - wstring t = L"^" + it + L"$"; + UString t; + t += '^'; + t.append(it); + t += '$'; if(t == trad) { - fputws_unlocked(L"/", output); - wstring to = t.substr(1, wcslen(t.c_str())-2); - fputws_unlocked(to.c_str(), output); + u_fputc('/', output); + write(it, output); tlout = true; break; } @@ -327,36 +272,35 @@ int main(int argc, char **argv) { if(it != tllu.end()) { - fputws_unlocked(L"/", output); + u_fputc('/', output); } - fputws_unlocked(it->c_str(), output); + write(*it, output); } } } else { - fputws_unlocked(L"/", output); - fputws_unlocked(tl.c_str(), output); + u_fputc('/', output); + write(tl, output); } - fputws_unlocked(L"$", output); + u_fputc('$', output); - sl = L""; tl = L""; + sl.clear(); + tl.clear(); tllu.clear(); i = 0; break; } if(!seenFirst && !outOfWord) { - sl.append(1, static_cast(val)); + sl += static_cast(val); } else if(!outOfWord) { - tl.append(1, static_cast(val)); + tl += static_cast(val); } } return 0; } - - diff --git a/src/lrx_proc.cc b/src/lrx_proc.cc index 32ac345..db713ed 100644 --- a/src/lrx_proc.cc +++ b/src/lrx_proc.cc @@ -20,11 +20,7 @@ #include #include #include - -#ifdef _MSC_VER -#include -#include -#endif +#include using namespace std; diff --git a/src/lrx_processor.cc b/src/lrx_processor.cc index f64c8b5..8715097 100644 --- a/src/lrx_processor.cc +++ b/src/lrx_processor.cc @@ -17,7 +17,9 @@ #include #include -#include +#include +#include + using namespace std; UString const LRXProcessor::LRX_PROCESSOR_TAG_SELECT = "