commit 3919e261dbc6dc8b9fe3168d207094637452f039 Author: Daniel Swanson Date: Mon Jun 14 17:06:32 2021 -0500 assorted cleanup diff --git a/src/multi_translator.cc b/src/multi_translator.cc index 4f2e7c3..ea98145 100644 --- a/src/multi_translator.cc +++ b/src/multi_translator.cc @@ -1,4 +1,5 @@ #include "multi_translator.h" +#include MultiTranslator::MultiTranslator(string path, string mode, bool trimmed, bool filter, bool number_lines) { this->trimmed = trimmed; diff --git a/src/multi_translator.h b/src/multi_translator.h index b2574e5..30ec426 100644 --- a/src/multi_translator.h +++ b/src/multi_translator.h @@ -4,15 +4,16 @@ #define BILTRANS_WITHOUT_QUEUE #include "tagger_output_processor.h" +#include class BiltransToken { public: TaggerToken sourceToken; vector targetTokens; UString blanks; - + bool isEOF; - + BiltransToken() { isEOF = false; } @@ -74,4 +75,3 @@ public: }; #endif - diff --git a/src/multitrans.cc b/src/multitrans.cc index a4643bc..e308019 100644 --- a/src/multitrans.cc +++ b/src/multitrans.cc @@ -1,4 +1,5 @@ #include "multi_translator.h" +#include bool trim = false; bool filter = false; @@ -59,6 +60,7 @@ void parseArguments(int argc, char **argv) { } int main(int argc, char** argv) { + LtLocale::tryToSetLocale(); parseArguments(argc, argv); MultiTranslator mt(path, mode, trim, filter, number_lines); diff --git a/src/tagger_output_processor.cc b/src/tagger_output_processor.cc index 52d5c7c..859aae3 100644 --- a/src/tagger_output_processor.cc +++ b/src/tagger_output_processor.cc @@ -1,13 +1,6 @@ #include "tagger_output_processor.h" - -TaggerOutputProcessor::TaggerOutputProcessor() { - sn = 0; - LtLocale::tryToSetLocale(); -} - -TaggerOutputProcessor::~TaggerOutputProcessor() { - -} +#include +#include int TaggerOutputProcessor::find(vector xs, UString x) { for (size_t i = 0; i < xs.size(); ++i) { @@ -22,16 +15,16 @@ TaggerToken TaggerOutputProcessor::parseTaggerToken(UString str) { int state = 0; // lemma; UString buffer; for (auto& c : str) { - if(c == L'<' && state == 0) { + if(c == '<' && state == 0) { state = 1; token.lemma = buffer; buffer.clear(); } - if (c == L'>') { + if (c == '>') { token.tags.push_back(buffer); buffer.clear(); - } else if (c != L'<') { + } else if (c != '<') { buffer += c; } } @@ -63,12 +56,12 @@ vector TaggerOutputProcessor::parseTags(UString token) { return tags; } -vector TaggerOutputProcessor::wsplit(UString wstr, wchar_t delim) { +vector TaggerOutputProcessor::wsplit(UString wstr, UChar delim) { vector tokens; UString buffer; for(size_t i = 0; i < wstr.size(); ++i) { - if(wstr[i] == delim && (i == 0 || wstr[i-1] != L'\\')) { + if(wstr[i] == delim && (i == 0 || wstr[i-1] != '\\')) { tokens.push_back(buffer); buffer.clear(); } else { @@ -94,47 +87,19 @@ UString TaggerOutputProcessor::getLemma(UString token) { } void TaggerOutputProcessor::processTaggerOutput(bool nullFlush) { - UString buffer; vector sentence; - bool escaped = false; - int state = 0; // outside - wchar_t c; - while((c = fgetwc(stdin))) { - if (c == -1) { - break; - } + UChar32 c; + InputFile in; + while (!in.eof()) { + c = in.get(); - if (nullFlush && c == L'\0') { + if ((c == '\n') || (nullFlush && c == '\0')) { processSentence(sentence); sentence.clear(); - buffer.clear(); - } - - if(c == L'\n') { - processSentence(sentence); - sentence.clear(); - buffer.clear(); - } - if (state == 0) { - if (c == '^' && !escaped) { - state = 1; // inside - } else if (c == '\\' && !escaped) { - escaped = true; - } else { - escaped = false; - } - } else if (state == 1) { - if(c == L'$' && !escaped) { - sentence.push_back(parseTaggerToken(buffer)); - buffer.clear(); - state = 0; - } else if (c == '\\' && !escaped) { - escaped = true; - buffer += c; - } else { - buffer += c; - escaped = false; - } + } else if (c == '\\') { + in.get(); + } else if (c == '^') { + sentence.push_back(parseTaggerToken(in.readBlock('^', '$'))); } } } diff --git a/src/tagger_output_processor.h b/src/tagger_output_processor.h index b9abb8a..0219ccf 100644 --- a/src/tagger_output_processor.h +++ b/src/tagger_output_processor.h @@ -2,16 +2,7 @@ #define TAGGER_OUTPUT_PROCESSOR #include -#include -#include - -#include -#include - -#include -#include -#include -#include +#include using namespace std; @@ -39,10 +30,10 @@ public: class TaggerOutputProcessor { protected: - int sn; + int sn = 0; vector parseTags(UString token); - vector wsplit(UString wstr, wchar_t delim); + vector wsplit(UString wstr, UChar delim); TaggerToken parseTaggerToken(UString buffer); int find(vector xs, UString x); @@ -50,11 +41,7 @@ protected: virtual void processSentence(vector) =0; public: - TaggerOutputProcessor(); - ~TaggerOutputProcessor(); - void processTaggerOutput(bool nullFlush=false); - }; #endif