commit db5936603e22a7d50ff3b82b2b12837f5f1365e0 Author: Daniel Swanson Date: Fri Jun 4 09:19:23 2021 -0500 eliminate use of wide streams diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am index dfb8a61..fa05947 100644 --- a/lttoolbox/Makefile.am +++ b/lttoolbox/Makefile.am @@ -4,7 +4,7 @@ h_sources = alphabet.h att_compiler.h buffer.h compiler.h compression.h \ match_exe.h match_node.h match_state.h my_stdio.h node.h \ pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h \ transducer.h trans_exe.h xml_parse_util.h exception.h tmx_compiler.h \ - string_to_wostream.h ustring.h + ustring.h cc_sources = alphabet.cc att_compiler.cc compiler.cc compression.cc entry_token.cc \ expander.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \ match_node.cc match_state.cc node.cc pattern_list.cc \ diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc index 0585a33..12509f5 100644 --- a/lttoolbox/att_compiler.cc +++ b/lttoolbox/att_compiler.cc @@ -19,11 +19,8 @@ #include #include #include -#include #include #include -#include -#include #include #include #include @@ -280,27 +277,27 @@ AttCompiler::extract_transducer(TransducerType type) _extract_transducer(type, starting_state, transducer, corr, visited); /* The final states. */ - bool noFinals = true; + //bool noFinals = true; for (auto& f : finals) { if (corr.find(f.first) != corr.end()) { transducer.setFinal(corr[f.first], f.second); - noFinals = false; + //noFinals = false; } } /* if(noFinals) { - wcerr << L"No final states (" << type << ")" << endl; - wcerr << L" were:" << endl; - wcerr << L"\t" ; + cerr << "No final states (" << type << ")" << endl; + cerr << " were:" << endl; + cerr << "\t" ; for (auto& f : finals) { - wcerr << f.first << L" "; + cerr << f.first << " "; } - wcerr << endl; + cerr << endl; } */ return transducer; @@ -413,7 +410,7 @@ TransducerType AttCompiler::classify_backwards(int state, set& path) { if(finals.find(state) != finals.end()) { - wcerr << L"ERROR: Transducer contains epsilon transition to a final state. Aborting." << endl; + cerr << "ERROR: Transducer contains epsilon transition to a final state. Aborting." << endl; exit(EXIT_FAILURE); } AttNode* node = get_node(state); @@ -422,7 +419,7 @@ AttCompiler::classify_backwards(int state, set& path) if(t1.type != UNDECIDED) { type |= t1.type; } else if(path.find(t1.to) != path.end()) { - wcerr << L"ERROR: Transducer contains initial epsilon loop. Aborting." << endl; + cerr << "ERROR: Transducer contains initial epsilon loop. Aborting." << endl; exit(EXIT_FAILURE); } else { path.insert(t1.to); diff --git a/lttoolbox/buffer.h b/lttoolbox/buffer.h index 9a1397f..5d19417 100644 --- a/lttoolbox/buffer.h +++ b/lttoolbox/buffer.h @@ -75,8 +75,8 @@ public: { if(buf_size == 0) { - wcerr << "Error: Cannot create empty buffer." << endl; - exit(EXIT_FAILURE); + cerr << "Error: Cannot create empty buffer." << endl; + exit(EXIT_FAILURE); } buf = new T[buf_size]; size = buf_size; @@ -115,8 +115,8 @@ public: { if(&b != this) { - destroy(); - copy(b); + destroy(); + copy(b); } return *this; } @@ -130,7 +130,7 @@ public: { if(lastpos == size) { - lastpos = 0; + lastpos = 0; } buf[lastpos++] = value; currentpos = lastpos; @@ -147,7 +147,7 @@ public: { if(lastpos == size) { - lastpos = 0; + lastpos = 0; } currentpos = lastpos; return buf[lastpos -1]; @@ -162,15 +162,15 @@ public: { if(currentpos != lastpos) { - if(currentpos == size) - { - currentpos = 0; - } - return buf[currentpos++]; + if(currentpos == size) + { + currentpos = 0; + } + return buf[currentpos++]; } else { - return last(); + return last(); } } @@ -182,11 +182,11 @@ public: { if(lastpos != 0) { - return buf[lastpos-1]; + return buf[lastpos-1]; } else { - return buf[size-1]; + return buf[size-1]; } } @@ -218,11 +218,11 @@ public: { if(prevpos <= currentpos) { - return currentpos - prevpos; + return currentpos - prevpos; } else { - return currentpos + size - prevpos; + return currentpos + size - prevpos; } } @@ -236,11 +236,11 @@ public: { if(postpos >= currentpos) { - return postpos - currentpos; + return postpos - currentpos; } else { - return postpos + size - currentpos; + return postpos + size - currentpos; } } diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc index f4f00fc..b016d84 100644 --- a/lttoolbox/compiler.cc +++ b/lttoolbox/compiler.cc @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/lttoolbox/compression.cc b/lttoolbox/compression.cc index 367b2b2..8b66be7 100644 --- a/lttoolbox/compression.cc +++ b/lttoolbox/compression.cc @@ -29,7 +29,7 @@ Compression::writeByte(unsigned char byte, FILE *output) { if(fwrite_unlocked(&byte, 1, 1, output) != 1) { - wcerr << L"I/O Error writing" << endl; + cerr << "I/O Error writing" << endl; exit(EXIT_FAILURE); } } @@ -41,7 +41,7 @@ Compression::readByte(FILE *input) if(fread_unlocked(&value, 1, 1, input) != 1) { // Not uncomment this code since -// wcerr << L"I/O Error reading" << endl; +// cerr << "I/O Error reading" << endl; // exit(EXIT_FAILURE); } @@ -88,7 +88,7 @@ Compression::multibyte_write(unsigned int value, FILE *output) } else { - wcerr << L"Out of range: " << value << endl; + cerr << "Out of range: " << value << endl; exit(EXIT_FAILURE); } } @@ -135,7 +135,7 @@ Compression::multibyte_write(unsigned int value, ostream &output) } else { - wcerr << "Out of range: " << value << endl; + cerr << "Out of range: " << value << endl; exit(EXIT_FAILURE); } } diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index 0c67050..90a1212 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -637,20 +637,7 @@ FSTProcessor::readGeneration(InputFile& input, UFILE *output) } else if(val == '<') { - UString cad; - cad += val; - - while((val = input.get()) != '>') - { - if(input.eof()) - { - streamError(); - } - cad += val; - } - cad += val; - - return alphabet(cad); + return alphabet(readFullBlock(input, '<', '>')); } else if(val == '[') { @@ -737,17 +724,7 @@ FSTProcessor::readBilingual(InputFile& input, UFILE *output) } else if(val == '<') { - UString cad; - cad += val; - while((val = input.get()) != '>') - { - if(input.eof()) - { - streamError(); - } - cad += val; - } - cad += val; + UString cad = readFullBlock(input, '<', '>'); int res = alphabet(cad); @@ -815,7 +792,7 @@ FSTProcessor::combineWblanks() final_wblank += "; "_u; } - final_wblank += last_wblank.substr(2,last_wblank.size()-4); //add wblank without brackets [[..]] + final_wblank.append(last_wblank, 2, last_wblank.size()-4); //add wblank without brackets [[..]] last_wblank.clear(); } else diff --git a/lttoolbox/lt_comp.cc b/lttoolbox/lt_comp.cc index ae31314..0202343 100644 --- a/lttoolbox/lt_comp.cc +++ b/lttoolbox/lt_comp.cc @@ -17,7 +17,6 @@ #include #include #include -#include #include #include @@ -176,7 +175,7 @@ int main(int argc, char *argv[]) } else { - wcerr << "Error: Cannot not open file '" << infile << "'." << endl << endl; + cerr << "Error: Cannot not open file '" << infile << "'." << endl << endl; exit(EXIT_FAILURE); } initGenericErrorDefaultFunc(NULL); @@ -230,7 +229,7 @@ int main(int argc, char *argv[]) FILE *output = fopen(outfile.c_str(), "wb"); if(!output) { - wcerr << "Error: Cannot open file '" << outfile << "'." << endl; + cerr << "Error: Cannot open file '" << outfile << "'." << endl; exit(EXIT_FAILURE); } if(ttype == 'a') diff --git a/lttoolbox/lt_locale.cc b/lttoolbox/lt_locale.cc index 64cb71e..10378b3 100644 --- a/lttoolbox/lt_locale.cc +++ b/lttoolbox/lt_locale.cc @@ -41,7 +41,7 @@ LtLocale::tryToSetLocale() return; } - wcerr << "Warning: unsupported locale, fallback to \"C\"" << endl; + cerr << "Warning: unsupported locale, fallback to \"C\"" << endl; setlocale(LC_ALL, "C"); #endif diff --git a/lttoolbox/lt_tmxcomp.cc b/lttoolbox/lt_tmxcomp.cc index b506b0c..32ab99f 100644 --- a/lttoolbox/lt_tmxcomp.cc +++ b/lttoolbox/lt_tmxcomp.cc @@ -108,7 +108,7 @@ int main(int argc, char *argv[]) FILE *output = fopen(argv[argc-1], "wb"); if(!output) { - wcerr << "Error: Cannot open file '" << argv[2] << "'." << endl; + cerr << "Error: Cannot open file '" << argv[2] << "'." << endl; exit(EXIT_FAILURE); } c.write(output); diff --git a/lttoolbox/string_to_wostream.h b/lttoolbox/string_to_wostream.h deleted file mode 100644 index 4ffbb4b..0000000 --- a/lttoolbox/string_to_wostream.h +++ /dev/null @@ -1,13 +0,0 @@ -// Include string_utils.h instead if you're linking against apertium - -#ifndef __STRING_TO_WOSTREAM_H_ -#define __STRING_TO_WOSTREAM_H_ - -#include - -static std::wostream & operator<<(std::wostream & ostr, std::string const & str) { - ostr << str.c_str(); - return ostr; -} - -#endif diff --git a/lttoolbox/tmx_compiler.cc b/lttoolbox/tmx_compiler.cc index ed92987..9b7e332 100644 --- a/lttoolbox/tmx_compiler.cc +++ b/lttoolbox/tmx_compiler.cc @@ -381,18 +381,9 @@ TMXCompiler::procTU() trim(origin); trim(meta); -// cout << "DESPUES DE TRIM\n"; -// printvector(origin); -// printvector(meta); align(origin, meta); -// cout << "DESPUES DE ALIGN\n"; -// printvector(origin); -// printvector(meta); align_blanks(origin, meta); -// cout << "DESPUES DE ALIGNBLANKS\n"; -// printvector(origin); -// printvector(meta); insertTU(origin, meta); } @@ -626,27 +617,6 @@ TMXCompiler::vectorcmp(vector const &orig, unsigned int const begin_orig, return true; } -void -TMXCompiler::printvector(vector const &v, wostream &os) -{ - for(unsigned int i = 0, limit = v.size(); i != limit; i++) - { - if(i != 0) - { - os << " "; - } - if(v[i] > 31) - { - os << v[i] << " ('" << UChar(v[i]) << "')"; - } - else - { - os << v[i]; - } - } - os << endl; -} - void TMXCompiler::setOriginLanguageCode(UString const &code) { diff --git a/lttoolbox/tmx_compiler.h b/lttoolbox/tmx_compiler.h index 0fa179b..7d0633a 100644 --- a/lttoolbox/tmx_compiler.h +++ b/lttoolbox/tmx_compiler.h @@ -146,8 +146,6 @@ private: void align_blanks(vector &o, vector &m); vector join(vector > const &v, int const s) const; - static void printvector(vector const &v, wostream &wos = std::wcout); //eliminar este mΓ©todo - public: /* diff --git a/tests/lt_proc/__init__.py b/tests/lt_proc/__init__.py index 44c7e13..f975387 100644 --- a/tests/lt_proc/__init__.py +++ b/tests/lt_proc/__init__.py @@ -231,5 +231,13 @@ class NonBMPATTTest(ProcTest): inputs = ['𐅁𐅃𐅅', '𐅂𐅄𐅆'] expectedOutputs = ['^𐅁𐅃𐅅/𐅁𐅃𐅅$', '^𐅂𐅄𐅆/𐅂𐅄𐅆$'] + +class NonBMPGeneratorTest(ProcTest): + procdix = "data/non-bmp.att" + inputs = ['^𐅁𐅃𐅅$', '^𐅂𐅄𐅆$'] + expectedOutputs = ['𐅁𐅃𐅅', '𐅂𐅄𐅆'] + procflags = ['-z', '-g'] + procdir = "rl" + # These fail on some systems: #from null_flush_invalid_stream_format import *