commit 7800a00f8cb7d55d4e92d935f70c035fd3bd5a36 Author: vaydheesh Date: Thu Jul 4 18:35:42 2019 +0530 Split: apertium-pretransfer.cc to pretransfer.h & pretransfer.cc diff --git a/apertium/Makefile.am b/apertium/Makefile.am index d2d4ba3..80ee5db 100644 --- a/apertium/Makefile.am +++ b/apertium/Makefile.am @@ -40,6 +40,7 @@ h_sources = a.h \ optional.h \ perceptron_spec.h \ perceptron_tagger.h \ + pretransfer.h \ postchunk.h \ sentence_stream.h \ serialiser.h \ @@ -131,6 +132,7 @@ cc_sources = a.cc \ file_morpho_stream.cc \ perceptron_spec.cc \ perceptron_tagger.cc \ + pretransfer.cc \ postchunk.cc \ sentence_stream.cc \ stream.cc \ @@ -260,6 +262,7 @@ apertium_DATA = deformat.xsl reformat.xsl new2old.xsl lexchoice.xsl \ apertium-createmodes.awk apertium_pretransfer_SOURCES = apertium_pretransfer.cc +apertium_pretransfer_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION) $(lib_LTLIBRARIES) apertium_multiple_translations_SOURCES = apertium-multiple-translations.cc apertium_multiple_translations_LDADD = $(APERTIUM_LIBS) -lapertium$(GENERIC_MAJOR_VERSION) $(lib_LTLIBRARIES) apertium_destxt_SOURCES = apertium_destxt.cc diff --git a/apertium/apertium_pretransfer.cc b/apertium/apertium_pretransfer.cc index abdf33e..c46ab54 100644 --- a/apertium/apertium_pretransfer.cc +++ b/apertium/apertium_pretransfer.cc @@ -21,163 +21,16 @@ #include #include "getopt_long.h" -#include -#include "apertium_config.h" -#include - #ifdef _MSC_VER #include #include #endif +#include #include using namespace Apertium; using namespace std; -bool compound_sep = false; - -void readAndWriteUntil(FILE *input, FILE *output, int const charcode) -{ - int mychar; - - while((mychar = fgetwc_unlocked(input)) != charcode) - { - if(feof(input)) - { - wcerr << L"ERROR: Unexpected EOF" << endl; - exit(EXIT_FAILURE); - } - fputwc_unlocked(mychar, output); - if(mychar == L'\\') - { - mychar = fgetwc(input); - fputwc(mychar, output); - } - } -} - -void procWord(FILE *input, FILE *output, bool surface_forms) -{ - int mychar; - wstring buffer = L""; - - bool buffer_mode = false; - bool in_tag = false; - bool queuing = false; - - if(surface_forms) - { - while((mychar = fgetwc_unlocked(input)) != L'/') ; - } - - while((mychar = fgetwc_unlocked(input)) != L'$') - { - if(feof(input)) - { - wcerr << L"ERROR: Unexpected EOF" << endl; - exit(EXIT_FAILURE); - } - - switch(mychar) - { - case L'<': - in_tag = true; - if(!buffer_mode) - { - buffer_mode = true; - } - break; - - case L'>': - in_tag = false; - break; - - case L'#': - if(buffer_mode) - { - buffer_mode = false; - queuing = true; - } - break; - } - - if(buffer_mode) - { - if((mychar != L'+' || (mychar == L'+' && in_tag == true)) && - (mychar != L'~' || (mychar == L'~' && in_tag == true))) - { - buffer += static_cast(mychar); - } - else if(in_tag == false && mychar == L'+') - { - buffer.append(L"$ ^"); - } - else if(in_tag == false && mychar == L'~' and compound_sep == true) - { - buffer.append(L"$^"); - } - } - else - { - if(mychar == L'+' && queuing == true) - { - buffer.append(L"$ ^"); - buffer_mode = true; - } - else - { - fputwc_unlocked(mychar, output); - } - } - - } - fputws_unlocked(buffer.c_str(), output); -} - -void processStream(FILE *input, FILE *output, bool null_flush, bool surface_forms) -{ - while(true) - { - int mychar = fgetwc_unlocked(input); - if(feof(input)) - { - break; - } - switch(mychar) - { - case L'[': - fputwc_unlocked(L'[', output); - readAndWriteUntil(input, output, L']'); - fputwc_unlocked(L']', output); - break; - - case L'\\': - fputwc_unlocked(mychar, output); - fputwc_unlocked(fgetwc_unlocked(input), output); - break; - - case L'^': - fputwc_unlocked(mychar, output); - procWord(input, output, surface_forms); - fputwc_unlocked(L'$', output); - break; - - case L'\0': - fputwc_unlocked(mychar, output); - - if(null_flush) - { - fflush(output); - } - break; - - default: - fputwc_unlocked(mychar, output); - break; - } - } -} - void usage(char *progname) { wcerr << L"USAGE: " << basename(progname) << L" [input_file [output_file]]" << endl; @@ -188,12 +41,10 @@ void usage(char *progname) exit(EXIT_FAILURE); } - - - int main(int argc, char *argv[]) { LtLocale::tryToSetLocale(); + bool compound_sep = false; bool null_flush = false; bool surface_forms = false; @@ -277,5 +128,5 @@ int main(int argc, char *argv[]) _setmode(_fileno(output), _O_U8TEXT); #endif - processStream(input, output, null_flush, surface_forms); + processStream(input, output, null_flush, surface_forms, compound_sep); } diff --git a/apertium/pretransfer.cc b/apertium/pretransfer.cc new file mode 100644 index 0000000..765c6c4 --- /dev/null +++ b/apertium/pretransfer.cc @@ -0,0 +1,143 @@ +#include + +void readAndWriteUntil(FILE *input, FILE *output, int const charcode) +{ + int mychar; + + while((mychar = fgetwc_unlocked(input)) != charcode) + { + if(feof(input)) + { + wcerr << L"ERROR: Unexpected EOF" << endl; + exit(EXIT_FAILURE); + } + fputwc_unlocked(mychar, output); + if(mychar == L'\\') + { + mychar = fgetwc(input); + fputwc(mychar, output); + } + } +} + +void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep) +{ + int mychar; + wstring buffer = L""; + + bool buffer_mode = false; + bool in_tag = false; + bool queuing = false; + + if(surface_forms) + { + while((mychar = fgetwc_unlocked(input)) != L'/') ; + } + + while((mychar = fgetwc_unlocked(input)) != L'$') + { + if(feof(input)) + { + wcerr << L"ERROR: Unexpected EOF" << endl; + exit(EXIT_FAILURE); + } + + switch(mychar) + { + case L'<': + in_tag = true; + if(!buffer_mode) + { + buffer_mode = true; + } + break; + + case L'>': + in_tag = false; + break; + + case L'#': + if(buffer_mode) + { + buffer_mode = false; + queuing = true; + } + break; + } + + if(buffer_mode) + { + if((mychar != L'+' || (mychar == L'+' && in_tag == true)) && + (mychar != L'~' || (mychar == L'~' && in_tag == true))) + { + buffer += static_cast(mychar); + } + else if(in_tag == false && mychar == L'+') + { + buffer.append(L"$ ^"); + } + else if(in_tag == false && mychar == L'~' and compound_sep == true) + { + buffer.append(L"$^"); + } + } + else + { + if(mychar == L'+' && queuing == true) + { + buffer.append(L"$ ^"); + buffer_mode = true; + } + else + { + fputwc_unlocked(mychar, output); + } + } + + } + fputws_unlocked(buffer.c_str(), output); +} + +void processStream(FILE *input, FILE *output, bool null_flush, bool surface_forms, bool compound_sep) +{ + while(true) + { + int mychar = fgetwc_unlocked(input); + if(feof(input)) + { + break; + } + switch(mychar) + { + case L'[': + fputwc_unlocked(L'[', output); + readAndWriteUntil(input, output, L']'); + fputwc_unlocked(L']', output); + break; + + case L'\\': + fputwc_unlocked(mychar, output); + fputwc_unlocked(fgetwc_unlocked(input), output); + break; + + case L'^': + fputwc_unlocked(mychar, output); + procWord(input, output, surface_forms, compound_sep); + fputwc_unlocked(L'$', output); + break; + + case L'\0': + fputwc_unlocked(mychar, output); + + if(null_flush) + { + fflush(output); + } + break; + + default: + fputwc_unlocked(mychar, output); + break; + } + } +} \ No newline at end of file diff --git a/apertium/pretransfer.h b/apertium/pretransfer.h new file mode 100644 index 0000000..6253962 --- /dev/null +++ b/apertium/pretransfer.h @@ -0,0 +1,16 @@ +#ifndef _PRETRANSFER_ +#define _PRETRANSFER_ + +#include +#include + +#include +#include +#include +#include + +void readAndWriteUntil(FILE *input, FILE *output, int const charcode); +void procWord(FILE *input, FILE *output, bool surface_forms, bool compound_sep); +void processStream(FILE *input, FILE *output, bool null_flush, bool surface_forms, bool compound_sep); + +#endif \ No newline at end of file