Index: branches/apertium-separable/examples/new-example.dix
===================================================================
--- branches/apertium-separable/examples/new-example.dix (revision 80644)
+++ branches/apertium-separable/examples/new-example.dix (revision 80645)
@@ -20,7 +20,7 @@
-
+
Index: branches/apertium-separable/src/lsx_compiler.cc
===================================================================
--- branches/apertium-separable/src/lsx_compiler.cc (revision 80644)
+++ branches/apertium-separable/src/lsx_compiler.cc (revision 80645)
@@ -1,50 +1,20 @@
-#include
#include
#include
-#include
#include
-#include
-#include
#include
#include
-#include
-
-#include
#include
-#include
-#include
-#include
-#include
-#include
-wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2);
-
-wstring
-readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2)
-{
- wstring result = L"";
- result += delim1;
- wchar_t c = delim1;
-
- while(!feof(input) && c != delim2)
- {
- c = static_cast(fgetwc(input));
- result += c;
- }
-
- return result;
-}
-
int main (int argc, char** argv)
{
if(argc != 3)
{
- wcout << L"lsx-comp " << endl;
+ wcout << L"./lsx-comp " << endl;
exit(0);
}
- /* compile */
+ LtLocale::tryToSetLocale();
Compiler c;
c.parse(argv[1], L"lr");
@@ -58,154 +28,5 @@
c.write(fst);
fclose(fst);
- /* process */
-
- Alphabet alphabet;
- TransExe transducer;
-
- LtLocale::tryToSetLocale();
-
- fst = fopen(argv[2], "r");
-
- set alphabetic_chars;
- int len = Compression::multibyte_read(fst);
- while(len > 0)
- {
- alphabetic_chars.insert(static_cast(Compression::multibyte_read(fst)));
- len--;
- }
-
- alphabet.read(fst);
- wcerr << L"alphabet_size: " << alphabet.size() << endl;
-
- len = Compression::multibyte_read(fst);
- len = Compression::multibyte_read(fst);
-
- wcerr << len << endl;
- wstring name = L"";
- while(len > 0)
- {
- name += static_cast(Compression::multibyte_read(fst));
- len--;
- }
- wcerr << name << endl;
-
- transducer.read(fst, alphabet);
-
- FILE *input = stdin;
- FILE *output = stdout;
-
- set anfinals;
- set escaped_chars;
-
- escaped_chars.insert(L'[');
- escaped_chars.insert(L']');
- escaped_chars.insert(L'{');
- escaped_chars.insert(L'}');
- escaped_chars.insert(L'^');
- escaped_chars.insert(L'$');
- escaped_chars.insert(L'/');
- escaped_chars.insert(L'\\');
- escaped_chars.insert(L'@');
- escaped_chars.insert(L'<');
- escaped_chars.insert(L'>');
-
- State *initial_state;
- initial_state = new State();
- initial_state->init(transducer.getInitial());
- anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end());
-
-
- vector new_states;
- vector alive_states;
-
- alive_states.push_back(*initial_state);
-
- bool outOfWord = true;
- bool isEscaped = false;
-
- while(!feof(input))
- {
- int val = fgetwc(input); // read 1 wide char
-
- wcerr << L"| " << (wchar_t)val << L" | val: " << val << L" || s.size(): " << alive_states.size() << L" || " << outOfWord << endl;
-
- if(/*val == L'^' && */ !isEscaped && outOfWord)
- {
- outOfWord = false;
- continue;
- }
-
- if((feof(input) || val == L'$') && !isEscaped && !outOfWord)
- {
- new_states.clear();
- for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++)
- {
- State s = *it;
- s.step(alphabet(L"<$>"));
- if(s.size() > 0)
- {
- new_states.push_back(s);
- }
-
- if(s.isFinal(anfinals))
- {
- wstring out = s.filterFinals(anfinals, alphabet, escaped_chars);
- wcerr << "FINAL: " << out << endl;
- new_states.push_back(*initial_state);
- }
- }
- alive_states.swap(new_states);
-
- outOfWord = true;
- continue;
- }
-
- if(val == L'<' && !outOfWord) // if in tag, get the whole tag and modify if necessary
- {
- wstring tag = L"";
- tag = readFullBlock(input, L'<', L'>');
- if(!alphabet.isSymbolDefined(tag))
- {
- alphabet.includeSymbol(tag);
- }
- val = static_cast(alphabet(tag));
-
- fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), val);
- }
-
- if(!outOfWord)
- {
- new_states.clear();
- wstring res = L"";
- for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++)
- {
- res = L"";
- State s = *it;
- if(val < 0)
- {
- s.step_override(val, alphabet(L""), val);
- }
- else if(val > 0)
- {
- s.step_override(val, alphabet(L""), val); // deal with cases!
- }
- if(s.size() > 0)
- {
- new_states.push_back(s);
- }
- wcerr << L"| | " << (wchar_t) val << L" " << L"size: " << s.size() << L" final: " << s.isFinal(anfinals) << endl;
- wcerr << L"| | cur: " << s.getReadableString(alphabet) << endl;
- }
- alive_states.swap(new_states);
- }
-
- if(outOfWord)
- {
- continue;
- }
-
- }
-
return 0;
}