Index: branches/apertium-separable/src/lsx_FSTProcessor.cc
===================================================================
--- branches/apertium-separable/src/lsx_FSTProcessor.cc (revision 81566)
+++ branches/apertium-separable/src/lsx_FSTProcessor.cc (revision 81569)
@@ -1198,16 +1198,12 @@
// wcout << L"SPACE";
// }
-
if((val == L'^' && isEscaped(val) && outOfWord) || feof(input))
{
- outOfWord = false;
blankqueue.push(blank);
if(alive_states.size() == 0)
{
- if(!finalFound)
- {
if(blankqueue.size() > 0)
{
fputws(blankqueue.front().c_str(), output);
@@ -1216,7 +1212,6 @@
}
alive_states.push_back(*initial_state);
-// wcerr << endl << L"" << in << L"" << endl;
for(int i=0; i < (int) in.size(); i++)
{
if(in[i] == L'$' && in[i+1] == L'^' && blankqueue.size() > 0)
@@ -1225,15 +1220,19 @@
blankqueue.pop();
}
}
+ finalFound = false;
fputws(in.c_str(), output);
fflush(output);
+ in = L""; //******
}
+ else if(finalFound && alive_states.size() == 1)
+ {
finalFound = false;
- in = L"";
}
blank = L"";
in += val;
+ outOfWord = false;
continue;
}
@@ -1257,7 +1256,7 @@
if(s.size() > 0)
{
new_states.push_back(s);
- }
+ }
/* if(s.isFinal(all_finals))
{
@@ -1265,17 +1264,15 @@
new_states.push_back(*initial_state);
}*/
- if(s.isFinal(all_finals))
+ if(s.isFinal(all_finals))
{
new_states.clear();
out = s.filterFinals(all_finals, alphabet, escaped_chars);
-// wcerr << endl << L"" << out << L"" << endl;
-// wcerr << endl << L"" << s.size() << L"" << endl;
new_states.push_back(*initial_state);
- finalFound = true;
- for (int i=0; i < (int) out.size(); i++)
+ int out_size = out.size();
+ for (int i=0; i < out_size; i++)
{
wchar_t c = out[i];
if(c == L'/')
@@ -1282,11 +1279,12 @@
{
out[i] = L'^';
}
- else if(c == L'$' && out[i-1] == L'<' && out[i+1] == L'>')
+ else if(c == L'$' && out[i-1] == L'<' && out[i+1] == L'>') // indexing
{
- // out.erase(i+1, 1);
out[i+1] = L'^';
out.erase(i-1, 1);
+ out_size--;
+ i--;
}
}
if(out[out.length()-1] == L'^')
@@ -1293,9 +1291,9 @@
{
out = out.substr(0, out.length()-1); // extra ^ at the end
}
- else
+ else // take# out ... of
{
- for(int i=out.length()-1; i>=0; i--)
+ for(int i=out.length()-1; i>=0; i--) // indexing
{
if(out[i] == L'$')
{
@@ -1305,28 +1303,45 @@
}
out += L'$';
}
+
if(blankqueue.size() > 0)
{
fputws(blankqueue.front().c_str(), output);
blankqueue.pop();
}
- for(int i=0; i < (int) out.length(); i++)
+
+ out_size = out.size();
+ for(int i=0; i < out_size; i++) // indexing
{
if((out[i] == L'$' || out[i] == L'#') && blankqueue.size() > 0)
{
out.insert(i+1, blankqueue.front().c_str());
+ out_size += blankqueue.front().size();
blankqueue.pop();
}
+ else if(out[i] == L' ' && blankqueue.size() > 0)
+ {
+ out.insert(i+1, blankqueue.front().c_str());
+ out.erase(i,1);
+ out_size += (blankqueue.front().size() - 1);
+ blankqueue.pop();
}
+ }
fputws(out.c_str(), output);
flushBlanks(output);
+ finalFound = true;
+ out = L"";
+ in = L"";
}
}
alive_states.swap(new_states);
outOfWord = true;
- in += val;
+ if(!finalFound)
+ {
+ in += val; //do not remove
+ }
continue;
}
@@ -1335,12 +1350,12 @@
if(val == L'<') // tag
{
wstring tag = readFullBlock(input, L'<', L'>');
+ in += tag;
if(!alphabet.isSymbolDefined(tag))
{
alphabet.includeSymbol(tag);
}
val = static_cast(alphabet(tag));
- in += tag;
}
else
{
@@ -1348,21 +1363,17 @@
}
new_states.clear();
- wstring res = L"";
for(vector::const_iterator it = alive_states.begin(); it != alive_states.end(); it++)
{
- res = L"";
State s = *it;
if(val < 0)
{
- fflush(output);
s.step_override(val, alphabet(L""), val);
}
else if(val > 0)
{
- fflush(output);
int val_lowercase = towlower(val);
- s.step_override(val_lowercase, alphabet(L""), val); // FIXME deal with cases!
+ s.step_override(val_lowercase, alphabet(L""), val); // FIXME deal with cases! in step_override
}
if(s.size() > 0)
@@ -1378,7 +1389,6 @@
}
-
void
FSTProcessor::tm_analysis(FILE *input, FILE *output)
{