commit 06f58c6f02ee056de5077aa8ff160343b8a91e1e Author: Daniel Swanson Date: Tue Jun 8 22:00:40 2021 -0500 add reject-current-rule in t2x and t3x because why not (closes #34) diff --git a/apertium/interchunk.cc b/apertium/interchunk.cc index dc44a4f..4fa927c 100644 --- a/apertium/interchunk.cc +++ b/apertium/interchunk.cc @@ -460,6 +460,9 @@ Interchunk::interchunk(InputFile& in, UFILE* out) } unsigned int last = input_buffer.getPos(); + unsigned int prev_last = last; + int lastrule_id = -1; + set banned_rules; output = out; ms.init(me->getInitial()); @@ -470,8 +473,30 @@ Interchunk::interchunk(InputFile& in, UFILE* out) { if(lastrule != NULL) { - applyRule(); - input_buffer.setPos(last); + int words_to_consume = applyRule(); + if (words_to_consume == -1) { + banned_rules.clear(); + input_buffer.setPos(last); + } else if (words_to_consume == 1) { + banned_rules.clear(); + if (prev_last >= input_buffer.getSize()) { + input_buffer.setPos(0); + } else { + input_buffer.setPos(prev_last+1); + } + while (true) { + TransferToken& tt = input_buffer.next(); + if (tt.getType() == tt_word) { + break; + } + } + } else { + banned_rules.insert(lastrule_id); + input_buffer.setPos(prev_last); + input_buffer.next(); + last = input_buffer.getPos(); + } + lastrule_id = -1; } else { @@ -480,23 +505,27 @@ Interchunk::interchunk(InputFile& in, UFILE* out) tmpword.clear(); input_buffer.setPos(last); input_buffer.next(); + prev_last = last; + banned_rules.clear(); last = input_buffer.getPos(); ms.init(me->getInitial()); } else if(tmpblank.size() != 0) { write(*tmpblank[0], output); tmpblank.clear(); + prev_last = last; last = input_buffer.getPos(); ms.init(me->getInitial()); } } } - int val = ms.classifyFinals(me->getFinals()); + int val = ms.classifyFinals(me->getFinals(), banned_rules); if(val != -1) { size_t lastrule_line = rule_lines[val-1]; lastrule = rule_map[val-1]; last = input_buffer.getPos(); + lastrule_id = val; last_lword = tmpword.size(); @@ -514,38 +543,36 @@ Interchunk::interchunk(InputFile& in, UFILE* out) switch(current.getType()) { - case tt_word: - applyWord(current.getContent()); - tmpword.push_back(¤t.getContent()); - break; + case tt_word: + applyWord(current.getContent()); + tmpword.push_back(¤t.getContent()); + break; - case tt_blank: - ms.step(' '); - tmpblank.push_back(¤t.getContent()); - break; + case tt_blank: + ms.step(' '); + tmpblank.push_back(¤t.getContent()); + break; - case tt_eof: - if(tmpword.size() != 0) - { - tmpblank.push_back(¤t.getContent()); - ms.clear(); - } - else - { - write(current.getContent(), output); - tmpblank.clear(); - return; - } - break; + case tt_eof: + if(tmpword.size() != 0) { + tmpblank.push_back(¤t.getContent()); + ms.clear(); + } + else { + write(current.getContent(), output); + tmpblank.clear(); + return; + } + break; - default: - cerr << "Error: Unknown input token." << endl; - return; + default: + cerr << "Error: Unknown input token." << endl; + return; } } } -void +int Interchunk::applyRule() { unsigned int limit = tmpword.size(); @@ -569,7 +596,7 @@ Interchunk::applyRule() word[i] = new InterchunkWord(*tmpword[i]); } - processRule(lastrule); + int words_to_consume = processRule(lastrule); lastrule = NULL; if(word) @@ -585,6 +612,7 @@ Interchunk::applyRule() tmpword.clear(); tmpblank.clear(); ms.init(me->getInitial()); + return words_to_consume; } void diff --git a/apertium/interchunk.dtd b/apertium/interchunk.dtd index 25e2f66..91af7ec 100644 --- a/apertium/interchunk.dtd +++ b/apertium/interchunk.dtd @@ -23,7 +23,7 @@ - + @@ -425,6 +425,16 @@ get-case-from --> + + + + + + + + diff --git a/apertium/postchunk.h b/apertium/postchunk.h index 43ad7b1..70044e0 100644 --- a/apertium/postchunk.h +++ b/apertium/postchunk.h @@ -29,7 +29,7 @@ class Postchunk : public TransferBase private: InterchunkWord **word; - + bool in_lu; bool in_wblank; UString out_wblank; @@ -44,9 +44,9 @@ private: void processCaseOf(xmlNode* element); UString processLu(xmlNode* element); UString processMlu(xmlNode* element); - + UString processChunk(xmlNode* element); - + void processLet(xmlNode *localroot); void processOut(xmlNode *localroot); void processCallMacro(xmlNode *localroot); @@ -56,7 +56,7 @@ private: UString readBlank(InputFile& in); UString readUntil(InputFile& in, int const symbol) const; void applyWord(UString const &word_str); - void applyRule(); + int applyRule(); TransferToken & readToken(InputFile& in); static void unchunk(UString const &chunk, UFILE *output); static vector getVecTags(UString const &chunk);