commit 66ea1399c6a97936b84c725e027a8c5671894b80
Author: Tanmai Khanna <khanna.tanmai@gmail.com>
Date:   Sat Aug 29 02:34:01 2020 +0530

    Interchunk tests

diff --git a/tests/data/apertium-nno-nob.nno-nob.t2x b/tests/data/apertium-nno-nob.nno-nob.t2x
new file mode 100644
index 0000000..1778747
--- /dev/null
+++ b/tests/data/apertium-nno-nob.nno-nob.t2x
@@ -0,0 +1,224 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!-- -*- nxml -*- -->
+<interchunk>
+  <section-def-cats>
+    <def-cat n="SN">
+      <cat-item tags="SN"/>
+      <cat-item tags="SN.*"/>
+    </def-cat>
+    <def-cat n="prn">
+      <cat-item tags="prn"/>
+      <cat-item tags="prn.*"/>
+    </def-cat>
+    <def-cat n="prn2">
+      <cat-item tags="prn2"/>
+      <cat-item tags="prn2.*"/>
+    </def-cat>
+    <def-cat n="test1">
+      <cat-item tags="test1"/>
+      <cat-item tags="test1.*"/>
+    </def-cat>
+    <def-cat n="test2">
+      <cat-item tags="test2"/>
+      <cat-item tags="test2.*"/>
+    </def-cat>
+    <def-cat n="test3">
+      <cat-item tags="test3"/>
+      <cat-item tags="test3.*"/>
+    </def-cat>
+    <def-cat n="test4">
+      <cat-item tags="test4"/>
+      <cat-item tags="test4.*"/>
+    </def-cat>
+    <def-cat n="test5">
+      <cat-item tags="test5"/>
+      <cat-item tags="test5.*"/>
+    </def-cat>
+    <def-cat n="SENT">
+      <cat-item tags="SENT"/>
+    </def-cat>
+  </section-def-cats>
+
+  <section-def-attrs>
+    <def-attr n="a_nbr">
+      <attr-item tags="sg"/>
+      <attr-item tags="pl"/>
+      <attr-item tags="sp"/>
+      <attr-item tags="ND"/>
+    </def-attr>
+    <def-attr n="a_cas">
+      <attr-item tags="nom"/>
+      <attr-item tags="acc"/>
+      <attr-item tags="dat"/>
+      <attr-item tags="gen"/>
+      <attr-item tags="ins"/>
+      <attr-item tags="loc"/>
+      <attr-item tags="abl"/>
+    </def-attr>
+  </section-def-attrs>
+
+  <section-rules>
+    <rule comment="REGLA: SN">
+      <pattern>
+        <pattern-item n="SN"/>
+      </pattern>
+      <action>
+        <out>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+        </out>
+      </action>
+    </rule>
+
+    <rule comment="prntest">
+      <pattern>
+        <pattern-item n="prn"/>
+        <pattern-item n="prn2"/>
+      </pattern>
+      <action>
+        <out>
+          <chunk>
+            <clip pos="2" part="lem"/>
+            <clip pos="2" part="tags"/>
+            <clip pos="2" part="chcontent"/>
+          </chunk>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+        </out>
+      </action>
+    </rule>
+
+    <rule comment="superblankrule1">
+      <pattern>
+        <pattern-item n="test1"/>
+        <pattern-item n="test2"/>
+        <pattern-item n="test3"/>
+      </pattern>
+      <action>
+        <out>
+          <chunk>
+            <clip pos="2" part="lem"/>
+            <clip pos="2" part="tags"/>
+            <clip pos="2" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="3" part="lem"/>
+            <clip pos="3" part="tags"/>
+            <clip pos="3" part="chcontent"/>
+          </chunk>
+        </out>
+      </action>
+    </rule>
+
+    <rule comment="superblankrule2">
+      <pattern>
+        <pattern-item n="test2"/>
+        <pattern-item n="test2"/>
+        <pattern-item n="test3"/>
+      </pattern>
+      <action>
+        <out>
+          <chunk>
+            <clip pos="2" part="lem"/>
+            <clip pos="2" part="tags"/>
+            <clip pos="2" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="2" part="lem"/>
+            <clip pos="2" part="tags"/>
+            <clip pos="2" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="3" part="lem"/>
+            <clip pos="3" part="tags"/>
+            <clip pos="3" part="chcontent"/>
+          </chunk>
+        </out>
+      </action>
+    </rule>
+
+    <rule comment="superblankrule3">
+      <pattern>
+        <pattern-item n="test3"/>
+        <pattern-item n="test2"/>
+        <pattern-item n="test1"/>
+      </pattern>
+      <action>
+        <out>
+          <chunk>
+            <clip pos="2" part="lem"/>
+            <clip pos="2" part="tags"/>
+            <clip pos="2" part="chcontent"/>
+          </chunk>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+          <chunk>
+            <clip pos="3" part="lem"/>
+            <clip pos="3" part="tags"/>
+            <clip pos="3" part="chcontent"/>
+          </chunk>
+        </out>
+      </action>
+    </rule>
+
+    <rule comment="superblankrule4">
+      <pattern>
+        <pattern-item n="test1"/>
+        <pattern-item n="test3"/>
+        <pattern-item n="test2"/>
+      </pattern>
+      <action>
+        <out>
+          <chunk>
+            <clip pos="2" part="lem"/>
+            <clip pos="2" part="tags"/>
+            <clip pos="2" part="chcontent"/>
+          </chunk>
+          <chunk>
+            <clip pos="1" part="lem"/>
+            <clip pos="1" part="tags"/>
+            <clip pos="1" part="chcontent"/>
+          </chunk>
+          <b/>
+          <chunk>
+            <clip pos="3" part="lem"/>
+            <clip pos="3" part="tags"/>
+            <clip pos="3" part="chcontent"/>
+          </chunk>
+        </out>
+      </action>
+    </rule>
+
+  </section-rules>
+</interchunk>
+
diff --git a/tests/data/bincompat.t2x.bin b/tests/data/bincompat.t2x.bin
new file mode 100644
index 0000000..5cde7a7
Binary files /dev/null and b/tests/data/bincompat.t2x.bin differ
diff --git a/tests/data/nno-nob.t2x.bin b/tests/data/nno-nob.t2x.bin
new file mode 100644
index 0000000..1411e9a
Binary files /dev/null and b/tests/data/nno-nob.t2x.bin differ
diff --git a/tests/interchunk/__init__.py b/tests/interchunk/__init__.py
new file mode 100644
index 0000000..a498147
--- /dev/null
+++ b/tests/interchunk/__init__.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import unittest
+
+from subprocess import Popen, PIPE, call
+
+import signal
+
+
+class Alarm(Exception):
+    pass
+
+
+class InterchunkTest(unittest.TestCase):
+    """Subclass and override inputs/expectedOutputs (and possibly other
+stuff) to create new interchunk tests."""
+
+    bindata = "data/nno-nob.t2x.bin"
+    t2xdata = "data/apertium-nno-nob.nno-nob.t2x"
+    flags = ["-z"]
+    inputs = [""]
+    expectedOutputs = [""]
+    expectedRetCodeFail = False
+
+    def alarmHandler(self, signum, frame):
+        raise Alarm
+
+    def withTimeout(self, seconds, cmd, *args, **kwds):
+        signal.signal(signal.SIGALRM, self.alarmHandler)
+        signal.alarm(seconds)
+        ret = cmd(*args, **kwds)
+        signal.alarm(0)         # reset the alarm
+        return ret
+
+    def communicateFlush(self, string):
+        self.proc.stdin.write(string.encode('utf-8'))
+        self.proc.stdin.write(b'\0')
+        self.proc.stdin.flush()
+
+        output = []
+        char = None
+        try:
+            char = self.withTimeout(2, self.proc.stdout.read, 1)
+        except Alarm:
+            pass
+        while char and char != b'\0':
+            output.append(char)
+            try:
+                char = self.withTimeout(2, self.proc.stdout.read, 1)
+            except Alarm:
+                break           # send what we got up till now
+
+        return b"".join(output).decode('utf-8')
+
+    def compile(self):
+        compileCmd = ["../apertium/apertium-preprocess-transfer",
+                      self.t2xdata,
+                      self.bindata]
+        self.assertEqual(call(compileCmd),
+                         0)
+
+    def runTest(self):
+        self.compile()
+        try:
+            cmd = ["../apertium/apertium-interchunk"] \
+                + self.flags                         \
+                + [self.t2xdata, self.bindata]
+            self.proc = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
+
+            for inp, exp in zip(self.inputs, self.expectedOutputs):
+                self.assertEqual(self.communicateFlush(inp+"[][\n]"),
+                                 exp+"[][\n]")
+
+            self.proc.communicate()  # let it terminate
+            self.proc.stdin.close()
+            self.proc.stdout.close()
+            self.proc.stderr.close()
+            retCode = self.proc.poll()
+            if self.expectedRetCodeFail:
+                self.assertNotEqual(retCode, 0)
+            else:
+                self.assertEqual(retCode, 0)
+
+        finally:
+            pass
+
+
+class SimpleInterchunkTest(InterchunkTest):
+    inputs =          ["^prn<prn><f>{^ho<prn><f>$}$ ^prn2<prn2><f>{^ho<prn2><f>$}$",
+                       "^vblex<vblex><imp>{^gå<vblex><imp>$}$^default<default>{^.<sent><clb>$}$"]
+    expectedOutputs = ["^prn2<prn2><f>{^ho<prn2><f>$}$^prn<prn><f>{^ho<prn><f>$}$",
+                       "^vblex<vblex><imp>{^gå<vblex><imp>$}$^default<default>{^.<sent><clb>$}$"]
+
+class WordboundBlankTest(InterchunkTest):
+    inputs =          ["^n_n<SN><sg>{[[t:b:123456]]^worda<n><ND><m>$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz<n><ND><f>$}$",
+                        "^prn<prn><f>{[[t:b:abc823]]^ho<prn><f>$}$ ^prn2<prn2><f>{[[t:i:poa023; t:span:12xas23]]^ho<prn2><f>$}$"]
+    expectedOutputs = ["^n_n<SN><sg>{[[t:b:123456]]^worda<n><ND><m>$ ;[testblank] [[t:s:xyzab12]]^wordb# xyz<n><ND><f>$}$",
+                        "^prn2<prn2><f>{[[t:i:poa023; t:span:12xas23]]^ho<prn2><f>$}$^prn<prn><f>{[[t:b:abc823]]^ho<prn><f>$}$"]
+
+class SuperblankTest(InterchunkTest):
+    inputs =          [ "[blank1];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];;", #superblankrule1
+                        "[blank1];; ^test1<test1x>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2x>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3x>{^wordc# xyz<n><ND><f>$}$ [blank4];;", #Blanks when no rules match
+                        "[blank1];; ^test2<test2>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];;", #superblankrule2 -> When output rule has more <b/> than input blanks, print all then spaces
+                        "[blank1];; ^test3<test3>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test1<test1>{^wordc# xyz<n><ND><f>$}$ [blank4];;", #superblankrule3 -> Output rule has no <b/>, flush all blanks after rule output
+                        "[blank1];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test3<test3>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test2<test2>{^wordc# xyz<n><ND><f>$}$ [blank4];;", #superblankrule4 -> Output rule has one <b/>, print one blank, then flush all after rule output
+                        "[blank1];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test1<test1>{^worda<n><ND><m>$}$ ;[blank5] ^test3<test3>{^wordb# xyz<n><ND><f>$}$ ;[blank6]; ^test2<test2>{^wordc# xyz<n><ND><f>$}$ [blank7];;"] #Multiple matching rules -> superblankrule1 & superblankrule4
+    
+    expectedOutputs = [ "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test1<test1>{^worda<n><ND><m>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];;",
+                        "[blank1];; ^test1<test1x>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2x>{^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^test3<test3x>{^wordc# xyz<n><ND><f>$}$ [blank4];;",
+                        "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test2<test2>{^worda<n><ND><m>$}$ ;[blank3]; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ^test2<test2>{^worda<n><ND><m>$}$ ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];;",
+                        "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$^test3<test3>{^worda<n><ND><m>$}$^test1<test1>{^wordc# xyz<n><ND><f>$}$ ;[blank2]  ;[blank3];  [blank4];;",
+                        "[blank1];; ^test3<test3>{^wordb# xyz<n><ND><f>$}$^test1<test1>{^worda<n><ND><m>$}$ ;[blank2] ^test2<test2>{^wordc# xyz<n><ND><f>$}$ ;[blank3];  [blank4];;",
+                        "[blank1];; ^test2<test2>{^wordb# xyz<n><ND><f>$}$ ;[blank2] ^test1<test1>{^worda<n><ND><m>$}$ ;[blank3]; ^test3<test3>{^wordc# xyz<n><ND><f>$}$ [blank4];; ^test3<test3>{^wordb# xyz<n><ND><f>$}$^test1<test1>{^worda<n><ND><m>$}$ ;[blank5] ^test2<test2>{^wordc# xyz<n><ND><f>$}$ ;[blank6];  [blank7];;"]
+
+
+class BincompatTest(SimpleInterchunkTest):
+    bindata = "data/bincompat.t2x.bin"
+
+    def compile(self):
+        pass
diff --git a/tests/postchunk/__init__.py b/tests/postchunk/__init__.py
index 69547cc..1a25be7 100644
--- a/tests/postchunk/__init__.py
+++ b/tests/postchunk/__init__.py
@@ -118,9 +118,9 @@ class SingleLUWordboundBlankTest(PostchunkTest):
 class SuperblankTest(PostchunkTest):
     inputs =          [ "[blank1];; ^n_n<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$}$ ;[blank3]; ",
                         "[blank1];; ^n_k<SN><sgn>{^worda<nn><NDn><mn>$ ;[blank2] ^wordb# xyz<nn><NDn><fn>$}$ ;[blank3]; ", #Blanks when no rules match
-                        "[blank1];; ^n_n2<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #When output rule has more <b/> than input blanks
-                        "[blank1];; ^n_n3<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #Output rule has no <b/>
-                        "[blank1];; ^n_n4<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #Output rule has one <b/>
+                        "[blank1];; ^n_n2<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #superblank rule 1 -> When output rule has more <b/> than input blanks, print all then spaces
+                        "[blank1];; ^n_n3<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #superblank rule 2 -> Output rule has no <b/>, flush all blanks after rule output
+                        "[blank1];; ^n_n4<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$ ;[blank3]; ^wordc<n>$}$ ;[blank4]; ", #superblank rule 3 -> Output rule has one <b/>, print one blank, then flush all after rule output
                         "[blank1];; ^n_n<SN><sg>{^worda<n><ND><m>$ ;[blank2] ^wordb# xyz<n><ND><f>$}$ ;[blank3]; ^n_n4<SN><sg>{^worda<n><ND><m>$ ;[blank4] ^wordb# xyz<n><ND><f>$ ;[blank5]; ^wordc<n>$}$ ;[blank6]; "] #Multiple matching rules
     
     expectedOutputs = [ "[blank1];; ^wordb# xyz<n><ND><f>$ ;[blank2] ^worda<n><ND><m>$ ^worda+wordb# xyz$ ;[blank3]; ",
diff --git a/tests/run_tests.py b/tests/run_tests.py
index 96fc24d..5930e63 100755
--- a/tests/run_tests.py
+++ b/tests/run_tests.py
@@ -8,6 +8,7 @@ import unittest
 import tagger
 import pretransfer
 import transfer
+import interchunk
 import postchunk
 import adaptdocx
 
@@ -17,6 +18,7 @@ if __name__ == "__main__":
     for module in [tagger,
                    pretransfer,
                    transfer,
+                   interchunk,
                    postchunk,
                    adaptdocx]:
         suite = unittest.TestLoader().loadTestsFromModule(module)