commit 950ad03cd99e75b24b5e9ebb1396ed9bc35a2905
Author: Daniel Swanson <popcorn.tomato.dude@gmail.com>
Date:   Fri Jun 11 14:29:47 2021 -0500

    unlocked I/O functions and old src files are unused - remove

diff --git a/configure.ac b/configure.ac
index b200861..ff39669 100644
--- a/configure.ac
+++ b/configure.ac
@@ -46,7 +46,6 @@ AC_SUBST(ICU_LIBS)
 # Checks for libraries.
 AC_CHECK_LIB(xml2, xmlReaderForFile)
 
-AC_CHECK_DECLS([fread_unlocked, fwrite_unlocked, fgetc_unlocked, fputc_unlocked, fputs_unlocked])
 AC_CHECK_HEADER([utf8.h], [], [AC_MSG_ERROR([You don't have utfcpp installed.])])
 
 CPPFLAGS="$CPPFLAGS $CFLAGS $LTTOOLBOX_CFLAGS $APERTIUM_CFLAGS $LIBXML_CFLAGS $ICU_CFLAGS"
diff --git a/src/processor.cc b/src/processor.cc
deleted file mode 100644
index aab265e..0000000
--- a/src/processor.cc
+++ /dev/null
@@ -1,173 +0,0 @@
-#include <cwchar>
-#include <cstdio>
-#include <cerrno>
-#include <string>
-#include <iostream>
-#include <list>
-#include <set>
-
-#include <lttoolbox/ltstr.h>
-#include <lttoolbox/lt_locale.h>
-#include <lttoolbox/transducer.h>
-#include <lttoolbox/compression.h>
-#include <lttoolbox/alphabet.h>
-#include <lttoolbox/state.h>
-#include <lttoolbox/trans_exe.h>
-
-wstring readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2);
-
-
-/* get the text between delim1 and delim2 */
-/* next_token() */
-wstring
-readFullBlock(FILE *input, wchar_t const delim1, wchar_t const delim2)
-{
-    wstring result = L"";
-    result += delim1;
-    wchar_t c = delim1;
-
-    while(!feof(input) && c != delim2)
-    {
-        c = static_cast<wchar_t>(fgetwc(input)); //fget_unlocked
-        result += c;
-    }
-
-    return result;
-}
-
-/***
-main
-***/
-int main (int argc, char** argv)
-{
-    Alphabet alphabet;
-    TransExe transducer;
-
-    LtLocale::tryToSetLocale();
-    FILE *fst = fopen(argv[1], "r");
-
-    set<wchar_t> alphabetic_chars;
-    int len = Compression::multibyte_read(fst);
-    while(len > 0)
-    {
-        alphabetic_chars.insert(static_cast<wchar_t>(Compression::multibyte_read(fst)));
-        len--;
-    }
-
-    alphabet.read(fst);
-    wcout << L"alphabet_size: " << alphabet.size() << endl;
-
-    len = Compression::multibyte_read(fst);
-    len = Compression::multibyte_read(fst);
-    wcout << len << endl;
-    wstring name = L"";
-    while(len > 0)
-    {
-        name += static_cast<wchar_t>(Compression::multibyte_read(fst));
-        len--;
-    }
-    wcout << name << endl;
-
-    transducer.read(fst, alphabet);
-
-    FILE *input = stdin;
-    FILE *output = stdout;
-
-    /* preparing for processing */
-    vector<State> alive_states; //A set of alive states is maintained to compute all the possible ways to
-    set<Node *> anfinals; //alive node finals ?
-    set<wchar_t> escaped_chars;
-
-    State* initial_state = new State();
-    initial_state->init(transducer.getInitial()); // getInitial() returns an int
-    anfinals.insert(transducer.getFinals().begin(), transducer.getFinals().end());
-
-    set<int> final_states = transducer.getFinals();
-    for(auto final_state : final_states) {
-        final_state.init(transducer.getInitial()); //initialize
-    }
-
-
-    /* processing */
-
-    vector<State> new_states;
-    alive_states.push_back(*initial_state);
-    // TODO: insert the other states
-    // TODO: insert the final state
-
-    int line_number = 0;
-    bool accepted = true;
-    while(!feof(input)) // while true
-    {
-        //initialize conditions
-        int tag_count = 0;
-        State* current_state = initial_state;
-        bool in_lemma = false;
-        bool in_take = false;
-        bool in_out = false;
-
-        while (alive_states.size() > 1 and !isFinal(current_state)) {
-            //get the next token
-            int val = fgetwc(input); // read 1 wide char
-            bool is_tag = false;
-            if(val == L'<') // if in tag, get the whole tag
-            {
-                in_lemma = false;
-                is_tag = true;
-                wstring tag = L"";
-                tag = readFullBlock(input, L'<', L'>');
-                val = static_cast<int>(alphabet(tag));
-
-                tag_count++;
-
-                cout << "val before: " << val << endl;
-                cout << "tag_count: " << tag_count << endl;
-
-                if(val == 0 && tag_count > 2) //TODO: val==0?
-                {
-                    val = static_cast<int>(alphabet(L"<ANY_TAG>"));
-                }
-
-                cout << "val after: " << val << endl;
-                fwprintf(stderr, L"tag %S: %d\n", tag.c_str(), val);
-
-                if (tag == '<sent>') {
-                    accepted = true;
-                }
-            }
-            else if(in_lemma && !in_take && !in_out) {
-                val == static_cast<int>(alphabet(L"&"));
-            }
-
-            // if (current_state == initial_state && not eof) {
-                //successfully reached eof
-                //exit()
-
-            if (current_state == initial_state && val != '\n') {
-                accepted = true;
-                break;
-            } else if (val == '\n') { //or sent
-                accepted = true;
-            }
-
-            //step into the next state
-            for(vector<State>::const_iterator it = alive_states.begin(); it != alive_states.end(); it++) { //step //for every state in alive_states
-                State s = *it;
-
-                if (tag_count > 2) {
-                    s.step(val, alphabet(L"<ANY_TAG>"));
-                } else {
-                    s.step(val)
-                }
-
-                if(s.size() > 0)
-                {
-                    new_states.push_back(s);
-                }
-                wcout << (wchar_t) val << L" " << L"size: " << s.size() << L" final: " << s.isFinal(anfinals) << endl;
-            }
-
-            alive_states.swap(new_states);
-        }
-        return 0;
-    }
diff --git a/src/transducer.py b/src/transducer.py
deleted file mode 100644
index 77fc1b4..0000000
--- a/src/transducer.py
+++ /dev/null
@@ -1,189 +0,0 @@
-#usage: python transducer.py testfile.txt
-
-import sys
-
-transitions = {
-    (-1,'^') : 0,
-    (0,'t') : 1,
-    (1,'a') : 2,
-    (2,'k') : 3,
-    (3,'e') : 4,
-    (4,'<vblex>') : 5,
-    (5,'<ANY_TAG>') : 6,
-    (6,'<ANY_TAG>') : 7,
-    (6,'$') : 8,
-    (7,'<ANY_TAG>') : 7,
-    (7,'$'): 8,
-    (8,' ') : 9,
-    (9,'^') : 10,
-    (10,'&') : 11,
-    (11,'&') : 11,
-    (11,'<n>') : 12,
-    (11,'<adj>') : 13,
-    (11,'<det>') : 14,
-    (11,'<prn>') : 15,
-    (11,'<np>'): 16,
-    (12,'<ANY_TAG>') : 200,
-    (200,'<ANY_TAG>') : 201,
-    (200,'$') : 17,
-    (201,'<ANY_TAG>') : 201,
-    (201,'$') : 17,
-    (13,'<ANY_TAG>') : 225,
-    (13,'$') : 250,
-    (225,'<ANY_TAG>') : 225,
-    (225,'$') : 250,
-    (250,' '):251,
-    (251,'^'):252,
-    (252,'&'):253,
-    (253,'&'):253,
-    (253,'<n>'):12,
-    (253,'<adj>'):13,
-    (14,'<ANY_TAG>') : 275,
-    (275,'<ANY_TAG>') : 276,
-    (275,'$') : 250,
-    (276,'<ANY_TAG>') : 276,
-    (276,'$') : 250,
-    (15,'<ANY_TAG>') : 200,
-    (16,'<ANY_TAG>'): 200,
-    (100,'<ANY_TAG>') : 100,
-    (100,'$') : 17,
-    (17,' ') : 18, #do not go to state 17 unless you are expecting 'out' to be the next word
-    (18,'^') : 19,
-    (19,'o') : 20,
-    (20,'u') : 21,
-    (21,'t') : 22,
-    (22,'<adv>') : 23,
-    (22,'<pr>') : 24,
-    (23,'$') : 25,
-    (24,'$') : 25,
-    (25,'') : 26,
-    (25,' ') : 26,
-    (25,'\n') : 26,
-    (25,'^') : 27,
-    (27,'.') : 28,
-    (28,'<sent>') : 29,
-    (29,'$') : 25
-}
-
-#<ANY_TAG_A> is required
-#<ANY_TAG_B> is optional
-states = {
-    -1 : '',
-    0 : '^',
-    1 : 't',
-    2 : 'a',
-    3 : 'k',
-    4 : 'e',
-    5 : '<vblex>',
-    6 : '<ANY_TAG_A>', #secondary tag is necessary
-    7 : '<ANY_TAG_B>', #third, fourth, fifth...tags are optional
-    8 : '$',
-    9 : ' ',
-    10 : '^',
-    11 : '&', #represents any character 'ANY_CHAR
-    12 : '<n>',
-    13 : '<adj>',
-    14 : '<det>',
-    15 : '<prn>',
-    16 : '<np>',
-    100: '<ANY_TAG_B>',
-    200: '<ANY_TAG_A>',
-    201: '<ANY_TAG_B>',
-    225: '<ANY_TAG_B>',
-    250: '$',
-    251: ' ',
-    252: '^',
-    253: '&',
-    275: '<ANY_TAG_A>',
-    276: '<ANY_TAG_B>',
-    17 : '$',
-    18 : ' ',
-    19 : '^',
-    20 : 'o',
-    21 : 'u',
-    22 : 't',
-    23 : '<adv>',
-    24 : '<pr>',
-    25 : '$',
-    26 : '\n',
-    27 : '^',
-    28 : '.',
-    29 : '<sent>',
-
-}
-
-def next_token(file, subsequent_tag, in_lemma, in_take, in_out):
-    original_token = file.read(1)
-    modified_token = original_token
-    if original_token == '<': #if in tag
-        in_lemma = False
-        c = ''
-        while c != '>':
-            c = file.read(1)
-            original_token += c
-            modified_token += c
-        if subsequent_tag:
-            modified_token = '<ANY_TAG>'
-    if in_lemma and not in_take and not in_out:
-        modified_token = '&' #ANY_CHAR
-    return original_token, modified_token
-
-def step(state, token): #token is at the next state
-    next_state = transitions.get((state,token))
-    output_token = states.get(next_state)
-    return next_state, output_token #return the next state, or None if it doesn't exist
-
-def main():
-    f = open(sys.argv[1])
-    line_number = 0
-    accepted = True
-    while True:
-        line = ''
-        if accepted:
-            line_number += 1
-        current_state = -1
-
-        subsequent_tag = False
-        in_lemma = False
-        in_take = False
-        in_out = False
-
-        while states.get(current_state) != None and current_state != 26:
-            original_token, modified_token = next_token(f, subsequent_tag, in_lemma, in_take, in_out)
-            if current_state == -1 and modified_token == '':
-                print('successfully reached end of file')
-                exit(0)
-            elif current_state == -1 and modified_token == '\n':
-                accepted = True
-                break
-            elif modified_token == '\n':
-                accepted = True
-
-            current_state, output_token = step(current_state, modified_token)
-            if output_token == None:
-                break
-
-            line += original_token
-
-            subsequent_tag = current_state in [5, 6, 7, 12, 13, 14, 15, 16, 100, 200, 201, 225, 275, 276]
-            in_lemma = current_state in [1, 2, 3, 10, 11, 252, 253, 19, 20, 21, 22]
-            in_take = current_state in [1, 2, 3, 4]
-            if current_state == 19:
-                pos = f.tell() #store the current buffer position
-                peek = f.read(4) #read in the next 4 chars
-                f.seek(pos) #return to the original position
-                if peek == 'out<':
-                    in_out = True
-
-        if current_state == 26:
-            print str(line_number) + '   ' + line
-            accepted = True
-        else:
-            if accepted:
-                print str(line_number) + '   string not accepted \n'
-                accepted = False
-                current_state = -1
-                line_number += 1
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/src/transducer2.cc b/src/transducer2.cc
deleted file mode 100644
index 7042095..0000000
--- a/src/transducer2.cc
+++ /dev/null
@@ -1,196 +0,0 @@
-#include <cwchar>
-#include <cstdio>
-#include <cerrno>
-#include <string>
-#include <iostream>
-#include <list>
-#include <set>
-#include <regex>
-
-#include <lttoolbox/ltstr.h>
-#include <lttoolbox/lt_locale.h>
-#include <lttoolbox/transducer.h>
-#include <lttoolbox/compression.h>
-#include <lttoolbox/alphabet.h>
-#include <lttoolbox/state.h>
-#include <lttoolbox/trans_exe.h>
-
-using namespace std;
-
-int main (int argc, char** argv) {
-    Alphabet alphabet;
-
-    LtLocale::tryToSetLocale();
-
-    alphabet.includeSymbol(L"<vblex>");
-    alphabet.includeSymbol(L"<n>");
-    alphabet.includeSymbol(L"<adj>");
-    alphabet.includeSymbol(L"<det>");
-    alphabet.includeSymbol(L"<prn>");
-    alphabet.includeSymbol(L"<np>");
-
-    alphabet.includeSymbol(L"<ANY_TAG>");
-    alphabet.includeSymbol(L"<ANY_CHAR>");
-    alphabet.includeSymbol(L"<$>");
-
-    int vblex_sym = alphabet(L"<vblex>");
-    int n_sym = alphabet(L"<n>");
-    int adj_sym = alphabet(L"<adj>");
-    int det_sym = alphabet(L"<det>");
-    int prn_sym = alphabet(L"<prn>");
-    int np_sym = alphabet(L"<np>");
-
-    int any_tag = alphabet(L"<ANY_TAG>");
-    int any_char = alphabet(L"<ANY_CHAR>");
-    int wb_sym = alphabet(L"<$>");
-
-    /* reap from input file */
-    for (string line; getline(cin, line);) {
-        Transducer t;
-        string first_token = line.substr(0, line.find(' '));
-        string second_token = line.substr(line.find(' ') + 1);
-
-        /* noun phrase acceptor: see README */
-
-        int initial = t.getInitial();
-        int take_out = initial;
-        for (wchar_t c : first_token) {
-            take_out = t.insertSingleTransduction(alphabet(c,c), take_out);
-        }
-        take_out = t.insertSingleTransduction(alphabet(0,L'#'), take_out);
-        take_out = t.insertSingleTransduction(alphabet(0,L' '), take_out);
-        for (wchar_t c : second_token) {
-            take_out = t.insertSingleTransduction(alphabet(0,c), take_out);
-        }
-        take_out = t.insertSingleTransduction(alphabet(vblex_sym,vblex_sym), take_out);
-        int loop = take_out;
-        take_out = t.insertSingleTransduction(alphabet(any_tag,any_tag), loop);
-        t.linkStates(take_out, loop, 0);
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out);
-
-        int after_takeout = take_out;
-
-        /* no det */
-        int from_nodet = after_takeout;
-
-        /* first lemma */
-        loop = after_takeout;
-        take_out = t.insertSingleTransduction(alphabet(any_char,any_char), loop);
-        t.linkStates(take_out, loop, 0);
-
-        int first_lm = take_out;
-
-        /* prn */
-        take_out = t.insertSingleTransduction(alphabet(prn_sym,prn_sym), first_lm);
-
-        loop = take_out;
-        take_out = t.insertSingleTransduction(alphabet(any_tag,any_tag), loop);
-        t.linkStates(take_out, loop, 0);
-
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out);
-
-        int after_prn = take_out;
-
-        /* np */
-        take_out = t.insertSingleTransduction(alphabet(np_sym,np_sym), first_lm);
-
-        loop = take_out;
-        take_out = t.insertSingleTransduction(alphabet(any_tag,any_tag), loop);
-        t.linkStates(take_out, loop, 0);
-
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out);
-
-        int after_np = take_out;
-
-        /* det */
-        take_out = t.insertSingleTransduction(alphabet(det_sym,det_sym), first_lm);
-
-        loop = take_out;
-        take_out = t.insertSingleTransduction(alphabet(any_tag,any_tag), loop);
-        t.linkStates(take_out, loop, 0);
-
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out);
-
-        int after_det = take_out;
-
-        /* no adj */
-        int from_noadj = take_out; //same as after_det
-
-        /* lemma for the adj */
-        loop = after_det;
-        take_out = t.insertSingleTransduction(alphabet(any_char,any_char), loop);
-        t.linkStates(take_out, loop, 0);
-
-        int lm_adj = take_out;
-
-        /* adj */
-        take_out = t.insertSingleTransduction(alphabet(adj_sym,adj_sym), lm_adj);
-
-        int optional_adj = take_out;
-
-        loop = take_out;
-        take_out = t.insertSingleTransduction(alphabet(any_tag,any_tag), loop);
-        t.linkStates(take_out, loop, 0);
-
-        //may not have a second tag
-        t.linkStates(optional_adj, take_out, 0);
-
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out);
-
-        int after_adj = take_out;
-
-        /* lemma for the noun */
-        loop = after_adj;
-        take_out = t.insertSingleTransduction(alphabet(any_char,any_char), loop);
-        t.linkStates(take_out, loop, 0);
-
-        int lm_noun = take_out;
-
-        /* possible subsequent adj */
-        t.linkStates(lm_noun, lm_adj, alphabet(adj_sym,adj_sym));
-
-        /* n */
-        take_out = t.insertSingleTransduction(alphabet(n_sym,n_sym), lm_noun);
-
-        loop = take_out;
-        take_out = t.insertSingleTransduction(alphabet(any_tag,any_tag), loop);
-        t.linkStates(take_out, loop, 0);
-
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,wb_sym), take_out);
-
-        /* out */
-        int before_out = take_out;
-
-        for (wchar_t c : second_token) {
-            take_out = t.insertSingleTransduction(alphabet(c,0), take_out);
-        }
-        take_out = t.insertSingleTransduction(alphabet(any_tag, 0), take_out);
-        take_out = t.insertSingleTransduction(alphabet(wb_sym,0), take_out);
-
-        t.setFinal(take_out);
-
-        /* final link states */
-        t.linkStates(after_takeout, before_out, 0);
-        t.linkStates(after_prn, before_out, 0);
-        t.linkStates(after_np, before_out, 0);
-        t.linkStates(from_nodet, after_det, 0);
-        t.linkStates(from_noadj, after_adj, 0);
-
-        string filename = regex_replace(line,std::regex("\\s+"), "") + ".fst";
-        FILE* fst = fopen(filename.c_str(), "w+");
-        // First write the letter symbols of the alphabet
-        Compression::wstring_write(L"abcdefghijklmnopqrstuvwxyz", fst);
-        // Then write the multicharacter symbols
-        alphabet.write(fst);
-        // Then write then number of transducers
-        Compression::multibyte_write(1, fst);
-        // Then write the name of the transducer
-        Compression::wstring_write(L"main@standard", fst);
-        // Then write the transducer
-        t.write(fst);
-        cout << line << " t.size(): " << t.size() << endl ;
-        fclose(fst);
-    }
-
-    return 0;
-}
\ No newline at end of file