Index: branches/apertium-separable/testing/Makefile =================================================================== --- branches/apertium-separable/testing/Makefile (nonexistent) +++ branches/apertium-separable/testing/Makefile (revision 80201) @@ -0,0 +1,3 @@ +transducer.cpp: + g++ -I/usr/local/include/lttoolbox-3.3 -Wall transducer.cpp -o transducer -llttoolbox3 + ./transducer Index: branches/apertium-separable/testing/transducer.cpp =================================================================== --- branches/apertium-separable/testing/transducer.cpp (nonexistent) +++ branches/apertium-separable/testing/transducer.cpp (revision 80201) @@ -0,0 +1,82 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int main (int argc, char** argv) { + Alphabet alphabet; + Transducer t; + + LtLocale::tryToSetLocale(); + + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L"&"); + alphabet.includeSymbol(L"$"); + + int vblex_sym = alphabet(L""); + int n_sym = alphabet(L""); + int adj_sym = alphabet(L""); + int det_sym = alphabet(L""); + int prn_sym = alphabet(L""); + int np_sym = alphabet(L""); + int adv_sym = alphabet(L""); + int pr_sym = alphabet(L""); + + int initial = t.getInitial(); + // int current_state = initial; + + int take_out = initial; + take_out = t.insertSingleTransduction(alphabet(L't',L't'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'a',L'a'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'k',L'k'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'e',L'e'), take_out); + + take_out = t.insertSingleTransduction(alphabet(L' ',L' '), take_out); + + take_out = t.insertSingleTransduction(alphabet(L'o',L'o'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'u',L'u'), take_out); + take_out = t.insertSingleTransduction(alphabet(L't',L't'), take_out); + + t.setFinal(take_out); + // take_out = t.insertSingleTransduction(alphabet(L'^',L'^'), take_out); + // take_out = t.insertSingleTransduction(alphabet(L'&',L'&'), take_out); + // take_out = t.insertSingleTransduction(alphabet(L'$',L'$'), take_out); + + FILE* fst = fopen("takeout.fst", "w"); + t.write(fst); + fclose(fst); + + // fst = fopen("takeout.fst", "r"); + // + // TransExe te; + // te.read(fst, alphabet); + // fclose(fst); + // + // State *initial_state = new State(); + // initial_state->init(te.getInitial()); + // State current_state = *initial_state; + + // cout << initial << endl; + // cout << "running" << endl; + return 0; +} \ No newline at end of file Index: branches/apertium-separable/testing/transducer.py =================================================================== --- branches/apertium-separable/testing/transducer.py (revision 80197) +++ branches/apertium-separable/testing/transducer.py (revision 80201) @@ -15,9 +15,6 @@ """ transitions = { - #if current_state is -1 and the next token (the next thing to print) - #is '^', then next_state() will print the next token, - #which is located at state[0], and set current_state to 0 (-1,'^') : 0, (0,'t') : 1, (1,'a') : 2, @@ -33,22 +30,20 @@ (9,'^') : 10, (10,'&') : 11, (11,'&') : 11, - (11,'') : 12, #if noun, there should be one or more add'l tags but no following words - (11,'') : 13, #if adj, add'l tags are optional and should be followed by an n + (11,'') : 12, + (11,'') : 13, (11,'') : 14, (11,'') : 15, - (11,''): 15.5, - # (12,'') : 16, #### case: n* + (11,''): 16, (12,'') : 200, (200,'') : 201, (200,'$') : 17, (201,'') : 201, (201,'$') : 17, - # (13,'') : 13, #### case: adj(*) n* (13,'') : 225, (13,'$') : 250, (225,'') : 225, - (225,'$') : 250, #followed by noun + (225,'$') : 250, (250,' '):251, (251,'^'):252, (252,'&'):253, @@ -55,23 +50,17 @@ (253,'&'):253, (253,''):12, (253,''):13, - # (14,'') : 16, (14,'') : 275, (275,'') : 276, (275,'$') : 250, (276,'') : 276, (276,'$') : 250, - # (15,'') : 16, #prn.pers same as n (15,'') : 200, - #15.5 same as 15 and n - (15.5,''): 200, - (16,'') : 100, - (16, '$') : 17, + (16,''): 200, (100,'') : 100, (100,'$') : 17, (17,' ') : 18, #do not go to state 17 unless you are expecting 'out' to be the next word - (18,'^') : 19, #? - # (19,'&') : 11, + (18,'^') : 19, (19,'o') : 20, (20,'u') : 21, (21,'t') : 22, @@ -104,8 +93,7 @@ 13 : '', 14 : '', 15 : '', - 15.5 : '', - 16 : '', + 16 : '', 100: '', 200: '', 201: '', @@ -151,12 +139,9 @@ def main(): f = open(sys.argv[1]) - # print('input a string:') - # eol = True line_number = 0 accepted = True - while True: #while eol: - # eol = False + while True: line = '' if accepted: line_number += 1 @@ -168,48 +153,42 @@ in_out = False while states.get(current_state) != None and current_state != 26: - # print states.get(current_state) + str(current_state) original_token, modified_token = next_token(f, subsequent_tag, in_lemma, in_take, in_out) - if current_state == -1 and modified_token == '': + i + f current_state == -1 and modified_token == '': print('successfully reached end of file') exit(0) elif current_state == -1 and modified_token == '\n': accepted = True break - elif modified_token == '\n': #not accepted and token == '\n': + elif modified_token == '\n': accepted = True - next_state, output_token = step(current_state, modified_token) + + current_state, output_token = step(current_state, modified_token) if output_token == None: break - line += original_token #line += output_token + line += original_token - subsequent_tag = next_state in [5, 6, 7, 12, 13, 14, 15, 15.5, 16, 100, 200, 201, 225, 275, 276] #every state that is a tag. secondary tags for 'out' not included because it only ever has one tag - in_lemma = next_state in [1, 2, 3, 10, 11, 252, 253, 19, 20, 21, 22] #include 4? do not include 22? - in_take = next_state in [1, 2, 3, 4] - # print 'position: ' + str(f.tell()) - if next_state == 19: - #in c: there is an istream::peek() function + subsequent_tag = current_state in [5, 6, 7, 12, 13, 14, 15, 16, 100, 200, 201, 225, 275, 276] + in_lemma = current_state in [1, 2, 3, 10, 11, 252, 253, 19, 20, 21, 22] + in_take = current_state in [1, 2, 3, 4] + if current_state == 19: pos = f.tell() #store the current buffer position peek = f.read(4) #read in the next 4 chars - f.seek(pos) #go back to the original position + f.seek(pos) #return to the original position if peek == 'out<': in_out = True - #TODO: when transitions are finalized, check indices - current_state = next_state #can't set this earlier, or else the following print statement doesn't work if current_state == 26: print str(line_number) + ' ' + line accepted = True else: - # print('error: current_state ' + str(current_state) + ' not found in states') - # exit(1) if accepted: print str(line_number) + ' string not accepted \n' accepted = False current_state = -1 line_number += 1 - # eol = True if __name__ == '__main__': main() \ No newline at end of file Index: branches/apertium-separable/testing/transducer.c =================================================================== --- branches/apertium-separable/testing/transducer.c (nonexistent) +++ branches/apertium-separable/testing/transducer.c (revision 80201) @@ -0,0 +1,81 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +int main (int argc, char** argv) { + Alphabet alphabet; + Transducer t; + + LtLocale::tryToSetLocale(); + + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L""); + + alphabet.includeSymbol(L""); + alphabet.includeSymbol(L"&"); + alphabet.includeSymbol(L"$"); + + int vblex_sym = alphabet(L""); + int n_sym = alphabet(L""); + int adj_sym = alphabet(L""); + int det_sym = alphabet(L""); + int prn_sym = alphabet(L""); + int np_sym = alphabet(L""); + int adv_sym = alphabet(L""); + int pr_sym = alphabet(L""); + + int initial = t.getInitial(); + // int current_state = initial; + + int take_out = initial; + take_out = t.insertSingleTransduction(alphabet(L't',L't'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'a',L'a'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'k',L'k'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'e',L'e'), take_out); + + take_out = t.insertSingleTransduction(alphabet(L' ',L' '), take_out); + + take_out = t.insertSingleTransduction(alphabet(L'o',L'o'), take_out); + take_out = t.insertSingleTransduction(alphabet(L'u',L'u'), take_out); + take_out = t.insertSingleTransduction(alphabet(L't',L't'), take_out); + + // take_out = t.insertSingleTransduction(alphabet(L'^',L'^'), take_out); + // take_out = t.insertSingleTransduction(alphabet(L'&',L'&'), take_out); + // take_out = t.insertSingleTransduction(alphabet(L'$',L'$'), take_out); + + FILE* fst = fopen("takeout.fst", "w"); + t.write(fst); + fclose(fst); + + // fst = fopen("takeout.fst", "r"); + // + // TransExe te; + // te.read(fst, alphabet); + // fclose(fst); + // + // State *initial_state = new State(); + // initial_state->init(te.getInitial()); + // State current_state = *initial_state; + + // cout << initial << endl; + // cout << "running" << endl; + return 0; +} \ No newline at end of file