Index: incubator/apertium-pol-rus/apertium-pol-rus.pol-rus.t2x
===================================================================
--- incubator/apertium-pol-rus/apertium-pol-rus.pol-rus.t2x	(revision 71826)
+++ incubator/apertium-pol-rus/apertium-pol-rus.pol-rus.t2x	(revision 71827)
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="UTF-8"?>
+  <?xml version="1.0" encoding="UTF-8"?>
 <interchunk>
   <section-def-cats>
     <def-cat n="SN">
Index: incubator/apertium-pol-rus/dev/cleaner.py
===================================================================
--- incubator/apertium-pol-rus/dev/cleaner.py	(revision 71826)
+++ incubator/apertium-pol-rus/dev/cleaner.py	(revision 71827)
@@ -41,5 +41,5 @@
 
 info = forms_collector('../../stuffs2') #../../stuffs # someverbs.txt
 
-with codecs.open('../../verbs_z_experiment.json', 'w', 'utf-8')as f:
+with codecs.open('../../verbs_z.json', 'w', 'utf-8')as f:
     json.dump(info, f, ensure_ascii=False, indent=2)
Index: incubator/apertium-pol-rus/dev/from_morpheus.py
===================================================================
--- incubator/apertium-pol-rus/dev/from_morpheus.py	(revision 71826)
+++ incubator/apertium-pol-rus/dev/from_morpheus.py	(revision 71827)
@@ -1,4 +1,4 @@
-# -*- coding: utf-8 -*-
+ # -*- coding: utf-8 -*-
 
 import codecs
 import re
Index: incubator/apertium-pol-rus/dev/from_z.py
===================================================================
--- incubator/apertium-pol-rus/dev/from_z.py	(revision 71826)
+++ incubator/apertium-pol-rus/dev/from_z.py	(revision 71827)
@@ -1,5 +1,9 @@
 # -*- coding: utf-8 -*-
 
+
+### NB: я заменяю prb на пустую строку в change_tags. пометить их как-то до этого.
+### why on earh does this happen: pstpss+pstpss
+
 import codecs
 import re
 import json
@@ -9,14 +13,38 @@
 def paradigm_collector(gram_d): # working at this
 	'''returns a dictionary, where keys are lemmas and values is a tuple of stem and frozenset of tuples of flections and frozensets of grammar tags'''
 	morph_d = {lexeme : [el[0] for el in gram_d[lexeme]] for lexeme in gram_d}
+	# print('example of morph_d: ' + str(morph_d[list(morph_d.keys())[0]]))
 	paradigms = {}
 	for lemma in morph_d:
-		stem_len = stem_finder(morph_d[lemma], lemma)
+		new_lemma = choose_lemma(gram_d[lemma])
+		stem_len = stem_finder(morph_d[lemma], new_lemma)
 		stem = lemma[:stem_len]
-		flections = frozenset([pair[0][stem_len:] + ' ' + pair[1] for pair in gram_d[lemma]])
+		flections = frozenset([pair[0][stem_len:] + ' ' + change_tags(pair[1]) for pair in gram_d[lemma]])
 		paradigms[lemma] = (stem, flections)
+	# print('example of paradigms: ' + str(paradigms[list(paradigms.keys())[0]]))
 	return paradigms
 
+def change_tags(grammar_tags, secondary = True):
+	grammar_tags = grammar_tags.replace(' use/ant', '').replace(' fac', '').replace('pstpss pstpss ', 'pstpss ')
+	if secondary:
+		grammar_tags = grammar_tags.replace('v impf ', '').replace('v perf ', '').replace('tv ', '').replace('iv ', '').replace(' prb', '')
+	return grammar_tags
+
+def choose_lemma(lexeme):
+	'''takes a list of forms amd grammar tags and returns a lemma'''
+	if 'pstact' in lexeme[0][1] or 'pstpss' in lexeme[0][1]:
+		for arr in lexeme:
+			if 'nom' in arr[1] and 'sg' in arr[1] and 'msc' in arr[1]:
+				return arr[0]
+	elif 'imp' in lexeme[0][1]:	
+		for arr in lexeme:
+			if 'sg' in arr[1]:
+				return arr[0]
+	else:
+		for arr in lexeme:
+			if 'inf' in arr[1]:
+				return arr[0]
+
 def stem_finder(forms, lemma):
 	'''finds length of the stem, returns an integer. called in paradigm_collector'''
 	min_len = len(min(forms, key = len))
@@ -30,24 +58,11 @@
 					break
 	return stems_len
 
-def stem_finder_mod(forms, lemma):
-	'''finds length of the stem, returns an integer. called in paradigm_collector'''
-	min_len = len(min(forms, key = len))
-	stems_len = min_len
-	for form in forms:
-		for i in range(min_len):
-			if lemma[i:i+1] != form[i:i+1]:
-				# print(form[i:], end = ', ')
-				if i < stems_len:
-					stems_len = i
-					break
-	return stems_len
-
 def find_similar(paradigms):
 	'''returns dictionary where keys are flections and grammar tags and values are lists of lexemes'''
 	similar = {}
 	for lemma in paradigms:
-		flecs = frozenset(paradigms[lemma][1])
+		flecs = final_tags(paradigms[lemma][1])
 		if flecs not in similar:
 			similar[flecs] = [lemma]
 		else:
@@ -59,20 +74,29 @@
 	print('number of paradigms: ' + str(len(similar)))
 	return similar
 
+def final_tags(frozen_info):
+	'''replaces tags'''
+	replacer = {'msc' : 'm', 'anin': 'an', 'fem' : 'f', 'inan' : 'nn', 'anim' : 'aa', 'neu' : 'nt'}
+	new_info = []
+	for wordform in frozen_info:
+		for replacement in replacer:
+			wordform = wordform.replace(replacement, replacer[replacement])
+		new_info.append(wordform)
+	return frozenset(new_info)
+
 # info = forms_collector('../../stuffs') #../../stuffs # someverbs.txt
 
 # with codecs.open('../../verbs_z_experiment.json', 'w', 'utf-8')as f:
 #     json.dump(info, f, ensure_ascii=False, indent=2)
 
-
 def par_splitter(info):
 	imp = {k:[] for k in info.keys()}
 	pstact, pstpss, other = copy.deepcopy(imp), copy.deepcopy(imp), copy.deepcopy(imp)
 	for lexeme in info:
 		for wordform in info[lexeme]:
-			if 'pstpss' in wordform[1]:
+			if 'pstpss' in wordform[1] and 'pred' not in wordform[1]:
 				pstpss[lexeme].append(wordform)
-			elif 'pstact' in wordform[1]:
+			elif 'pstact' in wordform[1] and 'adv' not in wordform[1]:
 				pstact[lexeme].append(wordform)
 			elif 'imp' in wordform[1]:
 				imp[lexeme].append(wordform)
@@ -84,8 +108,18 @@
 				d.pop(l)
 	return pstpss, pstact, imp, other
 
-def pstpss_par_maker(classes):
-	pass
+def pstpss_par_maker(similar):
+	text = '\n\n'
+	for infl_class in similar:
+		text += '<pardef n="BASE__' + similar[infl_class][0] + '">\n'
+		for item in infl_class:
+			item = item.split()
+			text += '  <e><p><l>' + item[0]
+			for tag in item[2:]:
+				text += '<s n="' + tag + '"/>'
+			text += '</r></p></e>\n'
+		text += '</pardef>\n\n'
+	print(text)
 
 def pstact_par_maker(classes):
 	pass
@@ -96,6 +130,16 @@
 def whole_par(classes, pstpss_par, pstact_par, imp_par):
 	pass
 
+def lexeme_spliter(info):
+	infinitive = 0
+	for lexeme in info:
+		for wordform in info[lexeme]:
+			if 'inf' in wordform[1]:
+				infinitive += 1
+		if infinitive == 2:
+			print(lexeme)
+
+
 def find_paradigm(word, inventories, similar):
 
 	for inventory in inventories:
@@ -107,22 +151,47 @@
 		if similar[key] == wordclass:
 			print(key)
 
+def fun_debugging_time(similar):
+	inventories = similar.values()
+	# greatest = sorted(inventories, key=len)[-1]
+	# print('length of the greatest class: ' + str(len(greatest)))
+	# print('three words from the greatest wordclass: ' + greatest[0] + ', ' + greatest[1] + ', ' + greatest[2])
+	# find_paradigm(greatest[0], inventories, similar)
+	# print('----------------')
+	# second = sorted(inventories, key=len)[-2]
+	# print('length of the second greatest class: ' + str(len(second)))
+	# print('three words from the second greatest wordclass: ' + second[0] + ', ' + second[1] + ', ' + second[2])
+	# find_paradigm(second[0], inventories, similar)
+	# print('----------------')
+	# third = sorted(inventories, key=len)[-3]
+	# print('length of the third greatest class: ' + str(len(third)))
+	# print('three words from the second greatest wordclass: ' + third[0] + ', ' + third[1]) # + ', ' + third[2])
+	# find_paradigm(third[0], inventories, similar)
+	# print('----------------')
+	fourth = sorted(inventories, key=len)[-4]
+	print('length of the fourth greatest class: ' + str(len(fourth)))
+	print('three words from the second greatest wordclass: ' + fourth[0]) # + ', ' + third[2])
+	print(fourth)
+	find_paradigm(fourth[0], inventories, similar)
 
+
 def main():
 	with codecs.open('../../verbs_z.json', 'r', 'utf-8')as f:
 	    info = json.load(f)
+
+	# lexeme_spliter(info) # отдебажить
  	
 	pstpss, pstact, imp, other = par_splitter(info)
 	similar_pstact = find_similar(paradigm_collector(pstact))
+	similar_pstpss = find_similar(paradigm_collector(pstpss))
+
+	pstpss_par_maker(similar_pstpss)
 	# import pickle
 	# pickle.dump(similar_pstact, open( "save.p", "wb" ) )
 
-	inventories = similar_pstact.values()
-	greatest = sorted(inventories, key=len)[2]
-	print('length of the greatest class: ' + str(len(greatest)))
-	print('the greatest class: ')
-	find_paradigm(greatest[0], inventories, similar_pstact)
-	print(greatest)
+	fun_debugging_time(similar_pstpss)
 
 
+
+
 main()