commit 97c4997b84d90484e06b8a33dc30c4806f4d9f9f Author: aboelhamd Date: Fri Aug 16 23:56:30 2019 +0200 change sklearn OrdinalEncoder with a custom one. diff --git a/sklearn-train.py b/sklearn-train.py index 17b7351..df10b35 100644 --- a/sklearn-train.py +++ b/sklearn-train.py @@ -45,10 +45,18 @@ for file in files: if data.shape[0] == data.iloc[:,0].nunique(): data = data.append(data) - # words (features) encoding - from sklearn.preprocessing import OrdinalEncoder - enc = OrdinalEncoder(dtype=np.int32) - features = enc.fit_transform(data.iloc[:,2:]) + # words(features) encoding + features = data.iloc[:,2:].values + + enc = {} + c = 0 + for i in range (len(features)) : + for j in range (len(features[i])) : + w = features[i][j] + if (w not in enc) : + enc[w]=c + c=c+1 + features[i][j]=enc[w] # save the encoder enc_name = os.path.join(models_path, 'encoder'+'-'+file_no_ext)[:256]