commit 789cbb8df752470cbe81f2e08261d8904e189dc0
Author: aboelhamd <aboelhamd.abotreka@gmail.com>
Date:   Sun Aug 4 23:30:33 2019 +0200

    Error of long file name is solved. Training records now have threshold 200k to be able to train in hours not days.

diff --git a/sklearn-train.py b/sklearn-train.py
index 66fcba4..17b7351 100644
--- a/sklearn-train.py
+++ b/sklearn-train.py
@@ -34,19 +34,12 @@ for file in files:
     file_no_ext = file_no_ext[:file_no_ext.find('.')]
 
   # These are the classifiers that permit training data with sample weights!
-  models_names = ["NaiveBayes", "LinearSVM", "RBFSVM", "DecisionTree",
-       "RandomForest", "AdaBoost"]
+  models_names = ["LinearSVM"]
 
-  classifiers = [
-      GaussianNB(),
-      SVC(kernel="linear", C=0.025),
-      SVC(gamma=2, C=1),
-      DecisionTreeClassifier(max_depth=5),
-      RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
-      AdaBoostClassifier()]
+  classifiers = [SVC(kernel="linear", C=0.025)]
   
   print("file name :", file)
-  data = pd.read_csv(files[file], delimiter=r"\s+").dropna()
+  data = pd.read_csv(files[file], delimiter=r"\s+").dropna().iloc[:200000]
   
   # if records equals to classes number, duplicates the data  
   if data.shape[0] == data.iloc[:,0].nunique():
@@ -58,7 +51,7 @@ for file in files:
   features = enc.fit_transform(data.iloc[:,2:])
 
   # save the encoder 
-  enc_name = os.path.join(models_path, 'encoder'+'-'+file_no_ext)
+  enc_name = os.path.join(models_path, 'encoder'+'-'+file_no_ext)[:256]
   joblib.dump(enc, enc_name)
 
   # target and weights
@@ -82,6 +75,6 @@ for file in files:
     print(" score =", score)
     
     # save models
-    model_name = os.path.join(models_path, name+'-'+file_no_ext)
+    model_name = os.path.join(models_path, name+'-'+file_no_ext)[:256]
     joblib.dump(model, model_name)
   print("----------------------------------------------\n")