commit c7cd079f9dc140068a03d0cdaf16687a85d91dcc Author: aboelhamd Date: Tue Aug 6 22:42:42 2019 +0200 script sklearn-predict is tested and works well. diff --git a/sklearn-predict.py b/sklearn-predict.py index e09c4d5..1488d70 100644 --- a/sklearn-predict.py +++ b/sklearn-predict.py @@ -29,8 +29,9 @@ output = open(output_path, 'w+') # ["NaiveBayes", "LinearSVM", "RBFSVM", "DecisionTree", "RandomForest", "AdaBoost"] for line in data : - file_name = line.split(' ')[0] - features = line.split(' ')[1:] + split = line.split(' ') + file_name = split[0] + features = split[1:len(split)-1] file_no_ext = file_name if (file_no_ext.find('.') != -1) : @@ -43,24 +44,27 @@ for line in data : # see if features are seen before or not seen = True - for i in range (features) : + for i in range (len(features)) : + #print(i, features, enc.categories_) if features[i] not in enc.categories_[i] : seen = False break if seen : # encode words - features = enc.transform(features) + features = enc.transform([features]) # load the model - model_name = os.path.join(models_path, name+'-'+file_no_ext)[:256] - loaded_model = joblib.load(model_name) + name = os.path.join(models_path, model_name+'-'+file_no_ext)[:256] + loaded_model = joblib.load(name) # predict and write in file - output.write(loaded_model.predict([features])+'\n') + #print('prediction = ', loaded_model.predict(features)[0]) + output.write(str(loaded_model.predict(features)[0])) + output.write('\n') else : - print("Words : "+features+", are not found in "+file_name) + print("Words : "+str(features)+", are not found in "+file_name) output.write('0\n') else :