Remove flanker and replace PyML with scikit-learn
I never was actually able to successfully install PyML but the source-forge distribution and lack of python3 support convinced me that scikit-learn would be a fine substitute. Flanker was also difficult for me to install and seemed only to be used in the tests, so I removed it as well to get into a position where I could run the tests. As of this commit, only one is not passing (test_standard_replies with android.eml) though I'm not familiar with the `email` library yet.
This commit is contained in:
		
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								talon/signature/data/classifier_01.npy
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								talon/signature/data/classifier_01.npy
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								talon/signature/data/classifier_02.npy
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								talon/signature/data/classifier_02.npy
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								talon/signature/data/classifier_03.npy
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								talon/signature/data/classifier_03.npy
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								talon/signature/data/classifier_04.npy
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								talon/signature/data/classifier_04.npy
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								talon/signature/data/classifier_05.npy
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								talon/signature/data/classifier_05.npy
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| @@ -3,7 +3,7 @@ | ||||
| import logging | ||||
|  | ||||
| import regex as re | ||||
| from PyML import SparseDataSet | ||||
| import numpy | ||||
|  | ||||
| from talon.signature.learning.featurespace import features, build_pattern | ||||
| from talon.utils import get_delimiter | ||||
| @@ -32,8 +32,8 @@ RE_REVERSE_SIGNATURE = re.compile(r''' | ||||
|  | ||||
| def is_signature_line(line, sender, classifier): | ||||
|     '''Checks if the line belongs to signature. Returns True or False.''' | ||||
|     data = SparseDataSet([build_pattern(line, features(sender))]) | ||||
|     return classifier.decisionFunc(data, 0) > 0 | ||||
|     data = numpy.array(build_pattern(line, features(sender))) | ||||
|     return classifier.predict(data) > 0 | ||||
|  | ||||
|  | ||||
| def extract(body, sender): | ||||
|   | ||||
| @@ -5,32 +5,27 @@ The classifier could be used to detect if a certain line of the message | ||||
| body belongs to the signature. | ||||
| """ | ||||
|  | ||||
| import os | ||||
| import sys | ||||
|  | ||||
| from PyML import SparseDataSet, SVM | ||||
| from numpy import genfromtxt | ||||
| from sklearn.svm import LinearSVC | ||||
| from sklearn.externals import joblib | ||||
|  | ||||
|  | ||||
| def init(): | ||||
|     '''Inits classifier with optimal options.''' | ||||
|     return SVM(C=10, optimization='liblinear') | ||||
|     """Inits classifier with optimal options.""" | ||||
|     return LinearSVC(C=10.0) | ||||
|  | ||||
|  | ||||
| def train(classifier, train_data_filename, save_classifier_filename=None): | ||||
|     '''Trains and saves classifier so that it could be easily loaded later.''' | ||||
|     data = SparseDataSet(train_data_filename, labelsColumn=-1) | ||||
|     classifier.train(data) | ||||
|     """Trains and saves classifier so that it could be easily loaded later.""" | ||||
|     file_data = genfromtxt(train_data_filename, delimiter=",") | ||||
|     train_data, labels = file_data[:, :-1], file_data[:, -1] | ||||
|     classifier.fit(train_data, labels) | ||||
|  | ||||
|     if save_classifier_filename: | ||||
|         classifier.save(save_classifier_filename) | ||||
|         joblib.dump(classifier, save_classifier_filename) | ||||
|     return classifier | ||||
|  | ||||
|  | ||||
| def load(saved_classifier_filename, train_data_filename): | ||||
|     """Loads saved classifier. | ||||
|  | ||||
|     Classifier should be loaded with the same data it was trained against | ||||
|     """ | ||||
|     train_data = SparseDataSet(train_data_filename, labelsColumn=-1) | ||||
|     classifier = init() | ||||
|     classifier.load(saved_classifier_filename, train_data) | ||||
|     return classifier | ||||
|     """Loads saved classifier. """ | ||||
|     return joblib.load(saved_classifier_filename) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user