Files
talon/talon/signature/learning/classifier.py
2016-07-12 17:25:46 +05:00

33 lines
1015 B
Python

# -*- coding: utf-8 -*-
"""The module's functions could init, train, save and load a classifier.
The classifier could be used to detect if a certain line of the message
body belongs to the signature.
"""
from __future__ import absolute_import
from numpy import genfromtxt
from sklearn.svm import LinearSVC
from sklearn.externals import joblib
def init():
"""Inits classifier with optimal options."""
return LinearSVC(C=10.0)
def train(classifier, train_data_filename, save_classifier_filename=None):
"""Trains and saves classifier so that it could be easily loaded later."""
file_data = genfromtxt(train_data_filename, delimiter=",")
train_data, labels = file_data[:, :-1], file_data[:, -1]
classifier.fit(train_data, labels)
if save_classifier_filename:
joblib.dump(classifier, save_classifier_filename)
return classifier
def load(saved_classifier_filename, train_data_filename):
"""Loads saved classifier. """
return joblib.load(saved_classifier_filename)