Python 2.7 backward compat

This commit is contained in:
Yacine Filali
2017-05-23 16:10:13 -07:00
parent 086f5ba43b
commit dd0a0f5c4d
5 changed files with 52 additions and 32 deletions

Binary file not shown.

View File

@@ -10,8 +10,8 @@ from __future__ import absolute_import
import pickle
from numpy import genfromtxt
from sklearn.svm import LinearSVC
from sklearn.externals import joblib
from sklearn.svm import LinearSVC
def init():
@@ -35,7 +35,11 @@ def load(saved_classifier_filename, train_data_filename):
try:
return joblib.load(saved_classifier_filename)
except ValueError:
loaded = pickle.load(open(saved_classifier_filename, 'rb'), encoding='latin1')
import sys
pickle_options = {}
if sys.version_info > (3, 0):
pickle_options["encoding"] = "bytes"
loaded = pickle.load(open(saved_classifier_filename, 'rb'), **pickle_options)
joblib.dump(loaded, saved_classifier_filename, compress=True)
return loaded

View File

@@ -58,9 +58,14 @@ def parse_msg_sender(filename, sender_known=True):
algorithm:
>>> parse_msg_sender(filename, False)
"""
import sys
kwargs = {}
if sys.version_info > (3, 0):
kwargs["encoding"] = "bytes"
sender, msg = None, None
if os.path.isfile(filename) and not is_sender_filename(filename):
with open(filename, encoding='utf-8') as f:
with open(filename, **kwargs) as f:
msg = f.read()
sender = u''
if sender_known:

View File

@@ -2,7 +2,8 @@
from __future__ import absolute_import
import regex as re
# noinspection PyUnresolvedReferences
import re
from talon import quotations, utils as u
from . import *
@@ -302,7 +303,12 @@ Reply
def extract_reply_and_check(filename):
f = open(filename, encoding='utf8')
import sys
kwargs = {}
if sys.version_info > (3, 0):
kwargs["encoding"] = "bytes"
f = open(filename, **kwargs)
msg_body = f.read()
reply = quotations.extract_from_html(msg_body)

View File

@@ -1,16 +1,16 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from .. import *
import os
from talon.signature.learning import dataset
from talon import signature
from talon.signature import extraction as e
from talon.signature import bruteforce
from six.moves import range
from talon.signature import bruteforce, extraction, extract
from talon.signature import extraction as e
from talon.signature.learning import dataset
from .. import *
def test_message_shorter_SIGNATURE_MAX_LINES():
sender = "bob@foo.bar"
@@ -18,23 +18,28 @@ def test_message_shorter_SIGNATURE_MAX_LINES():
Thanks in advance,
Bob"""
text, extracted_signature = signature.extract(body, sender)
text, extracted_signature = extract(body, sender)
eq_('\n'.join(body.splitlines()[:2]), text)
eq_('\n'.join(body.splitlines()[-2:]), extracted_signature)
def test_messages_longer_SIGNATURE_MAX_LINES():
import sys
kwargs = {}
if sys.version_info > (3, 0):
kwargs["encoding"] = "bytes"
for filename in os.listdir(STRIPPED):
filename = os.path.join(STRIPPED, filename)
if not filename.endswith('_body'):
continue
sender, body = dataset.parse_msg_sender(filename)
text, extracted_signature = signature.extract(body, sender)
text, extracted_signature = extract(body, sender)
extracted_signature = extracted_signature or ''
with open(filename[:-len('body')] + 'signature', encoding='utf8') as ms:
with open(filename[:-len('body')] + 'signature', **kwargs) as ms:
msg_signature = ms.read()
eq_(msg_signature.strip(), extracted_signature.strip())
stripped_msg = body.strip()[:len(body.strip())-len(msg_signature)]
stripped_msg = body.strip()[:len(body.strip()) - len(msg_signature)]
eq_(stripped_msg.strip(), text.strip())
@@ -47,7 +52,7 @@ Thanks in advance,
some text which doesn't seem to be a signature at all
Bob"""
text, extracted_signature = signature.extract(body, sender)
text, extracted_signature = extract(body, sender)
eq_('\n'.join(body.splitlines()[:2]), text)
eq_('\n'.join(body.splitlines()[-3:]), extracted_signature)
@@ -60,7 +65,7 @@ Thanks in advance,
some long text here which doesn't seem to be a signature at all
Bob"""
text, extracted_signature = signature.extract(body, sender)
text, extracted_signature = extract(body, sender)
eq_('\n'.join(body.splitlines()[:-1]), text)
eq_('Bob', extracted_signature)
@@ -68,13 +73,13 @@ Bob"""
some *long* text here which doesn't seem to be a signature at all
"""
((body, None), signature.extract(body, "david@example.com"))
((body, None), extract(body, "david@example.com"))
def test_basic():
msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov'
eq_(('Blah', '--\r\n\r\nSergey Obukhov'),
signature.extract(msg_body, 'Sergey'))
extract(msg_body, 'Sergey'))
def test_capitalized():
@@ -99,7 +104,7 @@ Doe Inc
Doe Inc
555-531-7967"""
eq_(sig, signature.extract(msg_body, 'Doe')[1])
eq_(sig, extract(msg_body, 'Doe')[1])
def test_over_2_text_lines_after_signature():
@@ -110,25 +115,25 @@ def test_over_2_text_lines_after_signature():
2 non signature lines in the end
It's not signature
"""
text, extracted_signature = signature.extract(body, "Bob")
text, extracted_signature = extract(body, "Bob")
eq_(extracted_signature, None)
def test_no_signature():
sender, body = "bob@foo.bar", "Hello"
eq_((body, None), signature.extract(body, sender))
eq_((body, None), extract(body, sender))
def test_handles_unicode():
sender, body = dataset.parse_msg_sender(UNICODE_MSG)
text, extracted_signature = signature.extract(body, sender)
text, extracted_signature = extract(body, sender)
@patch.object(signature.extraction, 'has_signature')
@patch.object(extraction, 'has_signature')
def test_signature_extract_crash(has_signature):
has_signature.side_effect = Exception('Bam!')
msg_body = u'Blah\r\n--\r\n\r\nСергей'
eq_((msg_body, None), signature.extract(msg_body, 'Сергей'))
eq_((msg_body, None), extract(msg_body, 'Сергей'))
def test_mark_lines():