From 15e61768f2b7e0ba746b1d8c188933f414eaf2fe Mon Sep 17 00:00:00 2001 From: Yacine Filali Date: Tue, 23 May 2017 16:17:39 -0700 Subject: [PATCH] Encoding fixes --- talon/signature/learning/classifier.py | 8 ++++---- talon/signature/learning/dataset.py | 2 +- tests/html_quotations_test.py | 2 +- tests/signature/extraction_test.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/talon/signature/learning/classifier.py b/talon/signature/learning/classifier.py index 2ca1a21..f775413 100644 --- a/talon/signature/learning/classifier.py +++ b/talon/signature/learning/classifier.py @@ -36,10 +36,10 @@ def load(saved_classifier_filename, train_data_filename): return joblib.load(saved_classifier_filename) except ValueError: import sys - pickle_options = {} + kwargs = {} if sys.version_info > (3, 0): - pickle_options["encoding"] = "bytes" + kwargs["encoding"] = "latin1" - loaded = pickle.load(open(saved_classifier_filename, 'rb'), **pickle_options) + loaded = pickle.load(open(saved_classifier_filename, 'rb'), **kwargs) joblib.dump(loaded, saved_classifier_filename, compress=True) - return loaded + return joblib.load(saved_classifier_filename) diff --git a/talon/signature/learning/dataset.py b/talon/signature/learning/dataset.py index c7a88d4..5026c0b 100644 --- a/talon/signature/learning/dataset.py +++ b/talon/signature/learning/dataset.py @@ -61,7 +61,7 @@ def parse_msg_sender(filename, sender_known=True): import sys kwargs = {} if sys.version_info > (3, 0): - kwargs["encoding"] = "bytes" + kwargs["encoding"] = "utf8" sender, msg = None, None if os.path.isfile(filename) and not is_sender_filename(filename): diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py index f26148c..2c5c2e5 100644 --- a/tests/html_quotations_test.py +++ b/tests/html_quotations_test.py @@ -306,7 +306,7 @@ def extract_reply_and_check(filename): import sys kwargs = {} if sys.version_info > (3, 0): - kwargs["encoding"] = "bytes" + kwargs["encoding"] = "utf8" f = open(filename, **kwargs) diff --git a/tests/signature/extraction_test.py b/tests/signature/extraction_test.py index 86b8705..b942674 100644 --- a/tests/signature/extraction_test.py +++ b/tests/signature/extraction_test.py @@ -27,7 +27,7 @@ def test_messages_longer_SIGNATURE_MAX_LINES(): import sys kwargs = {} if sys.version_info > (3, 0): - kwargs["encoding"] = "bytes" + kwargs["encoding"] = "utf8" for filename in os.listdir(STRIPPED): filename = os.path.join(STRIPPED, filename)