From da998ddb6039f4ee660787b6f50b821eee332831 Mon Sep 17 00:00:00 2001
From: Umair Khan <umair.waheed@gmail.com>
Date: Tue, 12 Jul 2016 17:25:46 +0500
Subject: [PATCH] Run modernizer on the code.

---
 setup.py                                      |  1 +
 talon/__init__.py                             |  1 +
 talon/constants.py                            |  1 +
 talon/html_quotations.py                      |  1 +
 talon/quotations.py                           |  6 ++++--
 talon/signature/__init__.py                   |  1 +
 talon/signature/bruteforce.py                 |  3 ++-
 talon/signature/extraction.py                 |  1 +
 talon/signature/learning/classifier.py        |  1 +
 talon/signature/learning/dataset.py           |  4 +++-
 talon/signature/learning/featurespace.py      |  3 +++
 talon/signature/learning/helpers.py           |  1 +
 talon/utils.py                                | 14 +++++++------
 tests/__init__.py                             |  1 +
 tests/html_quotations_test.py                 |  1 +
 tests/quotations_test.py                      |  1 +
 tests/signature/bruteforce_test.py            |  1 +
 tests/signature/extraction_test.py            | 20 ++++++++++---------
 tests/signature/learning/dataset_test.py      |  1 +
 tests/signature/learning/featurespace_test.py |  1 +
 tests/signature/learning/helpers_test.py      |  6 ++++--
 tests/text_quotations_test.py                 |  7 +++++--
 tests/utils_test.py                           | 10 ++++++----
 train.py                                      |  1 +
 24 files changed, 61 insertions(+), 27 deletions(-)

diff --git a/setup.py b/setup.py
index 8253212..79e3f20 100755
--- a/setup.py
+++ b/setup.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 from setuptools import setup, find_packages
 from setuptools.command.install import install
 
diff --git a/talon/__init__.py b/talon/__init__.py
index de27ae6..7060f5b 100644
--- a/talon/__init__.py
+++ b/talon/__init__.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 from talon.quotations import register_xpath_extensions
 try:
     from talon import signature
diff --git a/talon/constants.py b/talon/constants.py
index 68fa04c..0e7276d 100644
--- a/talon/constants.py
+++ b/talon/constants.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 import regex as re
 
 
diff --git a/talon/html_quotations.py b/talon/html_quotations.py
index 1af78ac..4aa7e74 100644
--- a/talon/html_quotations.py
+++ b/talon/html_quotations.py
@@ -3,6 +3,7 @@ The module's functions operate on message bodies trying to extract original
 messages (without quoted messages) from html
 """
 
+from __future__ import absolute_import
 import regex as re
 
 
diff --git a/talon/quotations.py b/talon/quotations.py
index ff23daa..b294de5 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -5,6 +5,7 @@ The module's functions operate on message bodies trying to extract
 original messages (without quoted messages)
 """
 
+from __future__ import absolute_import
 import regex as re
 import logging
 from copy import deepcopy
@@ -13,6 +14,7 @@ from lxml import html, etree
 
 from talon.utils import get_delimiter, html_to_text
 from talon import html_quotations
+from six.moves import range
 
 
 log = logging.getLogger(__name__)
@@ -207,7 +209,7 @@ def mark_message_lines(lines):
             if splitter:
                 # append as many splitter markers as lines in splitter
                 splitter_lines = splitter.group().splitlines()
-                for j in xrange(len(splitter_lines)):
+                for j in range(len(splitter_lines)):
                     markers[i + j] = 's'
 
                 # skip splitter lines
@@ -388,7 +390,7 @@ def extract_from_html(msg_body):
     lines_were_deleted, first_deleted, last_deleted = return_flags
     if lines_were_deleted:
         #collect checkpoints from deleted lines
-        for i in xrange(first_deleted, last_deleted):
+        for i in range(first_deleted, last_deleted):
             for checkpoint in line_checkpoints[i]:
                 quotation_checkpoints[checkpoint] = True
     else:
diff --git a/talon/signature/__init__.py b/talon/signature/__init__.py
index a871447..fc60e1d 100644
--- a/talon/signature/__init__.py
+++ b/talon/signature/__init__.py
@@ -20,6 +20,7 @@ trained against, don't forget to regenerate:
 * signature/data/classifier
 """
 
+from __future__ import absolute_import
 import os
 
 from . import extraction
diff --git a/talon/signature/bruteforce.py b/talon/signature/bruteforce.py
index d3493bb..7f666bd 100644
--- a/talon/signature/bruteforce.py
+++ b/talon/signature/bruteforce.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 import logging
 
 import regex as re
@@ -111,7 +112,7 @@ def extract_signature(msg_body):
 
             return (stripped_body.strip(),
                     signature.strip())
-    except Exception, e:
+    except Exception as e:
         log.exception('ERROR extracting signature')
         return (msg_body, None)
 
diff --git a/talon/signature/extraction.py b/talon/signature/extraction.py
index 995ad27..3259171 100644
--- a/talon/signature/extraction.py
+++ b/talon/signature/extraction.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 import logging
 
 import regex as re
diff --git a/talon/signature/learning/classifier.py b/talon/signature/learning/classifier.py
index 9ce5e75..8ec3228 100644
--- a/talon/signature/learning/classifier.py
+++ b/talon/signature/learning/classifier.py
@@ -5,6 +5,7 @@ The classifier could be used to detect if a certain line of the message
 body belongs to the signature.
 """
 
+from __future__ import absolute_import
 from numpy import genfromtxt
 from sklearn.svm import LinearSVC
 from sklearn.externals import joblib
diff --git a/talon/signature/learning/dataset.py b/talon/signature/learning/dataset.py
index b0a9f45..308995b 100644
--- a/talon/signature/learning/dataset.py
+++ b/talon/signature/learning/dataset.py
@@ -16,11 +16,13 @@ suffix and the corresponding sender file has the same name except for the
 suffix which should be `_sender`.
 """
 
+from __future__ import absolute_import
 import os
 import regex as re
 
 from talon.signature.constants import SIGNATURE_MAX_LINES
 from talon.signature.learning.featurespace import build_pattern, features
+from six.moves import range
 
 
 SENDER_SUFFIX = '_sender'
@@ -144,7 +146,7 @@ def build_extraction_dataset(folder, dataset_filename,
             if not sender or not msg:
                 continue
             lines = msg.splitlines()
-            for i in xrange(1, min(SIGNATURE_MAX_LINES,
+            for i in range(1, min(SIGNATURE_MAX_LINES,
                                    len(lines)) + 1):
                 line = lines[-i]
                 label = -1
diff --git a/talon/signature/learning/featurespace.py b/talon/signature/learning/featurespace.py
index 60676f9..649e859 100644
--- a/talon/signature/learning/featurespace.py
+++ b/talon/signature/learning/featurespace.py
@@ -7,9 +7,12 @@ The body and the message sender string are converted into unicode before
 applying features to them.
 """
 
+from __future__ import absolute_import
 from talon.signature.constants import (SIGNATURE_MAX_LINES,
                                        TOO_LONG_SIGNATURE_LINE)
 from talon.signature.learning.helpers import *
+from six.moves import zip
+from functools import reduce
 
 
 def features(sender=''):
diff --git a/talon/signature/learning/helpers.py b/talon/signature/learning/helpers.py
index 7085a74..f94c688 100644
--- a/talon/signature/learning/helpers.py
+++ b/talon/signature/learning/helpers.py
@@ -6,6 +6,7 @@
 
 """
 
+from __future__ import absolute_import
 import unicodedata
 import regex as re
 
diff --git a/talon/utils.py b/talon/utils.py
index dc47622..e4bd19b 100644
--- a/talon/utils.py
+++ b/talon/utils.py
@@ -1,5 +1,6 @@
 # coding:utf-8
 
+from __future__ import absolute_import
 import logging
 from random import shuffle
 import chardet
@@ -10,6 +11,7 @@ from lxml import html
 from lxml.cssselect import CSSSelector
 
 from talon.constants import RE_DELIMITER
+import six
 
 
 def safe_format(format_string, *args, **kwargs):
@@ -28,7 +30,7 @@ def safe_format(format_string, *args, **kwargs):
     except (UnicodeEncodeError, UnicodeDecodeError):
         format_string = to_utf8(format_string)
         args = [to_utf8(p) for p in args]
-        kwargs = {k: to_utf8(v) for k, v in kwargs.iteritems()}
+        kwargs = {k: to_utf8(v) for k, v in six.iteritems(kwargs)}
         return format_string.format(*args, **kwargs)
 
     # ignore other errors
@@ -47,7 +49,7 @@ def to_unicode(str_or_unicode, precise=False):
     """
     encoding = quick_detect_encoding(str_or_unicode) if precise else 'utf-8'
     if isinstance(str_or_unicode, str):
-        return unicode(str_or_unicode, encoding, 'replace')
+        return six.text_type(str_or_unicode, encoding, 'replace')
     return str_or_unicode
 
 
@@ -61,7 +63,7 @@ def detect_encoding(string):
         detected = chardet.detect(string)
         if detected:
             return detected.get('encoding') or 'utf-8'
-    except Exception, e:
+    except Exception as e:
         pass
     return 'utf-8'
 
@@ -76,7 +78,7 @@ def quick_detect_encoding(string):
         detected = cchardet.detect(string)
         if detected:
             return detected.get('encoding') or detect_encoding(string)
-    except Exception, e:
+    except Exception as e:
         pass
     return detect_encoding(string)
 
@@ -87,7 +89,7 @@ def to_utf8(str_or_unicode):
     >>> utils.to_utf8(u'hi')
         'hi'
     """
-    if isinstance(str_or_unicode, unicode):
+    if isinstance(str_or_unicode, six.text_type):
         return str_or_unicode.encode("utf-8", "ignore")
     return str(str_or_unicode)
 
@@ -173,7 +175,7 @@ def _rm_excessive_newlines(s):
 def _encode_utf8(s):
     """Encode in 'utf-8' if unicode
     """
-    return s.encode('utf-8') if isinstance(s, unicode) else s
+    return s.encode('utf-8') if isinstance(s, six.text_type) else s
 
 
 _UTF8_DECLARATION = ('<meta http-equiv="Content-Type" content="text/html;'
diff --git a/tests/__init__.py b/tests/__init__.py
index 8bd86cb..8fdebd6 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 from nose.tools import *
 from mock import *
 
diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py
index 4002d0f..5794545 100644
--- a/tests/html_quotations_test.py
+++ b/tests/html_quotations_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from . import *
 from . fixtures import *
 
diff --git a/tests/quotations_test.py b/tests/quotations_test.py
index 7184368..e5ed041 100644
--- a/tests/quotations_test.py
+++ b/tests/quotations_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from . import *
 from . fixtures import *
 
diff --git a/tests/signature/bruteforce_test.py b/tests/signature/bruteforce_test.py
index 09665fe..382615b 100644
--- a/tests/signature/bruteforce_test.py
+++ b/tests/signature/bruteforce_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from .. import *
 
 from talon.signature import bruteforce
diff --git a/tests/signature/extraction_test.py b/tests/signature/extraction_test.py
index a055064..72e962f 100644
--- a/tests/signature/extraction_test.py
+++ b/tests/signature/extraction_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from .. import *
 
 import os
@@ -8,6 +9,7 @@ from talon.signature.learning import dataset
 from talon import signature
 from talon.signature import extraction as e
 from talon.signature import bruteforce
+from six.moves import range
 
 
 def test_message_shorter_SIGNATURE_MAX_LINES():
@@ -127,20 +129,20 @@ def test_mark_lines():
 
 def test_process_marked_lines():
     # no signature found
-    eq_((range(5), None), e._process_marked_lines(range(5), 'telt'))
+    eq_((list(range(5)), None), e._process_marked_lines(list(range(5)), 'telt'))
 
     # signature in the middle of the text
-    eq_((range(9), None), e._process_marked_lines(range(9), 'tesestelt'))
+    eq_((list(range(9)), None), e._process_marked_lines(list(range(9)), 'tesestelt'))
 
     # long line splits signature
-    eq_((range(7), [7, 8]),
-        e._process_marked_lines(range(9), 'tsslsless'))
+    eq_((list(range(7)), [7, 8]),
+        e._process_marked_lines(list(range(9)), 'tsslsless'))
 
-    eq_((range(20), [20]),
-        e._process_marked_lines(range(21), 'ttttttstttesllelelets'))
+    eq_((list(range(20)), [20]),
+        e._process_marked_lines(list(range(21)), 'ttttttstttesllelelets'))
 
     # some signature lines could be identified as text
-    eq_(([0], range(1, 9)), e._process_marked_lines(range(9), 'tsetetest'))
+    eq_(([0], list(range(1, 9))), e._process_marked_lines(list(range(9)), 'tsetetest'))
 
-    eq_(([], range(5)),
-        e._process_marked_lines(range(5), "ststt"))
+    eq_(([], list(range(5))),
+        e._process_marked_lines(list(range(5)), "ststt"))
diff --git a/tests/signature/learning/dataset_test.py b/tests/signature/learning/dataset_test.py
index 42d8ae6..8e15275 100644
--- a/tests/signature/learning/dataset_test.py
+++ b/tests/signature/learning/dataset_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from ... import *
 import os
 
diff --git a/tests/signature/learning/featurespace_test.py b/tests/signature/learning/featurespace_test.py
index 70df62b..dd9f110 100644
--- a/tests/signature/learning/featurespace_test.py
+++ b/tests/signature/learning/featurespace_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from ... import *
 
 from talon.signature.learning import featurespace as fs
diff --git a/tests/signature/learning/helpers_test.py b/tests/signature/learning/helpers_test.py
index 704db4e..94f186f 100644
--- a/tests/signature/learning/helpers_test.py
+++ b/tests/signature/learning/helpers_test.py
@@ -1,11 +1,13 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from ... import *
 
 import regex as re
 
 from talon.signature.learning import helpers as h
 from talon.signature.learning.helpers import *
+from six.moves import range
 
 # First testing regex constants.
 VALID = '''
@@ -154,7 +156,7 @@ def test_extract_names():
         # check that extracted names could be compiled
         try:
             re.compile("|".join(extracted_names))
-        except Exception, e:
+        except Exception as e:
             ok_(False, ("Failed to compile extracted names {}"
                         "\n\nReason: {}").format(extracted_names, e))
         if expected_names:
@@ -204,7 +206,7 @@ def test_has_signature():
                         'sender@example.com'))
     assert_false(h.has_signature('http://www.example.com/555-555-5555',
                                  'sender@example.com'))
-    long_line = ''.join(['q' for e in xrange(28)])
+    long_line = ''.join(['q' for e in range(28)])
     assert_false(h.has_signature(long_line + ' sender', 'sender@example.com'))
     # wont crash on an empty string
     assert_false(h.has_signature('', ''))
diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py
index 70e1bfa..bfefb3a 100644
--- a/tests/text_quotations_test.py
+++ b/tests/text_quotations_test.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+from __future__ import absolute_import
 from . import *
 from . fixtures import *
 
@@ -7,6 +8,8 @@ import os
 
 import email.iterators
 from talon import quotations
+import six
+from six.moves import range
 
 
 @patch.object(quotations, 'MAX_LINES_COUNT', 1)
@@ -138,7 +141,7 @@ def _check_pattern_original_message(original_message_indicator):
 -----{}-----
 
 Test"""
-    eq_('Test reply', quotations.extract_from_plain(msg_body.format(unicode(original_message_indicator))))
+    eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator))))
 
 def test_english_original_message():
     _check_pattern_original_message('Original Message')
@@ -669,7 +672,7 @@ def test_standard_replies():
             continue
         with open(filename) as f:
             message = email.message_from_file(f)
-            body = email.iterators.typed_subpart_iterator(message, subtype='plain').next()
+            body = next(email.iterators.typed_subpart_iterator(message, subtype='plain'))
             text = ''.join(email.iterators.body_line_iterator(body, True))
 
             stripped_text = quotations.extract_from_plain(text)
diff --git a/tests/utils_test.py b/tests/utils_test.py
index c77f0a6..472e498 100644
--- a/tests/utils_test.py
+++ b/tests/utils_test.py
@@ -1,9 +1,11 @@
 # coding:utf-8
 
+from __future__ import absolute_import
 from . import *
 
 from talon import utils as u
 import cchardet
+import six
 
 
 def test_get_delimiter():
@@ -14,10 +16,10 @@ def test_get_delimiter():
 
 def test_unicode():
     eq_ (u'hi', u.to_unicode('hi'))
-    eq_ (type(u.to_unicode('hi')), unicode )
-    eq_ (type(u.to_unicode(u'hi')), unicode )
-    eq_ (type(u.to_unicode('привет')), unicode )
-    eq_ (type(u.to_unicode(u'привет')), unicode )
+    eq_ (type(u.to_unicode('hi')), six.text_type )
+    eq_ (type(u.to_unicode(u'hi')), six.text_type )
+    eq_ (type(u.to_unicode('привет')), six.text_type )
+    eq_ (type(u.to_unicode(u'привет')), six.text_type )
     eq_ (u"привет", u.to_unicode('привет'))
     eq_ (u"привет", u.to_unicode(u'привет'))
     # some latin1 stuff
diff --git a/train.py b/train.py
index 54d04b5..63ac7fa 100644
--- a/train.py
+++ b/train.py
@@ -1,3 +1,4 @@
+from __future__ import absolute_import
 from talon.signature import EXTRACTOR_FILENAME, EXTRACTOR_DATA
 from talon.signature.learning.classifier import train, init