Updated talon for Python 3

2017-05-23 15:39:50 -07:00
parent f16ae5110b
commit 086f5ba43b
10 changed files with 75 additions and 61 deletions
@@ -1,13 +1,12 @@
 # -*- coding: utf-8 -*-

 from __future__ import absolute_import
-from . import *
-from . fixtures import *

 import regex as re

 from talon import quotations, utils as u
-
+from . import *
+from .fixtures import *

 RE_WHITESPACE = re.compile("\s")
 RE_DOUBLE_WHITESPACE = re.compile("\s")
@@ -303,7 +302,7 @@ Reply


 def extract_reply_and_check(filename):
-    f = open(filename)
+    f = open(filename, encoding='utf8')

    msg_body = f.read()
    reply = quotations.extract_from_html(msg_body)
@@ -373,7 +372,7 @@ reply
 </blockquote>"""
    msg_body = msg_body.replace('\n', '\r\n')
    extracted = quotations.extract_from_html(msg_body)
-    assert_false(symbol in extracted)    
+    assert_false(symbol in extracted)
    # Keep new lines otherwise "My reply" becomes one word - "Myreply" 
    eq_("<html><head></head><body>My\nreply\n</body></html>", extracted)

@@ -31,7 +31,7 @@ def test_messages_longer_SIGNATURE_MAX_LINES():
        sender, body = dataset.parse_msg_sender(filename)
        text, extracted_signature = signature.extract(body, sender)
        extracted_signature = extracted_signature or ''
-        with open(filename[:-len('body')] + 'signature') as ms:
+        with open(filename[:-len('body')] + 'signature', encoding='utf8') as ms:
            msg_signature = ms.read()
            eq_(msg_signature.strip(), extracted_signature.strip())
            stripped_msg = body.strip()[:len(body.strip())-len(msg_signature)]
@@ -1,12 +1,12 @@
 # coding:utf-8

 from __future__ import absolute_import
-from . import *

-from talon import utils as u
 import cchardet
 import six
-from lxml import html
+
+from talon import utils as u
+from . import *


 def test_get_delimiter():
@@ -16,35 +16,35 @@ def test_get_delimiter():


 def test_unicode():
-    eq_ (u'hi', u.to_unicode('hi'))
-    eq_ (type(u.to_unicode('hi')), six.text_type )
-    eq_ (type(u.to_unicode(u'hi')), six.text_type )
-    eq_ (type(u.to_unicode('привет')), six.text_type )
-    eq_ (type(u.to_unicode(u'привет')), six.text_type )
-    eq_ (u"привет", u.to_unicode('привет'))
-    eq_ (u"привет", u.to_unicode(u'привет'))
+    eq_(u'hi', u.to_unicode('hi'))
+    eq_(type(u.to_unicode('hi')), six.text_type)
+    eq_(type(u.to_unicode(u'hi')), six.text_type)
+    eq_(type(u.to_unicode('привет')), six.text_type)
+    eq_(type(u.to_unicode(u'привет')), six.text_type)
+    eq_(u"привет", u.to_unicode('привет'))
+    eq_(u"привет", u.to_unicode(u'привет'))
    # some latin1 stuff
-    eq_ (u"Versión", u.to_unicode(u'Versi\xf3n'.encode('iso-8859-2'), precise=True))
+    eq_(u"Versión", u.to_unicode(u'Versi\xf3n'.encode('iso-8859-2'), precise=True))


 def test_detect_encoding():
-    eq_ ('ascii', u.detect_encoding(b'qwe').lower())
-    ok_ (u.detect_encoding(
+    eq_('ascii', u.detect_encoding(b'qwe').lower())
+    ok_(u.detect_encoding(
        u'Versi\xf3n'.encode('iso-8859-2')).lower() in [
            'iso-8859-1', 'iso-8859-2'])
-    eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
+    eq_('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
    # fallback to utf-8
    with patch.object(u.chardet, 'detect') as detect:
        detect.side_effect = Exception
-        eq_ ('utf-8', u.detect_encoding('qwe'.encode('utf8')).lower())
+        eq_('utf-8', u.detect_encoding('qwe'.encode('utf8')).lower())


 def test_quick_detect_encoding():
-    eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower())
-    ok_ (u.quick_detect_encoding(
+    eq_('ascii', u.quick_detect_encoding(b'qwe').lower())
+    ok_(u.quick_detect_encoding(
        u'Versi\xf3n'.encode('windows-1252')).lower() in [
            'windows-1252', 'windows-1250'])
-    eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())
+    eq_('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())


@patch.object(cchardet, 'detect')
@@ -84,7 +84,7 @@ Haha
    eq_(u"привет!", u.html_to_text("<b>привет!</b>").decode('utf8'))

    html = '<body><br/><br/>Hi</body>'
-    eq_ (b'Hi', u.html_to_text(html))
+    eq_(b'Hi', u.html_to_text(html))

    html = """Hi
 <style type="text/css">
@@ -104,7 +104,7 @@ font: 13px 'Lucida Grande', Arial, sans-serif;

 }
 </style>"""
-    eq_ (b'Hi', u.html_to_text(html))
+    eq_(b'Hi', u.html_to_text(html))

    html = """<div>
 <!-- COMMENT 1 -->
@@ -115,15 +115,16 @@ font: 13px 'Lucida Grande', Arial, sans-serif;


 def test_comment_no_parent():
-    s = "<!-- COMMENT 1 --> no comment"
+    s = b'<!-- COMMENT 1 --> no comment'
    d = u.html_document_fromstring(s)
-    eq_("no comment", u.html_tree_to_text(d))
+    eq_(b"no comment", u.html_tree_to_text(d))


@patch.object(u.html5parser, 'fromstring', Mock(side_effect=Exception()))
 def test_html_fromstring_exception():
    eq_(None, u.html_fromstring("<html></html>"))

+
@patch.object(u, 'html_too_big', Mock())
@patch.object(u.html5parser, 'fromstring')
 def test_html_fromstring_too_big(fromstring):
@@ -158,5 +159,5 @@ def test_html_too_big():

@patch.object(u, '_MAX_TAGS_COUNT', 3)
 def test_html_to_text():
-    eq_("Hello", u.html_to_text("<div>Hello</div>"))
+    eq_(b"Hello", u.html_to_text("<div>Hello</div>"))
    eq_(None, u.html_to_text("<div><span>Hi</span></div>"))