Operate on unicode data exclusively

2022-02-04 16:08:41 +03:00
parent a8c7e6a972
commit 14f106ee76
6 changed files with 72 additions and 266 deletions
--- a/tests/html_quotations_test.py
+++ b/tests/html_quotations_test.py
@@ -4,14 +4,17 @@ from __future__ import absolute_import

 # noinspection PyUnresolvedReferences
 import re
+from unittest.mock import Mock, patch

+from nose.tools import assert_false, assert_true, eq_, ok_
+
+from tests.fixtures import (OLK_SRC_BODY_SECTION,
+                            REPLY_QUOTATIONS_SHARE_BLOCK,
+                            REPLY_SEPARATED_BY_HR)
 from talon import quotations, utils as u
-from . import *
-from .fixtures import *
-from lxml import html

-RE_WHITESPACE = re.compile("\s")
-RE_DOUBLE_WHITESPACE = re.compile("\s")
+RE_WHITESPACE = re.compile(r"\s")
+RE_DOUBLE_WHITESPACE = re.compile(r"\s")


 def test_quotation_splitter_inside_blockquote():
@@ -166,7 +169,7 @@ def test_unicode_in_reply():

 <blockquote>
  Quote
-</blockquote>""".encode("utf-8")
+</blockquote>"""

    eq_("<html><head></head><body>Reply&#160;&#160;Text<br><div><br></div>"
        "</body></html>",
@@ -314,7 +317,6 @@ def extract_reply_and_check(filename):
    msg_body = f.read()
    reply = quotations.extract_from_html(msg_body)
    plain_reply = u.html_to_text(reply)
-    plain_reply = plain_reply.decode('utf8')

    eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"),
        RE_WHITESPACE.sub('', plain_reply))