# -*- coding: utf-8 -*-
from __future__ import absolute_import
# noinspection PyUnresolvedReferences
import re
from talon import quotations, utils as u
from . import *
from .fixtures import *
from lxml import html
RE_WHITESPACE = re.compile("\s")
RE_DOUBLE_WHITESPACE = re.compile("\s")
def test_quotation_splitter_inside_blockquote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_quotation_splitter_outside_blockquote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_regular_blockquote():
msg_body = """Reply
Regular
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
"""
eq_("ReplyRegular
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_no_blockquote():
msg_body = """
Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
reply = """
Reply
"""
eq_(RE_WHITESPACE.sub('', reply),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_empty_body():
eq_('', quotations.extract_from_html(''))
def test_validate_output_html():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
out = quotations.extract_from_html(msg_body)
ok_('' in out and '' in out,
'Invalid HTML - / tag not present')
ok_('' not in out,
'Invalid HTML output - element is not valid')
def test_gmail_quote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_gmail_quote_compact():
msg_body = 'Reply' \
'' \
'
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:' \
'
Test
' \
'
' \
'
'
eq_("Reply",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_gmail_quote_blockquote():
msg_body = """Message
My name is William Shakespeare.
"""
eq_(RE_WHITESPACE.sub('', msg_body),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_unicode_in_reply():
msg_body = u"""Reply \xa0 \xa0 Text
Quote
""".encode("utf-8")
eq_("Reply Text
"
"",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_blockquote_disclaimer():
msg_body = """
disclaimer
"""
stripped_html = """
disclaimer
"""
eq_(RE_WHITESPACE.sub('', stripped_html),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_date_block():
msg_body = """
"""
eq_('message
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_from_block():
msg_body = """
"""
eq_('message
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_shares_div_with_from_block():
msg_body = '''
Blah
Date: Tue, 22 May 2012 18:29:16 -0600
To: xx@hotmail.ca
From: quickemail@ashleymadison.com
Subject: You Have New Mail From x!
'''
eq_('Blah
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_quotations_share_block():
stripped_html = quotations.extract_from_plain(REPLY_QUOTATIONS_SHARE_BLOCK)
ok_(stripped_html)
ok_('From' not in stripped_html)
def test_OLK_SRC_BODY_SECTION_stripped():
eq_('Reply
',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
def test_reply_separated_by_hr():
eq_('',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
def test_from_block_and_quotations_in_separate_divs():
msg_body = '''
Reply
From: bob@example.com
Date: Thu, 24 Mar 2016 08:07:12 -0700
Quoted message
'''
eq_('Reply
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_reply_and_check(filename):
import sys
kwargs = {}
if sys.version_info > (3, 0):
kwargs["encoding"] = "utf8"
f = open(filename, **kwargs)
msg_body = f.read()
reply = quotations.extract_from_html(msg_body)
plain_reply = u.html_to_text(reply)
plain_reply = plain_reply.decode('utf8')
eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"),
RE_WHITESPACE.sub('', plain_reply))
def test_gmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/gmail.html")
def test_mail_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html")
def test_hotmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/hotmail.html")
def test_ms_outlook_2003_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2003.html")
def test_ms_outlook_2007_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html")
def test_ms_outlook_2010_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2010.html")
def test_thunderbird_reply():
extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html")
def test_windows_mail_reply():
extract_reply_and_check("tests/fixtures/html_replies/windows_mail.html")
def test_yandex_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/yandex_ru.html")
def test_CRLF():
"""CR is not converted to '
'
"""
symbol = '
'
extracted = quotations.extract_from_html('\r\n')
assert_false(symbol in extracted)
eq_('', RE_WHITESPACE.sub('', extracted))
msg_body = """My
reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
msg_body = msg_body.replace('\n', '\r\n')
extracted = quotations.extract_from_html(msg_body)
assert_false(symbol in extracted)
# Keep new lines otherwise "My reply" becomes one word - "Myreply"
eq_("My\nreply\n", extracted)
def test_gmail_forwarded_msg():
msg_body = """"""
extracted = quotations.extract_from_html(msg_body)
eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted))
def test_readable_html_empty():
msg_body = """
Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_(RE_WHITESPACE.sub('', msg_body),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
@patch.object(quotations, 'html_document_fromstring', Mock(return_value=None))
def test_bad_html():
bad_html = ""
eq_(bad_html, quotations.extract_from_html(bad_html))
def test_remove_namespaces():
msg_body = """
Dear Sir,
Thank you for the email.
thing
"""
rendered = quotations.extract_from_html(msg_body)
assert_true("" in rendered)
assert_true("xmlns" in rendered)
assert_true("" not in rendered)
assert_true("" not in rendered)