# -*- coding: utf-8 -*-
from . import *
from . fixtures import *
import regex as re
from flanker import mime
from talon import quotations
import html2text
RE_WHITESPACE = re.compile("\s")
RE_DOUBLE_WHITESPACE = re.compile("\s")
def test_quotation_splitter_inside_blockquote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_quotation_splitter_outside_blockquote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_regular_blockquote():
msg_body = """Reply
Regular
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
"""
eq_("Reply
Regular
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_no_blockquote():
msg_body = """
Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
reply = """
Reply
"""
eq_(RE_WHITESPACE.sub('', reply),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_empty_body():
eq_('', quotations.extract_from_html(''))
def test_validate_output_html():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
out = quotations.extract_from_html(msg_body)
ok_('' in out and '' in out,
'Invalid HTML - / tag not present')
ok_('' not in out,
'Invalid HTML output - element is not valid')
def test_gmail_quote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_unicode_in_reply():
msg_body = u"""Reply \xa0 \xa0 Text
Quote
""".encode("utf-8")
eq_("Reply Text
"
"",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_blockquote_disclaimer():
msg_body = """
disclaimer
"""
stripped_html = """
disclaimer
"""
eq_(RE_WHITESPACE.sub('', stripped_html),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_date_block():
msg_body = """
"""
eq_('message
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_from_block():
msg_body = """
"""
eq_('message
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_shares_div_with_from_block():
msg_body = '''
Blah
Date: Tue, 22 May 2012 18:29:16 -0600
To: xx@hotmail.ca
From: quickemail@ashleymadison.com
Subject: You Have New Mail From x!
'''
eq_('Blah
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_quotations_share_block():
msg = mime.from_string(REPLY_QUOTATIONS_SHARE_BLOCK)
html_part = list(msg.walk())[1]
assert html_part.content_type == 'text/html'
stripped_html = quotations.extract_from_html(html_part.body)
ok_(stripped_html)
ok_('From' not in stripped_html)
def test_OLK_SRC_BODY_SECTION_stripped():
eq_('Reply
',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
def test_reply_separated_by_hr():
eq_('',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
RE_REPLY = re.compile(r"^Hi\. I am fine\.\s*\n\s*Thanks,\s*\n\s*Alex\s*$")
def extract_reply_and_check(filename):
f = open(filename)
msg_body = f.read().decode("utf-8")
reply = quotations.extract_from_html(msg_body)
h = html2text.HTML2Text()
h.body_width = 0
plain_reply = h.handle(reply)
#remove spaces
plain_reply = plain_reply.replace(u'\xa0', u' ')
if RE_REPLY.match(plain_reply):
eq_(1, 1)
else:
eq_("Hi. I am fine.\n\nThanks,\nAlex", plain_reply)
def test_gmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/gmail.html")
def test_mail_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html")
def test_hotmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/hotmail.html")
def test_ms_outlook_2003_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2003.html")
def test_ms_outlook_2007_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html")
def test_thunderbird_reply():
extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html")
def test_windows_mail_reply():
extract_reply_and_check("tests/fixtures/html_replies/windows_mail.html")
def test_yandex_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/yandex_ru.html")