# -*- coding: utf-8 -*-
from . import *
from . fixtures import *
import regex as re
from talon import quotations, utils as u
RE_WHITESPACE = re.compile("\s")
RE_DOUBLE_WHITESPACE = re.compile("\s")
def test_quotation_splitter_inside_blockquote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_quotation_splitter_outside_blockquote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_regular_blockquote():
msg_body = """Reply
Regular
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
"""
eq_("Reply
Regular
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_no_blockquote():
msg_body = """
Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
reply = """
Reply
"""
eq_(RE_WHITESPACE.sub('', reply),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_empty_body():
eq_('', quotations.extract_from_html(''))
def test_validate_output_html():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
out = quotations.extract_from_html(msg_body)
ok_('' in out and '' in out,
'Invalid HTML - / tag not present')
ok_('' not in out,
'Invalid HTML output - element is not valid')
def test_gmail_quote():
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
eq_("Reply
",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_gmail_quote_blockquote():
msg_body = """Message
My name is William Shakespeare.
"""
eq_(RE_WHITESPACE.sub('', msg_body),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_unicode_in_reply():
msg_body = u"""Reply \xa0 \xa0 Text
Quote
""".encode("utf-8")
eq_("Reply Text
"
"",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_blockquote_disclaimer():
msg_body = """
disclaimer
"""
stripped_html = """
disclaimer
"""
eq_(RE_WHITESPACE.sub('', stripped_html),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_date_block():
msg_body = """
"""
eq_('message
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_from_block():
msg_body = """
"""
eq_('message
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_shares_div_with_from_block():
msg_body = '''
Blah
Date: Tue, 22 May 2012 18:29:16 -0600
To: xx@hotmail.ca
From: quickemail@ashleymadison.com
Subject: You Have New Mail From x!
'''
eq_('Blah
',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_quotations_share_block():
stripped_html = quotations.extract_from_plain(REPLY_QUOTATIONS_SHARE_BLOCK)
ok_(stripped_html)
ok_('From' not in stripped_html)
def test_OLK_SRC_BODY_SECTION_stripped():
eq_('Reply
',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
def test_reply_separated_by_hr():
eq_('',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
def extract_reply_and_check(filename):
f = open(filename)
msg_body = f.read()
reply = quotations.extract_from_html(msg_body)
plain_reply = u.html_to_text(reply)
eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"),
RE_WHITESPACE.sub('', plain_reply))
def test_gmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/gmail.html")
def test_mail_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html")
def test_hotmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/hotmail.html")
def test_ms_outlook_2003_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2003.html")
def test_ms_outlook_2007_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html")
def test_thunderbird_reply():
extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html")
def test_windows_mail_reply():
extract_reply_and_check("tests/fixtures/html_replies/windows_mail.html")
def test_yandex_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/yandex_ru.html")
def test_CRLF():
"""CR is not converted to '
'
"""
symbol = '
'
extracted = quotations.extract_from_html('\r\n')
assert_false(symbol in extracted)
eq_('', RE_WHITESPACE.sub('', extracted))
msg_body = """Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:
Test
"""
msg_body = msg_body.replace('\n', '\r\n')
extracted = quotations.extract_from_html(msg_body)
assert_false(symbol in extracted)
eq_("Reply
",
RE_WHITESPACE.sub('', extracted))