""" out = quotations.extract_from_html(msg_body) ok_('' in out and '' in out, 'Invalid HTML - / tag not present') ok_('

' not in out, 'Invalid HTML output -

element is not valid') def test_gmail_quote(): msg_body = """Reply

On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:

Test

""" eq_("Reply", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_gmail_quote_compact(): msg_body = 'Reply' \ '

' \ '

On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:' \ '

Test

' \ '

' eq_("Reply", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_gmail_quote_blockquote(): msg_body = """Message

My name is William Shakespeare.

""" eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_unicode_in_reply(): msg_body = u"""Reply \xa0 \xa0 Text

Quote

""".encode("utf-8") eq_("Reply Text

" "", RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_blockquote_disclaimer(): msg_body = """

message

Quote

disclaimer

""" stripped_html = """

message

disclaimer

""" eq_(RE_WHITESPACE.sub('', stripped_html), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_date_block(): msg_body = """

message

Date: Fri, 23 Mar 2012 12:35:31 -0600
To: bob@example.com
From: rob@example.com
Subject: You Have New Mail From Mary!

text

""" eq_('

message

', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_from_block(): msg_body = """

message

From: bob@example.com
Date: Fri, 23 Mar 2012 12:35:31 -0600
To: rob@example.com
Subject: You Have New Mail From Mary!

text

""" eq_('

message

', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_reply_shares_div_with_from_block(): msg_body = '''

Blah

Date: Tue, 22 May 2012 18:29:16 -0600
To: xx@hotmail.ca
From: quickemail@ashleymadison.com
Subject: You Have New Mail From x!

''' eq_('

Blah

', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_reply_quotations_share_block(): stripped_html = quotations.extract_from_plain(REPLY_QUOTATIONS_SHARE_BLOCK) ok_(stripped_html) ok_('From' not in stripped_html) def test_OLK_SRC_BODY_SECTION_stripped(): eq_('

', RE_WHITESPACE.sub( '', quotations.extract_from_html(OLK_SRC_BODY_SECTION))) def test_reply_separated_by_hr(): eq_('

there

', RE_WHITESPACE.sub( '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR))) def test_from_block_and_quotations_in_separate_divs(): msg_body = ''' Reply

From: bob@example.com Date: Thu, 24 Mar 2016 08:07:12 -0700

Quoted message

''' eq_('Reply

', RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def extract_reply_and_check(filename): import sys kwargs = {} if sys.version_info > (3, 0): kwargs["encoding"] = "utf8" f = open(filename, **kwargs) msg_body = f.read() reply = quotations.extract_from_html(msg_body) plain_reply = u.html_to_text(reply) plain_reply = plain_reply.decode('utf8') eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"), RE_WHITESPACE.sub('', plain_reply)) def test_gmail_reply(): extract_reply_and_check("tests/fixtures/html_replies/gmail.html") def test_mail_ru_reply(): extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html") def test_hotmail_reply(): extract_reply_and_check("tests/fixtures/html_replies/hotmail.html") def test_ms_outlook_2003_reply(): extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2003.html") def test_ms_outlook_2007_reply(): extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html") def test_ms_outlook_2010_reply(): extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2010.html") def test_thunderbird_reply(): extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html") def test_windows_mail_reply(): extract_reply_and_check("tests/fixtures/html_replies/windows_mail.html") def test_yandex_ru_reply(): extract_reply_and_check("tests/fixtures/html_replies/yandex_ru.html") def test_CRLF(): """CR is not converted to ' ' """ symbol = ' ' extracted = quotations.extract_from_html('\r\n') assert_false(symbol in extracted) eq_('', RE_WHITESPACE.sub('', extracted)) msg_body = """My reply

On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:

Test

""" msg_body = msg_body.replace('\n', '\r\n') extracted = quotations.extract_from_html(msg_body) assert_false(symbol in extracted) # Keep new lines otherwise "My reply" becomes one word - "Myreply" eq_("My\nreply\n", extracted) def test_gmail_forwarded_msg(): msg_body = """

---------- Forwarded message ----------
From: Bob <bob@example.com>
Date: Fri, Feb 11, 2010 at 5:59 PM
Subject: Bob WFH today
To: Mary <mary@example.com>

eom

""" extracted = quotations.extract_from_html(msg_body) eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted)) @patch.object(u, '_MAX_TAGS_COUNT', 4) def test_too_large_html(): msg_body = 'Reply' \ '

' \ '

On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:' \ '

Test

' \ '

' eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) def test_readable_html_empty(): msg_body = """

Reply
On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:

Test

""" eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) @patch.object(quotations, 'html_document_fromstring', Mock(return_value=None)) def test_bad_html(): bad_html = "" eq_(bad_html, quotations.extract_from_html(bad_html)) def test_remove_namespaces(): msg_body = """ Dear Sir, Thank you for the email.

thing

""" rendered = quotations.extract_from_html(msg_body) assert_true("

" in rendered) assert_true("xmlns" in rendered) assert_true("" not in rendered) assert_true("" not in rendered)