use html5lib to parse html
This commit is contained in:
@@ -27,7 +27,7 @@ def test_quotation_splitter_inside_blockquote():
|
||||
|
||||
</blockquote>"""
|
||||
|
||||
eq_("<html><body><p>Reply</p></body></html>",
|
||||
eq_("<html><head></head><body>Reply</body></html>",
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ def test_quotation_splitter_outside_blockquote():
|
||||
</div>
|
||||
</blockquote>
|
||||
"""
|
||||
eq_("<html><body><p>Reply</p></body></html>",
|
||||
eq_("<html><head></head><body>Reply</body></html>",
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -62,7 +62,7 @@ def test_regular_blockquote():
|
||||
</div>
|
||||
</blockquote>
|
||||
"""
|
||||
eq_("<html><body><p>Reply</p><blockquote>Regular</blockquote></body></html>",
|
||||
eq_("<html><head></head><body>Reply<blockquote>Regular</blockquote></body></html>",
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -85,6 +85,7 @@ Reply
|
||||
|
||||
reply = """
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
Reply
|
||||
|
||||
@@ -128,7 +129,7 @@ def test_gmail_quote():
|
||||
</div>
|
||||
</div>
|
||||
</div>"""
|
||||
eq_("<html><body><p>Reply</p></body></html>",
|
||||
eq_("<html><head></head><body>Reply</body></html>",
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -139,7 +140,7 @@ def test_gmail_quote_compact():
|
||||
'<div>Test</div>' \
|
||||
'</div>' \
|
||||
'</div>'
|
||||
eq_("<html><body><p>Reply</p></body></html>",
|
||||
eq_("<html><head></head><body>Reply</body></html>",
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -166,7 +167,7 @@ def test_unicode_in_reply():
|
||||
Quote
|
||||
</blockquote>""".encode("utf-8")
|
||||
|
||||
eq_("<html><body><p>Reply  Text<br></p><div><br></div>"
|
||||
eq_("<html><head></head><body>Reply  Text<br><div><br></div>"
|
||||
"</body></html>",
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
@@ -192,6 +193,7 @@ def test_blockquote_disclaimer():
|
||||
|
||||
stripped_html = """
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<div>
|
||||
<div>
|
||||
@@ -223,7 +225,7 @@ def test_date_block():
|
||||
</div>
|
||||
</div>
|
||||
"""
|
||||
eq_('<html><body><div>message<br></div></body></html>',
|
||||
eq_('<html><head></head><body><div>message<br></div></body></html>',
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -240,7 +242,7 @@ Subject: You Have New Mail From Mary!<br><br>
|
||||
text
|
||||
</div></div>
|
||||
"""
|
||||
eq_('<html><body><div>message<br></div></body></html>',
|
||||
eq_('<html><head></head><body><div>message<br></div></body></html>',
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -258,7 +260,7 @@ def test_reply_shares_div_with_from_block():
|
||||
|
||||
</div>
|
||||
</body>'''
|
||||
eq_('<html><body><div>Blah<br><br></div></body></html>',
|
||||
eq_('<html><head></head><body><div>Blah<br><br></div></body></html>',
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -269,13 +271,13 @@ def test_reply_quotations_share_block():
|
||||
|
||||
|
||||
def test_OLK_SRC_BODY_SECTION_stripped():
|
||||
eq_('<html><body><div>Reply</div></body></html>',
|
||||
eq_('<html><head></head><body><div>Reply</div></body></html>',
|
||||
RE_WHITESPACE.sub(
|
||||
'', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
|
||||
|
||||
|
||||
def test_reply_separated_by_hr():
|
||||
eq_('<html><body><div>Hi<div>there</div></div></body></html>',
|
||||
eq_('<html><head></head><body><div>Hi<div>there</div></div></body></html>',
|
||||
RE_WHITESPACE.sub(
|
||||
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
|
||||
|
||||
@@ -296,7 +298,7 @@ Reply
|
||||
</div>
|
||||
</div>
|
||||
'''
|
||||
eq_('<html><body><p>Reply</p><div><hr></div></body></html>',
|
||||
eq_('<html><head></head><body>Reply<div><hr></div></body></html>',
|
||||
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
|
||||
|
||||
|
||||
@@ -373,7 +375,7 @@ reply
|
||||
extracted = quotations.extract_from_html(msg_body)
|
||||
assert_false(symbol in extracted)
|
||||
# Keep new lines otherwise "My reply" becomes one word - "Myreply"
|
||||
eq_("<html><body><p>My\nreply\n</p></body></html>", extracted)
|
||||
eq_("<html><head></head><body>My\nreply\n</body></html>", extracted)
|
||||
|
||||
|
||||
def test_gmail_forwarded_msg():
|
||||
|
||||
Reference in New Issue
Block a user