diff --git a/talon/quotations.py b/talon/quotations.py index 9999e6a..add0e02 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -391,7 +391,7 @@ def _extract_from_html(msg_body): if msg_body.strip() == b'': return msg_body - msg_body = msg_body.replace(b'\r\n', b'').replace(b'\n', b'') + msg_body = msg_body.replace(b'\r\n', b'\n') html_tree = html.document_fromstring( msg_body, parser=html.HTMLParser(encoding="utf-8") diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py index 03c66a8..4453033 100644 --- a/tests/html_quotations_test.py +++ b/tests/html_quotations_test.py @@ -356,7 +356,8 @@ def test_CRLF(): assert_false(symbol in extracted) eq_('', RE_WHITESPACE.sub('', extracted)) - msg_body = """Reply + msg_body = """My +reply
@@ -371,8 +372,8 @@ def test_CRLF(): msg_body = msg_body.replace('\n', '\r\n') extracted = quotations.extract_from_html(msg_body) assert_false(symbol in extracted) - eq_("

Reply

", - RE_WHITESPACE.sub('', extracted)) + # Keep new lines otherwise "My reply" becomes one word - "Myreply" + eq_("

My\nreply\n

", extracted) def test_gmail_forwarded_msg():