diff --git a/talon/html_quotations.py b/talon/html_quotations.py index 32bf634..7db2037 100644 --- a/talon/html_quotations.py +++ b/talon/html_quotations.py @@ -155,21 +155,33 @@ def cut_from_block(html_message): if block: block = block[-1] + parent_div = None while block.getparent() is not None: if block.tag == 'div': + parent_div = block + break + block = block.getparent() + if parent_div is not None: + maybe_body = parent_div.getparent() + # In cases where removing this enclosing div will remove all + # content, we should assume the quote is not enclosed in a tag. + parent_div_is_all_content = ( + maybe_body is not None and maybe_body.tag == 'body' and + len(maybe_body.getchildren()) == 1) + if not parent_div_is_all_content: block.getparent().remove(block) return True - else: - block = block.getparent() - else: - # handle the case when From: block goes right after e.g.
- # and not enclosed in some tag - block = html_message.xpath( - ("//*[starts-with(mg:tail(), 'From:')]|" - "//*[starts-with(mg:tail(), 'Date:')]")) - if block: - block = block[0] - while(block.getnext() is not None): - block.getparent().remove(block.getnext()) - block.getparent().remove(block) - return True + else: + return False + + # handle the case when From: block goes right after e.g.
+ # and not enclosed in some tag + block = html_message.xpath( + ("//*[starts-with(mg:tail(), 'From:')]|" + "//*[starts-with(mg:tail(), 'Date:')]")) + if block: + block = block[0] + while(block.getnext() is not None): + block.getparent().remove(block.getnext()) + block.getparent().remove(block) + return True diff --git a/tests/fixtures/html_replies/ms_outlook_2010.html b/tests/fixtures/html_replies/ms_outlook_2010.html new file mode 100644 index 0000000..9d26d0e --- /dev/null +++ b/tests/fixtures/html_replies/ms_outlook_2010.html @@ -0,0 +1,87 @@ + + + + + + + +
+

Hi. I am fine.

+

Thanks,

+

Alex

+

From: Foo [mailto:foo@bar.com] +On Behalf Of baz@bar.com
+Sent: Monday, January 01, 2000 12:00 AM
+To: john@bar.com
+Cc: jane@bar.io
+Subject: Conversation

+

 

+

Hello! How are you?

+

 

+
+ + diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py index 5c4118e..72a0496 100644 --- a/tests/html_quotations_test.py +++ b/tests/html_quotations_test.py @@ -300,6 +300,10 @@ def test_ms_outlook_2007_reply(): extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html") +def test_ms_outlook_2010_reply(): + extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2010.html") + + def test_thunderbird_reply(): extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html")