diff --git a/talon/html_quotations.py b/talon/html_quotations.py
index 9540db8..fcf5a8a 100644
--- a/talon/html_quotations.py
+++ b/talon/html_quotations.py
@@ -159,21 +159,33 @@ def cut_from_block(html_message):
if block:
block = block[-1]
+ parent_div = None
while block.getparent() is not None:
if block.tag == 'div':
+ parent_div = block
+ break
+ block = block.getparent()
+ if parent_div is not None:
+ maybe_body = parent_div.getparent()
+ # In cases where removing this enclosing div will remove all
+ # content, we should assume the quote is not enclosed in a tag.
+ parent_div_is_all_content = (
+ maybe_body is not None and maybe_body.tag == 'body' and
+ len(maybe_body.getchildren()) == 1)
+ if not parent_div_is_all_content:
block.getparent().remove(block)
return True
- else:
- block = block.getparent()
- else:
- # handle the case when From: block goes right after e.g.
- # and not enclosed in some tag
- block = html_message.xpath(
- ("//*[starts-with(mg:tail(), 'From:')]|"
- "//*[starts-with(mg:tail(), 'Date:')]"))
- if block:
- block = block[0]
- while(block.getnext() is not None):
- block.getparent().remove(block.getnext())
- block.getparent().remove(block)
- return True
+ else:
+ return False
+
+ # handle the case when From: block goes right after e.g.
+ # and not enclosed in some tag
+ block = html_message.xpath(
+ ("//*[starts-with(mg:tail(), 'From:')]|"
+ "//*[starts-with(mg:tail(), 'Date:')]"))
+ if block:
+ block = block[0]
+ while(block.getnext() is not None):
+ block.getparent().remove(block.getnext())
+ block.getparent().remove(block)
+ return True
diff --git a/tests/fixtures/html_replies/ms_outlook_2010.html b/tests/fixtures/html_replies/ms_outlook_2010.html
new file mode 100644
index 0000000..9d26d0e
--- /dev/null
+++ b/tests/fixtures/html_replies/ms_outlook_2010.html
@@ -0,0 +1,87 @@
+
+
+
+
+
+
+
+
+
Hi. I am fine.
+
Thanks,
+
Alex
+
From: Foo [mailto:foo@bar.com]
+On Behalf Of baz@bar.com
+Sent: Monday, January 01, 2000 12:00 AM
+To: john@bar.com
+Cc: jane@bar.io
+Subject: Conversation
+
+
Hello! How are you?
+
+
+
+
diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py
index 9bf488f..44f7ed2 100644
--- a/tests/html_quotations_test.py
+++ b/tests/html_quotations_test.py
@@ -299,6 +299,10 @@ def test_ms_outlook_2007_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html")
+def test_ms_outlook_2010_reply():
+ extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2010.html")
+
+
def test_thunderbird_reply():
extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html")