From 44e70939d60ec61ea0522744c4604fc54157a92b Mon Sep 17 00:00:00 2001 From: Sergey Obukhov Date: Tue, 17 May 2016 15:31:01 -0700 Subject: [PATCH] fixes mailgun/talon#89 --- setup.py | 2 +- talon/html_quotations.py | 18 ++++++++++++++++-- tests/html_quotations_test.py | 20 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index a329e5f..1faf1a2 100755 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ from setuptools import setup, find_packages setup(name='talon', - version='1.2.6', + version='1.2.7', description=("Mailgun library " "to extract message quotations and signatures."), long_description=open("README.rst").read(), diff --git a/talon/html_quotations.py b/talon/html_quotations.py index 261232e..d56ec93 100644 --- a/talon/html_quotations.py +++ b/talon/html_quotations.py @@ -175,7 +175,21 @@ def cut_from_block(html_message): len(maybe_body.getchildren()) == 1) if not parent_div_is_all_content: - block.getparent().remove(block) + parent = block.getparent() + next_sibling = block.getnext() + + # remove all tags after found From block + # (From block and quoted message are in separate divs) + while next_sibling is not None: + parent.remove(block) + block = next_sibling + next_sibling = block.getnext() + + # remove the last sibling (or the + # From block if no siblings) + if block is not None: + parent.remove(block) + return True else: return False @@ -200,4 +214,4 @@ def cut_zimbra_quote(html_message): zDivider = html_message.xpath('//hr[@data-marker="__DIVIDER__"]') if zDivider: zDivider[0].getparent().remove(zDivider[0]) - return True \ No newline at end of file + return True diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py index 1658fe7..4002d0f 100644 --- a/tests/html_quotations_test.py +++ b/tests/html_quotations_test.py @@ -279,6 +279,26 @@ def test_reply_separated_by_hr(): '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR))) +def test_from_block_and_quotations_in_separate_divs(): + msg_body = ''' +Reply +
+
+
+ + From: bob@example.com + Date: Thu, 24 Mar 2016 08:07:12 -0700 + +
+
+ Quoted message +
+
+''' + eq_('

Reply


', + RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) + + def extract_reply_and_check(filename): f = open(filename)