Merge pull request #90 from mailgun/sergey/89

fixes mailgun/talon#89
This commit is contained in:
Sergey Obukhov
2016-05-17 16:01:56 -07:00
3 changed files with 37 additions and 3 deletions

View File

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(name='talon',
version='1.2.6',
version='1.2.7',
description=("Mailgun library "
"to extract message quotations and signatures."),
long_description=open("README.rst").read(),

View File

@@ -175,7 +175,21 @@ def cut_from_block(html_message):
len(maybe_body.getchildren()) == 1)
if not parent_div_is_all_content:
block.getparent().remove(block)
parent = block.getparent()
next_sibling = block.getnext()
# remove all tags after found From block
# (From block and quoted message are in separate divs)
while next_sibling is not None:
parent.remove(block)
block = next_sibling
next_sibling = block.getnext()
# remove the last sibling (or the
# From block if no siblings)
if block is not None:
parent.remove(block)
return True
else:
return False
@@ -200,4 +214,4 @@ def cut_zimbra_quote(html_message):
zDivider = html_message.xpath('//hr[@data-marker="__DIVIDER__"]')
if zDivider:
zDivider[0].getparent().remove(zDivider[0])
return True
return True

View File

@@ -279,6 +279,26 @@ def test_reply_separated_by_hr():
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
def test_from_block_and_quotations_in_separate_divs():
msg_body = '''
Reply
<div>
<hr/>
<div>
<font>
<b>From: bob@example.com</b>
<b>Date: Thu, 24 Mar 2016 08:07:12 -0700</b>
</font>
</div>
<div>
Quoted message
</div>
</div>
'''
eq_('<html><body><p>Reply</p><div><hr></div></body></html>',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_reply_and_check(filename):
f = open(filename)