6 Commits

Author SHA1 Message Date
Sergey Obukhov
9dbe6a494b Merge pull request #90 from mailgun/sergey/89
fixes mailgun/talon#89
2016-05-17 16:01:56 -07:00
Sergey Obukhov
44e70939d6 fixes mailgun/talon#89 2016-05-17 15:31:01 -07:00
Sergey Obukhov
ab6066eafa Merge pull request #87 from mailgun/sergey/1.2.6
bump up version
2016-04-07 17:54:12 -07:00
Sergey Obukhov
42258cdd36 bump up version 2016-04-07 17:51:48 -07:00
Sergey Obukhov
d3de9e6893 Merge pull request #86 from dougkeen/master
Fix #85 (exception when stripping gmail quotes)
2016-04-07 17:47:38 -07:00
Doug Keen
333beb94af Fix #85 (exception when stripping gmail quotes) 2016-04-04 14:22:50 -07:00
3 changed files with 49 additions and 4 deletions

View File

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
setup(name='talon',
version='1.2.5',
version='1.2.7',
description=("Mailgun library "
"to extract message quotations and signatures."),
long_description=open("README.rst").read(),

View File

@@ -78,7 +78,7 @@ def delete_quotation_tags(html_note, counter, quotation_checkpoints):
def cut_gmail_quote(html_message):
''' Cuts the outermost block element with class gmail_quote. '''
gmail_quote = html_message.cssselect('div.gmail_quote')
if gmail_quote and not RE_FWD.match(gmail_quote[0].text):
if gmail_quote and (gmail_quote[0].text is None or not RE_FWD.match(gmail_quote[0].text)):
gmail_quote[0].getparent().remove(gmail_quote[0])
return True
@@ -175,7 +175,21 @@ def cut_from_block(html_message):
len(maybe_body.getchildren()) == 1)
if not parent_div_is_all_content:
block.getparent().remove(block)
parent = block.getparent()
next_sibling = block.getnext()
# remove all tags after found From block
# (From block and quoted message are in separate divs)
while next_sibling is not None:
parent.remove(block)
block = next_sibling
next_sibling = block.getnext()
# remove the last sibling (or the
# From block if no siblings)
if block is not None:
parent.remove(block)
return True
else:
return False
@@ -200,4 +214,4 @@ def cut_zimbra_quote(html_message):
zDivider = html_message.xpath('//hr[@data-marker="__DIVIDER__"]')
if zDivider:
zDivider[0].getparent().remove(zDivider[0])
return True
return True

View File

@@ -131,6 +131,17 @@ def test_gmail_quote():
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_gmail_quote_compact():
msg_body = 'Reply' \
'<div class="gmail_quote">' \
'<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:' \
'<div>Test</div>' \
'</div>' \
'</div>'
eq_("<html><body><p>Reply</p></body></html>",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_gmail_quote_blockquote():
msg_body = """Message
<blockquote class="gmail_quote">
@@ -268,6 +279,26 @@ def test_reply_separated_by_hr():
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
def test_from_block_and_quotations_in_separate_divs():
msg_body = '''
Reply
<div>
<hr/>
<div>
<font>
<b>From: bob@example.com</b>
<b>Date: Thu, 24 Mar 2016 08:07:12 -0700</b>
</font>
</div>
<div>
Quoted message
</div>
</div>
'''
eq_('<html><body><p>Reply</p><div><hr></div></body></html>',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def extract_reply_and_check(filename):
f = open(filename)