Compare commits
	
		
			6 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 9dbe6a494b | ||
|  | 44e70939d6 | ||
|  | ab6066eafa | ||
|  | 42258cdd36 | ||
|  | d3de9e6893 | ||
|  | 333beb94af | 
							
								
								
									
										2
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								setup.py
									
									
									
									
									
								
							| @@ -2,7 +2,7 @@ from setuptools import setup, find_packages | |||||||
|  |  | ||||||
|  |  | ||||||
| setup(name='talon', | setup(name='talon', | ||||||
|       version='1.2.5', |       version='1.2.7', | ||||||
|       description=("Mailgun library " |       description=("Mailgun library " | ||||||
|                    "to extract message quotations and signatures."), |                    "to extract message quotations and signatures."), | ||||||
|       long_description=open("README.rst").read(), |       long_description=open("README.rst").read(), | ||||||
|   | |||||||
| @@ -78,7 +78,7 @@ def delete_quotation_tags(html_note, counter, quotation_checkpoints): | |||||||
| def cut_gmail_quote(html_message): | def cut_gmail_quote(html_message): | ||||||
|     ''' Cuts the outermost block element with class gmail_quote. ''' |     ''' Cuts the outermost block element with class gmail_quote. ''' | ||||||
|     gmail_quote = html_message.cssselect('div.gmail_quote') |     gmail_quote = html_message.cssselect('div.gmail_quote') | ||||||
|     if gmail_quote and not RE_FWD.match(gmail_quote[0].text): |     if gmail_quote and (gmail_quote[0].text is None or not RE_FWD.match(gmail_quote[0].text)): | ||||||
|         gmail_quote[0].getparent().remove(gmail_quote[0]) |         gmail_quote[0].getparent().remove(gmail_quote[0]) | ||||||
|         return True |         return True | ||||||
|  |  | ||||||
| @@ -175,7 +175,21 @@ def cut_from_block(html_message): | |||||||
|                 len(maybe_body.getchildren()) == 1) |                 len(maybe_body.getchildren()) == 1) | ||||||
|  |  | ||||||
|             if not parent_div_is_all_content: |             if not parent_div_is_all_content: | ||||||
|                 block.getparent().remove(block) |                 parent = block.getparent() | ||||||
|  |                 next_sibling = block.getnext() | ||||||
|  |  | ||||||
|  |                 # remove all tags after found From block | ||||||
|  |                 # (From block and quoted message are in separate divs) | ||||||
|  |                 while next_sibling is not None: | ||||||
|  |                     parent.remove(block) | ||||||
|  |                     block = next_sibling | ||||||
|  |                     next_sibling = block.getnext() | ||||||
|  |  | ||||||
|  |                 # remove the last sibling (or the | ||||||
|  |                 # From block if no siblings) | ||||||
|  |                 if block is not None: | ||||||
|  |                     parent.remove(block) | ||||||
|  |  | ||||||
|                 return True |                 return True | ||||||
|         else: |         else: | ||||||
|             return False |             return False | ||||||
|   | |||||||
| @@ -131,6 +131,17 @@ def test_gmail_quote(): | |||||||
|         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def test_gmail_quote_compact(): | ||||||
|  |     msg_body = 'Reply' \ | ||||||
|  |                '<div class="gmail_quote">' \ | ||||||
|  |                '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:' \ | ||||||
|  |                '<div>Test</div>' \ | ||||||
|  |                '</div>' \ | ||||||
|  |                '</div>' | ||||||
|  |     eq_("<html><body><p>Reply</p></body></html>", | ||||||
|  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_gmail_quote_blockquote(): | def test_gmail_quote_blockquote(): | ||||||
|     msg_body = """Message |     msg_body = """Message | ||||||
| <blockquote class="gmail_quote"> | <blockquote class="gmail_quote"> | ||||||
| @@ -268,6 +279,26 @@ def test_reply_separated_by_hr(): | |||||||
|             '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR))) |             '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR))) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def test_from_block_and_quotations_in_separate_divs(): | ||||||
|  |     msg_body = ''' | ||||||
|  | Reply | ||||||
|  | <div> | ||||||
|  |   <hr/> | ||||||
|  |   <div> | ||||||
|  |     <font> | ||||||
|  |       <b>From: bob@example.com</b> | ||||||
|  |       <b>Date: Thu, 24 Mar 2016 08:07:12 -0700</b> | ||||||
|  |     </font> | ||||||
|  |   </div> | ||||||
|  |   <div> | ||||||
|  |     Quoted message | ||||||
|  |   </div> | ||||||
|  | </div> | ||||||
|  | ''' | ||||||
|  |     eq_('<html><body><p>Reply</p><div><hr></div></body></html>', | ||||||
|  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | ||||||
|  |  | ||||||
|  |  | ||||||
| def extract_reply_and_check(filename): | def extract_reply_and_check(filename): | ||||||
|     f = open(filename) |     f = open(filename) | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user