| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | # -*- coding: utf-8 -*- | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-07-12 17:25:46 +05:00
										 |  |  | from __future__ import absolute_import | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-05-23 16:10:13 -07:00
										 |  |  | # noinspection PyUnresolvedReferences | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-18 18:56:41 -08:00
										 |  |  | from talon import quotations, utils as u | 
					
						
							| 
									
										
										
										
											2017-05-23 15:39:50 -07:00
										 |  |  | from . import * | 
					
						
							|  |  |  | from .fixtures import * | 
					
						
							| 
									
										
										
										
											2019-05-08 11:01:04 -05:00
										 |  |  | from lxml import html | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | RE_WHITESPACE = re.compile("\s") | 
					
						
							|  |  |  | RE_DOUBLE_WHITESPACE = re.compile("\s") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_quotation_splitter_inside_blockquote(): | 
					
						
							|  |  |  |     msg_body = """Reply
 | 
					
						
							|  |  |  | <blockquote> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     Test | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </blockquote>"""
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>Reply</body></html>", | 
					
						
							| 
									
										
										
										
											2015-12-05 00:37:02 -08:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_quotation_splitter_outside_blockquote(): | 
					
						
							|  |  |  |     msg_body = """Reply
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <blockquote> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     Test | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </blockquote> | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>Reply</body></html>", | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-07-16 21:24:49 +02:00
										 |  |  | def test_regular_blockquote(): | 
					
						
							|  |  |  |     msg_body = """Reply
 | 
					
						
							|  |  |  | <blockquote>Regular</blockquote> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <blockquote> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     <blockquote>Nested</blockquote> | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </blockquote> | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>Reply<blockquote>Regular</blockquote></body></html>", | 
					
						
							| 
									
										
										
										
											2015-07-16 21:24:49 +02:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | def test_no_blockquote(): | 
					
						
							|  |  |  |     msg_body = """
 | 
					
						
							|  |  |  | <html> | 
					
						
							|  |  |  | <body> | 
					
						
							|  |  |  | Reply | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   Test | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | </body> | 
					
						
							|  |  |  | </html> | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     reply = """
 | 
					
						
							|  |  |  | <html> | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  | <head></head> | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | <body> | 
					
						
							|  |  |  | Reply | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </body></html>"""
 | 
					
						
							|  |  |  |     eq_(RE_WHITESPACE.sub('', reply), | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_empty_body(): | 
					
						
							|  |  |  |     eq_('', quotations.extract_from_html('')) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_validate_output_html(): | 
					
						
							|  |  |  |     msg_body = """Reply
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     <blockquote> | 
					
						
							|  |  |  |       <div> | 
					
						
							|  |  |  |         Test | 
					
						
							|  |  |  |       </div> | 
					
						
							|  |  |  |     </blockquote> | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div/> | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  |     out = quotations.extract_from_html(msg_body) | 
					
						
							|  |  |  |     ok_('<html>' in out and '</html>' in out, | 
					
						
							|  |  |  |         'Invalid HTML - <html>/</html> tag not present') | 
					
						
							|  |  |  |     ok_('<div/>' not in out, | 
					
						
							|  |  |  |         'Invalid HTML output - <div/> element is not valid') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_gmail_quote(): | 
					
						
							|  |  |  |     msg_body = """Reply
 | 
					
						
							|  |  |  | <div class="gmail_quote"> | 
					
						
							|  |  |  |   <div class="gmail_quote"> | 
					
						
							|  |  |  |     On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  |     <div> | 
					
						
							|  |  |  |       Test | 
					
						
							|  |  |  |     </div> | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </div>"""
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>Reply</body></html>", | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-04-04 14:22:50 -07:00
										 |  |  | def test_gmail_quote_compact(): | 
					
						
							|  |  |  |     msg_body = 'Reply' \ | 
					
						
							|  |  |  |                '<div class="gmail_quote">' \ | 
					
						
							|  |  |  |                '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:' \ | 
					
						
							|  |  |  |                '<div>Test</div>' \ | 
					
						
							|  |  |  |                '</div>' \ | 
					
						
							|  |  |  |                '</div>' | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>Reply</body></html>", | 
					
						
							| 
									
										
										
										
											2016-04-04 14:22:50 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-05 00:37:02 -08:00
										 |  |  | def test_gmail_quote_blockquote(): | 
					
						
							|  |  |  |     msg_body = """Message
 | 
					
						
							|  |  |  | <blockquote class="gmail_quote"> | 
					
						
							|  |  |  |   <div class="gmail_default"> | 
					
						
							|  |  |  |     My name is William Shakespeare. | 
					
						
							|  |  |  |     <br/> | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </blockquote>"""
 | 
					
						
							|  |  |  |     eq_(RE_WHITESPACE.sub('', msg_body), | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | def test_unicode_in_reply(): | 
					
						
							|  |  |  |     msg_body = u"""Reply \xa0 \xa0 Text<br>
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   <br> | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-05 00:37:02 -08:00
										 |  |  | <blockquote> | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |   Quote | 
					
						
							|  |  |  | </blockquote>""".encode("utf-8")
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>Reply  Text<br><div><br></div>" | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         "</body></html>", | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_blockquote_disclaimer(): | 
					
						
							|  |  |  |     msg_body = """
 | 
					
						
							|  |  |  | <html> | 
					
						
							|  |  |  |   <body> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     <div> | 
					
						
							|  |  |  |       message | 
					
						
							|  |  |  |     </div> | 
					
						
							|  |  |  |     <blockquote> | 
					
						
							|  |  |  |       Quote | 
					
						
							|  |  |  |     </blockquote> | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     disclaimer | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  |   </body> | 
					
						
							|  |  |  | </html> | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     stripped_html = """
 | 
					
						
							|  |  |  | <html> | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |   <head></head> | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |   <body> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     <div> | 
					
						
							|  |  |  |       message | 
					
						
							|  |  |  |     </div> | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     disclaimer | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  |   </body> | 
					
						
							|  |  |  | </html> | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  |     eq_(RE_WHITESPACE.sub('', stripped_html), | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_date_block(): | 
					
						
							|  |  |  |     msg_body = """
 | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   message<br> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     <hr> | 
					
						
							|  |  |  |     Date: Fri, 23 Mar 2012 12:35:31 -0600<br> | 
					
						
							|  |  |  |     To: <a href="mailto:bob@example.com">bob@example.com</a><br> | 
					
						
							|  |  |  |     From: <a href="mailto:rob@example.com">rob@example.com</a><br> | 
					
						
							|  |  |  |     Subject: You Have New Mail From Mary!<br><br> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     text | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_('<html><head></head><body><div>message<br></div></body></html>', | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_from_block(): | 
					
						
							|  |  |  |     msg_body = """<div>
 | 
					
						
							|  |  |  | message<br> | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  | <hr> | 
					
						
							|  |  |  | From: <a href="mailto:bob@example.com">bob@example.com</a><br> | 
					
						
							|  |  |  | Date: Fri, 23 Mar 2012 12:35:31 -0600<br> | 
					
						
							|  |  |  | To: <a href="mailto:rob@example.com">rob@example.com</a><br> | 
					
						
							|  |  |  | Subject: You Have New Mail From Mary!<br><br> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | text | 
					
						
							|  |  |  | </div></div> | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_('<html><head></head><body><div>message<br></div></body></html>', | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_reply_shares_div_with_from_block(): | 
					
						
							|  |  |  |     msg_body = '''
 | 
					
						
							|  |  |  | <body> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     Blah<br><br> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     <hr>Date: Tue, 22 May 2012 18:29:16 -0600<br> | 
					
						
							|  |  |  |     To: xx@hotmail.ca<br> | 
					
						
							|  |  |  |     From: quickemail@ashleymadison.com<br> | 
					
						
							|  |  |  |     Subject: You Have New Mail From x!<br><br> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </body>'''
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_('<html><head></head><body><div>Blah<br><br></div></body></html>', | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_reply_quotations_share_block(): | 
					
						
							| 
									
										
										
										
											2015-03-08 00:06:01 -05:00
										 |  |  |     stripped_html = quotations.extract_from_plain(REPLY_QUOTATIONS_SHARE_BLOCK) | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |     ok_(stripped_html) | 
					
						
							|  |  |  |     ok_('From' not in stripped_html) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_OLK_SRC_BODY_SECTION_stripped(): | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_('<html><head></head><body><div>Reply</div></body></html>', | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub( | 
					
						
							|  |  |  |             '', quotations.extract_from_html(OLK_SRC_BODY_SECTION))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_reply_separated_by_hr(): | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_('<html><head></head><body><div>Hi<div>there</div></div></body></html>', | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |         RE_WHITESPACE.sub( | 
					
						
							|  |  |  |             '', quotations.extract_from_html(REPLY_SEPARATED_BY_HR))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-05-17 15:31:01 -07:00
										 |  |  | def test_from_block_and_quotations_in_separate_divs(): | 
					
						
							|  |  |  |     msg_body = '''
 | 
					
						
							|  |  |  | Reply | 
					
						
							|  |  |  | <div> | 
					
						
							|  |  |  |   <hr/> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     <font> | 
					
						
							|  |  |  |       <b>From: bob@example.com</b> | 
					
						
							|  |  |  |       <b>Date: Thu, 24 Mar 2016 08:07:12 -0700</b> | 
					
						
							|  |  |  |     </font> | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     Quoted message | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | </div> | 
					
						
							|  |  |  | '''
 | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_('<html><head></head><body>Reply<div><hr></div></body></html>', | 
					
						
							| 
									
										
										
										
											2016-05-17 15:31:01 -07:00
										 |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | def extract_reply_and_check(filename): | 
					
						
							| 
									
										
										
										
											2017-05-23 16:10:13 -07:00
										 |  |  |     import sys | 
					
						
							|  |  |  |     kwargs = {} | 
					
						
							|  |  |  |     if sys.version_info > (3, 0): | 
					
						
							| 
									
										
										
										
											2017-05-23 16:17:39 -07:00
										 |  |  |         kwargs["encoding"] = "utf8" | 
					
						
							| 
									
										
										
										
											2017-05-23 16:10:13 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  |     f = open(filename, **kwargs) | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-09-18 04:47:23 -07:00
										 |  |  |     msg_body = f.read() | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  |     reply = quotations.extract_from_html(msg_body) | 
					
						
							| 
									
										
										
										
											2015-12-18 18:56:41 -08:00
										 |  |  |     plain_reply = u.html_to_text(reply) | 
					
						
							| 
									
										
										
										
											2016-07-13 11:11:06 +05:00
										 |  |  |     plain_reply = plain_reply.decode('utf8') | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-18 18:56:41 -08:00
										 |  |  |     eq_(RE_WHITESPACE.sub('', "Hi. I am fine.\n\nThanks,\nAlex"), | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', plain_reply)) | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_gmail_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/gmail.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_mail_ru_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_hotmail_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/hotmail.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_ms_outlook_2003_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2003.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_ms_outlook_2007_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-10 19:14:51 -08:00
										 |  |  | def test_ms_outlook_2010_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2010.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2014-07-23 21:12:54 -07:00
										 |  |  | def test_thunderbird_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_windows_mail_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/windows_mail.html") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_yandex_ru_reply(): | 
					
						
							|  |  |  |     extract_reply_and_check("tests/fixtures/html_replies/yandex_ru.html") | 
					
						
							| 
									
										
										
										
											2015-09-21 09:51:26 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_CRLF(): | 
					
						
							|  |  |  |     """CR is not converted to '
'
 | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2015-12-05 00:37:02 -08:00
										 |  |  |     symbol = '
' | 
					
						
							|  |  |  |     extracted = quotations.extract_from_html('<html>\r\n</html>') | 
					
						
							|  |  |  |     assert_false(symbol in extracted) | 
					
						
							|  |  |  |     eq_('<html></html>', RE_WHITESPACE.sub('', extracted)) | 
					
						
							| 
									
										
										
										
											2015-09-21 09:51:26 -07:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-08-11 20:17:37 -07:00
										 |  |  |     msg_body = """My
 | 
					
						
							|  |  |  | reply | 
					
						
							| 
									
										
										
										
											2015-09-21 09:51:26 -07:00
										 |  |  | <blockquote> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     Test | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </blockquote>"""
 | 
					
						
							|  |  |  |     msg_body = msg_body.replace('\n', '\r\n') | 
					
						
							| 
									
										
										
										
											2015-12-05 00:37:02 -08:00
										 |  |  |     extracted = quotations.extract_from_html(msg_body) | 
					
						
							| 
									
										
										
										
											2017-05-23 15:39:50 -07:00
										 |  |  |     assert_false(symbol in extracted) | 
					
						
							| 
									
										
										
										
											2016-08-11 20:17:37 -07:00
										 |  |  |     # Keep new lines otherwise "My reply" becomes one word - "Myreply"  | 
					
						
							| 
									
										
										
										
											2016-08-15 19:36:21 -07:00
										 |  |  |     eq_("<html><head></head><body>My\nreply\n</body></html>", extracted) | 
					
						
							| 
									
										
										
										
											2016-02-19 18:30:43 -08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_gmail_forwarded_msg(): | 
					
						
							|  |  |  |     msg_body = """<div dir="ltr"><br><div class="gmail_quote">---------- Forwarded message ----------<br>From: <b class="gmail_sendername">Bob</b> <span dir="ltr"><<a href="mailto:bob@example.com">bob@example.com</a>></span><br>Date: Fri, Feb 11, 2010 at 5:59 PM<br>Subject: Bob WFH today<br>To: Mary <<a href="mailto:mary@example.com">mary@example.com</a>><br><br><br><div dir="ltr">eom</div>
 | 
					
						
							|  |  |  | </div><br></div>"""
 | 
					
						
							|  |  |  |     extracted = quotations.extract_from_html(msg_body) | 
					
						
							|  |  |  |     eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted)) | 
					
						
							| 
									
										
										
										
											2016-08-09 17:15:49 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-09-14 09:58:41 -07:00
										 |  |  | @patch.object(u, '_MAX_TAGS_COUNT', 4) | 
					
						
							| 
									
										
										
										
											2016-08-09 17:15:49 -07:00
										 |  |  | def test_too_large_html(): | 
					
						
							|  |  |  |     msg_body = 'Reply' \ | 
					
						
							|  |  |  |                '<div class="gmail_quote">' \ | 
					
						
							|  |  |  |                '<div class="gmail_quote">On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote:' \ | 
					
						
							|  |  |  |                '<div>Test</div>' \ | 
					
						
							|  |  |  |                '</div>' \ | 
					
						
							|  |  |  |                '</div>' | 
					
						
							|  |  |  |     eq_(RE_WHITESPACE.sub('', msg_body), | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							| 
									
										
										
										
											2016-08-11 19:54:53 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_readable_html_empty(): | 
					
						
							|  |  |  |     msg_body = """
 | 
					
						
							|  |  |  | <blockquote> | 
					
						
							|  |  |  |   Reply | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     On 11-Apr-2011, at 6:54 PM, Bob <bob@example.com> wrote: | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |   <div> | 
					
						
							|  |  |  |     Test | 
					
						
							|  |  |  |   </div> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | </blockquote>"""
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     eq_(RE_WHITESPACE.sub('', msg_body), | 
					
						
							|  |  |  |         RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body))) | 
					
						
							| 
									
										
										
										
											2016-08-19 11:38:12 -07:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @patch.object(quotations, 'html_document_fromstring', Mock(return_value=None)) | 
					
						
							|  |  |  | def test_bad_html(): | 
					
						
							|  |  |  |     bad_html = "<html></html>" | 
					
						
							|  |  |  |     eq_(bad_html, quotations.extract_from_html(bad_html)) | 
					
						
							| 
									
										
										
										
											2019-05-08 11:01:04 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def test_remove_namespaces(): | 
					
						
							|  |  |  |     msg_body = """
 | 
					
						
							|  |  |  |     <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns="http://www.w3.org/TR/REC-html40"> | 
					
						
							|  |  |  |         <body> | 
					
						
							|  |  |  |             <o:p>Dear Sir,</o:p> | 
					
						
							|  |  |  |             <o:p>Thank you for the email.</o:p> | 
					
						
							|  |  |  |             <blockquote>thing</blockquote> | 
					
						
							|  |  |  |         </body> | 
					
						
							|  |  |  |     </html> | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     rendered = quotations.extract_from_html(msg_body) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert_true("<p>" in rendered) | 
					
						
							|  |  |  |     assert_true("xmlns" in rendered) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     assert_true("<o:p>" not in rendered) | 
					
						
							|  |  |  |     assert_true("<xmlns:o>" not in rendered) |