Compare commits
	
		
			11 Commits
		
	
	
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | f16ae5110b | ||
|  | ab5cbe5ec3 | ||
|  | be5da92f16 | ||
|  | 95954a65a0 | ||
|  | 0b55e8fa77 | ||
|  | 6f159e8959 | ||
|  | 5c413b4b00 | ||
|  | cca64d3ed1 | ||
|  | e11eaf6ff8 | ||
|  | 85a4c1d855 | ||
|  | 0f5e72623b | 
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							| @@ -29,7 +29,7 @@ class InstallCommand(install): | |||||||
|  |  | ||||||
|  |  | ||||||
| setup(name='talon', | setup(name='talon', | ||||||
|       version='1.3.4', |       version='1.3.7', | ||||||
|       description=("Mailgun library " |       description=("Mailgun library " | ||||||
|                    "to extract message quotations and signatures."), |                    "to extract message quotations and signatures."), | ||||||
|       long_description=open("README.rst").read(), |       long_description=open("README.rst").read(), | ||||||
| @@ -48,7 +48,7 @@ setup(name='talon', | |||||||
|           "regex>=1", |           "regex>=1", | ||||||
|           "numpy", |           "numpy", | ||||||
|           "scipy", |           "scipy", | ||||||
|           "scikit-learn==0.16.1", # pickled versions of classifier, else rebuild |           "scikit-learn>=0.16.1", # pickled versions of classifier, else rebuild | ||||||
|           'chardet>=1.0.1', |           'chardet>=1.0.1', | ||||||
|           'cchardet>=0.3.5', |           'cchardet>=0.3.5', | ||||||
|           'cssselect', |           'cssselect', | ||||||
|   | |||||||
| @@ -139,6 +139,21 @@ RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]?.* | |||||||
|         'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt', |         'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt', | ||||||
|     ))), re.I) |     ))), re.I) | ||||||
|  |  | ||||||
|  | # ---- John Smith wrote ---- | ||||||
|  | RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format( | ||||||
|  |     u'|'.join(( | ||||||
|  |         # English | ||||||
|  |         'wrote' | ||||||
|  |     ))), re.I) | ||||||
|  |  | ||||||
|  | # Support polymail.io reply format | ||||||
|  | # On Tue, Apr 11, 2017 at 10:07 PM John Smith | ||||||
|  | # | ||||||
|  | # < | ||||||
|  | # mailto:John Smith <johnsmith@gmail.com> | ||||||
|  | # > wrote: | ||||||
|  | RE_POLYMAIL = re.compile('On.*\s{2}<\smailto:.*\s> wrote:', re.I) | ||||||
|  |  | ||||||
| SPLITTER_PATTERNS = [ | SPLITTER_PATTERNS = [ | ||||||
|     RE_ORIGINAL_MESSAGE, |     RE_ORIGINAL_MESSAGE, | ||||||
|     RE_ON_DATE_SMB_WROTE, |     RE_ON_DATE_SMB_WROTE, | ||||||
| @@ -154,16 +169,17 @@ SPLITTER_PATTERNS = [ | |||||||
|     re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?' |     re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?' | ||||||
|                '( \S+){3,6}@\S+:'), |                '( \S+){3,6}@\S+:'), | ||||||
|     # Sent from Samsung MobileName <address@example.com> wrote: |     # Sent from Samsung MobileName <address@example.com> wrote: | ||||||
|     re.compile('Sent from Samsung .*@.*> wrote') |     re.compile('Sent from Samsung .*@.*> wrote'), | ||||||
|  |     RE_ANDROID_WROTE, | ||||||
|  |     RE_POLYMAIL | ||||||
|     ] |     ] | ||||||
|  |  | ||||||
|  |  | ||||||
| RE_LINK = re.compile('<(http://[^>]*)>') | RE_LINK = re.compile('<(http://[^>]*)>') | ||||||
| RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@') | RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@') | ||||||
|  |  | ||||||
| RE_PARENTHESIS_LINK = re.compile("\(https?://") | RE_PARENTHESIS_LINK = re.compile("\(https?://") | ||||||
|  |  | ||||||
| SPLITTER_MAX_LINES = 4 | SPLITTER_MAX_LINES = 6 | ||||||
| MAX_LINES_COUNT = 1000 | MAX_LINES_COUNT = 1000 | ||||||
| # an extensive research shows that exceeding this limit | # an extensive research shows that exceeding this limit | ||||||
| # leads to excessive processing time | # leads to excessive processing time | ||||||
|   | |||||||
| @@ -35,6 +35,19 @@ On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> wrote: | |||||||
|  |  | ||||||
|     eq_("Test reply", quotations.extract_from_plain(msg_body)) |     eq_("Test reply", quotations.extract_from_plain(msg_body)) | ||||||
|  |  | ||||||
|  | def test_pattern_on_date_polymail(): | ||||||
|  |     msg_body = """Test reply | ||||||
|  |  | ||||||
|  | On Tue, Apr 11, 2017 at 10:07 PM John Smith | ||||||
|  |  | ||||||
|  | < | ||||||
|  | mailto:John Smith <johnsmith@gmail.com> | ||||||
|  | > wrote: | ||||||
|  | Test quoted data | ||||||
|  | """ | ||||||
|  |  | ||||||
|  |     eq_("Test reply", quotations.extract_from_plain(msg_body)) | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_pattern_sent_from_samsung_smb_wrote(): | def test_pattern_sent_from_samsung_smb_wrote(): | ||||||
|     msg_body = """Test reply |     msg_body = """Test reply | ||||||
| @@ -142,7 +155,8 @@ def _check_pattern_original_message(original_message_indicator): | |||||||
| -----{}----- | -----{}----- | ||||||
|  |  | ||||||
| Test""" | Test""" | ||||||
|     eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator)))) |     eq_('Test reply', quotations.extract_from_plain( | ||||||
|  |         msg_body.format(six.text_type(original_message_indicator)))) | ||||||
|  |  | ||||||
| def test_english_original_message(): | def test_english_original_message(): | ||||||
|     _check_pattern_original_message('Original Message') |     _check_pattern_original_message('Original Message') | ||||||
| @@ -165,6 +179,17 @@ Test reply""" | |||||||
|     eq_("Test reply", quotations.extract_from_plain(msg_body)) |     eq_("Test reply", quotations.extract_from_plain(msg_body)) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def test_android_wrote(): | ||||||
|  |     msg_body = """Test reply | ||||||
|  |  | ||||||
|  | ---- John Smith wrote ---- | ||||||
|  |  | ||||||
|  | > quoted | ||||||
|  | > text | ||||||
|  | """ | ||||||
|  |     eq_("Test reply", quotations.extract_from_plain(msg_body)) | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_reply_wraps_quotations(): | def test_reply_wraps_quotations(): | ||||||
|     msg_body = """Test reply |     msg_body = """Test reply | ||||||
|  |  | ||||||
|   | |||||||
| @@ -29,7 +29,9 @@ def test_unicode(): | |||||||
|  |  | ||||||
| def test_detect_encoding(): | def test_detect_encoding(): | ||||||
|     eq_ ('ascii', u.detect_encoding(b'qwe').lower()) |     eq_ ('ascii', u.detect_encoding(b'qwe').lower()) | ||||||
|     eq_ ('iso-8859-2', u.detect_encoding(u'Versi\xf3n'.encode('iso-8859-2')).lower()) |     ok_ (u.detect_encoding( | ||||||
|  |         u'Versi\xf3n'.encode('iso-8859-2')).lower() in [ | ||||||
|  |             'iso-8859-1', 'iso-8859-2']) | ||||||
|     eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower()) |     eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower()) | ||||||
|     # fallback to utf-8 |     # fallback to utf-8 | ||||||
|     with patch.object(u.chardet, 'detect') as detect: |     with patch.object(u.chardet, 'detect') as detect: | ||||||
| @@ -39,7 +41,9 @@ def test_detect_encoding(): | |||||||
|  |  | ||||||
| def test_quick_detect_encoding(): | def test_quick_detect_encoding(): | ||||||
|     eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower()) |     eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower()) | ||||||
|     eq_ ('windows-1252', u.quick_detect_encoding(u'Versi\xf3n'.encode('windows-1252')).lower()) |     ok_ (u.quick_detect_encoding( | ||||||
|  |         u'Versi\xf3n'.encode('windows-1252')).lower() in [ | ||||||
|  |             'windows-1252', 'windows-1250']) | ||||||
|     eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower()) |     eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower()) | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user