diff --git a/setup.py b/setup.py index 2423df5..2b91f60 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ class InstallCommand(install): setup(name='talon', - version='1.3.4', + version='1.3.5', description=("Mailgun library " "to extract message quotations and signatures."), long_description=open("README.rst").read(), @@ -48,7 +48,7 @@ setup(name='talon', "regex>=1", "numpy", "scipy", - "scikit-learn==0.16.1", # pickled versions of classifier, else rebuild + "scikit-learn>=0.16.1", # pickled versions of classifier, else rebuild 'chardet>=1.0.1', 'cchardet>=0.3.5', 'cssselect', diff --git a/talon/quotations.py b/talon/quotations.py index 232c69d..6016310 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -139,6 +139,13 @@ RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]?.* 'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt', ))), re.I) +# ---- John Smith wrote ---- +RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format( + u'|'.join(( + # English + 'wrote' + ))), re.I) + SPLITTER_PATTERNS = [ RE_ORIGINAL_MESSAGE, RE_ON_DATE_SMB_WROTE, @@ -154,10 +161,10 @@ SPLITTER_PATTERNS = [ re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?' '( \S+){3,6}@\S+:'), # Sent from Samsung MobileName
wrote: - re.compile('Sent from Samsung .*@.*> wrote') + re.compile('Sent from Samsung .*@.*> wrote'), + RE_ANDROID_WROTE ] - RE_LINK = re.compile('<(http://[^>]*)>') RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@') diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py index 622e84f..7a81c99 100644 --- a/tests/text_quotations_test.py +++ b/tests/text_quotations_test.py @@ -142,7 +142,8 @@ def _check_pattern_original_message(original_message_indicator): -----{}----- Test""" - eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator)))) + eq_('Test reply', quotations.extract_from_plain( + msg_body.format(six.text_type(original_message_indicator)))) def test_english_original_message(): _check_pattern_original_message('Original Message') @@ -165,6 +166,17 @@ Test reply""" eq_("Test reply", quotations.extract_from_plain(msg_body)) +def test_android_wrote(): + msg_body = """Test reply + +---- John Smith wrote ---- + +> quoted +> text +""" + eq_("Test reply", quotations.extract_from_plain(msg_body)) + + def test_reply_wraps_quotations(): msg_body = """Test reply diff --git a/tests/utils_test.py b/tests/utils_test.py index 08d34bb..778e858 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -39,7 +39,9 @@ def test_detect_encoding(): def test_quick_detect_encoding(): eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower()) - eq_ ('windows-1252', u.quick_detect_encoding(u'Versi\xf3n'.encode('windows-1252')).lower()) + ok_ (u.quick_detect_encoding( + u'Versi\xf3n'.encode('windows-1252')).lower() in [ + 'windows-1252', 'windows-1250']) eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())