1 Commits

Author SHA1 Message Date
Sergey Obukhov
0f5e72623b add android quotation pattern 2017-04-10 16:33:21 -07:00
4 changed files with 27 additions and 6 deletions

View File

@@ -29,7 +29,7 @@ class InstallCommand(install):
setup(name='talon',
version='1.3.4',
version='1.3.5',
description=("Mailgun library "
"to extract message quotations and signatures."),
long_description=open("README.rst").read(),
@@ -48,7 +48,7 @@ setup(name='talon',
"regex>=1",
"numpy",
"scipy",
"scikit-learn==0.16.1", # pickled versions of classifier, else rebuild
"scikit-learn>=0.16.1", # pickled versions of classifier, else rebuild
'chardet>=1.0.1',
'cchardet>=0.3.5',
'cssselect',

View File

@@ -139,6 +139,13 @@ RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]?.*
'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
))), re.I)
# ---- John Smith wrote ----
RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format(
u'|'.join((
# English
'wrote'
))), re.I)
SPLITTER_PATTERNS = [
RE_ORIGINAL_MESSAGE,
RE_ON_DATE_SMB_WROTE,
@@ -154,10 +161,10 @@ SPLITTER_PATTERNS = [
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
'( \S+){3,6}@\S+:'),
# Sent from Samsung MobileName <address@example.com> wrote:
re.compile('Sent from Samsung .*@.*> wrote')
re.compile('Sent from Samsung .*@.*> wrote'),
RE_ANDROID_WROTE
]
RE_LINK = re.compile('<(http://[^>]*)>')
RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@')

View File

@@ -142,7 +142,8 @@ def _check_pattern_original_message(original_message_indicator):
-----{}-----
Test"""
eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator))))
eq_('Test reply', quotations.extract_from_plain(
msg_body.format(six.text_type(original_message_indicator))))
def test_english_original_message():
_check_pattern_original_message('Original Message')
@@ -165,6 +166,17 @@ Test reply"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_android_wrote():
msg_body = """Test reply
---- John Smith wrote ----
> quoted
> text
"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_reply_wraps_quotations():
msg_body = """Test reply

View File

@@ -39,7 +39,9 @@ def test_detect_encoding():
def test_quick_detect_encoding():
eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower())
eq_ ('windows-1252', u.quick_detect_encoding(u'Versi\xf3n'.encode('windows-1252')).lower())
ok_ (u.quick_detect_encoding(
u'Versi\xf3n'.encode('windows-1252')).lower() in [
'windows-1252', 'windows-1250'])
eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())