Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0b55e8fa77 | ||
|
|
6f159e8959 | ||
|
|
85a4c1d855 | ||
|
|
0f5e72623b |
4
setup.py
4
setup.py
@@ -29,7 +29,7 @@ class InstallCommand(install):
|
|||||||
|
|
||||||
|
|
||||||
setup(name='talon',
|
setup(name='talon',
|
||||||
version='1.3.4',
|
version='1.3.6',
|
||||||
description=("Mailgun library "
|
description=("Mailgun library "
|
||||||
"to extract message quotations and signatures."),
|
"to extract message quotations and signatures."),
|
||||||
long_description=open("README.rst").read(),
|
long_description=open("README.rst").read(),
|
||||||
@@ -48,7 +48,7 @@ setup(name='talon',
|
|||||||
"regex>=1",
|
"regex>=1",
|
||||||
"numpy",
|
"numpy",
|
||||||
"scipy",
|
"scipy",
|
||||||
"scikit-learn==0.16.1", # pickled versions of classifier, else rebuild
|
"scikit-learn>=0.16.1", # pickled versions of classifier, else rebuild
|
||||||
'chardet>=1.0.1',
|
'chardet>=1.0.1',
|
||||||
'cchardet>=0.3.5',
|
'cchardet>=0.3.5',
|
||||||
'cssselect',
|
'cssselect',
|
||||||
|
|||||||
@@ -139,6 +139,13 @@ RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]?.*
|
|||||||
'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
|
'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
|
||||||
))), re.I)
|
))), re.I)
|
||||||
|
|
||||||
|
# ---- John Smith wrote ----
|
||||||
|
RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format(
|
||||||
|
u'|'.join((
|
||||||
|
# English
|
||||||
|
'wrote'
|
||||||
|
))), re.I)
|
||||||
|
|
||||||
SPLITTER_PATTERNS = [
|
SPLITTER_PATTERNS = [
|
||||||
RE_ORIGINAL_MESSAGE,
|
RE_ORIGINAL_MESSAGE,
|
||||||
RE_ON_DATE_SMB_WROTE,
|
RE_ON_DATE_SMB_WROTE,
|
||||||
@@ -154,10 +161,10 @@ SPLITTER_PATTERNS = [
|
|||||||
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
|
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
|
||||||
'( \S+){3,6}@\S+:'),
|
'( \S+){3,6}@\S+:'),
|
||||||
# Sent from Samsung MobileName <address@example.com> wrote:
|
# Sent from Samsung MobileName <address@example.com> wrote:
|
||||||
re.compile('Sent from Samsung .*@.*> wrote')
|
re.compile('Sent from Samsung .*@.*> wrote'),
|
||||||
|
RE_ANDROID_WROTE
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
RE_LINK = re.compile('<(http://[^>]*)>')
|
RE_LINK = re.compile('<(http://[^>]*)>')
|
||||||
RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@')
|
RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@')
|
||||||
|
|
||||||
|
|||||||
@@ -142,7 +142,8 @@ def _check_pattern_original_message(original_message_indicator):
|
|||||||
-----{}-----
|
-----{}-----
|
||||||
|
|
||||||
Test"""
|
Test"""
|
||||||
eq_('Test reply', quotations.extract_from_plain(msg_body.format(six.text_type(original_message_indicator))))
|
eq_('Test reply', quotations.extract_from_plain(
|
||||||
|
msg_body.format(six.text_type(original_message_indicator))))
|
||||||
|
|
||||||
def test_english_original_message():
|
def test_english_original_message():
|
||||||
_check_pattern_original_message('Original Message')
|
_check_pattern_original_message('Original Message')
|
||||||
@@ -165,6 +166,17 @@ Test reply"""
|
|||||||
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
|
|
||||||
|
def test_android_wrote():
|
||||||
|
msg_body = """Test reply
|
||||||
|
|
||||||
|
---- John Smith wrote ----
|
||||||
|
|
||||||
|
> quoted
|
||||||
|
> text
|
||||||
|
"""
|
||||||
|
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
|
|
||||||
def test_reply_wraps_quotations():
|
def test_reply_wraps_quotations():
|
||||||
msg_body = """Test reply
|
msg_body = """Test reply
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,9 @@ def test_unicode():
|
|||||||
|
|
||||||
def test_detect_encoding():
|
def test_detect_encoding():
|
||||||
eq_ ('ascii', u.detect_encoding(b'qwe').lower())
|
eq_ ('ascii', u.detect_encoding(b'qwe').lower())
|
||||||
eq_ ('iso-8859-2', u.detect_encoding(u'Versi\xf3n'.encode('iso-8859-2')).lower())
|
ok_ (u.detect_encoding(
|
||||||
|
u'Versi\xf3n'.encode('iso-8859-2')).lower() in [
|
||||||
|
'iso-8859-1', 'iso-8859-2'])
|
||||||
eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
|
eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
|
||||||
# fallback to utf-8
|
# fallback to utf-8
|
||||||
with patch.object(u.chardet, 'detect') as detect:
|
with patch.object(u.chardet, 'detect') as detect:
|
||||||
@@ -39,7 +41,9 @@ def test_detect_encoding():
|
|||||||
|
|
||||||
def test_quick_detect_encoding():
|
def test_quick_detect_encoding():
|
||||||
eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower())
|
eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower())
|
||||||
eq_ ('windows-1252', u.quick_detect_encoding(u'Versi\xf3n'.encode('windows-1252')).lower())
|
ok_ (u.quick_detect_encoding(
|
||||||
|
u'Versi\xf3n'.encode('windows-1252')).lower() in [
|
||||||
|
'windows-1252', 'windows-1250'])
|
||||||
eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())
|
eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user