Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f16ae5110b | ||
|
|
ab5cbe5ec3 | ||
|
|
be5da92f16 | ||
|
|
95954a65a0 | ||
|
|
0b55e8fa77 | ||
|
|
6f159e8959 | ||
|
|
5c413b4b00 | ||
|
|
cca64d3ed1 | ||
|
|
e11eaf6ff8 | ||
|
|
85a4c1d855 |
2
setup.py
2
setup.py
@@ -29,7 +29,7 @@ class InstallCommand(install):
|
|||||||
|
|
||||||
|
|
||||||
setup(name='talon',
|
setup(name='talon',
|
||||||
version='1.3.5',
|
version='1.3.7',
|
||||||
description=("Mailgun library "
|
description=("Mailgun library "
|
||||||
"to extract message quotations and signatures."),
|
"to extract message quotations and signatures."),
|
||||||
long_description=open("README.rst").read(),
|
long_description=open("README.rst").read(),
|
||||||
|
|||||||
@@ -146,6 +146,14 @@ RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format(
|
|||||||
'wrote'
|
'wrote'
|
||||||
))), re.I)
|
))), re.I)
|
||||||
|
|
||||||
|
# Support polymail.io reply format
|
||||||
|
# On Tue, Apr 11, 2017 at 10:07 PM John Smith
|
||||||
|
#
|
||||||
|
# <
|
||||||
|
# mailto:John Smith <johnsmith@gmail.com>
|
||||||
|
# > wrote:
|
||||||
|
RE_POLYMAIL = re.compile('On.*\s{2}<\smailto:.*\s> wrote:', re.I)
|
||||||
|
|
||||||
SPLITTER_PATTERNS = [
|
SPLITTER_PATTERNS = [
|
||||||
RE_ORIGINAL_MESSAGE,
|
RE_ORIGINAL_MESSAGE,
|
||||||
RE_ON_DATE_SMB_WROTE,
|
RE_ON_DATE_SMB_WROTE,
|
||||||
@@ -162,7 +170,8 @@ SPLITTER_PATTERNS = [
|
|||||||
'( \S+){3,6}@\S+:'),
|
'( \S+){3,6}@\S+:'),
|
||||||
# Sent from Samsung MobileName <address@example.com> wrote:
|
# Sent from Samsung MobileName <address@example.com> wrote:
|
||||||
re.compile('Sent from Samsung .*@.*> wrote'),
|
re.compile('Sent from Samsung .*@.*> wrote'),
|
||||||
RE_ANDROID_WROTE
|
RE_ANDROID_WROTE,
|
||||||
|
RE_POLYMAIL
|
||||||
]
|
]
|
||||||
|
|
||||||
RE_LINK = re.compile('<(http://[^>]*)>')
|
RE_LINK = re.compile('<(http://[^>]*)>')
|
||||||
@@ -170,7 +179,7 @@ RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@')
|
|||||||
|
|
||||||
RE_PARENTHESIS_LINK = re.compile("\(https?://")
|
RE_PARENTHESIS_LINK = re.compile("\(https?://")
|
||||||
|
|
||||||
SPLITTER_MAX_LINES = 4
|
SPLITTER_MAX_LINES = 6
|
||||||
MAX_LINES_COUNT = 1000
|
MAX_LINES_COUNT = 1000
|
||||||
# an extensive research shows that exceeding this limit
|
# an extensive research shows that exceeding this limit
|
||||||
# leads to excessive processing time
|
# leads to excessive processing time
|
||||||
|
|||||||
@@ -35,6 +35,19 @@ On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> wrote:
|
|||||||
|
|
||||||
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
|
def test_pattern_on_date_polymail():
|
||||||
|
msg_body = """Test reply
|
||||||
|
|
||||||
|
On Tue, Apr 11, 2017 at 10:07 PM John Smith
|
||||||
|
|
||||||
|
<
|
||||||
|
mailto:John Smith <johnsmith@gmail.com>
|
||||||
|
> wrote:
|
||||||
|
Test quoted data
|
||||||
|
"""
|
||||||
|
|
||||||
|
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
|
|
||||||
def test_pattern_sent_from_samsung_smb_wrote():
|
def test_pattern_sent_from_samsung_smb_wrote():
|
||||||
msg_body = """Test reply
|
msg_body = """Test reply
|
||||||
@@ -54,7 +67,7 @@ def test_pattern_on_date_wrote_somebody():
|
|||||||
"""Lorem
|
"""Lorem
|
||||||
|
|
||||||
Op 13-02-2014 3:18 schreef Julius Caesar <pantheon@rome.com>:
|
Op 13-02-2014 3:18 schreef Julius Caesar <pantheon@rome.com>:
|
||||||
|
|
||||||
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
|
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
@@ -256,7 +269,7 @@ def test_with_indent():
|
|||||||
|
|
||||||
------On 12/29/1987 17:32 PM, Julius Caesar wrote-----
|
------On 12/29/1987 17:32 PM, Julius Caesar wrote-----
|
||||||
|
|
||||||
Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur.
|
Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur.
|
||||||
"""
|
"""
|
||||||
eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body))
|
eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
@@ -381,11 +394,11 @@ Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny p
|
|||||||
|
|
||||||
def test_dutch_from_block():
|
def test_dutch_from_block():
|
||||||
eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
|
eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
|
||||||
"""Gluten-free culpa lo-fi et nesciunt nostrud.
|
"""Gluten-free culpa lo-fi et nesciunt nostrud.
|
||||||
|
|
||||||
Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende geschreven:
|
Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende geschreven:
|
||||||
|
|
||||||
Small batch beard laboris tempor, non listicle hella Tumblr heirloom.
|
Small batch beard laboris tempor, non listicle hella Tumblr heirloom.
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,9 @@ def test_unicode():
|
|||||||
|
|
||||||
def test_detect_encoding():
|
def test_detect_encoding():
|
||||||
eq_ ('ascii', u.detect_encoding(b'qwe').lower())
|
eq_ ('ascii', u.detect_encoding(b'qwe').lower())
|
||||||
eq_ ('iso-8859-2', u.detect_encoding(u'Versi\xf3n'.encode('iso-8859-2')).lower())
|
ok_ (u.detect_encoding(
|
||||||
|
u'Versi\xf3n'.encode('iso-8859-2')).lower() in [
|
||||||
|
'iso-8859-1', 'iso-8859-2'])
|
||||||
eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
|
eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
|
||||||
# fallback to utf-8
|
# fallback to utf-8
|
||||||
with patch.object(u.chardet, 'detect') as detect:
|
with patch.object(u.chardet, 'detect') as detect:
|
||||||
|
|||||||
Reference in New Issue
Block a user