fix appointments in text

This commit is contained in:
Sergey Obukhov
2017-10-23 16:24:20 -07:00
parent 60637ff13a
commit 0e6d5f993c
3 changed files with 36 additions and 4 deletions

View File

@@ -29,7 +29,7 @@ class InstallCommand(install):
setup(name='talon', setup(name='talon',
version='1.4.4', version='1.4.5',
description=("Mailgun library " description=("Mailgun library "
"to extract message quotations and signatures."), "to extract message quotations and signatures."),
long_description=open("README.rst").read(), long_description=open("README.rst").read(),

View File

@@ -165,15 +165,15 @@ SPLITTER_PATTERNS = [
RE_FROM_COLON_OR_DATE_COLON, RE_FROM_COLON_OR_DATE_COLON,
# 02.04.2012 14:20 пользователь "bob@example.com" < # 02.04.2012 14:20 пользователь "bob@example.com" <
# bob@xxx.mailgun.org> написал: # bob@xxx.mailgun.org> написал:
re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*@", re.S), re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*\s\S+@\S+", re.S),
# 2014-10-17 11:28 GMT+03:00 Bob < # 2014-10-17 11:28 GMT+03:00 Bob <
# bob@example.com>: # bob@example.com>:
re.compile("\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+GMT.*@", re.S), re.compile("\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+GMT.*\s\S+@\S+", re.S),
# Thu, 26 Jun 2014 14:00:51 +0400 Bob <bob@example.com>: # Thu, 26 Jun 2014 14:00:51 +0400 Bob <bob@example.com>:
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?' re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
'( \S+){3,6}@\S+:'), '( \S+){3,6}@\S+:'),
# Sent from Samsung MobileName <address@example.com> wrote: # Sent from Samsung MobileName <address@example.com> wrote:
re.compile('Sent from Samsung .*@.*> wrote'), re.compile('Sent from Samsung.* \S+@\S+> wrote'),
RE_ANDROID_WROTE, RE_ANDROID_WROTE,
RE_POLYMAIL RE_POLYMAIL
] ]

View File

@@ -119,6 +119,38 @@ On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> sent:
eq_("Test reply", quotations.extract_from_plain(msg_body)) eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_appointment():
msg_body = """Response
10/19/2017 @ 9:30 am for physical therapy
Bla
1517 4th Avenue Ste 300
London CA 19129, 555-421-6780
John Doe, FCLS
Mailgun Inc
555-941-0697
From: from@example.com [mailto:from@example.com]
Sent: Wednesday, October 18, 2017 2:05 PM
To: John Doer - SIU <jd@example.com>
Subject: RE: Claim # 5551188-1
Text"""
expected = """Response
10/19/2017 @ 9:30 am for physical therapy
Bla
1517 4th Avenue Ste 300
London CA 19129, 555-421-6780
John Doe, FCLS
Mailgun Inc
555-941-0697"""
eq_(expected, quotations.extract_from_plain(msg_body))
def test_line_starts_with_on(): def test_line_starts_with_on():
msg_body = """Blah-blah-blah msg_body = """Blah-blah-blah
On blah-blah-blah""" On blah-blah-blah"""