Add extra splitter expressions and tests for German and Danish.
Also some refactoring to make it a bit easier to add more languages.
This commit is contained in:
@@ -66,16 +66,32 @@ RE_EMPTY_QUOTATION = re.compile(
|
|||||||
e*
|
e*
|
||||||
''', re.VERBOSE)
|
''', re.VERBOSE)
|
||||||
|
|
||||||
|
# ------Original Message------ or ---- Reply Message ----
|
||||||
|
# With variations in other languages.
|
||||||
|
RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
|
||||||
|
u'|'.join((
|
||||||
|
# English
|
||||||
|
'Original Message', 'Reply Message',
|
||||||
|
# German
|
||||||
|
u'Ursprüngliche Nachricht', 'Antwort Nachricht',
|
||||||
|
# Danish
|
||||||
|
'Oprindelig meddelelse',
|
||||||
|
))), re.I)
|
||||||
|
|
||||||
|
RE_FROM_COLON_OR_DATE_COLON = re.compile('(_+\r?\n)?[\s]*(:?[*]?{}):[*]? .*'.format(
|
||||||
|
'|'.join((
|
||||||
|
# "From" in different languages.
|
||||||
|
'From', 'Van', 'De', 'Von', 'Fra',
|
||||||
|
# "Date" in different languages.
|
||||||
|
'Date', 'Datum',
|
||||||
|
))), re.I)
|
||||||
|
|
||||||
SPLITTER_PATTERNS = [
|
SPLITTER_PATTERNS = [
|
||||||
# ------Original Message------ or ---- Reply Message ----
|
RE_ORIGINAL_MESSAGE,
|
||||||
re.compile("[\s]*[-]+[ ]*(Original|Reply) Message[ ]*[-]+", re.I),
|
|
||||||
# <date> <person>
|
# <date> <person>
|
||||||
re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*@", re.VERBOSE),
|
re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*@", re.VERBOSE),
|
||||||
RE_ON_DATE_SMB_WROTE,
|
RE_ON_DATE_SMB_WROTE,
|
||||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?From|Date):[*]? .*'),
|
RE_FROM_COLON_OR_DATE_COLON,
|
||||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?Van|Datum):[*]? .*'),
|
|
||||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?De|Date):[*]? .*'),
|
|
||||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?Von|Datum):[*]? .*'),
|
|
||||||
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
|
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
|
||||||
'( \S+){3,6}@\S+:')
|
'( \S+){3,6}@\S+:')
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -99,21 +99,21 @@ bla-bla - bla"""
|
|||||||
|
|
||||||
|
|
||||||
def test_pattern_original_message():
|
def test_pattern_original_message():
|
||||||
msg_body = """Test reply
|
languages = (
|
||||||
|
'Original Message', # English
|
||||||
|
'Reply Message',
|
||||||
|
u'Ursprüngliche Nachricht', # German
|
||||||
|
'Antwort Nachricht',
|
||||||
|
'Oprindelig meddelelse', # Danish
|
||||||
|
)
|
||||||
|
msg_body = u"""Test reply
|
||||||
|
|
||||||
-----Original Message-----
|
-----{}-----
|
||||||
|
|
||||||
Test"""
|
Test"""
|
||||||
|
|
||||||
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
for language in languages:
|
||||||
|
eq_("Test reply", quotations.extract_from_plain(msg_body.format(unicode(language))))
|
||||||
msg_body = """Test reply
|
|
||||||
|
|
||||||
-----Original Message-----
|
|
||||||
|
|
||||||
Test"""
|
|
||||||
|
|
||||||
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
|
||||||
|
|
||||||
|
|
||||||
def test_reply_after_quotations():
|
def test_reply_after_quotations():
|
||||||
@@ -209,7 +209,7 @@ def test_pattern_date_email_with_unicode():
|
|||||||
|
|
||||||
|
|
||||||
def test_pattern_from_block():
|
def test_pattern_from_block():
|
||||||
msg_body = """Allo! Follow up MIME!
|
english = """Allo! Follow up MIME!
|
||||||
|
|
||||||
From: somebody@example.com
|
From: somebody@example.com
|
||||||
Sent: March-19-11 5:42 PM
|
Sent: March-19-11 5:42 PM
|
||||||
@@ -218,7 +218,30 @@ Subject: The manager has commented on your Loop
|
|||||||
|
|
||||||
Blah-blah-blah
|
Blah-blah-blah
|
||||||
"""
|
"""
|
||||||
eq_("Allo! Follow up MIME!", quotations.extract_from_plain(msg_body))
|
|
||||||
|
german = """Allo! Follow up MIME!
|
||||||
|
|
||||||
|
Von: somebody@example.com
|
||||||
|
Gesendet: Dienstag, 25. November 2014 14:59
|
||||||
|
An: Somebody
|
||||||
|
Betreff: The manager has commented on your Loop
|
||||||
|
|
||||||
|
Blah-blah-blah
|
||||||
|
"""
|
||||||
|
|
||||||
|
danish = """Allo! Follow up MIME!
|
||||||
|
|
||||||
|
Fra: somebody@example.com
|
||||||
|
Sendt: 19. march 2011 12:10
|
||||||
|
Til: Somebody
|
||||||
|
Emne: The manager has commented on your Loop
|
||||||
|
|
||||||
|
|
||||||
|
Blah-blah-blah
|
||||||
|
"""
|
||||||
|
|
||||||
|
for language in (english, german, danish):
|
||||||
|
eq_("Allo! Follow up MIME!", quotations.extract_from_plain(language))
|
||||||
|
|
||||||
|
|
||||||
def test_quotation_marker_false_positive():
|
def test_quotation_marker_false_positive():
|
||||||
|
|||||||
Reference in New Issue
Block a user