Add extra splitter expressions and tests for German and Danish.
Also some refactoring to make it a bit easier to add more languages.
This commit is contained in:
@@ -66,16 +66,32 @@ RE_EMPTY_QUOTATION = re.compile(
|
||||
e*
|
||||
''', re.VERBOSE)
|
||||
|
||||
# ------Original Message------ or ---- Reply Message ----
|
||||
# With variations in other languages.
|
||||
RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
|
||||
u'|'.join((
|
||||
# English
|
||||
'Original Message', 'Reply Message',
|
||||
# German
|
||||
u'Ursprüngliche Nachricht', 'Antwort Nachricht',
|
||||
# Danish
|
||||
'Oprindelig meddelelse',
|
||||
))), re.I)
|
||||
|
||||
RE_FROM_COLON_OR_DATE_COLON = re.compile('(_+\r?\n)?[\s]*(:?[*]?{}):[*]? .*'.format(
|
||||
'|'.join((
|
||||
# "From" in different languages.
|
||||
'From', 'Van', 'De', 'Von', 'Fra',
|
||||
# "Date" in different languages.
|
||||
'Date', 'Datum',
|
||||
))), re.I)
|
||||
|
||||
SPLITTER_PATTERNS = [
|
||||
# ------Original Message------ or ---- Reply Message ----
|
||||
re.compile("[\s]*[-]+[ ]*(Original|Reply) Message[ ]*[-]+", re.I),
|
||||
RE_ORIGINAL_MESSAGE,
|
||||
# <date> <person>
|
||||
re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*@", re.VERBOSE),
|
||||
RE_ON_DATE_SMB_WROTE,
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?From|Date):[*]? .*'),
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?Van|Datum):[*]? .*'),
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?De|Date):[*]? .*'),
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?Von|Datum):[*]? .*'),
|
||||
RE_FROM_COLON_OR_DATE_COLON,
|
||||
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
|
||||
'( \S+){3,6}@\S+:')
|
||||
]
|
||||
|
||||
@@ -99,21 +99,21 @@ bla-bla - bla"""
|
||||
|
||||
|
||||
def test_pattern_original_message():
|
||||
msg_body = """Test reply
|
||||
languages = (
|
||||
'Original Message', # English
|
||||
'Reply Message',
|
||||
u'Ursprüngliche Nachricht', # German
|
||||
'Antwort Nachricht',
|
||||
'Oprindelig meddelelse', # Danish
|
||||
)
|
||||
msg_body = u"""Test reply
|
||||
|
||||
-----Original Message-----
|
||||
-----{}-----
|
||||
|
||||
Test"""
|
||||
|
||||
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
||||
|
||||
msg_body = """Test reply
|
||||
|
||||
-----Original Message-----
|
||||
|
||||
Test"""
|
||||
|
||||
eq_("Test reply", quotations.extract_from_plain(msg_body))
|
||||
for language in languages:
|
||||
eq_("Test reply", quotations.extract_from_plain(msg_body.format(unicode(language))))
|
||||
|
||||
|
||||
def test_reply_after_quotations():
|
||||
@@ -209,7 +209,7 @@ def test_pattern_date_email_with_unicode():
|
||||
|
||||
|
||||
def test_pattern_from_block():
|
||||
msg_body = """Allo! Follow up MIME!
|
||||
english = """Allo! Follow up MIME!
|
||||
|
||||
From: somebody@example.com
|
||||
Sent: March-19-11 5:42 PM
|
||||
@@ -218,7 +218,30 @@ Subject: The manager has commented on your Loop
|
||||
|
||||
Blah-blah-blah
|
||||
"""
|
||||
eq_("Allo! Follow up MIME!", quotations.extract_from_plain(msg_body))
|
||||
|
||||
german = """Allo! Follow up MIME!
|
||||
|
||||
Von: somebody@example.com
|
||||
Gesendet: Dienstag, 25. November 2014 14:59
|
||||
An: Somebody
|
||||
Betreff: The manager has commented on your Loop
|
||||
|
||||
Blah-blah-blah
|
||||
"""
|
||||
|
||||
danish = """Allo! Follow up MIME!
|
||||
|
||||
Fra: somebody@example.com
|
||||
Sendt: 19. march 2011 12:10
|
||||
Til: Somebody
|
||||
Emne: The manager has commented on your Loop
|
||||
|
||||
|
||||
Blah-blah-blah
|
||||
"""
|
||||
|
||||
for language in (english, german, danish):
|
||||
eq_("Allo! Follow up MIME!", quotations.extract_from_plain(language))
|
||||
|
||||
|
||||
def test_quotation_marker_false_positive():
|
||||
|
||||
Reference in New Issue
Block a user