Add extra splitter expressions and tests for German and Danish.
Also some refactoring to make it a bit easier to add more languages.
This commit is contained in:
@@ -66,16 +66,32 @@ RE_EMPTY_QUOTATION = re.compile(
|
||||
e*
|
||||
''', re.VERBOSE)
|
||||
|
||||
# ------Original Message------ or ---- Reply Message ----
|
||||
# With variations in other languages.
|
||||
RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
|
||||
u'|'.join((
|
||||
# English
|
||||
'Original Message', 'Reply Message',
|
||||
# German
|
||||
u'Ursprüngliche Nachricht', 'Antwort Nachricht',
|
||||
# Danish
|
||||
'Oprindelig meddelelse',
|
||||
))), re.I)
|
||||
|
||||
RE_FROM_COLON_OR_DATE_COLON = re.compile('(_+\r?\n)?[\s]*(:?[*]?{}):[*]? .*'.format(
|
||||
'|'.join((
|
||||
# "From" in different languages.
|
||||
'From', 'Van', 'De', 'Von', 'Fra',
|
||||
# "Date" in different languages.
|
||||
'Date', 'Datum',
|
||||
))), re.I)
|
||||
|
||||
SPLITTER_PATTERNS = [
|
||||
# ------Original Message------ or ---- Reply Message ----
|
||||
re.compile("[\s]*[-]+[ ]*(Original|Reply) Message[ ]*[-]+", re.I),
|
||||
RE_ORIGINAL_MESSAGE,
|
||||
# <date> <person>
|
||||
re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*@", re.VERBOSE),
|
||||
RE_ON_DATE_SMB_WROTE,
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?From|Date):[*]? .*'),
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?Van|Datum):[*]? .*'),
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?De|Date):[*]? .*'),
|
||||
re.compile('(_+\r?\n)?[\s]*(:?[*]?Von|Datum):[*]? .*'),
|
||||
RE_FROM_COLON_OR_DATE_COLON,
|
||||
re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
|
||||
'( \S+){3,6}@\S+:')
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user