Add better support for Scandinavian languages

This is a port of https://github.com/tictail/claw/pull/6 by @simonflore.
This commit is contained in:
Adam Renberg
2015-09-21 21:41:59 +02:00
parent d62d633215
commit 14e3a0d80b
2 changed files with 37 additions and 4 deletions

View File

@@ -34,7 +34,11 @@ RE_ON_DATE_SMB_WROTE = re.compile(
# Dutch
'Op',
# German
'Am'
'Am',
# Norwegian
u'',
# Swedish, Danish
'Den',
)),
# Date and sender separator
u'|'.join((
@@ -54,7 +58,9 @@ RE_ON_DATE_SMB_WROTE = re.compile(
# Dutch
'schreef','verzond','geschreven',
# German
'schrieb'
'schrieb',
# Norwegian, Swedish
'skrev',
))
))
# Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'
@@ -125,9 +131,9 @@ RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]? .*'.format(
u'|'.join((
# "From" in different languages.
'From', 'Van', 'De', 'Von', 'Fra',
'From', 'Van', 'De', 'Von', 'Fra', u'Från',
# "Date" in different languages.
'Date', 'Datum', u'Envoyé'
'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
))), re.I)
SPLITTER_PATTERNS = [