support some polish and french formats

This commit is contained in:
szymonsobczak
2015-02-24 11:39:12 +01:00
parent 3768d7ba31
commit b16060261a
2 changed files with 75 additions and 8 deletions

View File

@@ -23,14 +23,33 @@ log = logging.getLogger(__name__)
RE_FWD = re.compile("^[-]+[ ]*Forwarded message[ ]*[-]+$", re.I | re.M)
RE_ON_DATE_SMB_WROTE = re.compile(
r'''
(
-* # could include dashes
[ ]?On[ ].*, # date part ends with comma
(.*\n){0,2} # splitter takes 4 lines at most
.*(wrote|sent):
)
''', re.VERBOSE)
u'(-*[ ]?({0})[ ].*({1})(.*\n){{0,2}}.*({2}):)'.format(
# Beginning of the line
u'|'.join((
# English
'On',
# French
'Le',
# Polish
'W dniu'
)),
# Date and sender separator
u'|'.join((
# most languages separate date and sender address by comma
',',
# polish date and sender address separator
u'użytkownik'
)),
# Ending of the line
u'|'.join((
# English
'wrote', 'sent',
# French
u'a écrit',
# Polish
u'napisał'
))
))
RE_QUOTATION = re.compile(
r'''