Merge pull request #35 from futuresimple/more_formats
Support some polish and french formats
This commit is contained in:
@@ -23,14 +23,33 @@ log = logging.getLogger(__name__)
|
|||||||
RE_FWD = re.compile("^[-]+[ ]*Forwarded message[ ]*[-]+$", re.I | re.M)
|
RE_FWD = re.compile("^[-]+[ ]*Forwarded message[ ]*[-]+$", re.I | re.M)
|
||||||
|
|
||||||
RE_ON_DATE_SMB_WROTE = re.compile(
|
RE_ON_DATE_SMB_WROTE = re.compile(
|
||||||
r'''
|
u'(-*[ ]?({0})[ ].*({1})(.*\n){{0,2}}.*({2}):)'.format(
|
||||||
(
|
# Beginning of the line
|
||||||
-* # could include dashes
|
u'|'.join((
|
||||||
[ ]?On[ ].*, # date part ends with comma
|
# English
|
||||||
(.*\n){0,2} # splitter takes 4 lines at most
|
'On',
|
||||||
.*(wrote|sent):
|
# French
|
||||||
)
|
'Le',
|
||||||
''', re.VERBOSE)
|
# Polish
|
||||||
|
'W dniu'
|
||||||
|
)),
|
||||||
|
# Date and sender separator
|
||||||
|
u'|'.join((
|
||||||
|
# most languages separate date and sender address by comma
|
||||||
|
',',
|
||||||
|
# polish date and sender address separator
|
||||||
|
u'użytkownik'
|
||||||
|
)),
|
||||||
|
# Ending of the line
|
||||||
|
u'|'.join((
|
||||||
|
# English
|
||||||
|
'wrote', 'sent',
|
||||||
|
# French
|
||||||
|
u'a écrit',
|
||||||
|
# Polish
|
||||||
|
u'napisał'
|
||||||
|
))
|
||||||
|
))
|
||||||
|
|
||||||
RE_QUOTATION = re.compile(
|
RE_QUOTATION = re.compile(
|
||||||
r'''
|
r'''
|
||||||
@@ -78,12 +97,12 @@ RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
|
|||||||
'Oprindelig meddelelse',
|
'Oprindelig meddelelse',
|
||||||
))), re.I)
|
))), re.I)
|
||||||
|
|
||||||
RE_FROM_COLON_OR_DATE_COLON = re.compile('(_+\r?\n)?[\s]*(:?[*]?{}):[*]? .*'.format(
|
RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]? .*'.format(
|
||||||
'|'.join((
|
u'|'.join((
|
||||||
# "From" in different languages.
|
# "From" in different languages.
|
||||||
'From', 'Van', 'De', 'Von', 'Fra',
|
'From', 'Van', 'De', 'Von', 'Fra',
|
||||||
# "Date" in different languages.
|
# "Date" in different languages.
|
||||||
'Date', 'Datum',
|
'Date', 'Datum', u'Envoyé'
|
||||||
))), re.I)
|
))), re.I)
|
||||||
|
|
||||||
SPLITTER_PATTERNS = [
|
SPLITTER_PATTERNS = [
|
||||||
|
|||||||
@@ -202,6 +202,24 @@ On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
|
|||||||
eq_("Hi", quotations.extract_from_plain(msg_body))
|
eq_("Hi", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
|
|
||||||
|
def test_short_quotation_with_newline():
|
||||||
|
msg_body = """Btw blah blah...
|
||||||
|
|
||||||
|
On Tue, Jan 27, 2015 at 12:42 PM -0800, "Company" <christine.XXX@XXX.com> wrote:
|
||||||
|
|
||||||
|
Hi Mark,
|
||||||
|
Blah blah?
|
||||||
|
Thanks,Christine
|
||||||
|
|
||||||
|
On Jan 27, 2015, at 11:55 AM, Mark XXX <mark@XXX.com> wrote:
|
||||||
|
|
||||||
|
Lorem ipsum?
|
||||||
|
Mark
|
||||||
|
|
||||||
|
Sent from Acompli"""
|
||||||
|
eq_("Btw blah blah...", quotations.extract_from_plain(msg_body))
|
||||||
|
|
||||||
|
|
||||||
def test_pattern_date_email_with_unicode():
|
def test_pattern_date_email_with_unicode():
|
||||||
msg_body = """Replying ok
|
msg_body = """Replying ok
|
||||||
2011/4/7 Nathan \xd0\xb8ova <support@example.com>
|
2011/4/7 Nathan \xd0\xb8ova <support@example.com>
|
||||||
@@ -233,6 +251,36 @@ Betreff: The manager has commented on your Loop
|
|||||||
Blah-blah-blah
|
Blah-blah-blah
|
||||||
"""))
|
"""))
|
||||||
|
|
||||||
|
def test_french_multiline_from_block():
|
||||||
|
eq_('Lorem ipsum', quotations.extract_from_plain(
|
||||||
|
u"""Lorem ipsum
|
||||||
|
|
||||||
|
De : Brendan xxx [mailto:brendan.xxx@xxx.com]
|
||||||
|
Envoyé : vendredi 23 janvier 2015 16:39
|
||||||
|
À : Camille XXX
|
||||||
|
Objet : Follow Up
|
||||||
|
|
||||||
|
Blah-blah-blah
|
||||||
|
"""))
|
||||||
|
|
||||||
|
def test_french_from_block():
|
||||||
|
eq_('Lorem ipsum', quotations.extract_from_plain(
|
||||||
|
u"""Lorem ipsum
|
||||||
|
|
||||||
|
Le 23 janv. 2015 à 22:03, Brendan xxx <brendan.xxx@xxx.com<mailto:brendan.xxx@xxx.com>> a écrit:
|
||||||
|
|
||||||
|
Bonjour!"""))
|
||||||
|
|
||||||
|
def test_polish_from_block():
|
||||||
|
eq_('Lorem ipsum', quotations.extract_from_plain(
|
||||||
|
u"""Lorem ipsum
|
||||||
|
|
||||||
|
W dniu 28 stycznia 2015 01:53 użytkownik Zoe xxx <zoe.xxx@xxx.com>
|
||||||
|
napisał:
|
||||||
|
|
||||||
|
Blah!
|
||||||
|
"""))
|
||||||
|
|
||||||
def test_danish_from_block():
|
def test_danish_from_block():
|
||||||
eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
|
eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
|
||||||
"""Allo! Follow up MIME!
|
"""Allo! Follow up MIME!
|
||||||
|
|||||||
Reference in New Issue
Block a user