Merge pull request #62 from tgwizard/better-support-for-scandinavian-languages

Add better support for Scandinavian languages
This commit is contained in:
Sergey Obukhov
2015-10-14 21:48:10 -07:00
2 changed files with 37 additions and 4 deletions

View File

@@ -34,7 +34,11 @@ RE_ON_DATE_SMB_WROTE = re.compile(
# Dutch
'Op',
# German
'Am'
'Am',
# Norwegian
u'',
# Swedish, Danish
'Den',
)),
# Date and sender separator
u'|'.join((
@@ -54,7 +58,9 @@ RE_ON_DATE_SMB_WROTE = re.compile(
# Dutch
'schreef','verzond','geschreven',
# German
'schrieb'
'schrieb',
# Norwegian, Swedish
'skrev',
))
))
# Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'
@@ -125,9 +131,9 @@ RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]? .*'.format(
u'|'.join((
# "From" in different languages.
'From', 'Van', 'De', 'Von', 'Fra',
'From', 'Van', 'De', 'Von', 'Fra', u'Från',
# "Date" in different languages.
'Date', 'Datum', u'Envoyé'
'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
))), re.I)
SPLITTER_PATTERNS = [

View File

@@ -311,6 +311,33 @@ Emne: The manager has commented on your Loop
Blah-blah-blah
"""))
def test_swedish_from_block():
eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
u"""Allo! Follow up MIME!
Från: Anno Sportel [mailto:anno.spoel@hsbcssad.com]
Skickat: den 26 augusti 2015 14:45
Till: Isacson Leiff
Ämne: RE: Week 36
Blah-blah-blah
"""))
def test_swedish_from_line():
eq_('Lorem', quotations.extract_from_plain(
"""Lorem
Den 14 september, 2015 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
"""))
def test_norwegian_from_line():
eq_('Lorem', quotations.extract_from_plain(
u"""Lorem
På 14 september 2015 på 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
"""))
def test_dutch_from_block():
eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
"""Gluten-free culpa lo-fi et nesciunt nostrud.