Merge pull request #62 from tgwizard/better-support-for-scandinavian-languages
Add better support for Scandinavian languages
This commit is contained in:
@@ -34,7 +34,11 @@ RE_ON_DATE_SMB_WROTE = re.compile(
|
||||
# Dutch
|
||||
'Op',
|
||||
# German
|
||||
'Am'
|
||||
'Am',
|
||||
# Norwegian
|
||||
u'På',
|
||||
# Swedish, Danish
|
||||
'Den',
|
||||
)),
|
||||
# Date and sender separator
|
||||
u'|'.join((
|
||||
@@ -54,7 +58,9 @@ RE_ON_DATE_SMB_WROTE = re.compile(
|
||||
# Dutch
|
||||
'schreef','verzond','geschreven',
|
||||
# German
|
||||
'schrieb'
|
||||
'schrieb',
|
||||
# Norwegian, Swedish
|
||||
'skrev',
|
||||
))
|
||||
))
|
||||
# Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'
|
||||
@@ -125,9 +131,9 @@ RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format(
|
||||
RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]? .*'.format(
|
||||
u'|'.join((
|
||||
# "From" in different languages.
|
||||
'From', 'Van', 'De', 'Von', 'Fra',
|
||||
'From', 'Van', 'De', 'Von', 'Fra', u'Från',
|
||||
# "Date" in different languages.
|
||||
'Date', 'Datum', u'Envoyé'
|
||||
'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt',
|
||||
))), re.I)
|
||||
|
||||
SPLITTER_PATTERNS = [
|
||||
|
||||
@@ -311,6 +311,33 @@ Emne: The manager has commented on your Loop
|
||||
Blah-blah-blah
|
||||
"""))
|
||||
|
||||
def test_swedish_from_block():
|
||||
eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
|
||||
u"""Allo! Follow up MIME!
|
||||
Från: Anno Sportel [mailto:anno.spoel@hsbcssad.com]
|
||||
Skickat: den 26 augusti 2015 14:45
|
||||
Till: Isacson Leiff
|
||||
Ämne: RE: Week 36
|
||||
|
||||
Blah-blah-blah
|
||||
"""))
|
||||
|
||||
def test_swedish_from_line():
|
||||
eq_('Lorem', quotations.extract_from_plain(
|
||||
"""Lorem
|
||||
Den 14 september, 2015 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
|
||||
|
||||
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
|
||||
"""))
|
||||
|
||||
def test_norwegian_from_line():
|
||||
eq_('Lorem', quotations.extract_from_plain(
|
||||
u"""Lorem
|
||||
På 14 september 2015 på 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
|
||||
|
||||
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
|
||||
"""))
|
||||
|
||||
def test_dutch_from_block():
|
||||
eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
|
||||
"""Gluten-free culpa lo-fi et nesciunt nostrud.
|
||||
|
||||
Reference in New Issue
Block a user