diff --git a/talon/quotations.py b/talon/quotations.py index d699acd..0aaa23b 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -34,7 +34,11 @@ RE_ON_DATE_SMB_WROTE = re.compile( # Dutch 'Op', # German - 'Am' + 'Am', + # Norwegian + u'På', + # Swedish, Danish + 'Den', )), # Date and sender separator u'|'.join(( @@ -54,7 +58,9 @@ RE_ON_DATE_SMB_WROTE = re.compile( # Dutch 'schreef','verzond','geschreven', # German - 'schrieb' + 'schrieb', + # Norwegian, Swedish + 'skrev', )) )) # Special case for languages where text is translated like this: 'on {date} wrote {somebody}:' @@ -125,9 +131,9 @@ RE_ORIGINAL_MESSAGE = re.compile(u'[\s]*[-]+[ ]*({})[ ]*[-]+'.format( RE_FROM_COLON_OR_DATE_COLON = re.compile(u'(_+\r?\n)?[\s]*(:?[*]?{})[\s]?:[*]? .*'.format( u'|'.join(( # "From" in different languages. - 'From', 'Van', 'De', 'Von', 'Fra', + 'From', 'Van', 'De', 'Von', 'Fra', u'Från', # "Date" in different languages. - 'Date', 'Datum', u'Envoyé' + 'Date', 'Datum', u'Envoyé', 'Skickat', 'Sendt', ))), re.I) SPLITTER_PATTERNS = [ diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py index 36dc8a5..ae481e1 100644 --- a/tests/text_quotations_test.py +++ b/tests/text_quotations_test.py @@ -311,6 +311,33 @@ Emne: The manager has commented on your Loop Blah-blah-blah """)) +def test_swedish_from_block(): + eq_('Allo! Follow up MIME!', quotations.extract_from_plain( + u"""Allo! Follow up MIME! +Från: Anno Sportel [mailto:anno.spoel@hsbcssad.com] +Skickat: den 26 augusti 2015 14:45 +Till: Isacson Leiff +Ämne: RE: Week 36 + +Blah-blah-blah +""")) + +def test_swedish_from_line(): + eq_('Lorem', quotations.extract_from_plain( + """Lorem +Den 14 september, 2015 02:23:18, Valentino Rudy (valentino@rudy.be) skrev: + +Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse. +""")) + +def test_norwegian_from_line(): + eq_('Lorem', quotations.extract_from_plain( + u"""Lorem +På 14 september 2015 på 02:23:18, Valentino Rudy (valentino@rudy.be) skrev: + +Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse. +""")) + def test_dutch_from_block(): eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain( """Gluten-free culpa lo-fi et nesciunt nostrud.