diff --git a/talon/quotations.py b/talon/quotations.py index cdd22b1..db6e0dc 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -32,7 +32,9 @@ RE_ON_DATE_SMB_WROTE = re.compile( # Polish 'W dniu', # Dutch - 'Op' + 'Op', + # German + 'Am' )), # Date and sender separator u'|'.join(( @@ -50,18 +52,26 @@ RE_ON_DATE_SMB_WROTE = re.compile( # Polish u'napisaƂ', # Dutch - 'schreef','verzond','geschreven' + 'schreef','verzond','geschreven', + # German + 'schrieb' )) )) # Special case for languages where text is translated like this: 'on {date} wrote {somebody}:' RE_ON_DATE_WROTE_SMB = re.compile( - u'(-*[ ]?({0})[ ].*(.*\n){{0,2}}.*({1})[ ].*:)'.format( + u'(-*[ ]?({0})[ ].*(.*\n){{0,2}}.*({1})[ ]*.*:)'.format( # Beginning of the line + u'|'.join(( 'Op', + #German + 'Am' + )), # Ending of the line u'|'.join(( # Dutch - 'schreef','verzond','geschreven' + 'schreef','verzond','geschreven', + # German + 'schrieb' )) ) ) @@ -181,6 +191,7 @@ def mark_message_lines(lines): else: # in case splitter is spread across several lines splitter = is_splitter('\n'.join(lines[i:i + SPLITTER_MAX_LINES])) + if splitter: # append as many splitter markers as lines in splitter splitter_lines = splitter.group().splitlines() @@ -293,12 +304,8 @@ def extract_from_plain(msg_body): delimiter = get_delimiter(msg_body) msg_body = preprocess(msg_body, delimiter) - lines = msg_body.splitlines() - # don't process too long messages - if len(lines) > MAX_LINES_COUNT: - return stripped_text - + lines = msg_body.splitlines()[:MAX_LINES_COUNT] markers = mark_message_lines(lines) lines = process_marked_lines(lines, markers) diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py index a56c48d..36dc8a5 100644 --- a/tests/text_quotations_test.py +++ b/tests/text_quotations_test.py @@ -12,11 +12,11 @@ from talon import quotations @patch.object(quotations, 'MAX_LINES_COUNT', 1) def test_too_many_lines(): msg_body = """Test reply - +Hi -----Original Message----- Test""" - eq_(msg_body, quotations.extract_from_plain(msg_body)) + eq_("Test reply", quotations.extract_from_plain(msg_body)) def test_pattern_on_date_somebody_wrote():