Files
talon/tests/text_quotations_test.py
Jeremy Schlatter 613d1fc815 Add extra splitter expressions and tests for German and Danish.
Also some refactoring to make it a bit easier to add more languages.
2014-12-23 15:44:04 -08:00

558 lines
14 KiB
Python

# -*- coding: utf-8 -*-
from . import *
from . fixtures import *
import os
from flanker import mime
from talon import quotations
@patch.object(quotations, 'MAX_LINES_COUNT', 1)
def test_too_many_lines():
msg_body = """Test reply
-----Original Message-----
Test"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_wrote():
msg_body = """Test reply
On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> wrote:
>
> Test
>
> Roman"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_wrote_date_with_slashes():
msg_body = """Test reply
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
> Test.
>
> Roman"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_wrote_allows_space_in_front():
msg_body = """Thanks Thanmai
On Mar 8, 2012 9:59 AM, "Example.com" <
r+7f1b094ceb90e18cca93d53d3703feae@example.com> wrote:
>**
> Blah-blah-blah"""
eq_("Thanks Thanmai", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_sent():
msg_body = """Test reply
On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> sent:
>
> Test
>
> Roman"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_line_starts_with_on():
msg_body = """Blah-blah-blah
On blah-blah-blah"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_reply_and_quotation_splitter_share_line():
# reply lines and 'On <date> <person> wrote:' splitter pattern
# are on the same line
msg_body = """reply On Wed, Apr 4, 2012 at 3:59 PM, bob@example.com wrote:
> Hi"""
eq_('reply', quotations.extract_from_plain(msg_body))
# test pattern '--- On <date> <person> wrote:' with reply text on
# the same line
msg_body = """reply--- On Wed, Apr 4, 2012 at 3:59 PM, me@domain.com wrote:
> Hi"""
eq_('reply', quotations.extract_from_plain(msg_body))
# test pattern '--- On <date> <person> wrote:' with reply text containing
# '-' symbol
msg_body = """reply
bla-bla - bla--- On Wed, Apr 4, 2012 at 3:59 PM, me@domain.com wrote:
> Hi"""
reply = """reply
bla-bla - bla"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_pattern_original_message():
languages = (
'Original Message', # English
'Reply Message',
u'Ursprüngliche Nachricht', # German
'Antwort Nachricht',
'Oprindelig meddelelse', # Danish
)
msg_body = u"""Test reply
-----{}-----
Test"""
for language in languages:
eq_("Test reply", quotations.extract_from_plain(msg_body.format(unicode(language))))
def test_reply_after_quotations():
msg_body = """On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
> Test
Test reply"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_reply_wraps_quotations():
msg_body = """Test reply
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
> Test
Regards, Roman"""
reply = """Test reply
Regards, Roman"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_reply_wraps_nested_quotations():
msg_body = """Test reply
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>Test test
>On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
>>
>> Test.
>>
>> Roman
Regards, Roman"""
reply = """Test reply
Regards, Roman"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_quotation_separator_takes_2_lines():
msg_body = """Test reply
On Fri, May 6, 2011 at 6:03 PM, Roman Tkachenko from Hacker News
<roman@definebox.com> wrote:
> Test.
>
> Roman
Regards, Roman"""
reply = """Test reply
Regards, Roman"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_quotation_separator_takes_3_lines():
msg_body = """Test reply
On Nov 30, 2011, at 12:47 PM, Somebody <
416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4@somebody.domain.com>
wrote:
Test message
"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_short_quotation():
msg_body = """Hi
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
> Hello"""
eq_("Hi", quotations.extract_from_plain(msg_body))
def test_pattern_date_email_with_unicode():
msg_body = """Replying ok
2011/4/7 Nathan \xd0\xb8ova <support@example.com>
> Cool beans, scro"""
eq_("Replying ok", quotations.extract_from_plain(msg_body))
def test_pattern_from_block():
english = """Allo! Follow up MIME!
From: somebody@example.com
Sent: March-19-11 5:42 PM
To: Somebody
Subject: The manager has commented on your Loop
Blah-blah-blah
"""
german = """Allo! Follow up MIME!
Von: somebody@example.com
Gesendet: Dienstag, 25. November 2014 14:59
An: Somebody
Betreff: The manager has commented on your Loop
Blah-blah-blah
"""
danish = """Allo! Follow up MIME!
Fra: somebody@example.com
Sendt: 19. march 2011 12:10
Til: Somebody
Emne: The manager has commented on your Loop
Blah-blah-blah
"""
for language in (english, german, danish):
eq_("Allo! Follow up MIME!", quotations.extract_from_plain(language))
def test_quotation_marker_false_positive():
msg_body = """Visit us now for assistance...
>>> >>> http://www.domain.com <<<
Visit our site by clicking the link above"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_link_closed_with_quotation_marker_on_new_line():
msg_body = '''8.45am-1pm
From: somebody@example.com
<http://email.example.com/c/dHJhY2tpbmdfY29kZT1mMDdjYzBmNzM1ZjYzMGIxNT
> <bob@example.com <mailto:bob@example.com> >
Requester: '''
eq_('8.45am-1pm', quotations.extract_from_plain(msg_body))
def test_link_breaks_quotation_markers_sequence():
# link starts and ends on the same line
msg_body = """Blah
On Thursday, October 25, 2012 at 3:03 PM, life is short. on Bob wrote:
>
> Post a response by replying to this email
>
(http://example.com/c/YzOTYzMmE) >
> life is short. (http://example.com/c/YzMmE)
>
"""
eq_("Blah", quotations.extract_from_plain(msg_body))
# link starts after some text on one line and ends on another
msg_body = """Blah
On Monday, 24 September, 2012 at 3:46 PM, bob wrote:
> [Ticket #50] test from bob
>
> View ticket (http://example.com/action
_nonce=3dd518)
>
"""
eq_("Blah", quotations.extract_from_plain(msg_body))
def test_from_block_starts_with_date():
msg_body = """Blah
Date: Wed, 16 May 2012 00:15:02 -0600
To: klizhentas@example.com"""
eq_('Blah', quotations.extract_from_plain(msg_body))
def test_bold_from_block():
msg_body = """Hi
*From:* bob@example.com [mailto:
bob@example.com]
*Sent:* Wednesday, June 27, 2012 3:05 PM
*To:* travis@example.com
*Subject:* Hello
"""
eq_("Hi", quotations.extract_from_plain(msg_body))
def test_weird_date_format_in_date_block():
msg_body = """Blah
Date: Fri=2C 28 Sep 2012 10:55:48 +0000
From: tickets@example.com
To: bob@example.com
Subject: [Ticket #8] Test
"""
eq_('Blah', quotations.extract_from_plain(msg_body))
def test_dont_parse_quotations_for_forwarded_messages():
msg_body = """FYI
---------- Forwarded message ----------
From: bob@example.com
Date: Tue, Sep 4, 2012 at 1:35 PM
Subject: Two
line subject
To: rob@example.com
Text"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_forwarded_message_in_quotations():
msg_body = """Blah
-----Original Message-----
FYI
---------- Forwarded message ----------
From: bob@example.com
Date: Tue, Sep 4, 2012 at 1:35 PM
Subject: Two
line subject
To: rob@example.com
"""
eq_("Blah", quotations.extract_from_plain(msg_body))
def test_mark_message_lines():
# e - empty line
# s - splitter line
# m - line starting with quotation marker '>'
# t - the rest
lines = ['Hello', '',
# next line should be marked as splitter
'_____________',
'From: foo@bar.com',
'',
'> Hi',
'',
'Signature']
eq_('tessemet', quotations.mark_message_lines(lines))
lines = ['Just testing the email reply',
'',
'Robert J Samson',
'Sent from my iPhone',
'',
# all 3 next lines should be marked as splitters
'On Nov 30, 2011, at 12:47 PM, Skapture <',
('416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4'
'@skapture-staging.mailgun.org>'),
'wrote:',
'',
'Tarmo Lehtpuu has posted the following message on']
eq_('tettessset', quotations.mark_message_lines(lines))
def test_process_marked_lines():
# quotations and last message lines are mixed
# consider all to be a last message
markers = 'tsemmtetm'
lines = [str(i) for i in range(len(markers))]
lines = [str(i) for i in range(len(markers))]
eq_(lines, quotations.process_marked_lines(lines, markers))
# no splitter => no markers
markers = 'tmm'
lines = ['1', '2', '3']
eq_(['1', '2', '3'], quotations.process_marked_lines(lines, markers))
# text after splitter without markers is quotation
markers = 'tst'
lines = ['1', '2', '3']
eq_(['1'], quotations.process_marked_lines(lines, markers))
# message + quotation + signature
markers = 'tsmt'
lines = ['1', '2', '3', '4']
eq_(['1', '4'], quotations.process_marked_lines(lines, markers))
# message + <quotation without markers> + nested quotation
markers = 'tstsmt'
lines = ['1', '2', '3', '4', '5', '6']
eq_(['1'], quotations.process_marked_lines(lines, markers))
# test links wrapped with paranthesis
# link starts on the marker line
markers = 'tsmttem'
lines = ['text',
'splitter',
'>View (http://example.com',
'/abc',
')',
'',
'> quote']
eq_(lines[:1], quotations.process_marked_lines(lines, markers))
# link starts on the new line
markers = 'tmmmtm'
lines = ['text',
'>'
'>',
'>',
'(http://example.com) > ',
'> life is short. (http://example.com) '
]
eq_(lines[:1], quotations.process_marked_lines(lines, markers))
# check all "inline" replies
markers = 'tsmtmtm'
lines = ['text',
'splitter',
'>',
'(http://example.com)',
'>',
'inline reply',
'>']
eq_(lines, quotations.process_marked_lines(lines, markers))
# inline reply with link not wrapped in paranthesis
markers = 'tsmtm'
lines = ['text',
'splitter',
'>',
'inline reply with link http://example.com',
'>']
eq_(lines, quotations.process_marked_lines(lines, markers))
# inline reply with link wrapped in paranthesis
markers = 'tsmtm'
lines = ['text',
'splitter',
'>',
'inline reply (http://example.com)',
'>']
eq_(lines, quotations.process_marked_lines(lines, markers))
def test_preprocess():
msg = ('Hello\n'
'See <http://google.com\n'
'> for more\n'
'information On Nov 30, 2011, at 12:47 PM, Somebody <\n'
'416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n'
'@example.com>'
'wrote:\n'
'\n'
'> Hi')
# test the link is rewritten
# 'On <date> <person> wrote:' pattern starts from a new line
prepared_msg = ('Hello\n'
'See @@http://google.com\n'
'@@ for more\n'
'information\n'
' On Nov 30, 2011, at 12:47 PM, Somebody <\n'
'416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n'
'@example.com>'
'wrote:\n'
'\n'
'> Hi')
eq_(prepared_msg, quotations.preprocess(msg, '\n'))
msg = """
> <http://teemcl.mailgun.org/u/**aD1mZmZiNGU5ODQwMDNkZWZlMTExNm**
> MxNjQ4Y2RmOTNlMCZyPXNlcmdleS5v**YnlraG92JTQwbWFpbGd1bmhxLmNvbS**
> Z0PSUyQSZkPWUwY2U<http://example.org/u/aD1mZmZiNGU5ODQwMDNkZWZlMTExNmMxNjQ4Y>
"""
eq_(msg, quotations.preprocess(msg, '\n'))
# 'On <date> <person> wrote' shouldn't be spread across too many lines
msg = ('Hello\n'
'How are you? On Nov 30, 2011, at 12:47 PM,\n '
'Example <\n'
'416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n'
'@example.org>'
'wrote:\n'
'\n'
'> Hi')
eq_(msg, quotations.preprocess(msg, '\n'))
msg = ('Hello On Nov 30, smb wrote:\n'
'Hi\n'
'On Nov 29, smb wrote:\n'
'hi')
prepared_msg = ('Hello\n'
' On Nov 30, smb wrote:\n'
'Hi\n'
'On Nov 29, smb wrote:\n'
'hi')
eq_(prepared_msg, quotations.preprocess(msg, '\n'))
def test_preprocess_postprocess_2_links():
msg_body = "<http://link1> <http://link2>"
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_standard_replies():
for filename in os.listdir(STANDARD_REPLIES):
filename = os.path.join(STANDARD_REPLIES, filename)
if os.path.isdir(filename):
continue
with open(filename) as f:
msg = f.read()
m = mime.from_string(msg)
for part in m.walk():
if part.content_type == 'text/plain':
text = part.body
stripped_text = quotations.extract_from_plain(text)
reply_text_fn = filename[:-4] + '_reply_text'
if os.path.isfile(reply_text_fn):
with open(reply_text_fn) as f:
reply_text = f.read()
else:
reply_text = 'Hello'
eq_(reply_text, stripped_text,
"'%(reply)s' != %(stripped)s for %(fn)s" %
{'reply': reply_text, 'stripped': stripped_text,
'fn': filename})