From e11eaf6ff8394e89136c2861915ee9dc985c4f2b Mon Sep 17 00:00:00 2001 From: Ethan Setnik Date: Tue, 11 Apr 2017 22:38:29 -0400 Subject: [PATCH 1/3] add support for polymail reply format --- talon/quotations.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/talon/quotations.py b/talon/quotations.py index 6016310..69f25f2 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -146,6 +146,14 @@ RE_ANDROID_WROTE = re.compile(u'[\s]*[-]+.*({})[ ]*[-]+'.format( 'wrote' ))), re.I) +# Support polymail.io reply format +# On Tue, Apr 11, 2017 at 10:07 PM John Smith +# +# < +# mailto:John Smith +# > wrote: +RE_POLYMAIL = re.compile('On.*\s{2}<\smailto:.*\s> wrote:', re.I) + SPLITTER_PATTERNS = [ RE_ORIGINAL_MESSAGE, RE_ON_DATE_SMB_WROTE, @@ -162,7 +170,8 @@ SPLITTER_PATTERNS = [ '( \S+){3,6}@\S+:'), # Sent from Samsung MobileName wrote: re.compile('Sent from Samsung .*@.*> wrote'), - RE_ANDROID_WROTE + RE_ANDROID_WROTE, + RE_POLYMAIL ] RE_LINK = re.compile('<(http://[^>]*)>') From cca64d3ed194ea57bd490c9c9d39810b57caa89b Mon Sep 17 00:00:00 2001 From: Ethan Setnik Date: Tue, 11 Apr 2017 23:36:36 -0400 Subject: [PATCH 2/3] add test case --- tests/text_quotations_test.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py index 7a81c99..c02c375 100644 --- a/tests/text_quotations_test.py +++ b/tests/text_quotations_test.py @@ -35,6 +35,19 @@ On 11-Apr-2011, at 6:54 PM, Roman Tkachenko wrote: eq_("Test reply", quotations.extract_from_plain(msg_body)) +def test_pattern_on_date_polymail(): + msg_body = """Test reply + +On Tue, Apr 11, 2017 at 10:07 PM John Smith + +< +mailto:John Smith +> wrote: +Test quoted data +""" + + eq_("Test reply", quotations.extract_from_plain(msg_body)) + def test_pattern_sent_from_samsung_smb_wrote(): msg_body = """Test reply @@ -54,7 +67,7 @@ def test_pattern_on_date_wrote_somebody(): """Lorem Op 13-02-2014 3:18 schreef Julius Caesar : - + Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse. """)) @@ -256,7 +269,7 @@ def test_with_indent(): ------On 12/29/1987 17:32 PM, Julius Caesar wrote----- -Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur. +Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur. """ eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body)) @@ -381,11 +394,11 @@ Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny p def test_dutch_from_block(): eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain( - """Gluten-free culpa lo-fi et nesciunt nostrud. + """Gluten-free culpa lo-fi et nesciunt nostrud. Op 17-feb.-2015, om 13:18 heeft Julius Caesar het volgende geschreven: - -Small batch beard laboris tempor, non listicle hella Tumblr heirloom. + +Small batch beard laboris tempor, non listicle hella Tumblr heirloom. """)) From 5c413b4b00f3c378f85b4ddfea82cbf1a96a83fa Mon Sep 17 00:00:00 2001 From: Ethan Setnik Date: Wed, 12 Apr 2017 00:07:29 -0400 Subject: [PATCH 3/3] allow more lines since polymail has extra whitespace --- talon/quotations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/talon/quotations.py b/talon/quotations.py index 69f25f2..9286209 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -179,7 +179,7 @@ RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@') RE_PARENTHESIS_LINK = re.compile("\(https?://") -SPLITTER_MAX_LINES = 4 +SPLITTER_MAX_LINES = 6 MAX_LINES_COUNT = 1000 # an extensive research shows that exceeding this limit # leads to excessive processing time