From f0ed5d6c07a7baa90f0964e1d5efe0595683ceda Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Tue, 14 Apr 2015 18:22:48 +0200
Subject: [PATCH 1/7] New splitter pattern for Dutch mail replies

---
 talon/quotations.py | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)
diff --git a/talon/quotations.py b/talon/quotations.py
index 292b39a..742f1dc 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -31,7 +31,9 @@ RE_ON_DATE_SMB_WROTE = re.compile(
             # French
             'Le',
             # Polish
-            'W dniu'
+            'W dniu',
+            # Dutch
+            'Op'
         )),
         # Date and sender separator
         u'|'.join((
@@ -47,9 +49,26 @@ RE_ON_DATE_SMB_WROTE = re.compile(
             # French
             u'a écrit',
             # Polish
-            u'napisał'
+            u'napisał',
+            # Dutch
+            'schreef','verzond'
         ))
     ))
+# Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'
+RE_ON_DATE_WROTE_SMB = re.compile(
+    u'(-*[ ]?({0}).*(.*\n){{0,2}}.*({1}).*:)'.format(
+        # Beginning of the line
+        u'|'.join((
+        	# Dutch
+            'Op'
+        )),
+        # Ending of the line
+        u'|'.join((
+            # Dutch
+            'schreef'
+        ))
+    )
+    )
 
 RE_QUOTATION = re.compile(
     r'''
@@ -110,6 +129,7 @@ SPLITTER_PATTERNS = [
     # <date> <person>
     re.compile("(\d+/\d+/\d+|\d+\.\d+\.\d+).*@", re.VERBOSE),
     RE_ON_DATE_SMB_WROTE,
+    RE_ON_DATE_WROTE_SMB,
     RE_FROM_COLON_OR_DATE_COLON,
     re.compile('\S{3,10}, \d\d? \S{3,10} 20\d\d,? \d\d?:\d\d(:\d\d)?'
                '( \S+){3,6}@\S+:')

From df3338192a1de189a2cf56cfe7c32b0eda0a3d75 Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Tue, 14 Apr 2015 18:49:26 +0200
Subject: [PATCH 2/7] Another submission to a dutch variation

---
 talon/quotations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/talon/quotations.py b/talon/quotations.py
index 742f1dc..ba4ac16 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -51,7 +51,7 @@ RE_ON_DATE_SMB_WROTE = re.compile(
             # Polish
             u'napisał',
             # Dutch
-            'schreef','verzond'
+            'schreef','verzond','geschreven'
         ))
     ))
 # Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'

From 105d16644de1dbb526752f3972ea547dfee8460b Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Tue, 14 Apr 2015 18:52:45 +0200
Subject: [PATCH 3/7] For patterns like this '---- On {date} {name} {mail}
 wrote ---- '

---
 talon/quotations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/talon/quotations.py b/talon/quotations.py
index ba4ac16..28914f2 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -23,7 +23,7 @@ log = logging.getLogger(__name__)
 RE_FWD = re.compile("^[-]+[ ]*Forwarded message[ ]*[-]+$", re.I | re.M)
 
 RE_ON_DATE_SMB_WROTE = re.compile(
-    u'(-*[ ]?({0})[ ].*({1})(.*\n){{0,2}}.*({2}):)'.format(
+    u'(-*[ ]?({0})[ ].*({1})(.*\n){{0,2}}.*({2}):?-*)'.format(
         # Beginning of the line
         u'|'.join((
             # English

From 072a4408379eb42c59cededdb6f8a1ceafd05fc0 Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Wed, 15 Apr 2015 13:55:17 +0200
Subject: [PATCH 4/7] Test cases for new patterns

---
 tests/text_quotations_test.py | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/tests/text_quotations_test.py b/tests/text_quotations_test.py
index b68b132..918ed29 100644
--- a/tests/text_quotations_test.py
+++ b/tests/text_quotations_test.py
@@ -33,6 +33,16 @@ On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> wrote:
     eq_("Test reply", quotations.extract_from_plain(msg_body))
 
 
+def test_pattern_on_date_wrote_somebody():
+    eq_('Lorem', quotations.extract_from_plain(
+    """Lorem
+
+Op 13-02-2014 3:18 schreef Julius Caesar <pantheon@rome.com>:
+    
+Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
+"""))
+
+
 def test_pattern_on_date_somebody_wrote_date_with_slashes():
     msg_body = """Test reply
 
@@ -201,6 +211,15 @@ On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
 > Hello"""
     eq_("Hi", quotations.extract_from_plain(msg_body))
 
+def test_with_indent():
+    msg_body = """YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.
+
+------On 12/29/1987 17:32 PM, Julius Caesar wrote-----
+
+Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur. 
+    """
+    eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body))
+
 
 def test_short_quotation_with_newline():
     msg_body = """Btw blah blah...
@@ -293,6 +312,15 @@ Emne: The manager has commented on your Loop
 Blah-blah-blah
 """))
 
+def test_dutch_from_block():
+    eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
+    """Gluten-free culpa lo-fi et nesciunt nostrud. 
+
+Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende geschreven:
+    
+Small batch beard laboris tempor, non listicle hella Tumblr heirloom. 
+"""))
+
 
 def test_quotation_marker_false_positive():
     msg_body = """Visit us now for assistance...

From b4c180b9ff5e1a2b9bddb6dab80bf0551719ea25 Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Wed, 15 Apr 2015 13:55:59 +0200
Subject: [PATCH 5/7] Extra spaces check in RE_ON_DATE_WROTE_SMB reggae

---
 talon/quotations.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/talon/quotations.py b/talon/quotations.py
index 28914f2..dc77fd4 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -56,16 +56,13 @@ RE_ON_DATE_SMB_WROTE = re.compile(
     ))
 # Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'
 RE_ON_DATE_WROTE_SMB = re.compile(
-    u'(-*[ ]?({0}).*(.*\n){{0,2}}.*({1}).*:)'.format(
+    u'(-*[ ]?({0})[ ].*(.*\n){{0,2}}.*({1})[ ].*:)'.format(
         # Beginning of the line
-        u'|'.join((
-        	# Dutch
-            'Op'
-        )),
+        	'Op',
         # Ending of the line
         u'|'.join((
             # Dutch
-            'schreef'
+            'schreef','verzond','geschreven'
         ))
     )
     )

From 84a83e865e0c2e2dcf36a4b1d0db1813ae64b1bf Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Thu, 16 Apr 2015 13:22:18 +0200
Subject: [PATCH 6/7] Change of behavior when msg_body has more then 1000 lines

---
 talon/quotations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/talon/quotations.py b/talon/quotations.py
index dc77fd4..8e4f141 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -298,7 +298,7 @@ def extract_from_plain(msg_body):
 
     # don't process too long messages
     if len(lines) > MAX_LINES_COUNT:
-        return stripped_text
+        lines = stripped_text.split('\n', MAX_LINES_COUNT)
 
     markers = mark_message_lines(lines)
     lines = process_marked_lines(lines, markers)

From f9f428f4c3cb84d641095e1b3c35da6d8f1cbfe6 Mon Sep 17 00:00:00 2001
From: Simon <simon.flore.hf@gmail.com>
Date: Thu, 16 Apr 2015 13:26:17 +0200
Subject: [PATCH 7/7] Revert "Change of behavior when msg_body has more then
 1000 lines"

This reverts commit 84a83e865e0c2e2dcf36a4b1d0db1813ae64b1bf.
---
 talon/quotations.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/talon/quotations.py b/talon/quotations.py
index 8e4f141..dc77fd4 100644
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -298,7 +298,7 @@ def extract_from_plain(msg_body):
 
     # don't process too long messages
     if len(lines) > MAX_LINES_COUNT:
-        lines = stripped_text.split('\n', MAX_LINES_COUNT)
+        return stripped_text
 
     markers = mark_message_lines(lines)
     lines = process_marked_lines(lines, markers)