From 139edd6104f6824a0b94a7577fd23d6ae7bf2d28 Mon Sep 17 00:00:00 2001 From: smitcona Date: Wed, 1 Feb 2017 17:16:30 +0000 Subject: [PATCH] Add new method which marks as splitlines, lines which are splitlines but start with email quotation indents ("> ") --- talon/quotations.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/talon/quotations.py b/talon/quotations.py index 8e2b2b7..514617f 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -469,12 +469,33 @@ def split_emails(msg): lines = msg_body.splitlines()[:MAX_LINES_COUNT] markers = mark_message_lines(lines) + markers = _mark_quoted_email_splitlines(markers, lines) + # we don't want splitlines in header blocks markers = _correct_splitlines_in_headers(markers, lines) return markers +def _mark_quoted_email_splitlines(markers, lines): + """ + When there are headers indented with '>' characters, we will attempt to identify if the header is a splitline header + using a slightly altered SPLITTER_PATTERNS list and mark it as 's'. + """ + # Create a list of markers to easily alter specific characters + markerlist = list(markers) + for i, line in enumerate(lines): + if markerlist[i] != 'm': + continue + for pattern in SPLITTER_PATTERNS: + matcher = re.search(pattern, line) + if matcher: + markerlist[i] = 's' + break + + return "".join(markerlist) + + def _correct_splitlines_in_headers(markers, lines): """Corrects markers by removing splitlines deemed to be inside header blocks""" updated_markers = ""