From 97b72ef767dba51e37275f5bf300d70a33b26026 Mon Sep 17 00:00:00 2001 From: smitcona Date: Tue, 22 Nov 2016 19:06:34 +0000 Subject: [PATCH] Adding in_header_block variable for reliability --- talon/quotations.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/talon/quotations.py b/talon/quotations.py index c8604d0..debf369 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -468,23 +468,37 @@ def split_emails(msg): # don't process too long messages lines = msg_body.splitlines()[:MAX_LINES_COUNT] markers = mark_message_lines(lines) + # we don't want splitlines in header blocks - markers = correct_splitlines_in_headers(markers, lines) + markers = _correct_splitlines_in_headers(markers, lines) return markers -def correct_splitlines_in_headers(markers, lines): +def _correct_splitlines_in_headers(markers, lines): """Corrects markers by removing splitlines deemed to be inside header blocks""" updated_markers = "" - i = -1 + i = 0 + in_header_block = False for m in markers: + # Only set in_header_block flag true when we hit an 's' and the line is a header. if m == 's': - if i > -1: - if bool(re.search(RE_HEADER, lines[i])): + if not in_header_block: + if i == 0: + in_header_block = True + elif i > 0 and not bool(re.search(RE_HEADER, lines[i-1])): + in_header_block = True + else: m = 't' + else: + m = 't' + # If the line is not a header line, set in_header_block false. + if not m == 's' and not bool(re.search(RE_HEADER, lines[i])): + in_header_block = False + + # Add the marker to the new updated markers string. updated_markers += m i += 1