Add new method which marks as splitlines, lines which are splitlines but start with email quotation indents ("> ")

This commit is contained in:
smitcona
2017-02-01 17:16:30 +00:00
parent 015c8d2a78
commit 139edd6104

View File

@@ -469,12 +469,33 @@ def split_emails(msg):
lines = msg_body.splitlines()[:MAX_LINES_COUNT] lines = msg_body.splitlines()[:MAX_LINES_COUNT]
markers = mark_message_lines(lines) markers = mark_message_lines(lines)
markers = _mark_quoted_email_splitlines(markers, lines)
# we don't want splitlines in header blocks # we don't want splitlines in header blocks
markers = _correct_splitlines_in_headers(markers, lines) markers = _correct_splitlines_in_headers(markers, lines)
return markers return markers
def _mark_quoted_email_splitlines(markers, lines):
"""
When there are headers indented with '>' characters, we will attempt to identify if the header is a splitline header
using a slightly altered SPLITTER_PATTERNS list and mark it as 's'.
"""
# Create a list of markers to easily alter specific characters
markerlist = list(markers)
for i, line in enumerate(lines):
if markerlist[i] != 'm':
continue
for pattern in SPLITTER_PATTERNS:
matcher = re.search(pattern, line)
if matcher:
markerlist[i] = 's'
break
return "".join(markerlist)
def _correct_splitlines_in_headers(markers, lines): def _correct_splitlines_in_headers(markers, lines):
"""Corrects markers by removing splitlines deemed to be inside header blocks""" """Corrects markers by removing splitlines deemed to be inside header blocks"""
updated_markers = "" updated_markers = ""