Adding in_header_block variable for reliability

This commit is contained in:
smitcona
2016-11-22 19:06:34 +00:00
parent 31489848be
commit 97b72ef767

View File

@@ -468,23 +468,37 @@ def split_emails(msg):
# don't process too long messages # don't process too long messages
lines = msg_body.splitlines()[:MAX_LINES_COUNT] lines = msg_body.splitlines()[:MAX_LINES_COUNT]
markers = mark_message_lines(lines) markers = mark_message_lines(lines)
# we don't want splitlines in header blocks # we don't want splitlines in header blocks
markers = correct_splitlines_in_headers(markers, lines) markers = _correct_splitlines_in_headers(markers, lines)
return markers return markers
def correct_splitlines_in_headers(markers, lines): def _correct_splitlines_in_headers(markers, lines):
"""Corrects markers by removing splitlines deemed to be inside header blocks""" """Corrects markers by removing splitlines deemed to be inside header blocks"""
updated_markers = "" updated_markers = ""
i = -1 i = 0
in_header_block = False
for m in markers: for m in markers:
# Only set in_header_block flag true when we hit an 's' and the line is a header.
if m == 's': if m == 's':
if i > -1: if not in_header_block:
if bool(re.search(RE_HEADER, lines[i])): if i == 0:
in_header_block = True
elif i > 0 and not bool(re.search(RE_HEADER, lines[i-1])):
in_header_block = True
else:
m = 't' m = 't'
else:
m = 't'
# If the line is not a header line, set in_header_block false.
if not m == 's' and not bool(re.search(RE_HEADER, lines[i])):
in_header_block = False
# Add the marker to the new updated markers string.
updated_markers += m updated_markers += m
i += 1 i += 1