Adding in_header_block variable for reliability
This commit is contained in:
@@ -468,23 +468,37 @@ def split_emails(msg):
|
|||||||
# don't process too long messages
|
# don't process too long messages
|
||||||
lines = msg_body.splitlines()[:MAX_LINES_COUNT]
|
lines = msg_body.splitlines()[:MAX_LINES_COUNT]
|
||||||
markers = mark_message_lines(lines)
|
markers = mark_message_lines(lines)
|
||||||
|
|
||||||
# we don't want splitlines in header blocks
|
# we don't want splitlines in header blocks
|
||||||
markers = correct_splitlines_in_headers(markers, lines)
|
markers = _correct_splitlines_in_headers(markers, lines)
|
||||||
|
|
||||||
return markers
|
return markers
|
||||||
|
|
||||||
|
|
||||||
def correct_splitlines_in_headers(markers, lines):
|
def _correct_splitlines_in_headers(markers, lines):
|
||||||
"""Corrects markers by removing splitlines deemed to be inside header blocks"""
|
"""Corrects markers by removing splitlines deemed to be inside header blocks"""
|
||||||
updated_markers = ""
|
updated_markers = ""
|
||||||
i = -1
|
i = 0
|
||||||
|
in_header_block = False
|
||||||
|
|
||||||
for m in markers:
|
for m in markers:
|
||||||
|
# Only set in_header_block flag true when we hit an 's' and the line is a header.
|
||||||
if m == 's':
|
if m == 's':
|
||||||
if i > -1:
|
if not in_header_block:
|
||||||
if bool(re.search(RE_HEADER, lines[i])):
|
if i == 0:
|
||||||
|
in_header_block = True
|
||||||
|
elif i > 0 and not bool(re.search(RE_HEADER, lines[i-1])):
|
||||||
|
in_header_block = True
|
||||||
|
else:
|
||||||
|
m = 't'
|
||||||
|
else:
|
||||||
m = 't'
|
m = 't'
|
||||||
|
|
||||||
|
# If the line is not a header line, set in_header_block false.
|
||||||
|
if not m == 's' and not bool(re.search(RE_HEADER, lines[i])):
|
||||||
|
in_header_block = False
|
||||||
|
|
||||||
|
# Add the marker to the new updated markers string.
|
||||||
updated_markers += m
|
updated_markers += m
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user