Add new method which marks as splitlines, lines which are splitlines but start with email quotation indents ("> ")
This commit is contained in:
@@ -469,12 +469,33 @@ def split_emails(msg):
|
|||||||
lines = msg_body.splitlines()[:MAX_LINES_COUNT]
|
lines = msg_body.splitlines()[:MAX_LINES_COUNT]
|
||||||
markers = mark_message_lines(lines)
|
markers = mark_message_lines(lines)
|
||||||
|
|
||||||
|
markers = _mark_quoted_email_splitlines(markers, lines)
|
||||||
|
|
||||||
# we don't want splitlines in header blocks
|
# we don't want splitlines in header blocks
|
||||||
markers = _correct_splitlines_in_headers(markers, lines)
|
markers = _correct_splitlines_in_headers(markers, lines)
|
||||||
|
|
||||||
return markers
|
return markers
|
||||||
|
|
||||||
|
|
||||||
|
def _mark_quoted_email_splitlines(markers, lines):
|
||||||
|
"""
|
||||||
|
When there are headers indented with '>' characters, we will attempt to identify if the header is a splitline header
|
||||||
|
using a slightly altered SPLITTER_PATTERNS list and mark it as 's'.
|
||||||
|
"""
|
||||||
|
# Create a list of markers to easily alter specific characters
|
||||||
|
markerlist = list(markers)
|
||||||
|
for i, line in enumerate(lines):
|
||||||
|
if markerlist[i] != 'm':
|
||||||
|
continue
|
||||||
|
for pattern in SPLITTER_PATTERNS:
|
||||||
|
matcher = re.search(pattern, line)
|
||||||
|
if matcher:
|
||||||
|
markerlist[i] = 's'
|
||||||
|
break
|
||||||
|
|
||||||
|
return "".join(markerlist)
|
||||||
|
|
||||||
|
|
||||||
def _correct_splitlines_in_headers(markers, lines):
|
def _correct_splitlines_in_headers(markers, lines):
|
||||||
"""Corrects markers by removing splitlines deemed to be inside header blocks"""
|
"""Corrects markers by removing splitlines deemed to be inside header blocks"""
|
||||||
updated_markers = ""
|
updated_markers = ""
|
||||||
|
|||||||
Reference in New Issue
Block a user