do not parse html quotations if html is longer then certain threshold

This commit is contained in:
Sergey Obukhov
2016-08-09 17:08:58 -07:00
parent 10d9a930f9
commit 4ee46c0a97

View File

@@ -164,6 +164,7 @@ RE_PARENTHESIS_LINK = re.compile("\(https?://")
SPLITTER_MAX_LINES = 4
MAX_LINES_COUNT = 1000
MAX_HTML_LEN = 2794202
QUOT_PATTERN = re.compile('^>+ ?')
NO_QUOT_LINE = re.compile('^[^>].*[\S].*')
@@ -382,6 +383,9 @@ def _extract_from_html(msg_body):
then checking deleted checkpoints,
then deleting necessary tags.
"""
if len(msg_body) > MAX_HTML_LEN:
return msg_body
if msg_body.strip() == b'':
return msg_body