do not parse html quotations if html is longer then certain threshold
This commit is contained in:
@@ -164,6 +164,7 @@ RE_PARENTHESIS_LINK = re.compile("\(https?://")
|
|||||||
|
|
||||||
SPLITTER_MAX_LINES = 4
|
SPLITTER_MAX_LINES = 4
|
||||||
MAX_LINES_COUNT = 1000
|
MAX_LINES_COUNT = 1000
|
||||||
|
MAX_HTML_LEN = 2794202
|
||||||
|
|
||||||
QUOT_PATTERN = re.compile('^>+ ?')
|
QUOT_PATTERN = re.compile('^>+ ?')
|
||||||
NO_QUOT_LINE = re.compile('^[^>].*[\S].*')
|
NO_QUOT_LINE = re.compile('^[^>].*[\S].*')
|
||||||
@@ -382,6 +383,9 @@ def _extract_from_html(msg_body):
|
|||||||
then checking deleted checkpoints,
|
then checking deleted checkpoints,
|
||||||
then deleting necessary tags.
|
then deleting necessary tags.
|
||||||
"""
|
"""
|
||||||
|
if len(msg_body) > MAX_HTML_LEN:
|
||||||
|
return msg_body
|
||||||
|
|
||||||
if msg_body.strip() == b'':
|
if msg_body.strip() == b'':
|
||||||
return msg_body
|
return msg_body
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user