From 4ee46c0a977477e889209f3f5940f1f759f5bd5d Mon Sep 17 00:00:00 2001 From: Sergey Obukhov Date: Tue, 9 Aug 2016 17:08:58 -0700 Subject: [PATCH] do not parse html quotations if html is longer then certain threshold --- talon/quotations.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/talon/quotations.py b/talon/quotations.py index d9dba21..f6122ff 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -164,6 +164,7 @@ RE_PARENTHESIS_LINK = re.compile("\(https?://") SPLITTER_MAX_LINES = 4 MAX_LINES_COUNT = 1000 +MAX_HTML_LEN = 2794202 QUOT_PATTERN = re.compile('^>+ ?') NO_QUOT_LINE = re.compile('^[^>].*[\S].*') @@ -382,6 +383,9 @@ def _extract_from_html(msg_body): then checking deleted checkpoints, then deleting necessary tags. """ + if len(msg_body) > MAX_HTML_LEN: + return msg_body + if msg_body.strip() == b'': return msg_body