do not parse html quotations if html is longer then certain threshold

2016-08-09 17:08:58 -07:00
parent 10d9a930f9
commit 4ee46c0a97
1 changed files with 4 additions and 0 deletions
--- a/talon/quotations.py
+++ b/talon/quotations.py
@@ -164,6 +164,7 @@ RE_PARENTHESIS_LINK = re.compile("\(https?://")

 SPLITTER_MAX_LINES = 4
 MAX_LINES_COUNT = 1000
+MAX_HTML_LEN = 2794202

 QUOT_PATTERN = re.compile('^>+ ?')
 NO_QUOT_LINE = re.compile('^[^>].*[\S].*')
@@ -382,6 +383,9 @@ def _extract_from_html(msg_body):
    then checking deleted checkpoints,
    then deleting necessary tags.
    """
+    if len(msg_body) > MAX_HTML_LEN:
+        return msg_body
+
    if msg_body.strip() == b'':
        return msg_body