diff --git a/setup.py b/setup.py index 7bef4a0..27901de 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ class InstallCommand(install): setup(name='talon', - version='1.3.1', + version='1.3.2', description=("Mailgun library " "to extract message quotations and signatures."), long_description=open("README.rst").read(), diff --git a/talon/quotations.py b/talon/quotations.py index 6f77124..7472b10 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -386,7 +386,7 @@ def _extract_from_html(msg_body): then checking deleted checkpoints, then deleting necessary tags. """ - if len(msg_body) > MAX_HTML_LEN: + if _html_too_big(msg_body): return msg_body if msg_body.strip() == b'': @@ -483,3 +483,12 @@ def register_xpath_extensions(): ns.prefix = 'mg' ns['text_content'] = text_content ns['tail'] = tail + + +def _html_too_big(msg_body): + return msg_body.count('<') > _MAX_TAGS_COUNT + + +# an extensive research shows that exceeding this limit +# might lead to excessive processing time +_MAX_TAGS_COUNT = 419 diff --git a/tests/html_quotations_test.py b/tests/html_quotations_test.py index c087eef..b216710 100644 --- a/tests/html_quotations_test.py +++ b/tests/html_quotations_test.py @@ -385,7 +385,7 @@ def test_gmail_forwarded_msg(): eq_(RE_WHITESPACE.sub('', msg_body), RE_WHITESPACE.sub('', extracted)) -@patch.object(quotations, 'MAX_HTML_LEN', 1) +@patch.object(quotations, '_MAX_TAGS_COUNT', 4) def test_too_large_html(): msg_body = 'Reply' \ '