diff --git a/setup.py b/setup.py index c54dd2d..d8b0554 100755 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ class InstallCommand(install): setup(name='talon', - version='1.2.15', + version='1.2.16', description=("Mailgun library " "to extract message quotations and signatures."), long_description=open("README.rst").read(), diff --git a/talon/utils.py b/talon/utils.py index 2da73bf..70de98c 100644 --- a/talon/utils.py +++ b/talon/utils.py @@ -117,7 +117,13 @@ def html_tree_to_text(tree): style.getparent().remove(style) for c in tree.xpath('//comment()'): - c.getparent().remove(c) + parent = c.getparent() + + # comment with no parent does not impact produced text + if not parent: + continue + + parent.remove(c) text = "" for el in tree.iter(): diff --git a/tests/utils_test.py b/tests/utils_test.py index 37f4d7b..a902746 100644 --- a/tests/utils_test.py +++ b/tests/utils_test.py @@ -6,6 +6,7 @@ from . import * from talon import utils as u import cchardet import six +from lxml import html def test_get_delimiter(): @@ -107,3 +108,9 @@ font: 13px 'Lucida Grande', Arial, sans-serif;
TEXT 2
""" eq_(b'TEXT 1 \nTEXT 2', u.html_to_text(html)) + + +def test_comment_no_parent(): + s = " no comment" + d = html.document_fromstring(s) + eq_("no comment", u.html_tree_to_text(d))