Compare commits
3 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
f53b5cc7a6 | ||
|
|
27adde7aa7 | ||
|
|
a9719833e0 |
2
setup.py
2
setup.py
@@ -29,7 +29,7 @@ class InstallCommand(install):
|
|||||||
|
|
||||||
|
|
||||||
setup(name='talon',
|
setup(name='talon',
|
||||||
version='1.2.15',
|
version='1.2.16',
|
||||||
description=("Mailgun library "
|
description=("Mailgun library "
|
||||||
"to extract message quotations and signatures."),
|
"to extract message quotations and signatures."),
|
||||||
long_description=open("README.rst").read(),
|
long_description=open("README.rst").read(),
|
||||||
|
|||||||
@@ -117,7 +117,13 @@ def html_tree_to_text(tree):
|
|||||||
style.getparent().remove(style)
|
style.getparent().remove(style)
|
||||||
|
|
||||||
for c in tree.xpath('//comment()'):
|
for c in tree.xpath('//comment()'):
|
||||||
c.getparent().remove(c)
|
parent = c.getparent()
|
||||||
|
|
||||||
|
# comment with no parent does not impact produced text
|
||||||
|
if not parent:
|
||||||
|
continue
|
||||||
|
|
||||||
|
parent.remove(c)
|
||||||
|
|
||||||
text = ""
|
text = ""
|
||||||
for el in tree.iter():
|
for el in tree.iter():
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from . import *
|
|||||||
from talon import utils as u
|
from talon import utils as u
|
||||||
import cchardet
|
import cchardet
|
||||||
import six
|
import six
|
||||||
|
from lxml import html
|
||||||
|
|
||||||
|
|
||||||
def test_get_delimiter():
|
def test_get_delimiter():
|
||||||
@@ -107,3 +108,9 @@ font: 13px 'Lucida Grande', Arial, sans-serif;
|
|||||||
<p>TEXT 2 <!-- COMMENT 2 --></p>
|
<p>TEXT 2 <!-- COMMENT 2 --></p>
|
||||||
</div>"""
|
</div>"""
|
||||||
eq_(b'TEXT 1 \nTEXT 2', u.html_to_text(html))
|
eq_(b'TEXT 1 \nTEXT 2', u.html_to_text(html))
|
||||||
|
|
||||||
|
|
||||||
|
def test_comment_no_parent():
|
||||||
|
s = "<!-- COMMENT 1 --> no comment"
|
||||||
|
d = html.document_fromstring(s)
|
||||||
|
eq_("no comment", u.html_tree_to_text(d))
|
||||||
|
|||||||
Reference in New Issue
Block a user