diff --git a/talon/html_quotations.py b/talon/html_quotations.py
index 4aa7e74..44afb6b 100644
--- a/talon/html_quotations.py
+++ b/talon/html_quotations.py
@@ -6,6 +6,7 @@ messages (without quoted messages) from html
from __future__ import absolute_import
import regex as re
+from talon.utils import cssselect
CHECKPOINT_PREFIX = '#!%!'
CHECKPOINT_SUFFIX = '!%!#'
@@ -78,7 +79,7 @@ def delete_quotation_tags(html_note, counter, quotation_checkpoints):
def cut_gmail_quote(html_message):
''' Cuts the outermost block element with class gmail_quote. '''
- gmail_quote = html_message.cssselect('div.gmail_quote')
+ gmail_quote = cssselect('div.gmail_quote', html_message)
if gmail_quote and (gmail_quote[0].text is None or not RE_FWD.match(gmail_quote[0].text)):
gmail_quote[0].getparent().remove(gmail_quote[0])
return True
@@ -135,7 +136,7 @@ def cut_microsoft_quote(html_message):
def cut_by_id(html_message):
found = False
for quote_id in QUOTE_IDS:
- quote = html_message.cssselect('#{}'.format(quote_id))
+ quote = cssselect('#{}'.format(quote_id), html_message)
if quote:
found = True
quote[0].getparent().remove(quote[0])
diff --git a/talon/utils.py b/talon/utils.py
index 7b118f9..03314d4 100644
--- a/talon/utils.py
+++ b/talon/utils.py
@@ -114,6 +114,7 @@ def get_delimiter(msg_body):
return delimiter
+
def html_tree_to_text(tree):
for style in CSSSelector('style')(tree):
style.getparent().remove(style)
@@ -176,6 +177,10 @@ def html_document_fromstring(s):
return html5parser.document_fromstring(s, parser=_HTML5LIB_PARSER)
+def cssselect(expr, tree):
+ return CSSSelector(expr)(tree)
+
+
def _contains_charset_spec(s):
"""Return True if the first 4KB contain charset spec
"""