diff --git a/talon/html_quotations.py b/talon/html_quotations.py index 4aa7e74..44afb6b 100644 --- a/talon/html_quotations.py +++ b/talon/html_quotations.py @@ -6,6 +6,7 @@ messages (without quoted messages) from html from __future__ import absolute_import import regex as re +from talon.utils import cssselect CHECKPOINT_PREFIX = '#!%!' CHECKPOINT_SUFFIX = '!%!#' @@ -78,7 +79,7 @@ def delete_quotation_tags(html_note, counter, quotation_checkpoints): def cut_gmail_quote(html_message): ''' Cuts the outermost block element with class gmail_quote. ''' - gmail_quote = html_message.cssselect('div.gmail_quote') + gmail_quote = cssselect('div.gmail_quote', html_message) if gmail_quote and (gmail_quote[0].text is None or not RE_FWD.match(gmail_quote[0].text)): gmail_quote[0].getparent().remove(gmail_quote[0]) return True @@ -135,7 +136,7 @@ def cut_microsoft_quote(html_message): def cut_by_id(html_message): found = False for quote_id in QUOTE_IDS: - quote = html_message.cssselect('#{}'.format(quote_id)) + quote = cssselect('#{}'.format(quote_id), html_message) if quote: found = True quote[0].getparent().remove(quote[0]) diff --git a/talon/utils.py b/talon/utils.py index 7b118f9..03314d4 100644 --- a/talon/utils.py +++ b/talon/utils.py @@ -114,6 +114,7 @@ def get_delimiter(msg_body): return delimiter + def html_tree_to_text(tree): for style in CSSSelector('style')(tree): style.getparent().remove(style) @@ -176,6 +177,10 @@ def html_document_fromstring(s): return html5parser.document_fromstring(s, parser=_HTML5LIB_PARSER) +def cssselect(expr, tree): + return CSSSelector(expr)(tree) + + def _contains_charset_spec(s): """Return True if the first 4KB contain charset spec """