fix cssselect
This commit is contained in:
@@ -6,6 +6,7 @@ messages (without quoted messages) from html
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import regex as re
|
import regex as re
|
||||||
|
|
||||||
|
from talon.utils import cssselect
|
||||||
|
|
||||||
CHECKPOINT_PREFIX = '#!%!'
|
CHECKPOINT_PREFIX = '#!%!'
|
||||||
CHECKPOINT_SUFFIX = '!%!#'
|
CHECKPOINT_SUFFIX = '!%!#'
|
||||||
@@ -78,7 +79,7 @@ def delete_quotation_tags(html_note, counter, quotation_checkpoints):
|
|||||||
|
|
||||||
def cut_gmail_quote(html_message):
|
def cut_gmail_quote(html_message):
|
||||||
''' Cuts the outermost block element with class gmail_quote. '''
|
''' Cuts the outermost block element with class gmail_quote. '''
|
||||||
gmail_quote = html_message.cssselect('div.gmail_quote')
|
gmail_quote = cssselect('div.gmail_quote', html_message)
|
||||||
if gmail_quote and (gmail_quote[0].text is None or not RE_FWD.match(gmail_quote[0].text)):
|
if gmail_quote and (gmail_quote[0].text is None or not RE_FWD.match(gmail_quote[0].text)):
|
||||||
gmail_quote[0].getparent().remove(gmail_quote[0])
|
gmail_quote[0].getparent().remove(gmail_quote[0])
|
||||||
return True
|
return True
|
||||||
@@ -135,7 +136,7 @@ def cut_microsoft_quote(html_message):
|
|||||||
def cut_by_id(html_message):
|
def cut_by_id(html_message):
|
||||||
found = False
|
found = False
|
||||||
for quote_id in QUOTE_IDS:
|
for quote_id in QUOTE_IDS:
|
||||||
quote = html_message.cssselect('#{}'.format(quote_id))
|
quote = cssselect('#{}'.format(quote_id), html_message)
|
||||||
if quote:
|
if quote:
|
||||||
found = True
|
found = True
|
||||||
quote[0].getparent().remove(quote[0])
|
quote[0].getparent().remove(quote[0])
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ def get_delimiter(msg_body):
|
|||||||
|
|
||||||
return delimiter
|
return delimiter
|
||||||
|
|
||||||
|
|
||||||
def html_tree_to_text(tree):
|
def html_tree_to_text(tree):
|
||||||
for style in CSSSelector('style')(tree):
|
for style in CSSSelector('style')(tree):
|
||||||
style.getparent().remove(style)
|
style.getparent().remove(style)
|
||||||
@@ -176,6 +177,10 @@ def html_document_fromstring(s):
|
|||||||
return html5parser.document_fromstring(s, parser=_HTML5LIB_PARSER)
|
return html5parser.document_fromstring(s, parser=_HTML5LIB_PARSER)
|
||||||
|
|
||||||
|
|
||||||
|
def cssselect(expr, tree):
|
||||||
|
return CSSSelector(expr)(tree)
|
||||||
|
|
||||||
|
|
||||||
def _contains_charset_spec(s):
|
def _contains_charset_spec(s):
|
||||||
"""Return True if the first 4KB contain charset spec
|
"""Return True if the first 4KB contain charset spec
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user