clean up style and extra imports
This commit is contained in:
@@ -12,8 +12,7 @@ from copy import deepcopy
|
||||
from lxml import html, etree
|
||||
import html2text
|
||||
|
||||
from talon.constants import RE_DELIMITER
|
||||
from talon.utils import random_token, get_delimiter
|
||||
from talon.utils import get_delimiter
|
||||
from talon import html_quotations
|
||||
|
||||
|
||||
@@ -151,7 +150,7 @@ def extract_from(msg_body, content_type='text/plain'):
|
||||
return extract_from_plain(msg_body)
|
||||
elif content_type == 'text/html':
|
||||
return extract_from_html(msg_body)
|
||||
except Exception, e:
|
||||
except Exception:
|
||||
log.exception('ERROR extracting message')
|
||||
|
||||
return msg_body
|
||||
@@ -344,7 +343,7 @@ def extract_from_html(msg_body):
|
||||
html_tree_copy = deepcopy(html_tree)
|
||||
|
||||
number_of_checkpoints = html_quotations.add_checkpoint(html_tree, 0)
|
||||
quotation_checkpoints = [False for i in xrange(number_of_checkpoints)]
|
||||
quotation_checkpoints = [False] * number_of_checkpoints
|
||||
msg_with_checkpoints = html.tostring(html_tree)
|
||||
|
||||
h = html2text.HTML2Text()
|
||||
|
||||
@@ -21,11 +21,9 @@ trained against, don't forget to regenerate:
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from cStringIO import StringIO
|
||||
|
||||
from . import extraction
|
||||
from . extraction import extract
|
||||
from . extraction import extract #noqa
|
||||
from . learning import classifier
|
||||
|
||||
|
||||
@@ -36,13 +34,5 @@ EXTRACTOR_DATA = os.path.join(DATA_DIR, 'train.data')
|
||||
|
||||
|
||||
def initialize():
|
||||
try:
|
||||
# redirect output
|
||||
so, sys.stdout = sys.stdout, StringIO()
|
||||
|
||||
extraction.EXTRACTOR = classifier.load(EXTRACTOR_FILENAME,
|
||||
EXTRACTOR_DATA)
|
||||
sys.stdout = so
|
||||
except Exception, e:
|
||||
raise Exception(
|
||||
"Failed initializing signature parsing with classifiers", e)
|
||||
|
||||
@@ -1,14 +1,10 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
import logging
|
||||
|
||||
import regex as re
|
||||
from PyML import SparseDataSet
|
||||
|
||||
from talon.constants import RE_DELIMITER
|
||||
from talon.signature.constants import (SIGNATURE_MAX_LINES,
|
||||
TOO_LONG_SIGNATURE_LINE)
|
||||
from talon.signature.learning.featurespace import features, build_pattern
|
||||
from talon.utils import get_delimiter
|
||||
from talon.signature.bruteforce import get_signature_candidate
|
||||
@@ -61,7 +57,7 @@ def extract(body, sender):
|
||||
text = delimiter.join(text)
|
||||
if text.strip():
|
||||
return (text, delimiter.join(signature))
|
||||
except Exception, e:
|
||||
except Exception:
|
||||
log.exception('ERROR when extracting signature with classifiers')
|
||||
|
||||
return (body, None)
|
||||
|
||||
@@ -3,8 +3,6 @@
|
||||
from . import *
|
||||
from . fixtures import *
|
||||
|
||||
from flanker import mime
|
||||
|
||||
from talon import quotations
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user