clean up style and extra imports
This commit is contained in:
@@ -12,8 +12,7 @@ from copy import deepcopy
|
|||||||
from lxml import html, etree
|
from lxml import html, etree
|
||||||
import html2text
|
import html2text
|
||||||
|
|
||||||
from talon.constants import RE_DELIMITER
|
from talon.utils import get_delimiter
|
||||||
from talon.utils import random_token, get_delimiter
|
|
||||||
from talon import html_quotations
|
from talon import html_quotations
|
||||||
|
|
||||||
|
|
||||||
@@ -151,7 +150,7 @@ def extract_from(msg_body, content_type='text/plain'):
|
|||||||
return extract_from_plain(msg_body)
|
return extract_from_plain(msg_body)
|
||||||
elif content_type == 'text/html':
|
elif content_type == 'text/html':
|
||||||
return extract_from_html(msg_body)
|
return extract_from_html(msg_body)
|
||||||
except Exception, e:
|
except Exception:
|
||||||
log.exception('ERROR extracting message')
|
log.exception('ERROR extracting message')
|
||||||
|
|
||||||
return msg_body
|
return msg_body
|
||||||
@@ -344,7 +343,7 @@ def extract_from_html(msg_body):
|
|||||||
html_tree_copy = deepcopy(html_tree)
|
html_tree_copy = deepcopy(html_tree)
|
||||||
|
|
||||||
number_of_checkpoints = html_quotations.add_checkpoint(html_tree, 0)
|
number_of_checkpoints = html_quotations.add_checkpoint(html_tree, 0)
|
||||||
quotation_checkpoints = [False for i in xrange(number_of_checkpoints)]
|
quotation_checkpoints = [False] * number_of_checkpoints
|
||||||
msg_with_checkpoints = html.tostring(html_tree)
|
msg_with_checkpoints = html.tostring(html_tree)
|
||||||
|
|
||||||
h = html2text.HTML2Text()
|
h = html2text.HTML2Text()
|
||||||
|
|||||||
@@ -21,11 +21,9 @@ trained against, don't forget to regenerate:
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import sys
|
|
||||||
from cStringIO import StringIO
|
|
||||||
|
|
||||||
from . import extraction
|
from . import extraction
|
||||||
from . extraction import extract
|
from . extraction import extract #noqa
|
||||||
from . learning import classifier
|
from . learning import classifier
|
||||||
|
|
||||||
|
|
||||||
@@ -36,13 +34,5 @@ EXTRACTOR_DATA = os.path.join(DATA_DIR, 'train.data')
|
|||||||
|
|
||||||
|
|
||||||
def initialize():
|
def initialize():
|
||||||
try:
|
extraction.EXTRACTOR = classifier.load(EXTRACTOR_FILENAME,
|
||||||
# redirect output
|
EXTRACTOR_DATA)
|
||||||
so, sys.stdout = sys.stdout, StringIO()
|
|
||||||
|
|
||||||
extraction.EXTRACTOR = classifier.load(EXTRACTOR_FILENAME,
|
|
||||||
EXTRACTOR_DATA)
|
|
||||||
sys.stdout = so
|
|
||||||
except Exception, e:
|
|
||||||
raise Exception(
|
|
||||||
"Failed initializing signature parsing with classifiers", e)
|
|
||||||
|
|||||||
@@ -1,14 +1,10 @@
|
|||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import os
|
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
import regex as re
|
import regex as re
|
||||||
from PyML import SparseDataSet
|
from PyML import SparseDataSet
|
||||||
|
|
||||||
from talon.constants import RE_DELIMITER
|
|
||||||
from talon.signature.constants import (SIGNATURE_MAX_LINES,
|
|
||||||
TOO_LONG_SIGNATURE_LINE)
|
|
||||||
from talon.signature.learning.featurespace import features, build_pattern
|
from talon.signature.learning.featurespace import features, build_pattern
|
||||||
from talon.utils import get_delimiter
|
from talon.utils import get_delimiter
|
||||||
from talon.signature.bruteforce import get_signature_candidate
|
from talon.signature.bruteforce import get_signature_candidate
|
||||||
@@ -61,7 +57,7 @@ def extract(body, sender):
|
|||||||
text = delimiter.join(text)
|
text = delimiter.join(text)
|
||||||
if text.strip():
|
if text.strip():
|
||||||
return (text, delimiter.join(signature))
|
return (text, delimiter.join(signature))
|
||||||
except Exception, e:
|
except Exception:
|
||||||
log.exception('ERROR when extracting signature with classifiers')
|
log.exception('ERROR when extracting signature with classifiers')
|
||||||
|
|
||||||
return (body, None)
|
return (body, None)
|
||||||
|
|||||||
@@ -3,8 +3,6 @@
|
|||||||
from . import *
|
from . import *
|
||||||
from . fixtures import *
|
from . fixtures import *
|
||||||
|
|
||||||
from flanker import mime
|
|
||||||
|
|
||||||
from talon import quotations
|
from talon import quotations
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user