This commit is contained in:
Sergey Obukhov
2015-09-18 05:19:59 -07:00
parent 77b62b0fef
commit d328c9d128
5 changed files with 7 additions and 4 deletions

Binary file not shown.

View File

@@ -16,7 +16,7 @@ from talon.signature.constants import SIGNATURE_MAX_LINES
rc = re.compile
RE_EMAIL = rc('@')
RE_EMAIL = rc('\S@\S')
RE_RELAX_PHONE = rc('(\(? ?[\d]{2,3} ?\)?.{,3}?){2,}')
RE_URL = rc(r'''https?://|www\.[\S]+\.[\S]''')
@@ -120,7 +120,7 @@ def contains_sender_names(sender):
names = names or sender
if names != '':
return binary_regex_search(re.compile(names))
return lambda s: False
return lambda s: 0
def extract_names(sender):

View File

@@ -6,7 +6,9 @@ from talon.signature.learning import featurespace as fs
def test_apply_features():
s = '''John Doe
s = '''This is John Doe
Tuesday @3pm suits. I'll chat to you then.
VP Research and Development, Xxxx Xxxx Xxxxx
@@ -19,11 +21,12 @@ john@example.com'''
# note that we don't consider the first line because signatures don't
# usually take all the text, empty lines are not considered
eq_(result, [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
with patch.object(fs, 'SIGNATURE_MAX_LINES', 4):
with patch.object(fs, 'SIGNATURE_MAX_LINES', 5):
features = fs.features(sender)
new_result = fs.apply_features(s, features)
# result remains the same because we don't consider empty lines