From 13dc43e960bbc2327d3e5ed405d2866f44e5f1ff Mon Sep 17 00:00:00 2001 From: Tarek Sheasha Date: Wed, 21 Jan 2015 15:54:57 +0100 Subject: [PATCH] Utilising the Constants Checking for the length of a line to determine if it is possibly a signature or not could be done in a more generic way by determining the maximum size of the line via a constant. Hence advocating the spirit of the modifying the code in only one place and propagating that change everywhere. This exact approach has already been used at: --- talon/signature/learning/featurespace.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/talon/signature/learning/featurespace.py b/talon/signature/learning/featurespace.py index 583fcf3..60676f9 100644 --- a/talon/signature/learning/featurespace.py +++ b/talon/signature/learning/featurespace.py @@ -7,7 +7,8 @@ The body and the message sender string are converted into unicode before applying features to them. """ -from talon.signature.constants import SIGNATURE_MAX_LINES +from talon.signature.constants import (SIGNATURE_MAX_LINES, + TOO_LONG_SIGNATURE_LINE) from talon.signature.learning.helpers import * @@ -20,7 +21,7 @@ def features(sender=''): # This one is not from paper. # Line is too long. # This one is less aggressive than `Line is too short` - lambda line: 1 if len(line) > 60 else 0, + lambda line: 1 if len(line) > TOO_LONG_SIGNATURE_LINE else 0, # Line contains email pattern. binary_regex_search(RE_EMAIL), # Line contains url.