From 8b78da5977bf7a56d24db270cb5c0c351ef8ca57 Mon Sep 17 00:00:00 2001 From: Pascal Borreli Date: Fri, 25 Jul 2014 02:40:37 +0000 Subject: [PATCH] Fixed typos --- README.rst | 2 +- talon/quotations.py | 12 ++++++------ talon/signature/bruteforce.py | 4 ++-- talon/signature/learning/featurespace.py | 8 ++++---- talon/signature/learning/helpers.py | 6 +++--- 5 files changed, 16 insertions(+), 16 deletions(-) diff --git a/README.rst b/README.rst index fc9ecbe..9e6182c 100644 --- a/README.rst +++ b/README.rst @@ -3,7 +3,7 @@ talon Mailgun library to extract message quotations and signatures. -If you ever tried to parse message quotations or signatures you know that absense of any formatting standards in this area could make this task a nightmare. Hopefully this library will make your life much easier. The name of the project is inspired by TALON - multipurpose robot designed to perform missions ranging from reconnaissance to combat and operate in a number of hostile environments. That’s what a good quotations and signature parser should be like :smile: +If you ever tried to parse message quotations or signatures you know that absence of any formatting standards in this area could make this task a nightmare. Hopefully this library will make your life much easier. The name of the project is inspired by TALON - multipurpose robot designed to perform missions ranging from reconnaissance to combat and operate in a number of hostile environments. That’s what a good quotations and signature parser should be like :smile: Usage ----- diff --git a/talon/quotations.py b/talon/quotations.py index 896143b..52086dc 100644 --- a/talon/quotations.py +++ b/talon/quotations.py @@ -81,7 +81,7 @@ SPLITTER_PATTERNS = [ RE_LINK = re.compile('<(http://[^>]*)>') RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@') -RE_PARANTHESIS_LINK = re.compile("\(https?://") +RE_PARENTHESIS_LINK = re.compile("\(https?://") SPLITTER_MAX_LINES = 4 MAX_LINES_COUNT = 1000 @@ -169,8 +169,8 @@ def process_marked_lines(lines, markers, return_flags=[False, -1, -1]): # long links could break sequence of quotation lines but they shouldn't # be considered an inline reply links = ( - RE_PARANTHESIS_LINK.search(lines[inline_reply.start() - 1]) or - RE_PARANTHESIS_LINK.match(lines[inline_reply.start()].strip())) + RE_PARENTHESIS_LINK.search(lines[inline_reply.start() - 1]) or + RE_PARENTHESIS_LINK.match(lines[inline_reply.start()].strip())) if not links: return_flags[:] = [False, -1, -1] return lines @@ -197,7 +197,7 @@ def preprocess(msg_body, delimiter, content_type='text/plain'): """Prepares msg_body for being stripped. Replaces link brackets so that they couldn't be taken for quotation marker. - Splits line in two if splitter pattern preceeded by some text on the same + Splits line in two if splitter pattern preceded by some text on the same line (done only for 'On wrote:' pattern). """ # normalize links i.e. replace '<', '>' wrapping the link with some symbols @@ -213,7 +213,7 @@ def preprocess(msg_body, delimiter, content_type='text/plain'): msg_body = re.sub(RE_LINK, link_wrapper, msg_body) def splitter_wrapper(splitter): - """Wrapps splitter with new line""" + """Wraps splitter with new line""" if splitter.start() and msg_body[splitter.start() - 1] != '\n': return '%s%s' % (delimiter, splitter.group()) else: @@ -268,7 +268,7 @@ def extract_from_html(msg_body): then converting html to text, then extracting quotations from text, then checking deleted checkpoints, - then deleting neccessary tags. + then deleting necessary tags. """ if msg_body.strip() == '': diff --git a/talon/signature/bruteforce.py b/talon/signature/bruteforce.py index 3aad5db..d3493bb 100644 --- a/talon/signature/bruteforce.py +++ b/talon/signature/bruteforce.py @@ -49,7 +49,7 @@ RE_PHONE_SIGNATURE = re.compile(r''' # c - could be signature line # d - line starts with dashes (could be signature or list item) # l - long line -RE_SIGNATURE_CANDIDAATE = re.compile(r''' +RE_SIGNATURE_CANDIDATE = re.compile(r''' (?Pc+d)[^d] | (?Pc+d)$ @@ -184,5 +184,5 @@ def _process_marked_candidate_indexes(candidate, markers): >>> _process_marked_candidate_indexes([9, 12, 14, 15, 17], 'clddc') [15, 17] """ - match = RE_SIGNATURE_CANDIDAATE.match(markers[::-1]) + match = RE_SIGNATURE_CANDIDATE.match(markers[::-1]) return candidate[-match.end('candidate'):] if match else [] diff --git a/talon/signature/learning/featurespace.py b/talon/signature/learning/featurespace.py index 1bb9979..583fcf3 100644 --- a/talon/signature/learning/featurespace.py +++ b/talon/signature/learning/featurespace.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -""" The module provides functions for convertion of a message body/body lines +""" The module provides functions for conversion of a message body/body lines into classifiers features space. The body and the message sender string are converted into unicode before @@ -47,9 +47,9 @@ def apply_features(body, features): '''Applies features to message body lines. Returns list of lists. Each of the lists corresponds to the body line - and is constituted by the numbers of features occurances (0 or 1). + and is constituted by the numbers of features occurrences (0 or 1). E.g. if element j of list i equals 1 this means that - feature j occured in line i (counting from the last line of the body). + feature j occurred in line i (counting from the last line of the body). ''' # collect all non empty lines lines = [line for line in body.splitlines() if line.strip()] @@ -66,7 +66,7 @@ def build_pattern(body, features): '''Converts body into a pattern i.e. a point in the features space. Applies features to the body lines and sums up the results. - Elements of the pattern indicate how many times a certain feature occured + Elements of the pattern indicate how many times a certain feature occurred in the last lines of the body. ''' line_patterns = apply_features(body, features) diff --git a/talon/signature/learning/helpers.py b/talon/signature/learning/helpers.py index ffdb721..70a4820 100644 --- a/talon/signature/learning/helpers.py +++ b/talon/signature/learning/helpers.py @@ -94,7 +94,7 @@ def binary_regex_match(prog): def flatten_list(list_to_flatten): - """Simple list comprehesion to flatten list. + """Simple list comprehension to flatten list. >>> flatten_list([[1, 2], [3, 4, 5]]) [1, 2, 3, 4, 5] @@ -155,7 +155,7 @@ def extract_names(sender): def categories_percent(s, categories): - '''Returns category characters persent. + '''Returns category characters percent. >>> categories_percent("qqq ggg hhh", ["Po"]) 0.0 @@ -177,7 +177,7 @@ def categories_percent(s, categories): def punctuation_percent(s): - '''Returns punctuation persent. + '''Returns punctuation percent. >>> punctuation_percent("qqq ggg hhh") 0.0