Fixed typos
This commit is contained in:
		| @@ -81,7 +81,7 @@ SPLITTER_PATTERNS = [ | ||||
| RE_LINK = re.compile('<(http://[^>]*)>') | ||||
| RE_NORMALIZED_LINK = re.compile('@@(http://[^>@]*)@@') | ||||
|  | ||||
| RE_PARANTHESIS_LINK = re.compile("\(https?://") | ||||
| RE_PARENTHESIS_LINK = re.compile("\(https?://") | ||||
|  | ||||
| SPLITTER_MAX_LINES = 4 | ||||
| MAX_LINES_COUNT = 1000 | ||||
| @@ -169,8 +169,8 @@ def process_marked_lines(lines, markers, return_flags=[False, -1, -1]): | ||||
|         # long links could break sequence of quotation lines but they shouldn't | ||||
|         # be considered an inline reply | ||||
|         links = ( | ||||
|             RE_PARANTHESIS_LINK.search(lines[inline_reply.start() - 1]) or | ||||
|             RE_PARANTHESIS_LINK.match(lines[inline_reply.start()].strip())) | ||||
|             RE_PARENTHESIS_LINK.search(lines[inline_reply.start() - 1]) or | ||||
|             RE_PARENTHESIS_LINK.match(lines[inline_reply.start()].strip())) | ||||
|         if not links: | ||||
|             return_flags[:] = [False, -1, -1] | ||||
|             return lines | ||||
| @@ -197,7 +197,7 @@ def preprocess(msg_body, delimiter, content_type='text/plain'): | ||||
|     """Prepares msg_body for being stripped. | ||||
|  | ||||
|     Replaces link brackets so that they couldn't be taken for quotation marker. | ||||
|     Splits line in two if splitter pattern preceeded by some text on the same | ||||
|     Splits line in two if splitter pattern preceded by some text on the same | ||||
|     line (done only for 'On <date> <person> wrote:' pattern). | ||||
|     """ | ||||
|     # normalize links i.e. replace '<', '>' wrapping the link with some symbols | ||||
| @@ -213,7 +213,7 @@ def preprocess(msg_body, delimiter, content_type='text/plain'): | ||||
|     msg_body = re.sub(RE_LINK, link_wrapper, msg_body) | ||||
|  | ||||
|     def splitter_wrapper(splitter): | ||||
|         """Wrapps splitter with new line""" | ||||
|         """Wraps splitter with new line""" | ||||
|         if splitter.start() and msg_body[splitter.start() - 1] != '\n': | ||||
|             return '%s%s' % (delimiter, splitter.group()) | ||||
|         else: | ||||
| @@ -268,7 +268,7 @@ def extract_from_html(msg_body): | ||||
|     then converting html to text, | ||||
|     then extracting quotations from text, | ||||
|     then checking deleted checkpoints, | ||||
|     then deleting neccessary tags. | ||||
|     then deleting necessary tags. | ||||
|     """ | ||||
|  | ||||
|     if msg_body.strip() == '': | ||||
|   | ||||
| @@ -49,7 +49,7 @@ RE_PHONE_SIGNATURE = re.compile(r''' | ||||
| # c - could be signature line | ||||
| # d - line starts with dashes (could be signature or list item) | ||||
| # l - long line | ||||
| RE_SIGNATURE_CANDIDAATE = re.compile(r''' | ||||
| RE_SIGNATURE_CANDIDATE = re.compile(r''' | ||||
|     (?P<candidate>c+d)[^d] | ||||
|     | | ||||
|     (?P<candidate>c+d)$ | ||||
| @@ -184,5 +184,5 @@ def _process_marked_candidate_indexes(candidate, markers): | ||||
|     >>> _process_marked_candidate_indexes([9, 12, 14, 15, 17], 'clddc') | ||||
|     [15, 17] | ||||
|     """ | ||||
|     match = RE_SIGNATURE_CANDIDAATE.match(markers[::-1]) | ||||
|     match = RE_SIGNATURE_CANDIDATE.match(markers[::-1]) | ||||
|     return candidate[-match.end('candidate'):] if match else [] | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| """ The module provides functions for convertion of a message body/body lines | ||||
| """ The module provides functions for conversion of a message body/body lines | ||||
| into classifiers features space. | ||||
|  | ||||
| The body and the message sender string are converted into unicode before | ||||
| @@ -47,9 +47,9 @@ def apply_features(body, features): | ||||
|     '''Applies features to message body lines. | ||||
|  | ||||
|     Returns list of lists. Each of the lists corresponds to the body line | ||||
|     and is constituted by the numbers of features occurances (0 or 1). | ||||
|     and is constituted by the numbers of features occurrences (0 or 1). | ||||
|     E.g. if element j of list i equals 1 this means that | ||||
|     feature j occured in line i (counting from the last line of the body). | ||||
|     feature j occurred in line i (counting from the last line of the body). | ||||
|     ''' | ||||
|     # collect all non empty lines | ||||
|     lines = [line for line in body.splitlines() if line.strip()] | ||||
| @@ -66,7 +66,7 @@ def build_pattern(body, features): | ||||
|     '''Converts body into a pattern i.e. a point in the features space. | ||||
|  | ||||
|     Applies features to the body lines and sums up the results. | ||||
|     Elements of the pattern indicate how many times a certain feature occured | ||||
|     Elements of the pattern indicate how many times a certain feature occurred | ||||
|     in the last lines of the body. | ||||
|     ''' | ||||
|     line_patterns = apply_features(body, features) | ||||
|   | ||||
| @@ -94,7 +94,7 @@ def binary_regex_match(prog): | ||||
|  | ||||
|  | ||||
| def flatten_list(list_to_flatten): | ||||
|     """Simple list comprehesion to flatten list. | ||||
|     """Simple list comprehension to flatten list. | ||||
|  | ||||
|     >>> flatten_list([[1, 2], [3, 4, 5]]) | ||||
|     [1, 2, 3, 4, 5] | ||||
| @@ -155,7 +155,7 @@ def extract_names(sender): | ||||
|  | ||||
|  | ||||
| def categories_percent(s, categories): | ||||
|     '''Returns category characters persent. | ||||
|     '''Returns category characters percent. | ||||
|  | ||||
|     >>> categories_percent("qqq ggg hhh", ["Po"]) | ||||
|     0.0 | ||||
| @@ -177,7 +177,7 @@ def categories_percent(s, categories): | ||||
|  | ||||
|  | ||||
| def punctuation_percent(s): | ||||
|     '''Returns punctuation persent. | ||||
|     '''Returns punctuation percent. | ||||
|  | ||||
|     >>> punctuation_percent("qqq ggg hhh") | ||||
|     0.0 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user