6 Commits

Author SHA1 Message Date
Maxim Vladimirskiy
24d0f2d00a Merge pull request #223 from mailgun/maxim/develop
PIP-1509: Optimise sender name check [python3]
2021-11-19 13:11:29 +03:00
Maxim Vladimirskiy
94007b0b92 Optimise sender name check 2021-11-19 11:12:26 +03:00
Maxim Vladimirskiy
1a5548f171 Merge pull request #222 from mailgun/maxim/develop
PIP-1409: Remove version pins from setup.py [python3]
2021-11-11 16:29:30 +03:00
Maxim Vladimirskiy
53c49b9121 Remove version pins from setup.py 2021-11-11 15:36:50 +03:00
Matt Dietz
bd50872043 Merge pull request #217 from mailgun/dietz/REP-1030
Drops Python 2 support [python3]
2021-06-15 09:46:29 -05:00
Matt Dietz
d37c4fd551 Drops Python 2 support
REP-1030

In addition to some python 2 => 3 fixes, this change bumps the scikit-learn
version to latest. The previously pinned version of scikit-learn failed trying
to compile all necessary C modules under python 3.7+ due to included header files
that weren't compatible with C the API implemented in python 3.7+.

Simultaneously, with the restrictive compatibility supported by scikit-learn,
it seemed prudent to drop python 2 support altogether. Otherwise, we'd be stuck
with python 3.4 as the newest possible version we could support.

With this change, tests are currently passing under 3.9.2.

Lastly, imports the original training data. At some point, a new version
of the training data was committed to the repo but no classifier was
trained from it. Using a classifier trained from this new data resulted
in most of the tests failing.
2021-06-10 14:03:25 -05:00
23 changed files with 2736 additions and 2512 deletions

20
.build/Dockerfile Normal file
View File

@@ -0,0 +1,20 @@
FROM python:3.9-slim-buster AS deps
RUN apt-get update && \
apt-get install -y build-essential git curl python3-dev libatlas3-base libatlas-base-dev liblapack-dev libxml2 libxml2-dev libffi6 libffi-dev musl-dev libxslt-dev
FROM deps AS testable
ARG REPORT_PATH
VOLUME ["/var/mailgun", "/etc/mailgun/ssl", ${REPORT_PATH}]
ADD . /app
WORKDIR /app
COPY wheel/* /wheel/
RUN mkdir -p ${REPORT_PATH}
RUN python ./setup.py build bdist_wheel -d /wheel && \
pip install --no-deps /wheel/*
ENTRYPOINT ["/bin/sh", "/app/run_tests.sh"]

3
.gitignore vendored
View File

@@ -54,3 +54,6 @@ _trial_temp
# OSX # OSX
.DS_Store .DS_Store
# vim-backup
*.bak

11
requirements.txt Normal file
View File

@@ -0,0 +1,11 @@
chardet>=1.0.1
cchardet>=0.3.5
cssselect
html5lib
joblib
lxml>=2.3.3
numpy
regex>=1
scikit-learn>=1.0.0
scipy
six>=1.10.0

4
run_tests.sh Executable file
View File

@@ -0,0 +1,4 @@
#!/usr/bin/env bash
set -ex
REPORT_PATH="${REPORT_PATH:-./}"
nosetests --with-xunit --with-coverage --cover-xml --cover-xml-file $REPORT_PATH/coverage.xml --xunit-file=$REPORT_PATH/nosetests.xml --cover-package=talon .

View File

@@ -19,17 +19,17 @@ class InstallCommand(install):
if self.no_ml: if self.no_ml:
dist = self.distribution dist = self.distribution
dist.packages=find_packages(exclude=[ dist.packages=find_packages(exclude=[
'tests', "tests",
'tests.*', "tests.*",
'talon.signature', "talon.signature",
'talon.signature.*', "talon.signature.*",
]) ])
for not_required in ['numpy', 'scipy', 'scikit-learn==0.16.1']: for not_required in ["numpy", "scipy", "scikit-learn==0.24.1"]:
dist.install_requires.remove(not_required) dist.install_requires.remove(not_required)
setup(name='talon', setup(name='talon',
version='1.4.8', version='1.4.10',
description=("Mailgun library " description=("Mailgun library "
"to extract message quotations and signatures."), "to extract message quotations and signatures."),
long_description=open("README.rst").read(), long_description=open("README.rst").read(),
@@ -44,20 +44,21 @@ setup(name='talon',
include_package_data=True, include_package_data=True,
zip_safe=True, zip_safe=True,
install_requires=[ install_requires=[
"lxml>=2.3.3", "lxml",
"regex>=1", "regex",
"numpy", "numpy",
"scipy", "scipy",
"scikit-learn==0.16.1", # pickled versions of classifier, else rebuild "scikit-learn>=1.0.0",
'chardet>=1.0.1', "chardet",
'cchardet>=0.3.5', "cchardet",
'cssselect', "cssselect",
'six>=1.10.0', "six",
'html5lib' "html5lib",
"joblib",
], ],
tests_require=[ tests_require=[
"mock", "mock",
"nose>=1.2.1", "nose",
"coverage" "coverage"
] ]
) )

View File

@@ -457,26 +457,20 @@ def _extract_from_html(msg_body):
msg_body = msg_body.replace(b'\r\n', b'\n') msg_body = msg_body.replace(b'\r\n', b'\n')
msg_body = re.sub(r"\<\?xml.+\?\>|\<\!DOCTYPE.+]\>", "", msg_body) msg_body = re.sub(br"\<\?xml.+\?\>|\<\!DOCTYPE.+]\>", "", msg_body)
html_tree = html_document_fromstring(msg_body) html_tree = html_document_fromstring(msg_body)
if html_tree is None: if html_tree is None:
return msg_body return msg_body
cut_quotations = False cut_quotations = (html_quotations.cut_gmail_quote(html_tree) or
try: html_quotations.cut_zimbra_quote(html_tree) or
cut_quotations = (html_quotations.cut_gmail_quote(html_tree) or html_quotations.cut_blockquote(html_tree) or
html_quotations.cut_zimbra_quote(html_tree) or html_quotations.cut_microsoft_quote(html_tree) or
html_quotations.cut_blockquote(html_tree) or html_quotations.cut_by_id(html_tree) or
html_quotations.cut_microsoft_quote(html_tree) or html_quotations.cut_from_block(html_tree)
html_quotations.cut_by_id(html_tree) or )
html_quotations.cut_from_block(html_tree)
)
except Exception as e:
log.exception('during html quotations cut')
pass
html_tree_copy = deepcopy(html_tree) html_tree_copy = deepcopy(html_tree)
number_of_checkpoints = html_quotations.add_checkpoint(html_tree, 0) number_of_checkpoints = html_quotations.add_checkpoint(html_tree, 0)

View File

@@ -23,17 +23,14 @@ trained against, don't forget to regenerate:
from __future__ import absolute_import from __future__ import absolute_import
import os import os
from . import extraction from talon.signature import extraction
from . extraction import extract #noqa from talon.signature.extraction import extract
from . learning import classifier from talon.signature.learning import classifier
DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
EXTRACTOR_FILENAME = os.path.join(DATA_DIR, 'classifier')
EXTRACTOR_DATA = os.path.join(DATA_DIR, 'train.data')
def initialize(): def initialize():
extraction.EXTRACTOR = classifier.load(EXTRACTOR_FILENAME, data_dir = os.path.join(os.path.dirname(__file__), 'data')
EXTRACTOR_DATA) extractor_filename = os.path.join(data_dir, 'classifier')
extractor_data_filename = os.path.join(data_dir, 'train.data')
extraction.EXTRACTOR = classifier.load(extractor_filename,
extractor_data_filename)

View File

@@ -62,7 +62,7 @@ RE_SIGNATURE_CANDIDATE = re.compile(r'''
def extract_signature(msg_body): def extract_signature(msg_body):
""" '''
Analyzes message for a presence of signature block (by common patterns) Analyzes message for a presence of signature block (by common patterns)
and returns tuple with two elements: message text without signature block and returns tuple with two elements: message text without signature block
and the signature itself. and the signature itself.
@@ -72,7 +72,7 @@ def extract_signature(msg_body):
>>> extract_signature('Hey man!') >>> extract_signature('Hey man!')
('Hey man!', None) ('Hey man!', None)
""" '''
try: try:
# identify line delimiter first # identify line delimiter first
delimiter = get_delimiter(msg_body) delimiter = get_delimiter(msg_body)

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@@ -8,7 +8,7 @@ body belongs to the signature.
from __future__ import absolute_import from __future__ import absolute_import
from numpy import genfromtxt from numpy import genfromtxt
from sklearn.externals import joblib import joblib
from sklearn.svm import LinearSVC from sklearn.svm import LinearSVC

View File

@@ -102,7 +102,7 @@ def flatten_list(list_to_flatten):
def contains_sender_names(sender): def contains_sender_names(sender):
'''Returns a functions to search sender\'s name or it\'s part. """Returns a functions to search sender\'s name or it\'s part.
>>> feature = contains_sender_names("Sergey N. Obukhov <xxx@example.com>") >>> feature = contains_sender_names("Sergey N. Obukhov <xxx@example.com>")
>>> feature("Sergey Obukhov") >>> feature("Sergey Obukhov")
@@ -115,7 +115,7 @@ def contains_sender_names(sender):
1 1
>>> contains_sender_names("<serobnic@mail.ru>")("serobnic") >>> contains_sender_names("<serobnic@mail.ru>")("serobnic")
1 1
''' """
names = '( |$)|'.join(flatten_list([[e, e.capitalize()] names = '( |$)|'.join(flatten_list([[e, e.capitalize()]
for e in extract_names(sender)])) for e in extract_names(sender)]))
names = names or sender names = names or sender
@@ -140,10 +140,16 @@ def extract_names(sender):
sender = "".join([char if char.isalpha() else ' ' for char in sender]) sender = "".join([char if char.isalpha() else ' ' for char in sender])
# Remove too short words and words from "black" list i.e. # Remove too short words and words from "black" list i.e.
# words like `ru`, `gmail`, `com`, `org`, etc. # words like `ru`, `gmail`, `com`, `org`, etc.
sender = [word for word in sender.split() if len(word) > 1 and names = list()
not word in BAD_SENDER_NAMES] for word in sender.split():
# Remove duplicates if len(word) < 2:
names = list(set(sender)) continue
if word in BAD_SENDER_NAMES:
continue
if word in names:
continue
names.append(word)
return names return names
@@ -208,20 +214,26 @@ def many_capitalized_words(s):
def has_signature(body, sender): def has_signature(body, sender):
'''Checks if the body has signature. Returns True or False.''' """Checks if the body has signature. Returns True or False."""
non_empty = [line for line in body.splitlines() if line.strip()] non_empty = [line for line in body.splitlines() if line.strip()]
candidate = non_empty[-SIGNATURE_MAX_LINES:] candidate = non_empty[-SIGNATURE_MAX_LINES:]
upvotes = 0 upvotes = 0
sender_check = contains_sender_names(sender)
for line in candidate: for line in candidate:
# we check lines for sender's name, phone, email and url, # we check lines for sender's name, phone, email and url,
# those signature lines don't take more then 27 lines # those signature lines don't take more then 27 lines
if len(line.strip()) > 27: if len(line.strip()) > 27:
continue continue
elif contains_sender_names(sender)(line):
if sender_check(line):
return True return True
elif (binary_regex_search(RE_RELAX_PHONE)(line) +
binary_regex_search(RE_EMAIL)(line) + if (binary_regex_search(RE_RELAX_PHONE)(line) +
binary_regex_search(RE_URL)(line) == 1): binary_regex_search(RE_EMAIL)(line) +
binary_regex_search(RE_URL)(line) == 1):
upvotes += 1 upvotes += 1
if upvotes > 1: if upvotes > 1:
return True return True
return False

3
test-requirements.txt Normal file
View File

@@ -0,0 +1,3 @@
coverage
mock
nose>=1.2.1

View File

@@ -1,4 +1,6 @@
from __future__ import absolute_import from __future__ import absolute_import
from nose.tools import *
from mock import *
import talon import talon

View File

@@ -2,12 +2,14 @@
from __future__ import absolute_import from __future__ import absolute_import
from tests.fixtures import REPLY_QUOTATIONS_SHARE_BLOCK, OLK_SRC_BODY_SECTION, REPLY_SEPARATED_BY_HR # noinspection PyUnresolvedReferences
from nose.tools import eq_, ok_, assert_false, assert_true
from talon import quotations, utils as u
from mock import Mock, patch
import re import re
from talon import quotations, utils as u
from . import *
from .fixtures import *
from lxml import html
RE_WHITESPACE = re.compile("\s") RE_WHITESPACE = re.compile("\s")
RE_DOUBLE_WHITESPACE = re.compile("\s") RE_DOUBLE_WHITESPACE = re.compile("\s")

View File

@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from . import *
from . fixtures import *
from mock import Mock, patch
from talon import quotations from talon import quotations
from nose.tools import eq_
@patch.object(quotations, 'extract_from_html') @patch.object(quotations, 'extract_from_html')

View File

@@ -1,10 +1,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from nose.tools import eq_ from .. import *
from talon.signature import bruteforce from talon.signature import bruteforce
from mock import patch, Mock
def test_empty_body(): def test_empty_body():

View File

@@ -2,14 +2,14 @@
from __future__ import absolute_import from __future__ import absolute_import
import os
from six.moves import range
from talon.signature import bruteforce, extraction, extract from talon.signature import bruteforce, extraction, extract
from talon.signature import extraction as e from talon.signature import extraction as e
from talon.signature.learning import dataset from talon.signature.learning import dataset
from nose.tools import eq_ from .. import *
from .. import STRIPPED, UNICODE_MSG
from six.moves import range
from mock import patch
import os
def test_message_shorter_SIGNATURE_MAX_LINES(): def test_message_shorter_SIGNATURE_MAX_LINES():

View File

@@ -1,14 +1,15 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from ... import *
from ... import EML_MSG_FILENAME, MSG_FILENAME_WITH_BODY_SUFFIX, TMP_DIR, EMAILS_DIR
from talon.signature.learning.featurespace import features
from talon.signature.learning import dataset as d
from nose.tools import eq_, assert_false, ok_
from numpy import genfromtxt
import os import os
from numpy import genfromtxt
from talon.signature.learning import dataset as d
from talon.signature.learning.featurespace import features
def test_is_sender_filename(): def test_is_sender_filename():
assert_false(d.is_sender_filename("foo/bar")) assert_false(d.is_sender_filename("foo/bar"))

View File

@@ -1,10 +1,9 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from ... import *
from talon.signature.learning import featurespace as fs from talon.signature.learning import featurespace as fs
from nose.tools import eq_, assert_false, ok_
from mock import patch
def test_apply_features(): def test_apply_features():

View File

@@ -1,13 +1,13 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from ... import *
import regex as re
from talon.signature.learning import helpers as h from talon.signature.learning import helpers as h
from talon.signature.learning.helpers import RE_RELAX_PHONE, RE_NAME from talon.signature.learning.helpers import *
from nose.tools import eq_, ok_, assert_false, assert_in
from mock import patch, Mock
from six.moves import range from six.moves import range
import re
# First testing regex constants. # First testing regex constants.
VALID = ''' VALID = '''

View File

@@ -1,16 +1,17 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
from __future__ import absolute_import from __future__ import absolute_import
from . import *
from . fixtures import *
from tests.fixtures import STANDARD_REPLIES
from talon import quotations
from six.moves import range
from nose.tools import eq_
from mock import patch
import email.iterators
import six
import os import os
import email.iterators
from talon import quotations
import six
from six.moves import range
from six import StringIO
@patch.object(quotations, 'MAX_LINES_COUNT', 1) @patch.object(quotations, 'MAX_LINES_COUNT', 1)
def test_too_many_lines(): def test_too_many_lines():
@@ -34,7 +35,6 @@ On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> wrote:
eq_("Test reply", quotations.extract_from_plain(msg_body)) eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_polymail(): def test_pattern_on_date_polymail():
msg_body = """Test reply msg_body = """Test reply
@@ -190,17 +190,14 @@ Test"""
eq_('Test reply', quotations.extract_from_plain( eq_('Test reply', quotations.extract_from_plain(
msg_body.format(six.text_type(original_message_indicator)))) msg_body.format(six.text_type(original_message_indicator))))
def test_english_original_message(): def test_english_original_message():
_check_pattern_original_message('Original Message') _check_pattern_original_message('Original Message')
_check_pattern_original_message('Reply Message') _check_pattern_original_message('Reply Message')
def test_german_original_message(): def test_german_original_message():
_check_pattern_original_message(u'Ursprüngliche Nachricht') _check_pattern_original_message(u'Ursprüngliche Nachricht')
_check_pattern_original_message('Antwort Nachricht') _check_pattern_original_message('Antwort Nachricht')
def test_danish_original_message(): def test_danish_original_message():
_check_pattern_original_message('Oprindelig meddelelse') _check_pattern_original_message('Oprindelig meddelelse')
@@ -299,7 +296,6 @@ On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
> Hello""" > Hello"""
eq_("Hi", quotations.extract_from_plain(msg_body)) eq_("Hi", quotations.extract_from_plain(msg_body))
def test_with_indent(): def test_with_indent():
msg_body = """YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin. msg_body = """YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.
@@ -307,8 +303,7 @@ def test_with_indent():
Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur. Brunch mumblecore pug Marfa tofu, irure taxidermy hoodie readymade pariatur.
""" """
eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", eq_("YOLO salvia cillum kogi typewriter mumblecore cardigan skateboard Austin.", quotations.extract_from_plain(msg_body))
quotations.extract_from_plain(msg_body))
def test_short_quotation_with_newline(): def test_short_quotation_with_newline():
@@ -348,7 +343,6 @@ Subject: The manager has commented on your Loop
Blah-blah-blah Blah-blah-blah
""")) """))
def test_german_from_block(): def test_german_from_block():
eq_('Allo! Follow up MIME!', quotations.extract_from_plain( eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
"""Allo! Follow up MIME! """Allo! Follow up MIME!
@@ -361,7 +355,6 @@ Betreff: The manager has commented on your Loop
Blah-blah-blah Blah-blah-blah
""")) """))
def test_french_multiline_from_block(): def test_french_multiline_from_block():
eq_('Lorem ipsum', quotations.extract_from_plain( eq_('Lorem ipsum', quotations.extract_from_plain(
u"""Lorem ipsum u"""Lorem ipsum
@@ -374,7 +367,6 @@ Objet : Follow Up
Blah-blah-blah Blah-blah-blah
""")) """))
def test_french_from_block(): def test_french_from_block():
eq_('Lorem ipsum', quotations.extract_from_plain( eq_('Lorem ipsum', quotations.extract_from_plain(
u"""Lorem ipsum u"""Lorem ipsum
@@ -383,7 +375,6 @@ Le 23 janv. 2015 à 22:03, Brendan xxx <brendan.xxx@xxx.com<mailto:brendan.xxx@x
Bonjour!""")) Bonjour!"""))
def test_polish_from_block(): def test_polish_from_block():
eq_('Lorem ipsum', quotations.extract_from_plain( eq_('Lorem ipsum', quotations.extract_from_plain(
u"""Lorem ipsum u"""Lorem ipsum
@@ -394,7 +385,6 @@ napisał:
Blah! Blah!
""")) """))
def test_danish_from_block(): def test_danish_from_block():
eq_('Allo! Follow up MIME!', quotations.extract_from_plain( eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
"""Allo! Follow up MIME! """Allo! Follow up MIME!
@@ -407,7 +397,6 @@ Emne: The manager has commented on your Loop
Blah-blah-blah Blah-blah-blah
""")) """))
def test_swedish_from_block(): def test_swedish_from_block():
eq_('Allo! Follow up MIME!', quotations.extract_from_plain( eq_('Allo! Follow up MIME!', quotations.extract_from_plain(
u"""Allo! Follow up MIME! u"""Allo! Follow up MIME!
@@ -419,7 +408,6 @@ Till: Isacson Leiff
Blah-blah-blah Blah-blah-blah
""")) """))
def test_swedish_from_line(): def test_swedish_from_line():
eq_('Lorem', quotations.extract_from_plain( eq_('Lorem', quotations.extract_from_plain(
"""Lorem """Lorem
@@ -428,7 +416,6 @@ Den 14 september, 2015 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse. Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
""")) """))
def test_norwegian_from_line(): def test_norwegian_from_line():
eq_('Lorem', quotations.extract_from_plain( eq_('Lorem', quotations.extract_from_plain(
u"""Lorem u"""Lorem
@@ -437,7 +424,6 @@ På 14 september 2015 på 02:23:18, Valentino Rudy (valentino@rudy.be) skrev:
Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse. Veniam laborum mlkshk kale chips authentic. Normcore mumblecore laboris, fanny pack readymade eu blog chia pop-up freegan enim master cleanse.
""")) """))
def test_dutch_from_block(): def test_dutch_from_block():
eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain( eq_('Gluten-free culpa lo-fi et nesciunt nostrud.', quotations.extract_from_plain(
"""Gluten-free culpa lo-fi et nesciunt nostrud. """Gluten-free culpa lo-fi et nesciunt nostrud.
@@ -447,7 +433,6 @@ Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende g
Small batch beard laboris tempor, non listicle hella Tumblr heirloom. Small batch beard laboris tempor, non listicle hella Tumblr heirloom.
""")) """))
def test_vietnamese_from_block(): def test_vietnamese_from_block():
eq_('Hello', quotations.extract_from_plain( eq_('Hello', quotations.extract_from_plain(
u"""Hello u"""Hello
@@ -457,7 +442,6 @@ Vào 14:24 8 tháng 6, 2017, Hùng Nguyễn <hungnguyen@xxx.com> đã viết:
> Xin chào > Xin chào
""")) """))
def test_quotation_marker_false_positive(): def test_quotation_marker_false_positive():
msg_body = """Visit us now for assistance... msg_body = """Visit us now for assistance...
>>> >>> http://www.domain.com <<< >>> >>> http://www.domain.com <<<
@@ -842,10 +826,10 @@ The user experience was unparallelled. Please continue production. I'm sending p
that this line is intact.""" that this line is intact."""
parsed = quotations.extract_from_plain(msg_body) parsed = quotations.extract_from_plain(msg_body)
eq_(msg_body, parsed.decode('utf8')) eq_(msg_body, parsed)
def test_appointment(): def test_appointment_2():
msg_body = """Invitation for an interview: msg_body = """Invitation for an interview:
Date: Wednesday 3, October 2011 Date: Wednesday 3, October 2011
@@ -854,4 +838,4 @@ Address: 130 Fox St
Please bring in your ID.""" Please bring in your ID."""
parsed = quotations.extract_from_plain(msg_body) parsed = quotations.extract_from_plain(msg_body)
eq_(msg_body, parsed.decode('utf8')) eq_(msg_body, parsed)

View File

@@ -2,13 +2,12 @@
from __future__ import absolute_import from __future__ import absolute_import
from nose.tools import eq_, ok_, assert_false
from talon import utils as u
from mock import patch, Mock
import cchardet import cchardet
import six import six
from talon import utils as u
from . import *
def test_get_delimiter(): def test_get_delimiter():
eq_('\r\n', u.get_delimiter('abc\r\n123')) eq_('\r\n', u.get_delimiter('abc\r\n123'))