7 Commits

Author SHA1 Message Date
Sergey Obukhov
b9ac866ea7 Merge pull request #151 from mailgun/sergey/reshape
reshape data as suggested by sklearn
2017-08-24 12:04:58 -07:00
Sergey Obukhov
678517dd89 reshape data as suggested by sklearn 2017-08-24 12:03:47 -07:00
Sergey Obukhov
a2aa345712 Merge pull request #148 from mailgun/sergey/v1.4.2
bump version after adding support for Vietnamese format
2017-07-10 11:44:46 -07:00
Sergey Obukhov
d998beaff3 bump version after adding support for Vietnamese format 2017-07-10 11:42:52 -07:00
Sergey Obukhov
a379bc4e7c Merge pull request #147 from hnx116/master
add support for Vietnamese reply format
2017-07-10 11:40:04 -07:00
Hung Nguyen
b8e1894f3b add test case 2017-07-10 13:28:33 +07:00
Hung Nguyen
0b5a44090f add support for Vietnamese reply format 2017-07-10 11:18:57 +07:00
4 changed files with 14 additions and 2 deletions

View File

@@ -29,7 +29,7 @@ class InstallCommand(install):
setup(name='talon',
version='1.4.1',
version='1.4.3',
description=("Mailgun library "
"to extract message quotations and signatures."),
long_description=open("README.rst").read(),

View File

@@ -42,6 +42,8 @@ RE_ON_DATE_SMB_WROTE = re.compile(
u'',
# Swedish, Danish
'Den',
# Vietnamese
u'Vào',
)),
# Date and sender separator
u'|'.join((
@@ -64,6 +66,8 @@ RE_ON_DATE_SMB_WROTE = re.compile(
'schrieb',
# Norwegian, Swedish
'skrev',
# Vietnamese
u'đã viết',
))
))
# Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'

View File

@@ -32,7 +32,7 @@ RE_REVERSE_SIGNATURE = re.compile(r'''
def is_signature_line(line, sender, classifier):
'''Checks if the line belongs to signature. Returns True or False.'''
data = numpy.array(build_pattern(line, features(sender)))
data = numpy.array(build_pattern(line, features(sender))).reshape(1, -1)
return classifier.predict(data) > 0

View File

@@ -401,6 +401,14 @@ Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende g
Small batch beard laboris tempor, non listicle hella Tumblr heirloom.
"""))
def test_vietnamese_from_block():
eq_('Hello', quotations.extract_from_plain(
u"""Hello
Vào 14:24 8 tháng 6, 2017, Hùng Nguyễn <hungnguyen@xxx.com> đã viết:
> Xin chào
"""))
def test_quotation_marker_false_positive():
msg_body = """Visit us now for assistance...