12 Commits

Author SHA1 Message Date
Sergey Obukhov
b9ac866ea7 Merge pull request #151 from mailgun/sergey/reshape
reshape data as suggested by sklearn
2017-08-24 12:04:58 -07:00
Sergey Obukhov
678517dd89 reshape data as suggested by sklearn 2017-08-24 12:03:47 -07:00
Sergey Obukhov
a2aa345712 Merge pull request #148 from mailgun/sergey/v1.4.2
bump version after adding support for Vietnamese format
2017-07-10 11:44:46 -07:00
Sergey Obukhov
d998beaff3 bump version after adding support for Vietnamese format 2017-07-10 11:42:52 -07:00
Sergey Obukhov
a379bc4e7c Merge pull request #147 from hnx116/master
add support for Vietnamese reply format
2017-07-10 11:40:04 -07:00
Hung Nguyen
b8e1894f3b add test case 2017-07-10 13:28:33 +07:00
Hung Nguyen
0b5a44090f add support for Vietnamese reply format 2017-07-10 11:18:57 +07:00
Sergey Obukhov
b40835eca2 Merge pull request #145 from mailgun/sergey/outlook-2013-version-bump
bump version after merging outlook 2013 support PR
2017-06-18 22:56:16 -07:00
Sergey Obukhov
b38562c7cc bump version after merging outlook 2013 support PR 2017-06-18 22:55:15 -07:00
Sergey Obukhov
70e9fb415e Merge pull request #139 from Savageman/patch-1
Added Outlook 2013 rules
2017-06-18 22:53:18 -07:00
Sergey Obukhov
64612099cd Merge branch 'master' into patch-1 2017-06-18 22:51:46 -07:00
Esperat Julian
e16dcf629e Added Outlook 2013 rules
Only the border color changes (compared to Outlook 2007, 2010) from `#B5C4DF` to `#E1E1E1`.
2017-04-27 11:34:01 +02:00
5 changed files with 20 additions and 2 deletions

View File

@@ -29,7 +29,7 @@ class InstallCommand(install):
setup(name='talon',
version='1.4.0',
version='1.4.3',
description=("Mailgun library "
"to extract message quotations and signatures."),
long_description=open("README.rst").read(),

View File

@@ -94,6 +94,12 @@ def cut_microsoft_quote(html_message):
#outlook 2007, 2010 (american)
"//div[@style='border:none;border-top:solid #B5C4DF 1.0pt;"
"padding:3.0pt 0in 0in 0in']|"
#outlook 2013 (international)
"//div[@style='border:none;border-top:solid #E1E1E1 1.0pt;"
"padding:3.0pt 0cm 0cm 0cm']|"
#outlook 2013 (american)
"//div[@style='border:none;border-top:solid #E1E1E1 1.0pt;"
"padding:3.0pt 0in 0in 0in']|"
#windows mail
"//div[@style='padding-top: 5px; "
"border-top-color: rgb(229, 229, 229); "

View File

@@ -42,6 +42,8 @@ RE_ON_DATE_SMB_WROTE = re.compile(
u'',
# Swedish, Danish
'Den',
# Vietnamese
u'Vào',
)),
# Date and sender separator
u'|'.join((
@@ -64,6 +66,8 @@ RE_ON_DATE_SMB_WROTE = re.compile(
'schrieb',
# Norwegian, Swedish
'skrev',
# Vietnamese
u'đã viết',
))
))
# Special case for languages where text is translated like this: 'on {date} wrote {somebody}:'

View File

@@ -32,7 +32,7 @@ RE_REVERSE_SIGNATURE = re.compile(r'''
def is_signature_line(line, sender, classifier):
'''Checks if the line belongs to signature. Returns True or False.'''
data = numpy.array(build_pattern(line, features(sender)))
data = numpy.array(build_pattern(line, features(sender))).reshape(1, -1)
return classifier.predict(data) > 0

View File

@@ -401,6 +401,14 @@ Op 17-feb.-2015, om 13:18 heeft Julius Caesar <pantheon@rome.com> het volgende g
Small batch beard laboris tempor, non listicle hella Tumblr heirloom.
"""))
def test_vietnamese_from_block():
eq_('Hello', quotations.extract_from_plain(
u"""Hello
Vào 14:24 8 tháng 6, 2017, Hùng Nguyễn <hungnguyen@xxx.com> đã viết:
> Xin chào
"""))
def test_quotation_marker_false_positive():
msg_body = """Visit us now for assistance...