initial commit

This commit is contained in:
Sergey Obukhov
2014-07-23 21:12:54 -07:00
commit 170f11038b
80 changed files with 7481 additions and 0 deletions

18
tests/__init__.py Normal file
View File

@@ -0,0 +1,18 @@
from nose.tools import *
from mock import *
import talon
EML_MSG_FILENAME = "tests/fixtures/standard_replies/yahoo.eml"
MSG_FILENAME_WITH_BODY_SUFFIX = ("tests/fixtures/signature/emails/P/"
"johndoeexamplecom_body")
EMAILS_DIR = "tests/fixtures/signature/emails"
TMP_DIR = "tests/fixtures/signature/tmp"
STRIPPED = "tests/fixtures/signature/emails/stripped/"
UNICODE_MSG = ("tests/fixtures/signature/emails/P/"
"unicode_msg")
talon.init()

View File

@@ -0,0 +1,16 @@
<html>
<body>
<div>Reply</div>
<span id="OLK_SRC_BODY_SECTION">
<div>
<span>From: </span>Bob &lt;<a href="mailto:bob@example.com">bob@example.com</a>&gt;<br>
<span>Date: </span>Tue, 01 Nov 2011 18:54:39 -0700<br>
<span>To: </span>Rob &lt;<a href="mailto:rob@example.com">rob@example.com</a>&gt;<br>
<span>Subject: </span>Test<br>
</div>
<div>
Hi
</div>
</span>
</body>
</html>

10
tests/fixtures/__init__.py vendored Normal file
View File

@@ -0,0 +1,10 @@
STANDARD_REPLIES = "tests/fixtures/standard_replies"
with open("tests/fixtures/reply-quotations-share-block.eml") as f:
REPLY_QUOTATIONS_SHARE_BLOCK = f.read()
with open("tests/fixtures/OLK_SRC_BODY_SECTION.html") as f:
OLK_SRC_BODY_SECTION = f.read()
with open("tests/fixtures/reply-separated-by-hr.html") as f:
REPLY_SEPARATED_BY_HR = f.read()

View File

@@ -0,0 +1,6 @@
<div dir="ltr"><div class="gmail_default"><div class="gmail_default" style>Hi. I am fine.</div><div class="gmail_default" style><br></div><div class="gmail_default" style>Thanks,</div><div class="gmail_default" style>Alex</div>
</div></div><div class="gmail_extra"><br><br><div class="gmail_quote">On Thu, Jun 26, 2014 at 2:14 PM, Alexander L <span dir="ltr">&lt;<a href="mailto:abc@example.com" target="_blank">a@example.com</a>&gt;</span> wrote:<br>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr"><div class="gmail_default" style="font-size:small"><div class="gmail_default" style="font-family:arial,sans-serif">
Hello! How are you?</div><div class="gmail_default" style="font-family:arial,sans-serif"><br>
</div><div class="gmail_default" style="font-family:arial,sans-serif">Thanks,</div><div class="gmail_default" style="font-family:arial,sans-serif">Sasha.</div></div></div>
</blockquote></div><br></div>

View File

@@ -0,0 +1,17 @@
<html>
<head>
<style><!--
.hmmessage P
{
margin:0px;
padding:0px
}
body.hmmessage
{
font-size: 12pt;
font-family:Calibri
}
--></style></head>
<body class='hmmessage'><div dir='ltr'>Hi. I am fine.<div><br></div><div>Thanks,</div><div>Alex<br><br><div><hr id="stopSpelling">Date: Thu, 26 Jun 2014 13:53:45 +0400<br>Subject: Test message<br>From: abc@example.com<br>To: alex.l@example.com<br><br><div dir="ltr"><div class="ecxgmail_default" style="font-size:small;">Hello! How are you?</div><div class="ecxgmail_default" style="font-size:small;"><br></div><div class="ecxgmail_default" style="font-size:small;">Thanks,</div><div class="ecxgmail_default" style="font-size:small;">
Sasha.</div></div></div></div> </div></body>
</html>

View File

@@ -0,0 +1,57 @@
<HTML><BODY><p>Hi. I am fine.</p><p>Thanks,<br>Alex</p><br><br><br>Thu, 26 Jun 2014 14:00:51 +0400 от Alexander L &lt;abc@example.com&gt;:<br>
<blockquote style="border-left:1px solid #0857A6; margin:10px; padding:0 0 0 10px;">
<div id="">
<div class="js-helper js-readmsg-msg">
<style type="text/css"></style>
<div>
<base target="_self" href="https://e.mail.ru/">
<div id="style_14037768550000001020_BODY"><div dir="ltr"><div style="font-size:small"><div style="font-family:arial,sans-serif">Hello! How are you?</div><div style="font-family:arial,sans-serif"><br>
</div><div style="font-family:arial,sans-serif">Thanks,</div><div style="font-family:arial,sans-serif">Sasha.</div></div></div>
</div>
<base target="_self" href="https://e.mail.ru/">
</div>
</div>
</div>
</blockquote>
<br></BODY></HTML>

View File

@@ -0,0 +1,134 @@
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii">
<meta name=Generator content="Microsoft Word 11 (filtered medium)">
<!--[if !mso]>
<style>
v\:* {behavior:url(#default#VML);}
o\:* {behavior:url(#default#VML);}
w\:* {behavior:url(#default#VML);}
.shape {behavior:url(#default#VML);}
</style>
<![endif]-->
<style>
<!--
/* Font Definitions */
@font-face
{font-family:Tahoma;
panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman";}
a:link, span.MsoHyperlink
{color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{color:purple;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-reply;
font-family:Arial;
color:navy;}
@page Section1
{size:595.3pt 841.9pt;
margin:2.0cm 42.5pt 2.0cm 3.0cm;}
div.Section1
{page:Section1;}
-->
</style>
<!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang=RU link=blue vlink=purple>
<div class=Section1>
<p class=MsoNormal><font size=2 color=navy face=Arial><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:navy'>Hi. I am fine.<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:navy'><o:p>&nbsp;</o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:navy'>Thanks,<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span lang=EN-US
style='font-size:10.0pt;font-family:Arial;color:navy'>Alex<o:p></o:p></span></font></p>
<p class=MsoNormal><font size=2 color=navy face=Arial><span style='font-size:
10.0pt;font-family:Arial;color:navy'><o:p>&nbsp;</o:p></span></font></p>
<div>
<div class=MsoNormal align=center style='text-align:center'><font size=3
face="Times New Roman"><span lang=EN-US style='font-size:12.0pt'>
<hr size=3 width="100%" align=center tabindex=-1>
</span></font></div>
<p class=MsoNormal><b><font size=2 face=Tahoma><span lang=EN-US
style='font-size:10.0pt;font-family:Tahoma;font-weight:bold'>From:</span></font></b><font
size=2 face=Tahoma><span lang=EN-US style='font-size:10.0pt;font-family:Tahoma'>
Alexander L [mailto:abc@example.com] <br>
<b><span style='font-weight:bold'>Sent:</span></b> Friday, June 27, 2014 12:06
PM<br>
<b><span style='font-weight:bold'>To:</span></b> Alexander<br>
<b><span style='font-weight:bold'>Subject:</span></b> Test message</span></font><span
lang=EN-US><o:p></o:p></span></p>
</div>
<p class=MsoNormal><font size=3 face="Times New Roman"><span style='font-size:
12.0pt'><o:p>&nbsp;</o:p></span></font></p>
<div>
<div>
<div>
<p class=MsoNormal><font size=3 face=Arial><span style='font-size:12.0pt;
font-family:Arial'>Hello! How are you?<o:p></o:p></span></font></p>
</div>
<div>
<p class=MsoNormal><font size=3 face=Arial><span style='font-size:12.0pt;
font-family:Arial'><o:p>&nbsp;</o:p></span></font></p>
</div>
<div>
<p class=MsoNormal><font size=3 face=Arial><span style='font-size:12.0pt;
font-family:Arial'>Thanks,<o:p></o:p></span></font></p>
</div>
<div>
<p class=MsoNormal><font size=3 face=Arial><span style='font-size:12.0pt;
font-family:Arial'>Sasha.<o:p></o:p></span></font></p>
</div>
</div>
</div>
</div>
</body>
</html>

View File

@@ -0,0 +1,42 @@
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><meta http-equiv=Content-Type content="text/html; charset=utf-8"><meta name=Generator content="Microsoft Word 12 (filtered medium)"><style><!--
/* Font Definitions */
@font-face
{font-family:"Cambria Math";
panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Tahoma;
panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman","serif";}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:purple;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-reply;
font-family:"Calibri","sans-serif";
color:#1F497D;}
.MsoChpDefault
{mso-style-type:export-only;}
@page WordSection1
{size:612.0pt 792.0pt;
margin:2.0cm 42.5pt 2.0cm 3.0cm;}
div.WordSection1
{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body lang=EN-US link=blue vlink=purple><div class=WordSection1><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>Hi. I am fine.<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'> <o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>Thanks,<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>Alex<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><o:p>&nbsp;</o:p></span></p><div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm'><p class=MsoNormal><b><span lang=RU style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span lang=RU style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'> Alexander L [mailto:abc@example.com] <br><b>Sent:</b> Thursday, July 03, 2014 3:50 PM<br><b>To:</b> alex.l@example.com<br><b>Subject:</b> Test message<o:p></o:p></span></p></div><p class=MsoNormal><o:p>&nbsp;</o:p></p><div><div><div><p class=MsoNormal><span style='font-family:"Arial","sans-serif"'>Hello! How are you?<o:p></o:p></span></p></div><div><p class=MsoNormal><span style='font-family:"Arial","sans-serif"'><o:p>&nbsp;</o:p></span></p></div><div><p class=MsoNormal><span style='font-family:"Arial","sans-serif"'>Thanks,<o:p></o:p></span></p></div><div><p class=MsoNormal><span style='font-family:"Arial","sans-serif"'>Sasha.<o:p></o:p></span></p></div></div></div></div></body></html>

View File

@@ -0,0 +1,32 @@
<html>
<head>
<meta content="text/html; charset=UTF-8" http-equiv="Content-Type">
</head>
<body bgcolor="#FFFFFF" text="#000000">
Hi. I am fine.<br>
<br>
Thanks,<br>
Alex<br>
<div class="moz-cite-prefix">On 26.06.2014 14:41, Alexander L
wrote:<br>
</div>
<blockquote
cite="mid:CA+jEWTKBU6qc4OnH5m=-0sfwkAzZhcy0rd+ean2W6bFUVXaO7A@mail.gmail.com"
type="cite">
<div dir="ltr">
<div class="gmail_default" style="font-size:small">
<div class="gmail_default"
style="font-family:arial,sans-serif">Hello! How are you?</div>
<div class="gmail_default"
style="font-family:arial,sans-serif"><br>
</div>
<div class="gmail_default"
style="font-family:arial,sans-serif">Thanks,</div>
<div class="gmail_default"
style="font-family:arial,sans-serif">Sasha.</div>
</div>
</div>
</blockquote>
<br>
</body>
</html>

View File

@@ -0,0 +1,33 @@
<html>
<head>
<meta name="generator" content="Windows Mail 17.5.9600.20498">
<style data-externalstyle="true"><!--
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph {
margin-top:0in;
margin-right:0in;
margin-bottom:0in;
margin-left:.5in;
margin-bottom:.0001pt;
}
p.MsoNormal, li.MsoNormal, div.MsoNormal {
margin:0in;
margin-bottom:.0001pt;
}
p.MsoListParagraphCxSpFirst, li.MsoListParagraphCxSpFirst, div.MsoListParagraphCxSpFirst,
p.MsoListParagraphCxSpMiddle, li.MsoListParagraphCxSpMiddle, div.MsoListParagraphCxSpMiddle,
p.MsoListParagraphCxSpLast, li.MsoListParagraphCxSpLast, div.MsoListParagraphCxSpLast {
margin-top:0in;
margin-right:0in;
margin-bottom:0in;
margin-left:.5in;
margin-bottom:.0001pt;
line-height:115%;
}
--></style></head>
<body dir="ltr">
<div data-externalstyle="false" dir="ltr" style="font-family: 'Calibri', 'Segoe UI', 'Meiryo', 'Microsoft YaHei UI', 'Microsoft JhengHei UI', 'Malgun Gothic', 'sans-serif';font-size:12pt;"><div>Hi. I am fine.</div><div><br></div><div>Thanks,</div><div>Alex<br></div><div data-signatureblock="true"><div><br></div><div><br></div></div><div style="padding-top: 5px; border-top-color: rgb(229, 229, 229); border-top-width: 1px; border-top-style: solid;"><div><font face=" 'Calibri', 'Segoe UI', 'Meiryo', 'Microsoft YaHei UI', 'Microsoft JhengHei UI', 'Malgun Gothic', 'sans-serif'" style='line-height: 15pt; letter-spacing: 0.02em; font-family: "Calibri", "Segoe UI", "Meiryo", "Microsoft YaHei UI", "Microsoft JhengHei UI", "Malgun Gothic", "sans-serif"; font-size: 12pt;'><b>От:</b>&nbsp;<a href="mailto:abc@example.com" target="_parent">Alexander L</a><br><b>Отправлено:</b>&nbsp;‎четверг‎, 26 ‎июня‎ 2014 г. 15:05<br><b>Кому:</b>&nbsp;<a href="mailto:alex-ninja@example.com" target="_parent">Alex</a></font></div></div><div><br></div><div dir=""><div dir="ltr"><div class="gmail_default" style="font-size: small;"><div class="gmail_default" style="font-family: arial,sans-serif;">Hello! How are you?</div><div class="gmail_default" style="font-family: arial,sans-serif;"><br>
</div><div class="gmail_default" style="font-family: arial,sans-serif;">Thanks,</div><div class="gmail_default" style="font-family: arial,sans-serif;">Sasha.</div></div></div>
</div></div>
</body>
</html>

View File

@@ -0,0 +1 @@
<p>Hi. I am fine.<br /><br />Thanks,<br />Alex<br /><br />26.06.2014, 14:41, "Alexander L" &lt;<a href="mailto:abc@example.com">abc@example.com</a>&gt;:</p><blockquote> Hello! How are you?<br /><br /> Thanks,<br /> Sasha.</blockquote>

View File

@@ -0,0 +1,22 @@
Content-Type: multipart/alternative;
boundary="===============6853056845739363347=="
MIME-Version: 1.0
Date: Wed, 4 Apr 2012 22:22:42 -0700 (PDT)
From: Joe Doe <xxx@example.com>
Subject: Re: You've got a new booking inquiry!
--===============6853056845739363347==
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
SGkgS2F0aGFyaW5lLsKgIFNvdW5kcyBncmVhdC7CoCBBcmUgdGhlcmUgYW5kIGRpZXRyeSByZXN0cmljdGlvbnMgb3IgdGhpbmdzIHlvdXIgaHVzYmFuZCBkb2VzL2RvZXNuJ3QgbGlrZSB0byBlYXQ/wqAgV291bGQgeW91IGxpa2UgdG8gZG8gYSBmZXcgaG9ycyBkIG9ldXZyZXMgYW5kIHRoZW4gYcKgMyBvciA0wqBjb3Vyc2UgZGlubmVyP8KgIExldCBtZSBrbm93IHdoYXQgeW91IHRoaW5rIHdpbGwgd29yayBiZXN0IGFuZCBJIHdpbGwgc3RhcnQgd29ya2luZyBvbiBhIG1lbnUgYW5kIHByb3Bvc2FsLsKgIFRoYW5rcyBzbyBtdWNoIGFuZCBsb29rIGZvcndhcmQgdG8gaGVhcmluZyBmcm9tIHlvdSBzb29uLgrCoApKb2UgWFhYCgotLS0gT24gV2VkLCA0LzQvMTIsIHh4eEBleGFtcGxlLmNvbSA8eHh4QGV4YW1wbGUuY29tPiB3cm90ZToKCgpGcm9tOiB4eHhAZXhhbXBsZS5jb20gPHh4eEBleGFtcGxlLmNvbT4KU3ViamVjdDogWW91J3ZlIGdvdCBhIG5ldyBib29raW5nIGlucXVpcnkhClRvOiB4eHhAeWFob28uY29tCkRhdGU6IFdlZG5lc2RheSwgQXByaWwgNCwgMjAxMiwgMTA6MjMgUE0KCk5ldyBCb29raW5nIElucXVpcnkKCg==
--===============6853056845739363347==
MIME-Version: 1.0
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: base64
PHRhYmxlPjx0cj48dGQ+PERJVj5IaSBLYXRoYXJpbmUuJm5ic3A7IFNvdW5kcyBncmVhdC4mbmJzcDsgQXJlIHRoZXJlIGFuZCBkaWV0cnkgcmVzdHJpY3Rpb25zIG9yIHRoaW5ncyB5b3VyIGh1c2JhbmQgZG9lcy9kb2Vzbid0IGxpa2UgdG8gZWF0PyZuYnNwOyBXb3VsZCB5b3UgbGlrZSB0byBkbyBhIGZldyBob3JzIGQgb2V1dnJlcyBhbmQgdGhlbiBhJm5ic3A7MyBvciA0Jm5ic3A7Y291cnNlIGRpbm5lcj8mbmJzcDsgTGV0IG1lIGtub3cgd2hhdCB5b3UgdGhpbmsgd2lsbCB3b3JrIGJlc3QgYW5kIEkgd2lsbCBzdGFydCB3b3JraW5nIG9uIGEgbWVudSBhbmQgcHJvcG9zYWwuJm5ic3A7IFRoYW5rcyBzbyBtdWNoIGFuZCBsb29rIGZvcndhcmQgdG8gaGVhcmluZyBmcm9tIHlvdSBzb29uLjwvRElWPgo8RElWPiZuYnNwOzwvRElWPgo8RElWPkpob24gRG9lPEJSPjxCUj4tLS0gT24gPEI+V2VkLCA0LzQvMTIsIHh4eEBleGFtcGxlLmNvbSA8ST4mbHQ7eHh4QGV4YW1wbGUuY29tJmd0OzwvST48L0I+IHdyb3RlOjxCUj48L0RJVj4KPEJMT0NLUVVPVEU+PEJSPkZyb206IHh4eEBleGFtcGxlLmNvbSAmbHQ7eHh4QGV4YW1wbGUuY29tJmd0OzxCUj5TdWJqZWN0OiBZb3UndmUgZ290IGEgbmV3IGJvb2tpbmcgaW5xdWlyeSE8QlI+VG86IHh4eEB5YWhvby5jb208QlI+RGF0ZTogV2VkbmVzZGF5LCBBcHJpbCA0LCAyMDEyLCAxMDoyMyBQTTxCUj48QlI+CjxESVY+CjxESVY+CjxDRU5URVI+CjxUQUJMRT4KPFRCT0RZPgo8VFI+CjxURD4KPFRBQkxFPgo8VEJPRFk+CjxUUj4KPFREPgo8VEFCTEU+CjxUQk9EWT4KPFRSPgo8VEQ+CjxESVY+TmV3IEJvb2tpbmcgSW5xdWlyeSA8L0RJVj48L1REPgo8VEQ+CjxESVY+WW91ciBwbGFjZSBpcyB0aGUgaG9tZSBvZiBiZXNwb2tlIGRpbmluZyA8L0RJVj48L1REPjwvVFI+PC9UQk9EWT48L1RBQkxFPjwvVEQ+PC9UUj48L1RCT0RZPjwvVEFCTEU+CjxUQUJMRT4KPFRCT0RZPgo8VFI+CjxURD4KPFRBQkxFPgo8VEJPRFk+CjxUUj4KPFREPiA8L1REPjwvVFI+PC9UQk9EWT48L1RBQkxFPjwvVEQ+PC9UUj4KPFRSPgo8VEQ+CjxUQUJMRT4KPFRCT0RZPgo8VFI+CjxURD4KPFRBQkxFPgo8VEJPRFk+CjxUUj4KPFREPgo8RElWPjxCUj5Hb29kIE5ld3MhPEJSPjxCUj4KPFA+RXZlbnQgRGV0YWlsczwvRElWPkRhdGU6IEFwcmlsIDI4LCAyMDEyPEJSPkxvY2F0aW9uOiB4eHg8QlI+SGVhZGNvdW50OiA2IHRvIDg8QlI+VGFyZ2V0IEJ1ZGdldDogJDUwIHBlciBwZXJzb248QlI+PEJSPkJlc3QgRGVzY3JpcHRpb24gb2YgVGFyZ2V0IEJ1ZGdldDogSSdkIGxvdmUgdG8gaGVhciB3aGF0IHRoZSBjaGVmIHRoaW5rcyBpcyBiZXN0IGZvciBteSBldmVudCwgcHJvdmlkZWQgd2Ugc3RheSBjbG9zZSB0byB0aGlzIGJ1ZGdldCA8QlI+PEJSPkV2ZW50IERlc2NyaXB0aW9uOiBJIGFtIHdhbnRpbmcgdG8gc3VycHJpc2UgbXkgaHVzYmFuZCB3aXRoIGEgY2FzdWFsIGRpbm5lciBwYXJ0eSBpbiBvdXIgaG9tZSBpbiB4eHguIFdlIGhhdmUgYW4gYW1hemluZyBraXRjaGVuICh0aGF0IEkgZG9uJ3QgZG8ganVzdGljZSB0byBidXQgSSBiZXQgeW91IGNvdWxkISksIGFuZCBhIHJlYWxseSBuaWNlIGdhcmRlbiBmb3IgZGluaW5nLiBJIGFtIGZseWluZyBzb21lIG9mIGhpcyBiZXN0IGZyaWVuZHMgaW4gdG8gY2VsZWJyYXRlIGhpbS4gV2UgaGF2ZSBzbWFsbCBraWRzICh3aG8gd2lsbCBiZSBzbGVlcGluZyEpLCBzbyBJJ20gaG9waW5nIGZvciBhIGNhc3VhbCBidXQgcm9tYW50aWMgZGlubmVyIHBhcnR5LiA8QlI+PEJSPlZpZXcgbW9yZSBpbnF1aXJ5IGRldGFpbHMgb24geW91ciBFdmVudCBEYXNoYm9hcmQuIElmIHlvdSBsaWtlIHdoYXQgeW91IHNlZSwgcGxlYXNlIGNyZWF0ZSBhIHByb3Bvc2FsIGZvciB0aGUgZXZlbnQuIDxCUj48QlI+SWYgeW91IGRvIG5vdCBoYXZlIHRoZSB0aW1lIHRvIG1ha2UgYSBmdWxsIHByb3Bvc2FsIHJpZ2h0IG5vdywgd2UgZW5jb3VyYWdlIHlvdSB0byBhdCBsZWFzdCByZXNwb25kIHRvIHRoZSBob3N0IHdpdGggYSBxdWljayBtZXNzYWdlIHRvIGNvbmZpcm0gdGhhdCB5b3UndmUgZ290dGVuIHRoaXMgaW5xdWlyeSBhbmQgaGF2ZSBiZWd1biB0aGlua2luZyBhYm91dCB0aGUgZXZlbnQuIDxCUj48QlI+PFNUUk9ORz5Zb3UgY2FuIHJlcGx5IGRpcmVjdGx5IHRvIHRoaXMgZW1haWwgYW5kIHlvdXIgbWVzc2FnZSB3aWxsIGdvIHRvIHRoZSBob3N0IG9uIHRoZSBldmVudCBkYXNoYm9hcmQuPC9TVFJPTkc+IDxCUj48QlI+UmVtZW1iZXIsIHlvdSBoYXZlIGV4Y2x1c2l2ZSBhY2Nlc3MgdG8gdGhpcyBpbnF1aXJ5IGZvciB0aGUgbmV4dCAyNCBob3Vycy4gUGxlYXNlIG1ha2UgYSBwcm9wb3NhbCBvciBzZW5kIGEgbWVzc2FnZSB0byB0aGUgaG9zdCBpbiB0aGF0IHRpbWUuIElmIHRoZSBob3N0IGhhcyBub3QgaGVhcmQgYW55dGhpbmcgZnJvbSB5b3UgaW4gMjQgaG91cnMsIHdlIHdpbGwKIGZvcndhcmQgdGhlIGhvc3RzIGlucXVpcnkgdG8gYSBzbWFsbCBudW1iZXIgb2YgYWRkaXRpb25hbCBjaGVmcywgYW5kIHRoZXkgd2lsbCBoYXZlIHRoZSBvcHBvcnR1bml0eSB0byBtYWtlIGEgcHJvcG9zYWwuIFdlIGRvIHRoaXMgYXMgYSBjb3VydGVzeSB0byB0aGUgaG9zdHMuIDxCUj48QlI+SWYgeW91IGNhbm5vdCBhY2NlcHQgdGhpcyBib29raW5nIG9yIGRvIG5vdCB3YW50IHRvIGZvciBhbnkgcmVhc29uLCBwbGVhc2UgdGFrZSB0aGUgdGltZSB0byBkZWNsaW5lIG9uIHRoZSBFdmVudCBEYXNoYm9hcmQuIDxCUj48QlI+VGltZSB0byBnZXQgY29va2luJyA8QlI+PEJSPjwvRElWPjwvVEQ+PC9UUj48L1RCT0RZPjwvVEFCTEU+PC9URD48L1RSPjwvVEJPRFk+PC9UQUJMRT48L1REPjwvVFI+CjxUUj4KPFREPgo8VEFCTEU+CjxUQk9EWT4KPFRSPgo8VEQ+CjxUQUJMRT4KPFRCT0RZPgo8VFI+CjxURD4KPERJVj4mbmJzcDs8QSBocmVmPSJodHRwOi8vZXhhbXBsZS5jb20iPmZvbGxvdyBvbiBUd2l0dGVyPC9BPiB8IDxBIGhyZWY9Imh0dHA6Ly94eHgiPmZyaWVuZCBvbiBGYWNlYm9vazwvQT4gfCA8QQogaHJlZj0iaHR0cDovL2V4YW1wbGUuY29tIj5Gb3J3YXJkIHRvIGEgRnJpZW5kPC9BPiZndDsmbmJzcDsgPC9ESVY+PC9URD48L1RSPgo8VFI+CjxURD4KPERJVj48RU0+Q29weXJpZ2g8L0VNPiA8L0RJVj48L1REPjwvVFI+PC9UQk9EWT48L1RBQkxFPjwvVEQ+PC9UUj48L1RCT0RZPjwvVEFCTEU+PC9URD48L1RSPjwvVEJPRFk+PC9UQUJMRT48QlI+PC9URD48L1RSPjwvVEJPRFk+PC9UQUJMRT48L0NFTlRFUj48SU1HIGFsdD0iIiBzcmM9Imh0dHA6Ly9leGFtcGxlLmNvbSI+IDwvRElWPjwvRElWPjwvQkxPQ0tRVU9URT48L3RkPjwvdHI+PC90YWJsZT4K
--===============6853056845739363347==--

View File

@@ -0,0 +1,21 @@
<html>
<body>
<div>
Hi
<div>
there
</div>
<div>
Bob
<hr>
<b>From: </b>bob@example.com<br>
<b>To: </b>xxx@comcast.net<br>
<b>Sent: </b>Friday, July 22, 2011 6:20:01 PM<br>
<b>Subject: </b>Hello<br><br>
<p>
Hello
</p>
</div>
</div>
</body>
</html>

24
tests/fixtures/signature/emails/P/102682_R_S vendored Executable file
View File

@@ -0,0 +1,24 @@
From: doe@example.com (John Doe)
Subject: Hello
Date: 7 Apr 94 17:35:09 GMT
#reply#rickc@example.com (xxx xxx) writes:
#reply#>In article <xxx.xxx.xxx@xxx-xxx>, xxx@example.com
#reply#>writes:
#reply#>|> I just wanted to let everyone know that I have lost what little respect
#reply#>|> I have
#reply#>|> for xxx xxx after seeing today's xxx game.
#reply#>|> A dishard xxx fan
#reply#>Yes, I also wonder if they can win with this manager.
#reply#>I never believed managers had that much to do with winning
#reply#>until I saw how much they had to do with losing....
I like the xxx a lot, but my heart belongs to the xxx...You can imagine
my frustration when I saw the xxx nabbing xxx...ARHGGHRGHH!
#sig# -John Doe
#sig#
#sig# doe@example.com

View File

@@ -0,0 +1,34 @@
(Please accept our apologies if you've already completed the Survey. Send a reply with "Did it" in the subject line to avoid future reminder messages.)
Dear Executive,
YOUR INPUT IS VERY VALUABLE. Over the past week or so, you've been invited to participate in a very important survey that will significantly improve the information products available to you in the power and energy industry.
Because we haven't heard from you yet, we're adding more prizes to encourage participation. There are now 12 prizes you could win; we've added 4 more inducements to the original 8 prizes.
YOUR CHANCE OF WINNING IS HUGE. We're hoping to draw a total of 200 respondents from this sector of the industry. When we receive your fully completed questionnaire, your e-mail address will one among 30 in a drawing that could bring you one of the 12 prizes. Those odds aren't bad at all! The prizes include:
-- FOUR $100 gift certificates
-- EIGHT $50 gift certificates
IF YOU WIN, you can choose from the following list where you'd like to spend your gift certificate:
-- Amazon.com,
-- REI.com,
-- GOLFDISCOUNT.com,
-- CABELLAS.com,
-- fogdog.com, or
-- a general American Express gift certificate.
Just click on the long blue URL listed in the section below this letter to connect to the welcome page for the survey.
THE DEADLINE FOR SUBMITTING YOUR SURVEY IS FRIDAY, SEPTEMBER 7.
Thanks for your participation, and we wish you the best of luck in the
drawing!
#sig#John E. Doe, Ph.D.
#sig#President
#sig#Xxx Research
#sig#john@example.com
#sig#www.example.com

View File

@@ -0,0 +1 @@
john@example.com

View File

@@ -0,0 +1,10 @@
From: Сергей Обухов
Добрый день.
Мы являемся стартапом работающим над созданием платформы
почтовых электронных сообщений. Нам бы хотелось использовать Ваш продукт
для решения задач парсинга сообщений. Если Вы заинтересованы, пожалуйста
ответьте на это письмо.
С уважением,
Сергей Обухов

View File

@@ -0,0 +1,9 @@
Martin, can we get:
1 L Male
1 M Male
1 M Female
to 111 Xxxxxx ST Xxx Xxxxxxxxx XX 94133
That'd be awesome! Really cool shirts!

View File

@@ -0,0 +1,5 @@
Thanks Sasha, I can't go any higher and is why I limited it to the
homepage.
Stavros Xxxxxx
via mobile

View File

@@ -0,0 +1 @@
Stavros Xxxxxx

View File

@@ -0,0 +1,2 @@
Stavros Xxxxxx
via mobile

View File

@@ -0,0 +1,34 @@
(Please accept our apologies if you've already completed the Survey. Send a reply with "Did it" in the subject line to avoid future reminder messages.)
Dear Executive,
YOUR INPUT IS VERY VALUABLE. Over the past week or so, you've been invited to participate in a very important survey that will significantly improve the information products available to you in the power and energy industry.
Because we haven't heard from you yet, we're adding more prizes to encourage participation. There are now 12 prizes you could win; we've added 4 more inducements to the original 8 prizes.
YOUR CHANCE OF WINNING IS HUGE. We're hoping to draw a total of 200 respondents from this sector of the industry. When we receive your fully completed questionnaire, your e-mail address will one among 30 in a drawing that could bring you one of the 12 prizes. Those odds aren't bad at all! The prizes include:
-- FOUR $100 gift certificates
-- EIGHT $50 gift certificates
IF YOU WIN, you can choose from the following list where you'd like to spend your gift certificate:
-- Amazon.com,
-- REI.com,
-- GOLFDISCOUNT.com,
-- CABELLAS.com,
-- fogdog.com, or
-- a general American Express gift certificate.
Just click on the long blue URL listed in the section below this letter to connect to the welcome page for the survey.
THE DEADLINE FOR SUBMITTING YOUR SURVEY IS FRIDAY, SEPTEMBER 7.
Thanks for your participation, and we wish you the best of luck in the
drawing!
#sig#John E. Doe, Ph.D.
#sig#President
#sig#Xxx Research
#sig#john@example.com
#sig#www.example.com

View File

@@ -0,0 +1 @@
john@example.com

View File

@@ -0,0 +1,5 @@
#sig#John E. Doe, Ph.D.
#sig#President
#sig#Xxx Research
#sig#john@example.com
#sig#www.example.com

View File

@@ -0,0 +1,8 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce nec est enim. Vestibulum vel enim urna, sed facilisis augue. Vestibulum dui nibh, pulvinar id adipiscing id, congue id turpis.
Suspendisse non posuere erat. Ut porta luctus augue, laoreet accumsan sem auctor quis. Fusce feugiat elit et dolor tempor lobortis. Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed molestie gravida mi, id faucibus risus tempus vel. Mauris dictum enim nec lectus iaculis ac eleifend libero vestibulum. Morbi imperdiet lobortis erat non molestie. Sed non aliquam lacus.
Gina Xxxxxxxxx
gina@example.com
(555) 346-9947
www.example.com

View File

@@ -0,0 +1 @@
Gina Xxxxxxxxx

View File

@@ -0,0 +1,4 @@
Gina Xxxxxxxxx
gina@example.com
(555) 346-9947
www.example.com

View File

@@ -0,0 +1,6 @@
Simone,
It is 'example.com'. Please let me know what you see.
Thank you,
Noam

View File

@@ -0,0 +1 @@
Noam

View File

@@ -0,0 +1,2 @@
Thank you,
Noam

View File

@@ -0,0 +1,23 @@
Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce nec est enim. Vestibulum vel enim urna, sed facilisis augue. Vestibulum dui nibh, pulvinar id adipiscing id, congue id turpis.
Suspendisse non posuere erat. Ut porta luctus augue, laoreet accumsan sem auctor quis. Fusce feugiat elit et dolor tempor lobortis. Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Sed molestie gravida mi, id faucibus risus tempus vel. Mauris dictum enim nec lectus iaculis ac eleifend libero vestibulum. Morbi imperdiet lobortis erat non molestie. Sed non aliquam lacus.
----------------
AM_LOGO_CLR
Me John Doe
Internet xxx / xxxxx d'internet
Xxxxxxx du Québec
t. 555-931-0702
skype: xxx
f. 555-875-7611
http://example.com/
http://www.example.com/in/xxx

View File

@@ -0,0 +1 @@
John Doe

View File

@@ -0,0 +1,19 @@
----------------
AM_LOGO_CLR
Me John Doe
Internet xxx / xxxxx d'internet
Xxxxxxx du Québec
t. 555-931-0702
skype: xxx
f. 555-875-7611
http://example.com/
http://www.example.com/in/xxx

View File

@@ -0,0 +1 @@
*.data

View File

@@ -0,0 +1,24 @@
Content-Type: multipart/alternative;
boundary="===============0934372227844987316=="
MIME-Version: 1.0
Date: Mon, 2 Apr 2012 18:22:10 +0400
Message-Id: <CAEAsyCZ-sCHxZtoKyM3JmT5gSYpZd5GwY-cVNiV8H329zgJT4g@mail.gmail.com>
Subject: Re: Test
From: Sergey Obykhov <bob@example.com>
To: "bob@xxx.mailgun.org" <bob@xxx.mailgun.org>
--===============0934372227844987316==
MIME-Version: 1.0
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: base64
SGVsbG8KMDIuMDQuMjAxMiAxNDoyMCDQv9C+0LvRjNC30L7QstCw0YLQtdC70YwgImJvYkB4eHgubWFpbGd1bi5vcmciIDwKYm9iQHh4eC5tYWlsZ3VuLm9yZz4g0L3QsNC/0LjRgdCw0Ls6Cgo+IEhpCj4KCg==
--===============0934372227844987316==
MIME-Version: 1.0
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: base64
PHA+SGVsbG88L3A+CjxkaXYgY2xhc3M9ImdtYWlsX3F1b3RlIj4wMi4wNC4yMDEyIDE0OjIwINC/0L7Qu9GM0LfQvtCy0LDRgtC10LvRjCAmcXVvdDs8YSBocmVmPSJtYWlsdG86Ym9iQHh4eC5tYWlsZ3VuLm9yZyI+Ym9iQHh4eC5tYWlsZ3VuLm9yZzwvYT4mcXVvdDsgJmx0OzxhIGhyZWY9Im1haWx0bzpib2JAeHh4Lm1haWxndW4ub3JnIj5ib2JAeHh4Lm1haWxndW4ub3JnPC9hPiZndDsg0L3QsNC/0LjRgdCw0Ls6PGJyIHR5cGU9ImF0dHJpYnV0aW9uIj4KPGJsb2NrcXVvdGUgY2xhc3M9ImdtYWlsX3F1b3RlIiBzdHlsZT0ibWFyZ2luOjAgMCAwIC44ZXg7Ym9yZGVyLWxlZnQ6MXB4ICNjY2Mgc29saWQ7cGFkZGluZy1sZWZ0OjFleCI+SGk8YnI+CjwvYmxvY2txdW90ZT48L2Rpdj4KCg==
--===============0934372227844987316==--

65
tests/fixtures/standard_replies/aol.eml vendored Normal file
View File

@@ -0,0 +1,65 @@
Content-Type: multipart/alternative;
boundary="===============7429987408351918371=="
MIME-Version: 1.0
To: bob@example.com
Subject: Re: Test
From: Megan Odin <xxx@aol.com>
Message-Id: <8CEDEEFBEF4733B-1E5C-73DF@webmail-d070.sysops.aol.com>
Date: Mon, 2 Apr 2012 09:57:58 -0400 (EDT)
--===============7429987408351918371==
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Hello
-----Original Message-----
From: bob <bob@example.com>
To: xxx <xxx@gmail.com>; xxx <xxx@hotmail.com>; xxx <xxx@yahoo.com>; xxx <xxx@aol.com>; xxx <xxx@comcast.net>; xxx <xxx@nyc.rr.com>
Sent: Mon, Apr 2, 2012 5:49 pm
Subject: Test
Hi
--===============7429987408351918371==
Content-Type: text/html; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
<font color='black' size='2' face='arial'>Hello<br>
<br>
<br>
<div style="font-family:arial,helvetica;font-size:10pt;color:black">-----Original Message-----<br>
From: bob &lt;bob@example.com&gt;<br>
To: xxx &lt;xxx@gmail.com&gt;; xxx &lt;xxx@hotmail.com&gt;; xxx &lt;xxx@yahoo.com&gt;; xxx &lt;xxx@aol.com&gt;; xxx &lt;xxx@comcast.net&gt;; xxx &lt;xxx@nyc.rr.com&gt;<br>
Sent: Mon, Apr 2, 2012 5:49 pm<br>
Subject: Test<br>
<br>
<div id="AOLMsgPart_0_4d68a632-fe65-4f6d-ace2-292ac1b91f1f" style="margin: 0px;font-family: Tahoma, Verdana, Arial, Sans-Serif;font-size: 12px;color: #000;background-color: #fff;">
<pre style="font-size: 9pt;"><tt>Hi
</tt></pre>
</div>
<!-- end of AOLMsgPart_0_4d68a632-fe65-4f6d-ace2-292ac1b91f1f -->
</div>
</font>
--===============7429987408351918371==--

View File

@@ -0,0 +1,15 @@
Content-Type: text/plain; charset=iso-8859-1
Mime-Version: 1.0 (Apple Message framework v1257)
Subject: Re: Test
From: xxx <xxx@gmail.com>
Date: Tue, 3 Apr 2012 16:55:26 +0400
Content-Transfer-Encoding: 7bit
Message-Id: <9A1EA6A5-4FD3-4AD0-8DFD-2420E670DB53@gmail.com>
To: bob <bob@example.com>
X-Mailer: Apple Mail (2.1257)
Hello
On Apr 3, 2012, at 4:19 PM, bob wrote:
> Hi

View File

@@ -0,0 +1,33 @@
Content-Type: multipart/alternative;
boundary="===============3552566137977633461=="
MIME-Version: 1.0
Date: Mon, 2 Apr 2012 13:56:12 +0000 (UTC)
From: xxx@comcast.net
To: bob@xxx.mailgun.org
Message-Id: <650787974.741595.1333374972389.JavaMail.root@sz0152a.westchester.pa.mail.comcast.net>
Subject: Re: Test
X-Mailer: Zimbra 6.0.13_GA_2944 (ZimbraWebClient - SAF3 (Linux)/6.0.13_GA_2944)
--===============3552566137977633461==
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Hello
----- Original Message -----
From: bob@xxx.mailgun.org
To: xxx@gmail.com, xxx@hotmail.com, xxx@yahoo.com, xxx@aol.com, xxx@comcast.net, lsloan6@nyc.rr.com
Sent: Monday, April 2, 2012 5:44:22 PM
Subject: Test
Hi
--===============3552566137977633461==
MIME-Version: 1.0
Content-Type: text/html; charset="us-ascii"
Content-Transfer-Encoding: 7bit
<html><head><style type='text/css'>p { margin: 0; }</style></head><body><div style='font-family: Arial; font-size: 12pt; color: #000000'>Hello<br><br><hr id="zwchr"><b>From: </b>bob@xxx.mailgun.org<br><b>To: </b>xxx@gmail.com, xxx@hotmail.com, xxx@yahoo.com, xxx@aol.com, xxx@comcast.net, lsloan6@nyc.rr.com<br><b>Sent: </b>Monday, April 2, 2012 5:44:22 PM<br><b>Subject: </b>Test<br><br>Hi<br></div></body></html>
--===============3552566137977633461==--

View File

@@ -0,0 +1,31 @@
Content-Type: multipart/alternative;
boundary="===============3455449757443551301=="
MIME-Version: 1.0
Date: Mon, 2 Apr 2012 20:21:52 +0400
Message-Id: <CAKsfaBW4hj0Gek6TwbR3erng4P1y0CZzJ0d=pXtCNnYnbe7PLg@mail.gmail.com>
Subject: Re: Test
From: Megan One <xxx@gmail.com>
To: bob@example.com
--===============3455449757443551301==
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Hello
On Mon, Apr 2, 2012 at 6:26 PM, Megan One <xxx@gmail.com> wrote:
> Hi
--===============3455449757443551301==
MIME-Version: 1.0
Content-Type: text/html; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Hello<br><br><div class="gmail_quote">On Mon, Apr 2, 2012 at 6:26 PM, Megan One <span dir="ltr">&lt;<a href="mailto:xxx@gmail.com">xxx@gmail.com</a>&gt;</span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Hi
</blockquote></div><br>
--===============3455449757443551301==--

View File

@@ -0,0 +1,50 @@
Content-Type: multipart/alternative;
boundary="===============5499446768842282638=="
MIME-Version: 1.0
Message-Id: <DUB102-W192C6E94759954C4885B92B14C0@phx.gbl>
From: Alexey Q <xxx@hotmail.com>
To: <bob@xxx.mailgun.org>
Subject: RE: Test
Date: Mon, 2 Apr 2012 21:47:37 +0800
X-Originalarrivaltime: 02 Apr 2012 13:47:37.0935 (UTC)
FILETIME=[2A6C0DF0:01CD10D7]
--===============5499446768842282638==
MIME-Version: 1.0
Content-Type: text/plain; charset="us-ascii"
Content-Transfer-Encoding: 7bit
Hello
> Subject: Test
> From: bob@xxx.mailgun.org
> To: xxx@gmail.com; xxx@hotmail.com; xxx@yahoo.com; xxx@aol.com; xxx@comcast.net; xxx@nyc.rr.com
> Date: Mon, 2 Apr 2012 17:44:22 +0400
>
> Hi
--===============5499446768842282638==
MIME-Version: 1.0
Content-Type: text/html; charset="us-ascii"
Content-Transfer-Encoding: 7bit
<html>
<head>
<style><!--
.hmmessage P
{
margin:0px;
padding:0px
}
body.hmmessage
{
font-size: 10pt;
font-family:Tahoma
}
--></style></head>
<body class='hmmessage'><div dir='ltr'>
Hello<br><br><div><div id="SkyDrivePlaceholder"></div>&gt; Subject: Test<br>&gt; From: bob@xxx.mailgun.org<br>&gt; To: xxx@gmail.com; xxx@hotmail.com; xxx@yahoo.com; xxx@aol.com; xxx@comcast.net; xxx@nyc.rr.com<br>&gt; Date: Mon, 2 Apr 2012 17:44:22 +0400<br>&gt; <br>&gt; Hi<br></div> </div></body>
</html>
--===============5499446768842282638==--

View File

@@ -0,0 +1,19 @@
Subject: Re: Test
From: xxx <xxx@gmail.com>
Content-Type: text/plain;
charset=us-ascii
X-Mailer: iPhone Mail (9B176)
Message-Id: <06C90B12-13B9-4C5F-A9EF-4A809D94C078@gmail.com>
Date: Tue, 3 Apr 2012 16:23:59 +0400
To: bob <bob@example.com>
Content-Transfer-Encoding: quoted-printable
Mime-Version: 1.0 (1.0)
hello
Sent from my iPhone
On Apr 3, 2012, at 4:19 PM, bob <bob@example.com> wr=
ote:
> Hi

View File

@@ -0,0 +1,85 @@
Subject: Test
From: me@example.com
To: you@example.com
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary=0016364c440b2e8b63049acd5370
X-Mailgun-Tag: tag
X-Mailgun-Mailing-List-Id: 1q
--0016364c440b2e8b63049acd5370
Content-Type: text/plain; charset=ISO-8859-1
Hello
From: xxx@xxx.mailgun.org [mailto:xxx@xxx.mailgun.org]
Sent: March-09-12 4:22 PM
To: Dan Le
Subject: The manager has commented on your Loop
Hi dan.le@example.com<mailto:dan.le@example.com>,
The manager's comment:
"Hello Allan! Did you ask for some MIME? "
Loop details:
xxx at Dan
I'm not happy
""
Your Loop is here<http://dev.xxx.com/loop/view/4f50f20e160839c95a000bb3?_uid=4f3541a7ac63e655040008e3>.
We will be in touch again with any further updates,
xxx
If you did not sign up to receive emails from us you can use the link below to unsubscribe. We apologize for any inconvenience.
Unsubscribe<http://dev.xxx.com/user/unsubscribe/dan.le@example.com?verify=4a400554148256338956101abdf06406>
--0016364c440b2e8b63049acd5370
Content-Type: text/html; charset=ISO-8859-1
<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><META HTTP-EQUIV="Content-Type" CONTENT="text/html; charset=us-ascii"><meta name=Generator content="Microsoft Word 14 (filtered medium)"><style><!--
/* Font Definitions */
@font-face
{font-family:Calibri;
panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
{font-family:Tahoma;
panose-1:2 11 6 4 3 5 4 4 2 4;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
{margin:0cm;
margin-bottom:.0001pt;
font-size:12.0pt;
font-family:"Times New Roman","serif";}
a:link, span.MsoHyperlink
{mso-style-priority:99;
color:blue;
text-decoration:underline;}
a:visited, span.MsoHyperlinkFollowed
{mso-style-priority:99;
color:purple;
text-decoration:underline;}
span.EmailStyle17
{mso-style-type:personal-reply;
font-family:"Calibri","sans-serif";
color:#1F497D;}
.MsoChpDefault
{mso-style-type:export-only;
font-family:"Calibri","sans-serif";
mso-fareast-language:EN-US;}
@page WordSection1
{size:612.0pt 792.0pt;
margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
{page:WordSection1;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]--></head><body lang=EN-CA link=blue vlink=purple><div class=WordSection1><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>Allo! Follow up MIME!<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><o:p>&nbsp;</o:p></span></p><p class=MsoNormal><b><span lang=EN-US style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span lang=EN-US style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'> xxx@xxx.mailgun.org [mailto:xxx@xxx.mailgun.org] <br><b>Sent:</b> March-09-12 4:22 PM<br><b>To:</b> Dan Le<br><b>Subject:</b> The manager has commented on your Loop<o:p></o:p></span></p><p class=MsoNormal><o:p>&nbsp;</o:p></p><p class=MsoNormal>Hi <a href="mailto:dan.le@example.com">dan.le@example.com</a>,<br><br>The manager's comment:<br>&quot;Hello Allan! Did you ask for some MIME? &quot;<br><br>Loop details:<br><br>xxx at Dan<br>I'm not happy<br>&quot;&quot;<br><br>Your Loop is <a href="http://dev.xxx.com/loop/view/4f50f20e160839c95a000bb3?_uid=4f3541a7ac63e655040008e3">here</a>.<br><br>We will be in touch again with any further updates,<br><br>xxx<br><br>If you did not sign up to receive emails from us you can use the link below to unsubscribe. We apologize for any inconvenience.<br><br><a href="http://dev.xxx.com/user/unsubscribe/dan.le@example.com?verify=4a400554148256338956101abdf06406">Unsubscribe</a> <o:p></o:p></p></div></body></html>
--0016364c440b2e8b63049acd5370--

View File

@@ -0,0 +1,61 @@
Date: Tue, 3 Apr 2012 16:58:35 +0400
From: xxx <xxx@gmail.com>
To: bob <bob@example.com>
Message-ID: <5BB86EF4B6E24E4C9DA4BBEF59DA9809@gmail.com>
Subject: Re: Test
X-Mailer: sparrow 1.5 (build 1043)
MIME-Version: 1.0
Content-Type: multipart/alternative; boundary="4f7af3fb_749abb43_300"
--4f7af3fb_749abb43_300
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline
Hello
--
xxx
Sent with Sparrow (http://www.sparrowmailapp.com/?sig)
On Tuesday, April 3, 2012 at 4:55 PM, xxx wrote:
> Hello
>
> On Apr 3, 2012, at 4:19 PM, bob wrote:
>
> > Hi
--4f7af3fb_749abb43_300
Content-Type: text/html; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline
<div>
<span style=3D=22font-size: 12px;=22>Hello</span>
</div>
<div><div><br></div><div>--&nbsp;</div><div>xx=
x</div><div>Sent with <a href=3D=22http://www.sparrowmailapp.com/=3Fsig=22=
>Sparrow</a></div><div><br></div></div>
=20
<p style=3D=22color: =23A0A0A8;=22>On Tuesday, April 3, 2=
012 at 4:55 PM, xxx wrote:</p>
<blockquote type=3D=22cite=22 style=3D=22border-left-styl=
e:solid;border-width:1px;margin-left:0px;padding-left:10px;=22>
<span><div><div><div>Hello</div><div><br></div><div>O=
n Apr 3, 2012, at 4:19 PM, bob wrote:</div><div><br></div><blo=
ckquote type=3D=22cite=22><div>Hi</div></blockquote></div></div></span>
=20
=20
=20
=20
</blockquote>
=20
<div>
<br>
</div>
--4f7af3fb_749abb43_300--

View File

@@ -0,0 +1,5 @@
Hello
--
xxx
Sent with Sparrow (http://www.sparrowmailapp.com/?sig)

View File

@@ -0,0 +1,15 @@
MIME-Version: 1.0
Message-Id: <4F79B73C.9030506@xxx.mailgun.org>
Date: Mon, 02 Apr 2012 18:27:08 +0400
From: bob <bob@xxx.mailgun.org>
User-Agent: Mozilla/5.0 (X11; U; Linux i686; en-US;
rv:1.9.2.28) Gecko/20120313 Thunderbird/3.1.20
To: Megan One <xxx@gmail.com>
Subject: Re: Test
Sender: bob@xxx.mailgun.org
Content-Type: text/plain; charset="us-ascii"; format="flowed"
Content-Transfer-Encoding: 7bit
On 04/02/2012 06:26 PM, Megan One wrote:
> Hi
Hello

View File

@@ -0,0 +1,22 @@
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
X-Mailer: YahooMailWebService/0.8.117.340979
Message-Id: <1333374330.68772.YahooMailNeo@web114411.mail.gq1.yahoo.com>
Date: Mon, 2 Apr 2012 06:45:30 -0700 (PDT)
From: Alex Q <xxx@yahoo.com>
Subject: Re: Test
To: "bob@xxx.mailgun.org" <bob@xxx.mailgun.org>
In-Reply-To: <1333374262.7063.15.camel@mg5>
Content-Transfer-Encoding: 7bit
Hello
----- Original Message -----
From: "bob@xxx.mailgun.org" <bob@xxx.mailgun.org>
To: xxx@gmail.com; xxx@hotmail.com; xxx@yahoo.com; xxx@aol.com; xxx@comcast.net; xxx@nyc.rr.com
Cc:
Sent: Monday, April 2, 2012 5:44 PM
Subject: Test
Hi

View File

@@ -0,0 +1,298 @@
# -*- coding: utf-8 -*-
from . import *
from . fixtures import *
import regex as re
from flanker import mime
from talon import quotations
import html2text
RE_WHITESPACE = re.compile("\s")
RE_DOUBLE_WHITESPACE = re.compile("\s")
def test_quotation_splitter_inside_blockquote():
msg_body = """Reply
<blockquote>
<div>
On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:
</div>
<div>
Test
</div>
</blockquote>"""
eq_("<html><body><p>Reply</p></body></html>",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_quotation_splitter_outside_blockquote():
msg_body = """Reply
<div>
On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:
</div>
<blockquote>
<div>
Test
</div>
</blockquote>
"""
eq_("<html><body><p>Reply</p><div></div></body></html>",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_no_blockquote():
msg_body = """
<html>
<body>
Reply
<div>
On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:
</div>
<div>
Test
</div>
</body>
</html>
"""
reply = """
<html>
<body>
Reply
</body></html>"""
eq_(RE_WHITESPACE.sub('', reply),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_empty_body():
eq_('', quotations.extract_from_html(''))
def test_validate_output_html():
msg_body = """Reply
<div>
On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:
<blockquote>
<div>
Test
</div>
</blockquote>
</div>
<div/>
"""
out = quotations.extract_from_html(msg_body)
ok_('<html>' in out and '</html>' in out,
'Invalid HTML - <html>/</html> tag not present')
ok_('<div/>' not in out,
'Invalid HTML output - <div/> element is not valid')
def test_gmail_quote():
msg_body = """Reply
<div class="gmail_quote">
<div class="gmail_quote">
On 11-Apr-2011, at 6:54 PM, Bob &lt;bob@example.com&gt; wrote:
<div>
Test
</div>
</div>
</div>"""
eq_("<html><body><p>Reply</p></body></html>",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_unicode_in_reply():
msg_body = u"""Reply \xa0 \xa0 Text<br>
<div>
<br>
</div>
<blockquote class="gmail_quote">
Quote
</blockquote>""".encode("utf-8")
eq_("<html><body><p>Reply&#160;&#160;Text<br></p><div><br></div>"
"</body></html>",
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_blockquote_disclaimer():
msg_body = """
<html>
<body>
<div>
<div>
message
</div>
<blockquote>
Quote
</blockquote>
</div>
<div>
disclaimer
</div>
</body>
</html>
"""
stripped_html = """
<html>
<body>
<div>
<div>
message
</div>
</div>
<div>
disclaimer
</div>
</body>
</html>
"""
eq_(RE_WHITESPACE.sub('', stripped_html),
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_date_block():
msg_body = """
<div>
message<br>
<div>
<hr>
Date: Fri, 23 Mar 2012 12:35:31 -0600<br>
To: <a href="mailto:bob@example.com">bob@example.com</a><br>
From: <a href="mailto:rob@example.com">rob@example.com</a><br>
Subject: You Have New Mail From Mary!<br><br>
text
</div>
</div>
"""
eq_('<html><body><div>message<br></div></body></html>',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_from_block():
msg_body = """<div>
message<br>
<div>
<hr>
From: <a href="mailto:bob@example.com">bob@example.com</a><br>
Date: Fri, 23 Mar 2012 12:35:31 -0600<br>
To: <a href="mailto:rob@example.com">rob@example.com</a><br>
Subject: You Have New Mail From Mary!<br><br>
text
</div></div>
"""
eq_('<html><body><div>message<br></div></body></html>',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_shares_div_with_from_block():
msg_body = '''
<body>
<div>
Blah<br><br>
<hr>Date: Tue, 22 May 2012 18:29:16 -0600<br>
To: xx@hotmail.ca<br>
From: quickemail@ashleymadison.com<br>
Subject: You Have New Mail From x!<br><br>
</div>
</body>'''
eq_('<html><body><div>Blah<br><br></div></body></html>',
RE_WHITESPACE.sub('', quotations.extract_from_html(msg_body)))
def test_reply_quotations_share_block():
msg = mime.from_string(REPLY_QUOTATIONS_SHARE_BLOCK)
html_part = list(msg.walk())[1]
assert html_part.content_type == 'text/html'
stripped_html = quotations.extract_from_html(html_part.body)
ok_(stripped_html)
ok_('From' not in stripped_html)
def test_OLK_SRC_BODY_SECTION_stripped():
eq_('<html><body><div>Reply</div></body></html>',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(OLK_SRC_BODY_SECTION)))
def test_reply_separated_by_hr():
eq_('<html><body><div>Hi<div>there</div></div></body></html>',
RE_WHITESPACE.sub(
'', quotations.extract_from_html(REPLY_SEPARATED_BY_HR)))
RE_REPLY = re.compile(r"^Hi\. I am fine\.\s*\n\s*Thanks,\s*\n\s*Alex\s*$")
def extract_reply_and_check(filename):
f = open(filename)
msg_body = f.read().decode("utf-8")
reply = quotations.extract_from_html(msg_body)
h = html2text.HTML2Text()
h.body_width = 0
plain_reply = h.handle(reply)
#remove &nbsp; spaces
plain_reply = plain_reply.replace(u'\xa0', u' ')
if RE_REPLY.match(plain_reply):
eq_(1, 1)
else:
eq_("Hi. I am fine.\n\nThanks,\nAlex", plain_reply)
def test_gmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/gmail.html")
def test_mail_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/mail_ru.html")
def test_hotmail_reply():
extract_reply_and_check("tests/fixtures/html_replies/hotmail.html")
def test_ms_outlook_2003_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2003.html")
def test_ms_outlook_2007_reply():
extract_reply_and_check("tests/fixtures/html_replies/ms_outlook_2007.html")
def test_thunderbird_reply():
extract_reply_and_check("tests/fixtures/html_replies/thunderbird.html")
def test_windows_mail_reply():
extract_reply_and_check("tests/fixtures/html_replies/windows_mail.html")
def test_yandex_ru_reply():
extract_reply_and_check("tests/fixtures/html_replies/yandex_ru.html")

33
tests/quotations_test.py Normal file
View File

@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
from . import *
from . fixtures import *
from flanker import mime
from talon import quotations
@patch.object(quotations, 'extract_from_html')
@patch.object(quotations, 'extract_from_plain')
def test_extract_from_respects_content_type(extract_from_plain,
extract_from_html):
msg_body = "Hi there"
quotations.extract_from(msg_body, 'text/plain')
extract_from_plain.assert_called_with(msg_body)
quotations.extract_from(msg_body, 'text/html')
extract_from_html.assert_called_with(msg_body)
eq_(msg_body, quotations.extract_from(msg_body, 'text/blah'))
@patch.object(quotations, 'extract_from_plain', Mock(side_effect=Exception()))
def test_crash_inside_extract_from():
msg_body = "Hi there"
eq_(msg_body, quotations.extract_from(msg_body, 'text/plain'))
def test_empty_body():
eq_('', quotations.extract_from_plain(''))

View File

View File

@@ -0,0 +1,238 @@
# -*- coding: utf-8 -*-
from .. import *
import os
from flanker import mime
from talon.signature import bruteforce
def test_empty_body():
eq_(('', None), bruteforce.extract_signature(''))
def test_no_signature():
msg_body = 'Hey man!'
eq_((msg_body, None), bruteforce.extract_signature(msg_body))
def test_signature_only():
msg_body = '--\nRoman'
eq_((msg_body, None), bruteforce.extract_signature(msg_body))
def test_signature_separated_by_dashes():
msg_body = '''Hey man! How r u?
---
Roman'''
eq_(('Hey man! How r u?', '---\nRoman'),
bruteforce.extract_signature(msg_body))
msg_body = '''Hey!
-roman'''
eq_(('Hey!', '-roman'), bruteforce.extract_signature(msg_body))
msg_body = '''Hey!
- roman'''
eq_(('Hey!', '- roman'), bruteforce.extract_signature(msg_body))
msg_body = '''Wow. Awesome!
--
Bob Smith'''
eq_(('Wow. Awesome!', '--\nBob Smith'),
bruteforce.extract_signature(msg_body))
def test_signature_words():
msg_body = '''Hey!
Thanks!
Roman'''
eq_(('Hey!', 'Thanks!\nRoman'),
bruteforce.extract_signature(msg_body))
msg_body = '''Hey!
--
Best regards,
Roman'''
eq_(('Hey!', '--\nBest regards,\n\nRoman'),
bruteforce.extract_signature(msg_body))
msg_body = '''Hey!
--
--
Regards,
Roman'''
eq_(('Hey!', '--\n--\nRegards,\nRoman'),
bruteforce.extract_signature(msg_body))
def test_iphone_signature():
msg_body = '''Hey!
Sent from my iPhone!'''
eq_(('Hey!', 'Sent from my iPhone!'),
bruteforce.extract_signature(msg_body))
def test_mailbox_for_iphone_signature():
msg_body = """Blah
Sent from Mailbox for iPhone"""
eq_(("Blah", "Sent from Mailbox for iPhone"),
bruteforce.extract_signature(msg_body))
def test_line_starts_with_signature_word():
msg_body = '''Hey man!
Thanks for your attention.
--
Thanks!
Roman'''
eq_(('Hey man!\nThanks for your attention.', '--\nThanks!\nRoman'),
bruteforce.extract_signature(msg_body))
def test_line_starts_with_dashes():
msg_body = '''Hey man!
Look at this:
--> one
--> two
--
Roman'''
eq_(('Hey man!\nLook at this:\n\n--> one\n--> two', '--\nRoman'),
bruteforce.extract_signature(msg_body))
def test_blank_lines_inside_signature():
msg_body = '''Blah.
-Lev.
Sent from my HTC smartphone!'''
eq_(('Blah.', '-Lev.\n\nSent from my HTC smartphone!'),
bruteforce.extract_signature(msg_body))
msg_body = '''Blah
--
John Doe'''
eq_(('Blah', '--\n\nJohn Doe'), bruteforce.extract_signature(msg_body))
def test_blackberry_signature():
msg_body = """Heeyyoooo.
Sent wirelessly from my BlackBerry device on the Bell network.
Envoyé sans fil par mon terminal mobile BlackBerry sur le réseau de Bell."""
eq_(('Heeyyoooo.', msg_body[len('Heeyyoooo.\n'):]),
bruteforce.extract_signature(msg_body))
msg_body = u"""Blah
Enviado desde mi oficina móvil BlackBerry® de Telcel"""
eq_(('Blah', u'Enviado desde mi oficina móvil BlackBerry® de Telcel'),
bruteforce.extract_signature(msg_body))
@patch.object(bruteforce, 'get_delimiter', Mock(side_effect=Exception()))
def test_crash_in_extract_signature():
msg_body = '''Hey!
-roman'''
eq_((msg_body, None), bruteforce.extract_signature(msg_body))
def test_signature_cant_start_from_first_line():
msg_body = """Thanks,
Blah
regards
John Doe"""
eq_(('Thanks,\n\nBlah', 'regards\n\nJohn Doe'),
bruteforce.extract_signature(msg_body))
@patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 2)
def test_signature_max_lines_ignores_empty_lines():
msg_body = """Thanks,
Blah
regards
John Doe"""
eq_(('Thanks,\nBlah', 'regards\n\n\nJohn Doe'),
bruteforce.extract_signature(msg_body))
def test_get_signature_candidate():
# if there aren't at least 2 non-empty lines there should be no signature
for lines in [], [''], ['', ''], ['abc']:
eq_([], bruteforce.get_signature_candidate(lines))
# first line never included
lines = ['text', 'signature']
eq_(['signature'], bruteforce.get_signature_candidate(lines))
# test when message is shorter then SIGNATURE_MAX_LINES
with patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 3):
lines = ['text', '', '', 'signature']
eq_(['signature'], bruteforce.get_signature_candidate(lines))
# test when message is longer then the SIGNATURE_MAX_LINES
with patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 2):
lines = ['text1', 'text2', 'signature1', '', 'signature2']
eq_(['signature1', '', 'signature2'],
bruteforce.get_signature_candidate(lines))
# test long lines not encluded
with patch.object(bruteforce, 'TOO_LONG_SIGNATURE_LINE', 3):
lines = ['BR,', 'long', 'Bob']
eq_(['Bob'], bruteforce.get_signature_candidate(lines))
# test list (with dashes as bullet points) not included
lines = ['List:,', '- item 1', '- item 2', '--', 'Bob']
eq_(['--', 'Bob'], bruteforce.get_signature_candidate(lines))
def test_mark_candidate_indexes():
with patch.object(bruteforce, 'TOO_LONG_SIGNATURE_LINE', 3):
# spaces are not considered when checking line length
eq_('clc',
bruteforce._mark_candidate_indexes(
['BR, ', 'long', 'Bob'],
[0, 1, 2]))
# only candidate lines are marked
# if line has only dashes it's a candidate line
eq_('ccdc',
bruteforce._mark_candidate_indexes(
['-', 'long', '-', '- i', 'Bob'],
[0, 2, 3, 4]))
def test_process_marked_candidate_indexes():
eq_([2, 13, 15],
bruteforce._process_marked_candidate_indexes(
[2, 13, 15], 'dcc'))
eq_([15],
bruteforce._process_marked_candidate_indexes(
[2, 13, 15], 'ddc'))
eq_([13, 15],
bruteforce._process_marked_candidate_indexes(
[13, 15], 'cc'))
eq_([15],
bruteforce._process_marked_candidate_indexes(
[15], 'lc'))
eq_([15],
bruteforce._process_marked_candidate_indexes(
[13, 15], 'ld'))

View File

@@ -0,0 +1,148 @@
# -*- coding: utf-8 -*-
from .. import *
import os
from PyML import SparseDataSet
from talon.signature.learning import dataset
from talon import signature
from talon.signature import extraction as e
from talon.signature import bruteforce
def test_message_shorter_SIGNATURE_MAX_LINES():
sender = "bob@foo.bar"
body = """Call me ASAP, please.This is about the last changes you deployed.
Thanks in advance,
Bob"""
text, extracted_signature = signature.extract(body, sender)
eq_('\n'.join(body.splitlines()[:2]), text)
eq_('\n'.join(body.splitlines()[-2:]), extracted_signature)
def test_messages_longer_SIGNATURE_MAX_LINES():
for filename in os.listdir(STRIPPED):
filename = os.path.join(STRIPPED, filename)
if not filename.endswith('_body'):
continue
sender, body = dataset.parse_msg_sender(filename)
text, extracted_signature = signature.extract(body, sender)
extracted_signature = extracted_signature or ''
with open(filename[:-len('body')] + 'signature') as ms:
msg_signature = ms.read()
eq_(msg_signature.strip(), extracted_signature.strip())
stripped_msg = body.strip()[:len(body.strip())-len(msg_signature)]
eq_(stripped_msg.strip(), text.strip())
def test_text_line_in_signature():
# test signature should consist of one solid part
sender = "bob@foo.bar"
body = """Call me ASAP, please.This is about the last changes you deployed.
Thanks in advance,
some text which doesn't seem to be a signature at all
Bob"""
text, extracted_signature = signature.extract(body, sender)
eq_('\n'.join(body.splitlines()[:2]), text)
eq_('\n'.join(body.splitlines()[-3:]), extracted_signature)
def test_long_line_in_signature():
sender = "bob@foo.bar"
body = """Call me ASAP, please.This is about the last changes you deployed.
Thanks in advance,
some long text here which doesn't seem to be a signature at all
Bob"""
text, extracted_signature = signature.extract(body, sender)
eq_('\n'.join(body.splitlines()[:-1]), text)
eq_('Bob', extracted_signature)
body = """Thanks David,
some *long* text here which doesn't seem to be a signature at all
"""
((body, None), signature.extract(body, "david@example.com"))
def test_basic():
msg_body = 'Blah\r\n--\r\n\r\nSergey Obukhov'
eq_(('Blah', '--\r\n\r\nSergey Obukhov'),
signature.extract(msg_body, 'Sergey'))
def test_over_2_text_lines_after_signature():
body = """Blah
Bob,
If there are more than
2 non signature lines in the end
It's not signature
"""
text, extracted_signature = signature.extract(body, "Bob")
eq_(extracted_signature, None)
def test_no_signature():
sender, body = "bob@foo.bar", "Hello"
eq_((body, None), signature.extract(body, sender))
def test_handles_unicode():
sender, body = dataset.parse_msg_sender(UNICODE_MSG)
text, extracted_signature = signature.extract(body, sender)
@patch.object(signature.extraction, 'has_signature')
def test_signature_extract_crash(has_signature):
has_signature.side_effect = Exception('Bam!')
msg_body = u'Blah\r\n--\r\n\r\nСергей'
eq_((msg_body, None), signature.extract(msg_body, 'Сергей'))
def test_mark_lines():
with patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 2):
# we analyse the 2nd line as well though it's the 6th line
# (starting from the bottom) because we don't count empty line
eq_('ttset',
e._mark_lines(['Bob Smith',
'Bob Smith',
'Bob Smith',
'',
'some text'], 'Bob Smith'))
with patch.object(bruteforce, 'SIGNATURE_MAX_LINES', 3):
# we don't analyse the 1st line because
# signature cant start from the 1st line
eq_('tset',
e._mark_lines(['Bob Smith',
'Bob Smith',
'',
'some text'], 'Bob Smith'))
def test_process_marked_lines():
# no signature found
eq_((range(5), None), e._process_marked_lines(range(5), 'telt'))
# signature in the middle of the text
eq_((range(9), None), e._process_marked_lines(range(9), 'tesestelt'))
# long line splits signature
eq_((range(7), [7, 8]),
e._process_marked_lines(range(9), 'tsslsless'))
eq_((range(20), [20]),
e._process_marked_lines(range(21), 'ttttttstttesllelelets'))
# some signature lines could be identified as text
eq_(([0], range(1, 9)), e._process_marked_lines(range(9), 'tsetetest'))
eq_(([], range(5)),
e._process_marked_lines(range(5), "ststt"))

View File

View File

@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
from ... import *
import os
from PyML import SparseDataSet
from talon.utils import to_unicode
from talon.signature.learning import dataset as d
from talon.signature.learning.featurespace import features
def test_is_sender_filename():
assert_false(d.is_sender_filename("foo/bar"))
assert_false(d.is_sender_filename("foo/bar_body"))
ok_(d.is_sender_filename("foo/bar_sender"))
def test_build_sender_filename():
eq_("foo/bar_sender", d.build_sender_filename("foo/bar_body"))
def test_parse_msg_sender():
sender, msg = d.parse_msg_sender(EML_MSG_FILENAME)
# if the message in eml format
with open(EML_MSG_FILENAME) as f:
eq_(sender,
" Alex Q <xxx@yahoo.com>")
eq_(msg, f.read())
# if the message sender is stored in a separate file
sender, msg = d.parse_msg_sender(MSG_FILENAME_WITH_BODY_SUFFIX)
with open(MSG_FILENAME_WITH_BODY_SUFFIX) as f:
eq_(sender, u"john@example.com")
eq_(msg, f.read())
def test_build_extraction_dataset():
if os.path.exists(os.path.join(TMP_DIR, 'extraction.data')):
os.remove(os.path.join(TMP_DIR, 'extraction.data'))
d.build_extraction_dataset(os.path.join(EMAILS_DIR, 'P'),
os.path.join(TMP_DIR,
'extraction.data'), 1)
test_data = SparseDataSet(os.path.join(TMP_DIR, 'extraction.data'),
labelsColumn=-1)
# the result is a loadable signature extraction dataset
# 32 comes from 3 emails in emails/P folder, 11 lines checked to be
# a signature, one email has only 10 lines
eq_(test_data.size(), 32)
eq_(len(features('')), test_data.numFeatures)

View File

@@ -0,0 +1,44 @@
# -*- coding: utf-8 -*-
from ... import *
from talon.signature.learning import featurespace as fs
def test_apply_features():
s = '''John Doe
VP Research and Development, Xxxx Xxxx Xxxxx
555-226-2345
john@example.com'''
sender = 'John <john@example.com>'
features = fs.features(sender)
result = fs.apply_features(s, features)
# note that we don't consider the first line because signatures don't
# usually take all the text, empty lines are not considered
eq_(result, [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
with patch.object(fs, 'SIGNATURE_MAX_LINES', 4):
features = fs.features(sender)
new_result = fs.apply_features(s, features)
# result remains the same because we don't consider empty lines
eq_(result, new_result)
def test_build_pattern():
s = '''John Doe
VP Research and Development, Xxxx Xxxx Xxxxx
555-226-2345
john@example.com'''
sender = 'John <john@example.com>'
features = fs.features(sender)
result = fs.build_pattern(s, features)
eq_(result, [2, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1])

View File

@@ -0,0 +1,236 @@
# -*- coding: utf-8 -*-
from ... import *
import regex as re
from talon.signature.learning import helpers as h
from talon.signature.learning.helpers import *
# First testing regex constants.
VALID = '''
15615552323
1-561-555-1212
5613333
18008793262
800-879-3262
0-800.879.3262
04 3452488
04 -3452488
04 - 3452499
(610) 310-5555 x5555
533-1123
(021)1234567
(021)123456
(000)000000
+7 920 34 57 23
+7(920) 34 57 23
+7(920)345723
+7920345723
8920345723
21143
2-11-43
2 - 11 - 43
'''
VALID_PHONE_NUMBERS = [e.strip() for e in VALID.splitlines() if e.strip()]
def test_match_phone_numbers():
for phone in VALID_PHONE_NUMBERS:
ok_(RE_RELAX_PHONE.match(phone), "{} should be matched".format(phone))
def test_match_names():
names = ['John R. Doe']
for name in names:
ok_(RE_NAME.match(name), "{} should be matched".format(name))
def test_sender_with_name():
ok_lines = ['Sergey Obukhov <serobnic@example.com>',
'\tSergey <serobnic@example.com>',
('"Doe, John (TX)"'
'<DowJ@example.com>@EXAMPLE'
'<IMCEANOTES-+22Doe+2C+20John+20'
'+28TX+29+22+20+3CDoeJ+40example+2Ecom+3E'
'+40EXAMPLE@EXAMPLE.com>'),
('Company Sleuth <csleuth@email.xxx.com>'
'@EXAMPLE <XXX-Company+20Sleuth+20+3Ccsleuth'
'+40email+2Exxx+2Ecom+3E+40EXAMPLE@EXAMPLE.com>'),
('Doe III, John '
'</O=EXAMPLE/OU=NA/CN=RECIPIENTS/CN=jDOE5>')]
for line in ok_lines:
ok_(RE_SENDER_WITH_NAME.match(line),
'{} should be matched'.format(line))
nok_lines = ['', '<serobnic@xxx.ru>', 'Sergey serobnic@xxx.ru']
for line in nok_lines:
assert_false(RE_SENDER_WITH_NAME.match(line),
'{} should not be matched'.format(line))
# Now test helpers functions
def test_binary_regex_search():
eq_(1, h.binary_regex_search(re.compile("12"))("12"))
eq_(0, h.binary_regex_search(re.compile("12"))("34"))
def binary_regex_match(prog):
eq_(1, h.binary_regex_match(re.compile("12"))("12 3"))
eq_(0, h.binary_regex_match(re.compile("12"))("3 12"))
def test_flatten_list():
eq_([1, 2, 3, 4, 5], h.flatten_list([[1, 2], [3, 4, 5]]))
@patch.object(h.re, 'compile')
def test_contains_sender_names(re_compile):
with patch.object(h, 'extract_names',
Mock(return_value=['bob', 'smith'])) as extract_names:
has_sender_names = h.contains_sender_names("bob.smith@example.com")
extract_names.assert_called_with("bob.smith@example.com")
for name in ["bob", "Bob", "smith", "Smith"]:
ok_(has_sender_names(name))
extract_names.return_value = ''
has_sender_names = h.contains_sender_names("bob.smith@example.com")
# if no names could be extracted fallback to the email address
ok_(has_sender_names('bob.smith@example.com'))
# don't crash if there are no sender
extract_names.return_value = ''
has_sender_names = h.contains_sender_names("")
assert_false(has_sender_names(''))
def test_extract_names():
senders_names = {
# from example dataset
('Jay Rickerts <eCenter@example.com>@EXAMPLE <XXX-Jay+20Rickerts'
'+20+3CeCenter+40example+2Ecom+3E+40EXAMPLE@EXAMPLE.com>'):
['Jay', 'Rickerts'],
# if `,` is used in sender's name
'Williams III, Bill </O=EXAMPLE/OU=NA/CN=RECIPIENTS/CN=BWILLIA5>':
['Williams', 'III', 'Bill'],
# if somehow `'` or `"` are used in sender's name
'Laura" "Goldberg <laura.goldberg@example.com>':
['Laura', 'Goldberg'],
# extract from senders email address
'<sergey@xxx.ru>': ['sergey'],
# extract from sender's email address
# if dots are used in the email address
'<sergey.obukhov@xxx.ru>': ['sergey', 'obukhov'],
# extract from sender's email address
# if dashes are used in the email address
'<sergey-obukhov@xxx.ru>': ['sergey', 'obukhov'],
# extract from sender's email address
# if `_` are used in the email address
'<sergey_obukhov@xxx.ru>': ['sergey', 'obukhov'],
# old style From field, found in jangada dataset
'wcl@example.com (Wayne Long)': ['Wayne', 'Long'],
# if only sender's name provided
'Wayne Long': ['Wayne', 'Long'],
# if middle name is shortened with dot
'Sergey N. Obukhov <serobnic@xxx.ru>': ['Sergey', 'Obukhov'],
# not only spaces could be used as name splitters
' Sergey Obukhov <serobnic@xxx.ru>': ['Sergey', 'Obukhov'],
# finally normal example
'Sergey <serobnic@xxx.ru>': ['Sergey'],
# if middle name is shortened with `,`
'Sergey N, Obukhov': ['Sergey', 'Obukhov'],
# if mailto used with email address and sender's name is specified
'Sergey N, Obukhov [mailto: serobnic@xxx.ru]': ['Sergey', 'Obukhov'],
# when only email address is given
'serobnic@xxx.ru': ['serobnic'],
# when nothing is given
'': [],
# if phone is specified in the `From:` header
'wcl@example.com (Wayne Long +7 920 -256 - 35-09)': ['Wayne', 'Long'],
# from crash reports `nothing to repeat`
'* * * * <the_pod1@example.com>': ['the', 'pod'],
'"**Bobby B**" <copymycashsystem@example.com>':
['Bobby', 'copymycashsystem'],
# from crash reports `bad escape`
'"M Ali B Azlan \(GHSE/PETH\)" <aliazlan@example.com>':
['Ali', 'Azlan'],
('"Ridthauddin B A Rahim \(DD/PCSB\)"'
' <ridthauddin_arahim@example.com>'): ['Ridthauddin', 'Rahim'],
('"Boland, Patrick \(Global Xxx Group, Ireland \)"'
' <Patrick.Boland@example.com>'): ['Boland', 'Patrick'],
'"Mates Rate \(Wine\)" <amen@example.com.com>':
['Mates', 'Rate', 'Wine'],
('"Morgan, Paul \(Business Xxx RI, Xxx Xxx Group\)"'
' <paul.morgan@example.com>'): ['Morgan', 'Paul'],
'"David DECOSTER \(Domicile\)" <decosterdavid@xxx.be>':
['David', 'DECOSTER', 'Domicile']
}
for sender, expected_names in senders_names.items():
extracted_names = h.extract_names(sender)
# check that extracted names could be compiled
try:
re.compile("|".join(extracted_names))
except Exception, e:
ok_(False, ("Failed to compile extracted names {}"
"\n\nReason: {}").format(extracted_names, e))
if expected_names:
for name in expected_names:
assert_in(name, extracted_names)
else:
eq_(expected_names, extracted_names)
# words like `ru`, `gmail`, `com`, `org`, etc. are not considered
# sender's names
for word in h.BAD_SENDER_NAMES:
eq_(h.extract_names(word), [])
# duplicates are not allowed
eq_(h.extract_names("sergey <sergey@example.com"), ["sergey"])
def test_categories_percent():
eq_(0.0, h.categories_percent("qqq ggg hhh", ["Po"]))
eq_(50.0, h.categories_percent("q,w.", ["Po"]))
eq_(0.0, h.categories_percent("qqq ggg hhh", ["Nd"]))
eq_(50.0, h.categories_percent("q5", ["Nd"]))
eq_(50.0, h.categories_percent("s.s,5s", ["Po", "Nd"]))
eq_(0.0, h.categories_percent("", ["Po", "Nd"]))
@patch.object(h, 'categories_percent')
def test_punctuation_percent(categories_percent):
h.punctuation_percent("qqq")
categories_percent.assert_called_with("qqq", ['Po'])
def test_capitalized_words_percent():
eq_(0.0, h.capitalized_words_percent(''))
eq_(100.0, h.capitalized_words_percent('Example Corp'))
eq_(50.0, h.capitalized_words_percent('Qqq qqq QQQ 123 sss'))
eq_(100.0, h.capitalized_words_percent('Cell 713-444-7368'))
eq_(100.0, h.capitalized_words_percent('8th Floor'))
eq_(0.0, h.capitalized_words_percent('(212) 230-9276'))
def test_has_signature():
ok_(h.has_signature('sender', 'sender@example.com'))
ok_(h.has_signature('http://www.example.com\n555 555 5555',
'sender@example.com'))
ok_(h.has_signature('http://www.example.com\naddress@example.com',
'sender@example.com'))
assert_false(h.has_signature('http://www.example.com/555-555-5555',
'sender@example.com'))
long_line = ''.join(['q' for e in xrange(28)])
assert_false(h.has_signature(long_line + ' sender', 'sender@example.com'))
# wont crash on an empty string
assert_false(h.has_signature('', ''))
# dont consider empty strings when analysing signature
with patch.object(h, 'SIGNATURE_MAX_LINES', 1):
ok_('sender\n\n', 'sender@example.com')

View File

@@ -0,0 +1,534 @@
# -*- coding: utf-8 -*-
from . import *
from . fixtures import *
import os
from flanker import mime
from talon import quotations
@patch.object(quotations, 'MAX_LINES_COUNT', 1)
def test_too_many_lines():
msg_body = """Test reply
-----Original Message-----
Test"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_wrote():
msg_body = """Test reply
On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> wrote:
>
> Test
>
> Roman"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_wrote_date_with_slashes():
msg_body = """Test reply
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
> Test.
>
> Roman"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_wrote_allows_space_in_front():
msg_body = """Thanks Thanmai
On Mar 8, 2012 9:59 AM, "Example.com" <
r+7f1b094ceb90e18cca93d53d3703feae@example.com> wrote:
>**
> Blah-blah-blah"""
eq_("Thanks Thanmai", quotations.extract_from_plain(msg_body))
def test_pattern_on_date_somebody_sent():
msg_body = """Test reply
On 11-Apr-2011, at 6:54 PM, Roman Tkachenko <romant@example.com> sent:
>
> Test
>
> Roman"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_line_starts_with_on():
msg_body = """Blah-blah-blah
On blah-blah-blah"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_reply_and_quotation_splitter_share_line():
# reply lines and 'On <date> <person> wrote:' splitter pattern
# are on the same line
msg_body = """reply On Wed, Apr 4, 2012 at 3:59 PM, bob@example.com wrote:
> Hi"""
eq_('reply', quotations.extract_from_plain(msg_body))
# test pattern '--- On <date> <person> wrote:' with reply text on
# the same line
msg_body = """reply--- On Wed, Apr 4, 2012 at 3:59 PM, me@domain.com wrote:
> Hi"""
eq_('reply', quotations.extract_from_plain(msg_body))
# test pattern '--- On <date> <person> wrote:' with reply text containing
# '-' symbol
msg_body = """reply
bla-bla - bla--- On Wed, Apr 4, 2012 at 3:59 PM, me@domain.com wrote:
> Hi"""
reply = """reply
bla-bla - bla"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_pattern_original_message():
msg_body = """Test reply
-----Original Message-----
Test"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
msg_body = """Test reply
-----Original Message-----
Test"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_reply_after_quotations():
msg_body = """On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
> Test
Test reply"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_reply_wraps_quotations():
msg_body = """Test reply
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
> Test
Regards, Roman"""
reply = """Test reply
Regards, Roman"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_reply_wraps_nested_quotations():
msg_body = """Test reply
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>Test test
>On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
>
>>
>> Test.
>>
>> Roman
Regards, Roman"""
reply = """Test reply
Regards, Roman"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_quotation_separator_takes_2_lines():
msg_body = """Test reply
On Fri, May 6, 2011 at 6:03 PM, Roman Tkachenko from Hacker News
<roman@definebox.com> wrote:
> Test.
>
> Roman
Regards, Roman"""
reply = """Test reply
Regards, Roman"""
eq_(reply, quotations.extract_from_plain(msg_body))
def test_quotation_separator_takes_3_lines():
msg_body = """Test reply
On Nov 30, 2011, at 12:47 PM, Somebody <
416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4@somebody.domain.com>
wrote:
Test message
"""
eq_("Test reply", quotations.extract_from_plain(msg_body))
def test_short_quotation():
msg_body = """Hi
On 04/19/2011 07:10 AM, Roman Tkachenko wrote:
> Hello"""
eq_("Hi", quotations.extract_from_plain(msg_body))
def test_pattern_date_email_with_unicode():
msg_body = """Replying ok
2011/4/7 Nathan \xd0\xb8ova <support@example.com>
> Cool beans, scro"""
eq_("Replying ok", quotations.extract_from_plain(msg_body))
def test_pattern_from_block():
msg_body = """Allo! Follow up MIME!
From: somebody@example.com
Sent: March-19-11 5:42 PM
To: Somebody
Subject: The manager has commented on your Loop
Blah-blah-blah
"""
eq_("Allo! Follow up MIME!", quotations.extract_from_plain(msg_body))
def test_quotation_marker_false_positive():
msg_body = """Visit us now for assistance...
>>> >>> http://www.domain.com <<<
Visit our site by clicking the link above"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_link_closed_with_quotation_marker_on_new_line():
msg_body = '''8.45am-1pm
From: somebody@example.com
<http://email.example.com/c/dHJhY2tpbmdfY29kZT1mMDdjYzBmNzM1ZjYzMGIxNT
> <bob@example.com <mailto:bob@example.com> >
Requester: '''
eq_('8.45am-1pm', quotations.extract_from_plain(msg_body))
def test_link_breaks_quotation_markers_sequence():
# link starts and ends on the same line
msg_body = """Blah
On Thursday, October 25, 2012 at 3:03 PM, life is short. on Bob wrote:
>
> Post a response by replying to this email
>
(http://example.com/c/YzOTYzMmE) >
> life is short. (http://example.com/c/YzMmE)
>
"""
eq_("Blah", quotations.extract_from_plain(msg_body))
# link starts after some text on one line and ends on another
msg_body = """Blah
On Monday, 24 September, 2012 at 3:46 PM, bob wrote:
> [Ticket #50] test from bob
>
> View ticket (http://example.com/action
_nonce=3dd518)
>
"""
eq_("Blah", quotations.extract_from_plain(msg_body))
def test_from_block_starts_with_date():
msg_body = """Blah
Date: Wed, 16 May 2012 00:15:02 -0600
To: klizhentas@example.com"""
eq_('Blah', quotations.extract_from_plain(msg_body))
def test_bold_from_block():
msg_body = """Hi
*From:* bob@example.com [mailto:
bob@example.com]
*Sent:* Wednesday, June 27, 2012 3:05 PM
*To:* travis@example.com
*Subject:* Hello
"""
eq_("Hi", quotations.extract_from_plain(msg_body))
def test_weird_date_format_in_date_block():
msg_body = """Blah
Date: Fri=2C 28 Sep 2012 10:55:48 +0000
From: tickets@example.com
To: bob@example.com
Subject: [Ticket #8] Test
"""
eq_('Blah', quotations.extract_from_plain(msg_body))
def test_dont_parse_quotations_for_forwarded_messages():
msg_body = """FYI
---------- Forwarded message ----------
From: bob@example.com
Date: Tue, Sep 4, 2012 at 1:35 PM
Subject: Two
line subject
To: rob@example.com
Text"""
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_forwarded_message_in_quotations():
msg_body = """Blah
-----Original Message-----
FYI
---------- Forwarded message ----------
From: bob@example.com
Date: Tue, Sep 4, 2012 at 1:35 PM
Subject: Two
line subject
To: rob@example.com
"""
eq_("Blah", quotations.extract_from_plain(msg_body))
def test_mark_message_lines():
# e - empty line
# s - splitter line
# m - line starting with quotation marker '>'
# t - the rest
lines = ['Hello', '',
# next line should be marked as splitter
'_____________',
'From: foo@bar.com',
'',
'> Hi',
'',
'Signature']
eq_('tessemet', quotations.mark_message_lines(lines))
lines = ['Just testing the email reply',
'',
'Robert J Samson',
'Sent from my iPhone',
'',
# all 3 next lines should be marked as splitters
'On Nov 30, 2011, at 12:47 PM, Skapture <',
('416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4'
'@skapture-staging.mailgun.org>'),
'wrote:',
'',
'Tarmo Lehtpuu has posted the following message on']
eq_('tettessset', quotations.mark_message_lines(lines))
def test_process_marked_lines():
# quotations and last message lines are mixed
# consider all to be a last message
markers = 'tsemmtetm'
lines = [str(i) for i in range(len(markers))]
lines = [str(i) for i in range(len(markers))]
eq_(lines, quotations.process_marked_lines(lines, markers))
# no splitter => no markers
markers = 'tmm'
lines = ['1', '2', '3']
eq_(['1', '2', '3'], quotations.process_marked_lines(lines, markers))
# text after splitter without markers is quotation
markers = 'tst'
lines = ['1', '2', '3']
eq_(['1'], quotations.process_marked_lines(lines, markers))
# message + quotation + signature
markers = 'tsmt'
lines = ['1', '2', '3', '4']
eq_(['1', '4'], quotations.process_marked_lines(lines, markers))
# message + <quotation without markers> + nested quotation
markers = 'tstsmt'
lines = ['1', '2', '3', '4', '5', '6']
eq_(['1'], quotations.process_marked_lines(lines, markers))
# test links wrapped with paranthesis
# link starts on the marker line
markers = 'tsmttem'
lines = ['text',
'splitter',
'>View (http://example.com',
'/abc',
')',
'',
'> quote']
eq_(lines[:1], quotations.process_marked_lines(lines, markers))
# link starts on the new line
markers = 'tmmmtm'
lines = ['text',
'>'
'>',
'>',
'(http://example.com) > ',
'> life is short. (http://example.com) '
]
eq_(lines[:1], quotations.process_marked_lines(lines, markers))
# check all "inline" replies
markers = 'tsmtmtm'
lines = ['text',
'splitter',
'>',
'(http://example.com)',
'>',
'inline reply',
'>']
eq_(lines, quotations.process_marked_lines(lines, markers))
# inline reply with link not wrapped in paranthesis
markers = 'tsmtm'
lines = ['text',
'splitter',
'>',
'inline reply with link http://example.com',
'>']
eq_(lines, quotations.process_marked_lines(lines, markers))
# inline reply with link wrapped in paranthesis
markers = 'tsmtm'
lines = ['text',
'splitter',
'>',
'inline reply (http://example.com)',
'>']
eq_(lines, quotations.process_marked_lines(lines, markers))
def test_preprocess():
msg = ('Hello\n'
'See <http://google.com\n'
'> for more\n'
'information On Nov 30, 2011, at 12:47 PM, Somebody <\n'
'416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n'
'@example.com>'
'wrote:\n'
'\n'
'> Hi')
# test the link is rewritten
# 'On <date> <person> wrote:' pattern starts from a new line
prepared_msg = ('Hello\n'
'See @@http://google.com\n'
'@@ for more\n'
'information\n'
' On Nov 30, 2011, at 12:47 PM, Somebody <\n'
'416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n'
'@example.com>'
'wrote:\n'
'\n'
'> Hi')
eq_(prepared_msg, quotations.preprocess(msg, '\n'))
msg = """
> <http://teemcl.mailgun.org/u/**aD1mZmZiNGU5ODQwMDNkZWZlMTExNm**
> MxNjQ4Y2RmOTNlMCZyPXNlcmdleS5v**YnlraG92JTQwbWFpbGd1bmhxLmNvbS**
> Z0PSUyQSZkPWUwY2U<http://example.org/u/aD1mZmZiNGU5ODQwMDNkZWZlMTExNmMxNjQ4Y>
"""
eq_(msg, quotations.preprocess(msg, '\n'))
# 'On <date> <person> wrote' shouldn't be spread across too many lines
msg = ('Hello\n'
'How are you? On Nov 30, 2011, at 12:47 PM,\n '
'Example <\n'
'416ffd3258d4d2fa4c85cfa4c44e1721d66e3e8f4\n'
'@example.org>'
'wrote:\n'
'\n'
'> Hi')
eq_(msg, quotations.preprocess(msg, '\n'))
msg = ('Hello On Nov 30, smb wrote:\n'
'Hi\n'
'On Nov 29, smb wrote:\n'
'hi')
prepared_msg = ('Hello\n'
' On Nov 30, smb wrote:\n'
'Hi\n'
'On Nov 29, smb wrote:\n'
'hi')
eq_(prepared_msg, quotations.preprocess(msg, '\n'))
def test_preprocess_postprocess_2_links():
msg_body = "<http://link1> <http://link2>"
eq_(msg_body, quotations.extract_from_plain(msg_body))
def test_standard_replies():
for filename in os.listdir(STANDARD_REPLIES):
filename = os.path.join(STANDARD_REPLIES, filename)
if os.path.isdir(filename):
continue
with open(filename) as f:
msg = f.read()
m = mime.from_string(msg)
for part in m.walk():
if part.content_type == 'text/plain':
text = part.body
stripped_text = quotations.extract_from_plain(text)
reply_text_fn = filename[:-4] + '_reply_text'
if os.path.isfile(reply_text_fn):
with open(reply_text_fn) as f:
reply_text = f.read()
else:
reply_text = 'Hello'
eq_(reply_text, stripped_text,
"'%(reply)s' != %(stripped)s for %(fn)s" %
{'reply': reply_text, 'stripped': stripped_text,
'fn': filename})

9
tests/utils_test.py Normal file
View File

@@ -0,0 +1,9 @@
from . import *
from talon import utils
def test_get_delimiter():
eq_('\r\n', utils.get_delimiter('abc\r\n123'))
eq_('\n', utils.get_delimiter('abc\n123'))
eq_('\n', utils.get_delimiter('abc'))