Merge pull request #95 from mailgun/sergey/forge
open-sourcing email dataset
This commit is contained in:
11
README.rst
11
README.rst
@@ -116,8 +116,19 @@ or
|
|||||||
from talon.signature.learning.classifier import train, init
|
from talon.signature.learning.classifier import train, init
|
||||||
train(init(), EXTRACTOR_DATA, EXTRACTOR_FILENAME)
|
train(init(), EXTRACTOR_DATA, EXTRACTOR_FILENAME)
|
||||||
|
|
||||||
|
Open-source Dataset
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
Recently we started a `forge`_ project to create an open-source, annotated dataset of raw emails. In the project we
|
||||||
|
used a subset of `ENRON`_ data, cleansed of private, health and financial information by `EDRM`_. At the moment over 190
|
||||||
|
emails are annotated. Any contribution and collaboration on the project are welcome. Once the dataset is ready we plan to
|
||||||
|
start using it for talon.
|
||||||
|
|
||||||
.. _scikit-learn: http://scikit-learn.org
|
.. _scikit-learn: http://scikit-learn.org
|
||||||
.. _ENRON: https://www.cs.cmu.edu/~enron/
|
.. _ENRON: https://www.cs.cmu.edu/~enron/
|
||||||
|
.. _EDRM: http://www.edrm.net/resources/data-sets/edrm-enron-email-data-set
|
||||||
|
.. _forge: https://github.com/mailgun/forge
|
||||||
|
|
||||||
|
|
||||||
Research
|
Research
|
||||||
--------
|
--------
|
||||||
|
|||||||
2
setup.py
2
setup.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
|
|
||||||
setup(name='talon',
|
setup(name='talon',
|
||||||
version='1.2.9',
|
version='1.2.10',
|
||||||
description=("Mailgun library "
|
description=("Mailgun library "
|
||||||
"to extract message quotations and signatures."),
|
"to extract message quotations and signatures."),
|
||||||
long_description=open("README.rst").read(),
|
long_description=open("README.rst").read(),
|
||||||
|
|||||||
Reference in New Issue
Block a user