Compare commits
	
		
			4 Commits
		
	
	
		
			thrawn/dev
			...
			v1.4.9
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 1a5548f171 | ||
|  | 53c49b9121 | ||
|  | bd50872043 | ||
|  | d37c4fd551 | 
							
								
								
									
										20
									
								
								.build/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								.build/Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | |||||||
|  | FROM python:3.9-slim-buster AS deps | ||||||
|  |  | ||||||
|  | RUN apt-get update && \ | ||||||
|  |     apt-get install -y build-essential git curl python3-dev libatlas3-base libatlas-base-dev liblapack-dev libxml2 libxml2-dev libffi6 libffi-dev musl-dev libxslt-dev | ||||||
|  |  | ||||||
|  | FROM deps AS testable | ||||||
|  | ARG REPORT_PATH | ||||||
|  |  | ||||||
|  | VOLUME ["/var/mailgun", "/etc/mailgun/ssl", ${REPORT_PATH}] | ||||||
|  |  | ||||||
|  | ADD . /app | ||||||
|  | WORKDIR /app | ||||||
|  | COPY wheel/* /wheel/ | ||||||
|  |  | ||||||
|  | RUN mkdir -p ${REPORT_PATH} | ||||||
|  |  | ||||||
|  | RUN python ./setup.py build bdist_wheel -d /wheel && \ | ||||||
|  |     pip install --no-deps /wheel/* | ||||||
|  |  | ||||||
|  | ENTRYPOINT ["/bin/sh", "/app/run_tests.sh"] | ||||||
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -54,3 +54,6 @@ _trial_temp | |||||||
|  |  | ||||||
| # OSX | # OSX | ||||||
| .DS_Store | .DS_Store | ||||||
|  |  | ||||||
|  | # vim-backup | ||||||
|  | *.bak | ||||||
|   | |||||||
							
								
								
									
										11
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | |||||||
|  | chardet>=1.0.1 | ||||||
|  | cchardet>=0.3.5 | ||||||
|  | cssselect | ||||||
|  | html5lib | ||||||
|  | joblib | ||||||
|  | lxml>=2.3.3 | ||||||
|  | numpy | ||||||
|  | regex>=1 | ||||||
|  | scikit-learn>=1.0.0 | ||||||
|  | scipy | ||||||
|  | six>=1.10.0 | ||||||
							
								
								
									
										4
									
								
								run_tests.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										4
									
								
								run_tests.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,4 @@ | |||||||
|  | #!/usr/bin/env bash | ||||||
|  | set -ex | ||||||
|  | REPORT_PATH="${REPORT_PATH:-./}" | ||||||
|  | nosetests --with-xunit --with-coverage --cover-xml --cover-xml-file $REPORT_PATH/coverage.xml --xunit-file=$REPORT_PATH/nosetests.xml --cover-package=talon . | ||||||
							
								
								
									
										31
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								setup.py
									
									
									
									
									
								
							| @@ -19,17 +19,17 @@ class InstallCommand(install): | |||||||
|         if self.no_ml: |         if self.no_ml: | ||||||
|             dist = self.distribution |             dist = self.distribution | ||||||
|             dist.packages=find_packages(exclude=[ |             dist.packages=find_packages(exclude=[ | ||||||
|                 'tests', |                 "tests", | ||||||
|                 'tests.*', |                 "tests.*", | ||||||
|                 'talon.signature', |                 "talon.signature", | ||||||
|                 'talon.signature.*', |                 "talon.signature.*", | ||||||
|             ]) |             ]) | ||||||
|             for not_required in ['numpy', 'scipy', 'scikit-learn==0.16.1']: |             for not_required in ["numpy", "scipy", "scikit-learn==0.24.1"]: | ||||||
|                 dist.install_requires.remove(not_required) |                 dist.install_requires.remove(not_required) | ||||||
|  |  | ||||||
|  |  | ||||||
| setup(name='talon', | setup(name='talon', | ||||||
|       version='1.4.8', |       version='1.4.9', | ||||||
|       description=("Mailgun library " |       description=("Mailgun library " | ||||||
|                    "to extract message quotations and signatures."), |                    "to extract message quotations and signatures."), | ||||||
|       long_description=open("README.rst").read(), |       long_description=open("README.rst").read(), | ||||||
| @@ -44,20 +44,21 @@ setup(name='talon', | |||||||
|       include_package_data=True, |       include_package_data=True, | ||||||
|       zip_safe=True, |       zip_safe=True, | ||||||
|       install_requires=[ |       install_requires=[ | ||||||
|           "lxml>=2.3.3", |           "lxml", | ||||||
|           "regex>=1", |           "regex", | ||||||
|           "numpy", |           "numpy", | ||||||
|           "scipy", |           "scipy", | ||||||
|           "scikit-learn==0.16.1", # pickled versions of classifier, else rebuild |           "scikit-learn>=1.0.0", | ||||||
|           'chardet>=1.0.1', |           "chardet", | ||||||
|           'cchardet>=0.3.5', |           "cchardet", | ||||||
|           'cssselect', |           "cssselect", | ||||||
|           'six>=1.10.0', |           "six", | ||||||
|           'html5lib' |           "html5lib", | ||||||
|  |           "joblib", | ||||||
|           ], |           ], | ||||||
|       tests_require=[ |       tests_require=[ | ||||||
|           "mock", |           "mock", | ||||||
|           "nose>=1.2.1", |           "nose", | ||||||
|           "coverage" |           "coverage" | ||||||
|           ] |           ] | ||||||
|       ) |       ) | ||||||
|   | |||||||
| @@ -457,7 +457,7 @@ def _extract_from_html(msg_body): | |||||||
|  |  | ||||||
|     msg_body = msg_body.replace(b'\r\n', b'\n') |     msg_body = msg_body.replace(b'\r\n', b'\n') | ||||||
|  |  | ||||||
|     msg_body = re.sub(r"\<\?xml.+\?\>|\<\!DOCTYPE.+]\>", "", msg_body) |     msg_body = re.sub(br"\<\?xml.+\?\>|\<\!DOCTYPE.+]\>", "", msg_body) | ||||||
|  |  | ||||||
|     html_tree = html_document_fromstring(msg_body) |     html_tree = html_document_fromstring(msg_body) | ||||||
|  |  | ||||||
|   | |||||||
										
											Binary file not shown.
										
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -8,7 +8,7 @@ body belongs to the signature. | |||||||
| from __future__ import absolute_import | from __future__ import absolute_import | ||||||
|  |  | ||||||
| from numpy import genfromtxt | from numpy import genfromtxt | ||||||
| from sklearn.externals import joblib | import joblib | ||||||
| from sklearn.svm import LinearSVC | from sklearn.svm import LinearSVC | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										3
									
								
								test-requirements.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								test-requirements.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | |||||||
|  | coverage | ||||||
|  | mock | ||||||
|  | nose>=1.2.1 | ||||||
| @@ -826,10 +826,10 @@ The user experience was unparallelled. Please continue production. I'm sending p | |||||||
| that this line is intact.""" | that this line is intact.""" | ||||||
|  |  | ||||||
|     parsed = quotations.extract_from_plain(msg_body) |     parsed = quotations.extract_from_plain(msg_body) | ||||||
|     eq_(msg_body, parsed.decode('utf8')) |     eq_(msg_body, parsed) | ||||||
|  |  | ||||||
|  |  | ||||||
| def test_appointment(): | def test_appointment_2(): | ||||||
|     msg_body = """Invitation for an interview: |     msg_body = """Invitation for an interview: | ||||||
|  |  | ||||||
| Date: Wednesday 3, October 2011  | Date: Wednesday 3, October 2011  | ||||||
| @@ -838,4 +838,4 @@ Address: 130 Fox St | |||||||
|  |  | ||||||
| Please bring in your ID.""" | Please bring in your ID.""" | ||||||
|     parsed = quotations.extract_from_plain(msg_body) |     parsed = quotations.extract_from_plain(msg_body) | ||||||
|     eq_(msg_body, parsed.decode('utf8')) |     eq_(msg_body, parsed) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user