Make utils compatible with Python 3.

This commit is contained in:
Umair Khan
2016-07-13 13:00:24 +05:00
parent 7901f5d1dc
commit 622a98d6d5
2 changed files with 19 additions and 17 deletions

View File

@@ -59,6 +59,7 @@ def detect_encoding(string):
Defaults to UTF-8. Defaults to UTF-8.
""" """
assert isinstance(string, bytes)
try: try:
detected = chardet.detect(string) detected = chardet.detect(string)
if detected: if detected:
@@ -74,6 +75,7 @@ def quick_detect_encoding(string):
Uses cchardet. Fallbacks to detect_encoding. Uses cchardet. Fallbacks to detect_encoding.
""" """
assert isinstance(string, bytes)
try: try:
detected = cchardet.detect(string) detected = cchardet.detect(string)
if detected: if detected:

View File

@@ -23,42 +23,42 @@ def test_unicode():
eq_ (u"привет", u.to_unicode('привет')) eq_ (u"привет", u.to_unicode('привет'))
eq_ (u"привет", u.to_unicode(u'привет')) eq_ (u"привет", u.to_unicode(u'привет'))
# some latin1 stuff # some latin1 stuff
eq_ (u"Versión", u.to_unicode('Versi\xf3n', precise=True)) eq_ (u"Versión", u.to_unicode(u'Versi\xf3n'.encode('iso-8859-2'), precise=True))
def test_detect_encoding(): def test_detect_encoding():
eq_ ('ascii', u.detect_encoding('qwe').lower()) eq_ ('ascii', u.detect_encoding(b'qwe').lower())
eq_ ('iso-8859-2', u.detect_encoding('Versi\xf3n').lower()) eq_ ('iso-8859-2', u.detect_encoding(u'Versi\xf3n'.encode('iso-8859-2')).lower())
eq_ ('utf-8', u.detect_encoding('привет').lower()) eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
# fallback to utf-8 # fallback to utf-8
with patch.object(u.chardet, 'detect') as detect: with patch.object(u.chardet, 'detect') as detect:
detect.side_effect = Exception detect.side_effect = Exception
eq_ ('utf-8', u.detect_encoding('qwe').lower()) eq_ ('utf-8', u.detect_encoding('qwe'.encode('utf8')).lower())
def test_quick_detect_encoding(): def test_quick_detect_encoding():
eq_ ('ascii', u.quick_detect_encoding('qwe').lower()) eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower())
eq_ ('windows-1252', u.quick_detect_encoding('Versi\xf3n').lower()) eq_ ('windows-1252', u.quick_detect_encoding(u'Versi\xf3n'.encode('windows-1252')).lower())
eq_ ('utf-8', u.quick_detect_encoding('привет').lower()) eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())
@patch.object(cchardet, 'detect') @patch.object(cchardet, 'detect')
@patch.object(u, 'detect_encoding') @patch.object(u, 'detect_encoding')
def test_quick_detect_encoding_edge_cases(detect_encoding, cchardet_detect): def test_quick_detect_encoding_edge_cases(detect_encoding, cchardet_detect):
cchardet_detect.return_value = {'encoding': 'ascii'} cchardet_detect.return_value = {'encoding': 'ascii'}
eq_('ascii', u.quick_detect_encoding("qwe")) eq_('ascii', u.quick_detect_encoding(b"qwe"))
cchardet_detect.assert_called_once_with("qwe") cchardet_detect.assert_called_once_with(b"qwe")
# fallback to detect_encoding # fallback to detect_encoding
cchardet_detect.return_value = {} cchardet_detect.return_value = {}
detect_encoding.return_value = 'utf-8' detect_encoding.return_value = 'utf-8'
eq_('utf-8', u.quick_detect_encoding("qwe")) eq_('utf-8', u.quick_detect_encoding(b"qwe"))
# exception # exception
detect_encoding.reset_mock() detect_encoding.reset_mock()
cchardet_detect.side_effect = Exception() cchardet_detect.side_effect = Exception()
detect_encoding.return_value = 'utf-8' detect_encoding.return_value = 'utf-8'
eq_('utf-8', u.quick_detect_encoding("qwe")) eq_('utf-8', u.quick_detect_encoding(b"qwe"))
ok_(detect_encoding.called) ok_(detect_encoding.called)
@@ -75,11 +75,11 @@ Haha
</p> </p>
</body>""" </body>"""
text = u.html_to_text(html) text = u.html_to_text(html)
eq_("Hello world! \n\n * One! \n * Two \nHaha", text) eq_(b"Hello world! \n\n * One! \n * Two \nHaha", text)
eq_("привет!", u.html_to_text("<b>привет!</b>")) eq_(u"привет!", u.html_to_text("<b>привет!</b>").decode('utf8'))
html = '<body><br/><br/>Hi</body>' html = '<body><br/><br/>Hi</body>'
eq_ ('Hi', u.html_to_text(html)) eq_ (b'Hi', u.html_to_text(html))
html = """Hi html = """Hi
<style type="text/css"> <style type="text/css">
@@ -99,11 +99,11 @@ font: 13px 'Lucida Grande', Arial, sans-serif;
} }
</style>""" </style>"""
eq_ ('Hi', u.html_to_text(html)) eq_ (b'Hi', u.html_to_text(html))
html = """<div> html = """<div>
<!-- COMMENT 1 --> <!-- COMMENT 1 -->
<span>TEXT 1</span> <span>TEXT 1</span>
<p>TEXT 2 <!-- COMMENT 2 --></p> <p>TEXT 2 <!-- COMMENT 2 --></p>
</div>""" </div>"""
eq_('TEXT 1 \nTEXT 2', u.html_to_text(html)) eq_(b'TEXT 1 \nTEXT 2', u.html_to_text(html))