Make utils compatible with Python 3.
This commit is contained in:
@@ -59,6 +59,7 @@ def detect_encoding(string):
|
|||||||
|
|
||||||
Defaults to UTF-8.
|
Defaults to UTF-8.
|
||||||
"""
|
"""
|
||||||
|
assert isinstance(string, bytes)
|
||||||
try:
|
try:
|
||||||
detected = chardet.detect(string)
|
detected = chardet.detect(string)
|
||||||
if detected:
|
if detected:
|
||||||
@@ -74,6 +75,7 @@ def quick_detect_encoding(string):
|
|||||||
|
|
||||||
Uses cchardet. Fallbacks to detect_encoding.
|
Uses cchardet. Fallbacks to detect_encoding.
|
||||||
"""
|
"""
|
||||||
|
assert isinstance(string, bytes)
|
||||||
try:
|
try:
|
||||||
detected = cchardet.detect(string)
|
detected = cchardet.detect(string)
|
||||||
if detected:
|
if detected:
|
||||||
|
|||||||
@@ -23,42 +23,42 @@ def test_unicode():
|
|||||||
eq_ (u"привет", u.to_unicode('привет'))
|
eq_ (u"привет", u.to_unicode('привет'))
|
||||||
eq_ (u"привет", u.to_unicode(u'привет'))
|
eq_ (u"привет", u.to_unicode(u'привет'))
|
||||||
# some latin1 stuff
|
# some latin1 stuff
|
||||||
eq_ (u"Versión", u.to_unicode('Versi\xf3n', precise=True))
|
eq_ (u"Versión", u.to_unicode(u'Versi\xf3n'.encode('iso-8859-2'), precise=True))
|
||||||
|
|
||||||
|
|
||||||
def test_detect_encoding():
|
def test_detect_encoding():
|
||||||
eq_ ('ascii', u.detect_encoding('qwe').lower())
|
eq_ ('ascii', u.detect_encoding(b'qwe').lower())
|
||||||
eq_ ('iso-8859-2', u.detect_encoding('Versi\xf3n').lower())
|
eq_ ('iso-8859-2', u.detect_encoding(u'Versi\xf3n'.encode('iso-8859-2')).lower())
|
||||||
eq_ ('utf-8', u.detect_encoding('привет').lower())
|
eq_ ('utf-8', u.detect_encoding(u'привет'.encode('utf8')).lower())
|
||||||
# fallback to utf-8
|
# fallback to utf-8
|
||||||
with patch.object(u.chardet, 'detect') as detect:
|
with patch.object(u.chardet, 'detect') as detect:
|
||||||
detect.side_effect = Exception
|
detect.side_effect = Exception
|
||||||
eq_ ('utf-8', u.detect_encoding('qwe').lower())
|
eq_ ('utf-8', u.detect_encoding('qwe'.encode('utf8')).lower())
|
||||||
|
|
||||||
|
|
||||||
def test_quick_detect_encoding():
|
def test_quick_detect_encoding():
|
||||||
eq_ ('ascii', u.quick_detect_encoding('qwe').lower())
|
eq_ ('ascii', u.quick_detect_encoding(b'qwe').lower())
|
||||||
eq_ ('windows-1252', u.quick_detect_encoding('Versi\xf3n').lower())
|
eq_ ('windows-1252', u.quick_detect_encoding(u'Versi\xf3n'.encode('windows-1252')).lower())
|
||||||
eq_ ('utf-8', u.quick_detect_encoding('привет').lower())
|
eq_ ('utf-8', u.quick_detect_encoding(u'привет'.encode('utf8')).lower())
|
||||||
|
|
||||||
|
|
||||||
@patch.object(cchardet, 'detect')
|
@patch.object(cchardet, 'detect')
|
||||||
@patch.object(u, 'detect_encoding')
|
@patch.object(u, 'detect_encoding')
|
||||||
def test_quick_detect_encoding_edge_cases(detect_encoding, cchardet_detect):
|
def test_quick_detect_encoding_edge_cases(detect_encoding, cchardet_detect):
|
||||||
cchardet_detect.return_value = {'encoding': 'ascii'}
|
cchardet_detect.return_value = {'encoding': 'ascii'}
|
||||||
eq_('ascii', u.quick_detect_encoding("qwe"))
|
eq_('ascii', u.quick_detect_encoding(b"qwe"))
|
||||||
cchardet_detect.assert_called_once_with("qwe")
|
cchardet_detect.assert_called_once_with(b"qwe")
|
||||||
|
|
||||||
# fallback to detect_encoding
|
# fallback to detect_encoding
|
||||||
cchardet_detect.return_value = {}
|
cchardet_detect.return_value = {}
|
||||||
detect_encoding.return_value = 'utf-8'
|
detect_encoding.return_value = 'utf-8'
|
||||||
eq_('utf-8', u.quick_detect_encoding("qwe"))
|
eq_('utf-8', u.quick_detect_encoding(b"qwe"))
|
||||||
|
|
||||||
# exception
|
# exception
|
||||||
detect_encoding.reset_mock()
|
detect_encoding.reset_mock()
|
||||||
cchardet_detect.side_effect = Exception()
|
cchardet_detect.side_effect = Exception()
|
||||||
detect_encoding.return_value = 'utf-8'
|
detect_encoding.return_value = 'utf-8'
|
||||||
eq_('utf-8', u.quick_detect_encoding("qwe"))
|
eq_('utf-8', u.quick_detect_encoding(b"qwe"))
|
||||||
ok_(detect_encoding.called)
|
ok_(detect_encoding.called)
|
||||||
|
|
||||||
|
|
||||||
@@ -75,11 +75,11 @@ Haha
|
|||||||
</p>
|
</p>
|
||||||
</body>"""
|
</body>"""
|
||||||
text = u.html_to_text(html)
|
text = u.html_to_text(html)
|
||||||
eq_("Hello world! \n\n * One! \n * Two \nHaha", text)
|
eq_(b"Hello world! \n\n * One! \n * Two \nHaha", text)
|
||||||
eq_("привет!", u.html_to_text("<b>привет!</b>"))
|
eq_(u"привет!", u.html_to_text("<b>привет!</b>").decode('utf8'))
|
||||||
|
|
||||||
html = '<body><br/><br/>Hi</body>'
|
html = '<body><br/><br/>Hi</body>'
|
||||||
eq_ ('Hi', u.html_to_text(html))
|
eq_ (b'Hi', u.html_to_text(html))
|
||||||
|
|
||||||
html = """Hi
|
html = """Hi
|
||||||
<style type="text/css">
|
<style type="text/css">
|
||||||
@@ -99,11 +99,11 @@ font: 13px 'Lucida Grande', Arial, sans-serif;
|
|||||||
|
|
||||||
}
|
}
|
||||||
</style>"""
|
</style>"""
|
||||||
eq_ ('Hi', u.html_to_text(html))
|
eq_ (b'Hi', u.html_to_text(html))
|
||||||
|
|
||||||
html = """<div>
|
html = """<div>
|
||||||
<!-- COMMENT 1 -->
|
<!-- COMMENT 1 -->
|
||||||
<span>TEXT 1</span>
|
<span>TEXT 1</span>
|
||||||
<p>TEXT 2 <!-- COMMENT 2 --></p>
|
<p>TEXT 2 <!-- COMMENT 2 --></p>
|
||||||
</div>"""
|
</div>"""
|
||||||
eq_('TEXT 1 \nTEXT 2', u.html_to_text(html))
|
eq_(b'TEXT 1 \nTEXT 2', u.html_to_text(html))
|
||||||
|
|||||||
Reference in New Issue
Block a user