108 lines
2.7 KiB
Python
108 lines
2.7 KiB
Python
# coding:utf-8
|
|
|
|
import logging
|
|
from random import shuffle
|
|
import chardet
|
|
import cchardet
|
|
|
|
from talon.constants import RE_DELIMITER
|
|
|
|
|
|
def safe_format(format_string, *args, **kwargs):
|
|
"""
|
|
Helper: formats string with any combination of bytestrings/unicode
|
|
strings without raising exceptions
|
|
"""
|
|
try:
|
|
if not args and not kwargs:
|
|
return format_string
|
|
else:
|
|
return format_string.format(*args, **kwargs)
|
|
|
|
# catch encoding errors and transform everything into utf-8 string
|
|
# before logging:
|
|
except (UnicodeEncodeError, UnicodeDecodeError):
|
|
format_string = to_utf8(format_string)
|
|
args = [to_utf8(p) for p in args]
|
|
kwargs = {k: to_utf8(v) for k, v in kwargs.iteritems()}
|
|
return format_string.format(*args, **kwargs)
|
|
|
|
# ignore other errors
|
|
except:
|
|
return u''
|
|
|
|
|
|
def to_unicode(str_or_unicode, precise=False):
|
|
"""
|
|
Safely returns a unicode version of a given string
|
|
>>> utils.to_unicode('привет')
|
|
u'привет'
|
|
>>> utils.to_unicode(u'привет')
|
|
u'привет'
|
|
If `precise` flag is True, tries to guess the correct encoding first.
|
|
"""
|
|
encoding = quick_detect_encoding(str_or_unicode) if precise else 'utf-8'
|
|
if isinstance(str_or_unicode, str):
|
|
return unicode(str_or_unicode, encoding, 'replace')
|
|
return str_or_unicode
|
|
|
|
|
|
def detect_encoding(string):
|
|
"""
|
|
Tries to detect the encoding of the passed string.
|
|
|
|
Defaults to UTF-8.
|
|
"""
|
|
try:
|
|
detected = chardet.detect(string)
|
|
if detected:
|
|
return detected.get('encoding') or 'utf-8'
|
|
except Exception, e:
|
|
print 11111111111, e
|
|
pass
|
|
return 'utf-8'
|
|
|
|
|
|
def quick_detect_encoding(string):
|
|
"""
|
|
Tries to detect the encoding of the passed string.
|
|
|
|
Uses cchardet. Fallbacks to detect_encoding.
|
|
"""
|
|
try:
|
|
detected = cchardet.detect(string)
|
|
if detected:
|
|
return detected.get('encoding') or detect_encoding(string)
|
|
except Exception, e:
|
|
print 222222222222, e
|
|
pass
|
|
return detect_encoding(string)
|
|
|
|
|
|
def to_utf8(str_or_unicode):
|
|
"""
|
|
Safely returns a UTF-8 version of a given string
|
|
>>> utils.to_utf8(u'hi')
|
|
'hi'
|
|
"""
|
|
if isinstance(str_or_unicode, unicode):
|
|
return str_or_unicode.encode("utf-8", "ignore")
|
|
return str(str_or_unicode)
|
|
|
|
|
|
def random_token(length=7):
|
|
vals = ("a b c d e f g h i j k l m n o p q r s t u v w x y z "
|
|
"0 1 2 3 4 5 6 7 8 9").split(' ')
|
|
shuffle(vals)
|
|
return ''.join(vals[:length])
|
|
|
|
|
|
def get_delimiter(msg_body):
|
|
delimiter = RE_DELIMITER.search(msg_body)
|
|
if delimiter:
|
|
delimiter = delimiter.group()
|
|
else:
|
|
delimiter = '\n'
|
|
|
|
return delimiter
|