You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

140 lines
3.8 KiB

14 years ago
from string import ascii_letters, digits
from urllib import quote_plus
import os
14 years ago
import re
import traceback
14 years ago
import unicodedata
11 years ago
from chardet import detect
11 years ago
from couchpotato.core.logger import CPLog
11 years ago
import six
14 years ago
11 years ago
14 years ago
log = CPLog(__name__)
14 years ago
14 years ago
def toSafeString(original):
valid_chars = "-_.() %s%s" % (ascii_letters, digits)
11 years ago
cleaned_filename = unicodedata.normalize('NFKD', toUnicode(original)).encode('ASCII', 'ignore')
valid_string = ''.join(c for c in cleaned_filename if c in valid_chars)
return ' '.join(valid_string.split())
14 years ago
11 years ago
14 years ago
def simplifyString(original):
string = stripAccents(original.lower())
string = toSafeString(' '.join(re.split('\W+', string)))
split = re.split('\W+|_', string.lower())
14 years ago
return toUnicode(' '.join(split))
11 years ago
14 years ago
def toUnicode(original, *args):
try:
if isinstance(original, unicode):
14 years ago
return original
else:
13 years ago
try:
11 years ago
return six.text_type(original, *args)
13 years ago
except:
try:
detected = detect(original)
if detected.get('encoding') == 'utf-8':
return original.decode('utf-8')
13 years ago
return ek(original, *args)
except:
raise
except:
log.error('Unable to decode value "%s..." : %s ', (repr(original)[:20], traceback.format_exc()))
14 years ago
ascii_text = str(original).encode('string_escape')
13 years ago
return toUnicode(ascii_text)
def toUTF8(original):
try:
if isinstance(original, str) and len(original) > 0:
# Try to detect
detected = detect(original)
return original.decode(detected.get('encoding')).encode('utf-8')
else:
return original
except:
#log.error('Failed encoding to UTF8: %s', traceback.format_exc())
raise
11 years ago
def ss(original, *args):
u_original = toUnicode(original, *args)
try:
from couchpotato.environment import Env
return u_original.encode(Env.get('encoding'))
11 years ago
except Exception as e:
log.debug('Failed ss encoding char, force UTF8: %s', e)
try:
return u_original.encode(Env.get('encoding'), 'replace')
except:
return u_original.encode('utf-8', 'replace')
11 years ago
def sp(path, *args):
# Standardise encoding, normalise case, path and strip trailing '/' or '\'
if not path or len(path) == 0:
return path
# convert windows path (from remote box) to *nix path
if os.path.sep == '/' and '\\' in path:
path = '/' + path.replace(':', '').replace('\\', '/')
path = os.path.normpath(ss(path, *args))
# Remove any trailing path separators
if path != os.path.sep:
path = path.rstrip(os.path.sep)
# Add a trailing separator in case it is a root folder on windows (crashes guessit)
if len(path) == 2 and path[1] == ':':
path = path + os.path.sep
# Replace *NIX ambiguous '//' at the beginning of a path with '/' (crashes guessit)
path = re.sub('^//', '/', path)
return path
11 years ago
def ek(original, *args):
if isinstance(original, (str, unicode)):
try:
from couchpotato.environment import Env
return original.decode(Env.get('encoding'))
except UnicodeDecodeError:
13 years ago
raise
return original
11 years ago
def isInt(value):
try:
int(value)
return True
except ValueError:
return False
11 years ago
def stripAccents(s):
return ''.join((c for c in unicodedata.normalize('NFD', toUnicode(s)) if unicodedata.category(c) != 'Mn'))
11 years ago
def tryUrlencode(s):
11 years ago
new = six.u('')
12 years ago
if isinstance(s, dict):
11 years ago
for key, value in s.items():
new += six.u('&%s=%s') % (key, tryUrlencode(value))
return new[1:]
else:
for letter in ss(s):
try:
new += quote_plus(letter)
except:
new += letter
return new