CouchPotatoServer/couchpotato/core/helpers/encoding.py

from string import ascii_letters, digits
from urllib import quote_plus
import os
import re
import traceback
import unicodedata

from chardet import detect
from couchpotato.core.logger import CPLog
import six


log = CPLog(__name__)


def toSafeString(original):
    valid_chars = "-_.() %s%s" % (ascii_letters, digits)
    cleaned_filename = unicodedata.normalize('NFKD', toUnicode(original)).encode('ASCII', 'ignore')
    valid_string = ''.join(c for c in cleaned_filename if c in valid_chars)
    return ' '.join(valid_string.split())


def simplifyString(original):
    string = stripAccents(original.lower())
    string = toSafeString(' '.join(re.split('\W+', string)))
    split = re.split('\W+|_', string.lower())
    return toUnicode(' '.join(split))


def toUnicode(original, *args):
    try:
        if isinstance(original, unicode):
            return original
        else:
            try:
                return six.text_type(original, *args)
            except:
                try:
                    detected = detect(original)
                    if detected.get('encoding') == 'utf-8':
                        return original.decode('utf-8')
                    return ek(original, *args)
                except:
                    raise
    except:
        log.error('Unable to decode value "%s..." : %s ', (repr(original)[:20], traceback.format_exc()))
        ascii_text = str(original).encode('string_escape')
        return toUnicode(ascii_text)

def toUTF8(original):
    try:
        if isinstance(original, str) and len(original) > 0:
            # Try to detect
            detected = detect(original)
            return original.decode(detected.get('encoding')).encode('utf-8')
        else:
            return original
    except:
        #log.error('Failed encoding to UTF8: %s', traceback.format_exc())
        raise

def ss(original, *args):

    u_original = toUnicode(original, *args)
    try:
        from couchpotato.environment import Env
        return u_original.encode(Env.get('encoding'))
    except Exception as e:
        log.debug('Failed ss encoding char, force UTF8: %s', e)
        try:
            return u_original.encode(Env.get('encoding'), 'replace')
        except:
            return u_original.encode('utf-8', 'replace')


def sp(path, *args):

    # Standardise encoding, normalise case, path and strip trailing '/' or '\'
    if not path or len(path) == 0:
        return path

    # convert windows path (from remote box) to *nix path
    if os.path.sep == '/' and '\\' in path:
        path = '/' + path.replace(':', '').replace('\\', '/')

    path = os.path.normpath(ss(path, *args))

    # Remove any trailing path separators
    if path != os.path.sep:
        path = path.rstrip(os.path.sep)

    # Add a trailing separator in case it is a root folder on windows (crashes guessit)
    if len(path) == 2 and path[1] == ':':
        path = path + os.path.sep

    # Replace *NIX ambiguous '//' at the beginning of a path with '/' (crashes guessit)
    path = re.sub('^//', '/', path)

    return path


def ek(original, *args):
    if isinstance(original, (str, unicode)):
        try:
            from couchpotato.environment import Env
            return original.decode(Env.get('encoding'))
        except UnicodeDecodeError:
            raise

    return original


def isInt(value):
    try:
        int(value)
        return True
    except ValueError:
        return False


def stripAccents(s):
    return ''.join((c for c in unicodedata.normalize('NFD', toUnicode(s)) if unicodedata.category(c) != 'Mn'))


def tryUrlencode(s):
    new = six.u('')
    if isinstance(s, dict):
        for key, value in s.items():
            new += six.u('&%s=%s') % (key, tryUrlencode(value))

        return new[1:]
    else:
        for letter in ss(s):
            try:
                new += quote_plus(letter)
            except:
                new += letter

    return new
Movie add JSONP 14 years ago			`from string import ascii_letters, digits`
Use custom urlencode function. fixes #121 13 years ago			`from urllib import quote_plus`
Introduce path cleaning A new function sp is introduced. It does the same as ss but also cleans the path. 12 years ago			`import os`
Movie add JSONP 14 years ago			`import re`
Add traceback to unicode encoding 13 years ago			`import traceback`
Movie add JSONP 14 years ago			`import unicodedata`
Optimize imports 11 years ago
Replace non existing chars 11 years ago			`from chardet import detect`
Optimize imports 11 years ago			`from couchpotato.core.logger import CPLog`
py3k port helpers 11 years ago			`import six`
Movie add JSONP 14 years ago
Optimize imports 11 years ago
Movie add JSONP 14 years ago			`log = CPLog(__name__)`

Notifications 14 years ago
Movie add JSONP 14 years ago			`def toSafeString(original):`
			`valid_chars = "-_.() %s%s" % (ascii_letters, digits)`
Code cleanup 11 years ago			`cleaned_filename = unicodedata.normalize('NFKD', toUnicode(original)).encode('ASCII', 'ignore')`
			`valid_string = ''.join(c for c in cleaned_filename if c in valid_chars)`
Remove duplicate spaces 12 years ago			`return ' '.join(valid_string.split())`
Movie add JSONP 14 years ago
Code cleanup 11 years ago
Movie add JSONP 14 years ago			`def simplifyString(original):`
Strip accents before simplifying string 13 years ago			`string = stripAccents(original.lower())`
			`string = toSafeString(' '.join(re.split('\W+', string)))`
Simplifystring also strip on underscore 13 years ago			`split = re.split('\W+\|_', string.lower())`
Movie add JSONP 14 years ago			`return toUnicode(' '.join(split))`

Code cleanup 11 years ago
Movie add JSONP 14 years ago			`def toUnicode(original, *args):`
			`try:`
Use custom urlencode function. fixes #121 13 years ago			`if isinstance(original, unicode):`
Movie add JSONP 14 years ago			`return original`
			`else:`
Fixes 13 years ago			`try:`
py3k port helpers 11 years ago			`return six.text_type(original, *args)`
Fixes 13 years ago			`except:`
			`try:`
Replace non existing chars 11 years ago			`detected = detect(original)`
			`if detected.get('encoding') == 'utf-8':`
			`return original.decode('utf-8')`
Fixes 13 years ago			`return ek(original, *args)`
			`except:`
			`raise`
Add traceback to unicode encoding 13 years ago			`except:`
			`log.error('Unable to decode value "%s..." : %s ', (repr(original)[:20], traceback.format_exc()))`
Movie add JSONP 14 years ago			`ascii_text = str(original).encode('string_escape')`
Fixes 13 years ago			`return toUnicode(ascii_text)`
Movie search Profile Settings 14 years ago
Force logging utf8 11 years ago			`def toUTF8(original):`
			`try:`
			`if isinstance(original, str) and len(original) > 0:`
			`# Try to detect`
			`detected = detect(original)`
			`return original.decode(detected.get('encoding')).encode('utf-8')`
			`else:`
			`return original`
			`except:`
			`#log.error('Failed encoding to UTF8: %s', traceback.format_exc())`
			`raise`
Code cleanup 11 years ago
Encoding file paths 13 years ago			`def ss(original, *args):`
Revert back to UTF8 when ss encoding fails. fix #2220 12 years ago
			`u_original = toUnicode(original, *args)`
			`try:`
			`from couchpotato.environment import Env`
			`return u_original.encode(Env.get('encoding'))`
Code cleanup 11 years ago			`except Exception as e:`
Revert back to UTF8 when ss encoding fails. fix #2220 12 years ago			`log.debug('Failed ss encoding char, force UTF8: %s', e)`
Replace non existing chars 11 years ago			`try:`
			`return u_original.encode(Env.get('encoding'), 'replace')`
			`except:`
			`return u_original.encode('utf-8', 'replace')`
Encoding file paths 13 years ago
Code cleanup 11 years ago
Introduce path cleaning A new function sp is introduced. It does the same as ss but also cleans the path. 12 years ago			`def sp(path, *args):`
Normalize path sp function 12 years ago
Introduce path cleaning A new function sp is introduced. It does the same as ss but also cleans the path. 12 years ago			`# Standardise encoding, normalise case, path and strip trailing '/' or '\'`
Normalize path sp function 12 years ago			`if not path or len(path) == 0:`
			`return path`

Convert windows path to *nix path in sp Fixes #2594 Note that os.path.normath converts '/' to '\\' on windows machines, but unfortunately not the other way around... 12 years ago			`# convert windows path (from remote box) to *nix path`
			`if os.path.sep == '/' and '\\' in path:`
			`path = '/' + path.replace(':', '').replace('\\', '/')`

Don't normcase in sp function 11 years ago			`path = os.path.normpath(ss(path, *args))`
Normalize path sp function 12 years ago
Add a trailing separator for windows drive path Fixes #2581, #2526 12 years ago			`# Remove any trailing path separators`
Normalize path sp function 12 years ago			`if path != os.path.sep:`
			`path = path.rstrip(os.path.sep)`

Remove leading '//' from *NIX paths Fixes #2506, #2021 12 years ago			`# Add a trailing separator in case it is a root folder on windows (crashes guessit)`
Add a trailing separator for windows drive path Fixes #2581, #2526 12 years ago			`if len(path) == 2 and path[1] == ':':`
			`path = path + os.path.sep`

Remove leading '//' from *NIX paths Fixes #2506, #2021 12 years ago			`# Replace *NIX ambiguous '//' at the beginning of a path with '/' (crashes guessit)`
			`path = re.sub('^//', '/', path)`

More path encoding 11 years ago			`return path`
Introduce path cleaning A new function sp is introduced. It does the same as ss but also cleans the path. 12 years ago
Code cleanup 11 years ago
Some more encoding 14 years ago			`def ek(original, *args):`
Use custom urlencode function. fixes #121 13 years ago			`if isinstance(original, (str, unicode)):`
Some more encoding 14 years ago			`try:`
			`from couchpotato.environment import Env`
			`return original.decode(Env.get('encoding'))`
			`except UnicodeDecodeError:`
Fixes 13 years ago			`raise`
Some more encoding 14 years ago
			`return original`
Movie search Profile Settings 14 years ago
Code cleanup 11 years ago
Scheduler NZBmatrix, newznab Scores 14 years ago			`def isInt(value):`
Movie search Profile Settings 14 years ago			`try:`
			`int(value)`
			`return True`
			`except ValueError:`
			`return False`
Strip accents before simplifying string 13 years ago
Code cleanup 11 years ago
Strip accents before simplifying string 13 years ago			`def stripAccents(s):`
toUnicode before stripaccents 13 years ago			`return ''.join((c for c in unicodedata.normalize('NFD', toUnicode(s)) if unicodedata.category(c) != 'Mn'))`
Use custom urlencode function. fixes #121 13 years ago
Code cleanup 11 years ago
Use custom urlencode function. fixes #121 13 years ago			`def tryUrlencode(s):`
py3k port helpers 11 years ago			`new = six.u('')`
Code cleanup 12 years ago			`if isinstance(s, dict):`
py3k port helpers 11 years ago			`for key, value in s.items():`
			`new += six.u('&%s=%s') % (key, tryUrlencode(value))`
Use custom urlencode function. fixes #121 13 years ago
			`return new[1:]`
			`else:`
Encoding issues. fix #974 13 years ago			`for letter in ss(s):`
Use custom urlencode function. fixes #121 13 years ago			`try:`
			`new += quote_plus(letter)`
			`except:`
			`new += letter`

			`return new`