CouchPotatoServer/libs/cssutils/_codec3.py


								#!/usr/bin/env python

								"""Python codec for CSS."""

								__docformat__ = 'restructuredtext'

								__author__ = 'Walter Doerwald'

								__version__ = '$Id: util.py 1114 2008-03-05 13:22:59Z cthedot $'


								import sys

								import codecs

								import marshal


								# We're using bits to store all possible candidate encodings (or variants, i.e.

								# we have two bits for the variants of UTF-16 and two for the

								# variants of UTF-32).

								#

								# Prefixes for various CSS encodings

								# UTF-8-SIG   xEF  xBB  xBF

								# UTF-16 (LE) xFF  xFE ~x00|~x00

								# UTF-16 (BE) xFE  xFF

								# UTF-16-LE    @   x00   @   x00

								# UTF-16-BE   x00   @

								# UTF-32 (LE) xFF  xFE  x00  x00

								# UTF-32 (BE) x00  x00  xFE  xFF

								# UTF-32-LE    @   x00  x00  x00

								# UTF-32-BE   x00  x00  x00   @

								# CHARSET      @    c    h    a  ...


								def chars(bytestring):

								    return ''.join(chr(byte) for byte in bytestring)


								def detectencoding_str(input, final=False):

								    """

								    Detect the encoding of the byte string ``input``, which contains the

								    beginning of a CSS file. This function returns the detected encoding (or

								    ``None`` if it hasn't got enough data), and a flag that indicates whether

								    that encoding has been detected explicitely or implicitely. To detect the

								    encoding the first few bytes are used (or if ``input`` is ASCII compatible

								    and starts with a charset rule the encoding name from the rule). "Explicit"

								    detection means that the bytes start with a BOM or a charset rule.


								    If the encoding can't be detected yet, ``None`` is returned as the encoding.

								    ``final`` specifies whether more data will be available in later calls or

								    not. If ``final`` is true, ``detectencoding_str()`` will never return

								    ``None`` as the encoding.

								    """


								    # A bit for every candidate

								    CANDIDATE_UTF_8_SIG    =   1

								    CANDIDATE_UTF_16_AS_LE =   2

								    CANDIDATE_UTF_16_AS_BE =   4

								    CANDIDATE_UTF_16_LE    =   8

								    CANDIDATE_UTF_16_BE    =  16

								    CANDIDATE_UTF_32_AS_LE =  32

								    CANDIDATE_UTF_32_AS_BE =  64

								    CANDIDATE_UTF_32_LE    = 128

								    CANDIDATE_UTF_32_BE    = 256

								    CANDIDATE_CHARSET      = 512


								    candidates = 1023 # all candidates


								    #input = chars(input)

								    li = len(input)

								    if li>=1:

								        # Check first byte

								        c = input[0]

								        if c != b"\xef"[0]:

								            candidates &= ~CANDIDATE_UTF_8_SIG

								        if c != b"\xff"[0]:

								            candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_16_AS_LE)

								        if c != b"\xfe"[0]:

								            candidates &= ~CANDIDATE_UTF_16_AS_BE

								        if c != b"@"[0]:

								            candidates &= ~(CANDIDATE_UTF_32_LE|CANDIDATE_UTF_16_LE|CANDIDATE_CHARSET)

								        if c != b"\x00"[0]:

								            candidates &= ~(CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_BE|CANDIDATE_UTF_16_BE)

								        if li>=2:

								            # Check second byte

								            c = input[1]

								            if c != b"\xbb"[0]:

								                candidates &= ~CANDIDATE_UTF_8_SIG

								            if c != b"\xfe"[0]:

								                candidates &= ~(CANDIDATE_UTF_16_AS_LE|CANDIDATE_UTF_32_AS_LE)

								            if c != b"\xff"[0]:

								                candidates &= ~CANDIDATE_UTF_16_AS_BE

								            if c != b"\x00"[0]:

								                candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_BE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE)

								            if c != b"@"[0]:

								                candidates &= ~CANDIDATE_UTF_16_BE

								            if c != b"c"[0]:

								                candidates &= ~CANDIDATE_CHARSET

								            if li>=3:

								                # Check third byte

								                c = input[2]

								                if c != b"\xbf"[0]:

								                    candidates &= ~CANDIDATE_UTF_8_SIG

								                if c != b"c"[0]:

								                    candidates &= ~CANDIDATE_UTF_16_LE

								                if c != b"\x00"[0]:

								                    candidates &= ~(CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE|CANDIDATE_UTF_32_BE)

								                if c != b"\xfe"[0]:

								                    candidates &= ~CANDIDATE_UTF_32_AS_BE

								                if c != b"h"[0]:

								                    candidates &= ~CANDIDATE_CHARSET

								                if li>=4:

								                    # Check fourth byte

								                    c = input[3]

								                    if input[2:4] == b"\x00\x00"[0:2]:

								                        candidates &= ~CANDIDATE_UTF_16_AS_LE

								                    if c != b"\x00"[0]:

								                        candidates &= ~(CANDIDATE_UTF_16_LE|CANDIDATE_UTF_32_AS_LE|CANDIDATE_UTF_32_LE)

								                    if c != b"\xff"[0]:

								                        candidates &= ~CANDIDATE_UTF_32_AS_BE

								                    if c != b"@"[0]:

								                        candidates &= ~CANDIDATE_UTF_32_BE

								                    if c != b"a"[0]:

								                        candidates &= ~CANDIDATE_CHARSET

								    if candidates == 0:

								        return ("utf-8", False)

								    if not (candidates & (candidates-1)): # only one candidate remaining

								        if candidates == CANDIDATE_UTF_8_SIG and li >= 3:

								            return ("utf-8-sig", True)

								        elif candidates == CANDIDATE_UTF_16_AS_LE and li >= 2:

								            return ("utf-16", True)

								        elif candidates == CANDIDATE_UTF_16_AS_BE and li >= 2:

								            return ("utf-16", True)

								        elif candidates == CANDIDATE_UTF_16_LE and li >= 4:

								            return ("utf-16-le", False)

								        elif candidates == CANDIDATE_UTF_16_BE and li >= 2:

								            return ("utf-16-be", False)

								        elif candidates == CANDIDATE_UTF_32_AS_LE and li >= 4:

								            return ("utf-32", True)

								        elif candidates == CANDIDATE_UTF_32_AS_BE and li >= 4:

								            return ("utf-32", True)

								        elif candidates == CANDIDATE_UTF_32_LE and li >= 4:

								            return ("utf-32-le", False)

								        elif candidates == CANDIDATE_UTF_32_BE and li >= 4:

								            return ("utf-32-be", False)

								        elif candidates == CANDIDATE_CHARSET and li >= 4:

								            prefix = '@charset "'

								            charsinput = chars(input)

								            if charsinput[:len(prefix)] == prefix:

								                pos = charsinput.find('"', len(prefix))

								                if pos >= 0:

								                    # TODO: return str and not bytes!

								                    return (charsinput[len(prefix):pos], True)

								    # if this is the last call, and we haven't determined an encoding yet,

								    # we default to UTF-8

								    if final:

								        return ("utf-8", False)

								    return (None, False) # dont' know yet


								def detectencoding_unicode(input, final=False):

								    """

								    Detect the encoding of the unicode string ``input``, which contains the

								    beginning of a CSS file. The encoding is detected from the charset rule

								    at the beginning of ``input``. If there is no charset rule, ``"utf-8"``

								    will be returned.


								    If the encoding can't be detected yet, ``None`` is returned. ``final``

								    specifies whether more data will be available in later calls or not. If

								    ``final`` is true, ``detectencoding_unicode()`` will never return ``None``.

								    """

								    prefix = '@charset "'

								    if input.startswith(prefix):

								        pos = input.find('"', len(prefix))

								        if pos >= 0:

								            return (input[len(prefix):pos], True)

								    elif final or not prefix.startswith(input):

								        # if this is the last call, and we haven't determined an encoding yet,

								        # (or the string definitely doesn't start with prefix) we default to UTF-8

								        return ("utf-8", False)

								    return (None, False) # don't know yet


								def _fixencoding(input, encoding, final=False):

								    """

								    Replace the name of the encoding in the charset rule at the beginning of

								    ``input`` with ``encoding``. If ``input`` doesn't starts with a charset

								    rule, ``input`` will be returned unmodified.


								    If the encoding can't be found yet, ``None`` is returned. ``final``

								    specifies whether more data will be available in later calls or not.

								    If ``final`` is true, ``_fixencoding()`` will never return ``None``.

								    """

								    prefix = '@charset "'

								    if len(input) > len(prefix):

								        if input.startswith(prefix):

								            pos = input.find('"', len(prefix))

								            if pos >= 0:

								                if encoding.replace("_", "-").lower() == "utf-8-sig":

								                    encoding = "utf-8"

								                return prefix + encoding + input[pos:]

								            # we haven't seen the end of the encoding name yet => fall through

								        else:

								            return input # doesn't start with prefix, so nothing to fix

								    elif not prefix.startswith(input) or final:

								        # can't turn out to be a @charset rule later (or there is no "later")

								        return input

								    if final:

								        return input

								    return None # don't know yet


								def decode(input, errors="strict", encoding=None, force=True):

								    try:

								        # py 3 only, memory?! object to bytes

								        input = input.tobytes()

								    except AttributeError as e:

								        pass


								    if encoding is None or not force:

								        (_encoding, explicit) = detectencoding_str(input, True)

								        if _encoding == "css":

								            raise ValueError("css not allowed as encoding name")

								        if (explicit and not force) or encoding is None: # Take the encoding from the input

								            encoding = _encoding


								    # NEEDS: change in parse.py (str to bytes!)

								    (input, consumed) = codecs.getdecoder(encoding)(input, errors)

								    return (_fixencoding(input, str(encoding), True), consumed)


								def encode(input, errors="strict", encoding=None):

								    consumed = len(input)

								    if encoding is None:

								        encoding = detectencoding_unicode(input, True)[0]

								        if encoding.replace("_", "-").lower() == "utf-8-sig":

								            input = _fixencoding(input, "utf-8", True)

								    else:

								        input = _fixencoding(input, str(encoding), True)

								    if encoding == "css":

								        raise ValueError("css not allowed as encoding name")

								    encoder = codecs.getencoder(encoding)

								    return (encoder(input, errors)[0], consumed)


								def _bytes2int(bytes):

								    # Helper: convert an 8 bit string into an ``int``.

								    i = 0

								    for byte in bytes:

								        i = (i<<8) + ord(byte)

								    return i


								def _int2bytes(i):

								    # Helper: convert an ``int`` into an 8-bit string.

								    v = []

								    while i:

								        v.insert(0, chr(i&0xff))

								        i >>= 8

								    return "".join(v)


								if hasattr(codecs, "IncrementalDecoder"):

								    class IncrementalDecoder(codecs.IncrementalDecoder):

								        def __init__(self, errors="strict", encoding=None, force=True):

								            self.decoder = None

								            self.encoding = encoding

								            self.force = force

								            codecs.IncrementalDecoder.__init__(self, errors)

								            # Store ``errors`` somewhere else,

								            # because we have to hide it in a property

								            self._errors = errors

								            self.buffer = b""

								            self.headerfixed = False


								        def iterdecode(self, input):

								            for part in input:

								                result = self.decode(part, False)

								                if result:

								                    yield result

								            result = self.decode("", True)

								            if result:

								                yield result


								        def decode(self, input, final=False):

								            # We're doing basically the same as a ``BufferedIncrementalDecoder``,

								            # but since the buffer is only relevant until the encoding has been

								            # detected (in which case the buffer of the underlying codec might

								            # kick in), we're implementing buffering ourselves to avoid some

								            # overhead.

								            if self.decoder is None:

								                input = self.buffer + input

								                # Do we have to detect the encoding from the input?

								                if self.encoding is None or not self.force:

								                    (encoding, explicit) = detectencoding_str(input, final)

								                    if encoding is None: # no encoding determined yet

								                        self.buffer = input # retry the complete input on the next call

								                        return "" # no encoding determined yet, so no output

								                    elif encoding == "css":

								                        raise ValueError("css not allowed as encoding name")

								                    if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input

								                        self.encoding = encoding

								                self.buffer = "" # drop buffer, as the decoder might keep its own

								                decoder = codecs.getincrementaldecoder(self.encoding)

								                self.decoder = decoder(self._errors)

								            if self.headerfixed:

								                return self.decoder.decode(input, final)

								            # If we haven't fixed the header yet,

								            # the content of ``self.buffer`` is a ``unicode`` object

								            output = self.buffer + self.decoder.decode(input, final)

								            encoding = self.encoding

								            if encoding.replace("_", "-").lower() == "utf-8-sig":

								                encoding = "utf-8"

								            newoutput = _fixencoding(output, str(encoding), final)

								            if newoutput is None:

								                # retry fixing the @charset rule (but keep the decoded stuff)

								                self.buffer = output

								                return ""

								            self.headerfixed = True

								            return newoutput


								        def reset(self):

								            codecs.IncrementalDecoder.reset(self)

								            self.decoder = None

								            self.buffer = b""

								            self.headerfixed = False


								        def _geterrors(self):

								            return self._errors


								        def _seterrors(self, errors):

								            # Setting ``errors`` must be done on the real decoder too

								            if self.decoder is not None:

								                self.decoder.errors = errors

								            self._errors = errors

								        errors = property(_geterrors, _seterrors)


								        def getstate(self):

								            if self.decoder is not None:

								                state = (self.encoding, self.buffer, self.headerfixed, True, self.decoder.getstate())

								            else:

								                state = (self.encoding, self.buffer, self.headerfixed, False, None)

								            return ("", _bytes2int(marshal.dumps(state)))


								        def setstate(self, state):

								            state = _int2bytes(marshal.loads(state[1])) # ignore buffered input

								            self.encoding = state[0]

								            self.buffer = state[1]

								            self.headerfixed = state[2]

								            if state[3] is not None:

								                self.decoder = codecs.getincrementaldecoder(self.encoding)(self._errors)

								                self.decoder.setstate(state[4])

								            else:

								                self.decoder = None


								if hasattr(codecs, "IncrementalEncoder"):

								    class IncrementalEncoder(codecs.IncrementalEncoder):

								        def __init__(self, errors="strict", encoding=None):

								            self.encoder = None

								            self.encoding = encoding

								            codecs.IncrementalEncoder.__init__(self, errors)

								            # Store ``errors`` somewhere else,

								            # because we have to hide it in a property

								            self._errors = errors

								            self.buffer = ""


								        def iterencode(self, input):

								            for part in input:

								                result = self.encode(part, False)

								                if result:

								                    yield result

								            result = self.encode("", True)

								            if result:

								                yield result


								        def encode(self, input, final=False):

								            if self.encoder is None:

								                input = self.buffer + input

								                if self.encoding is not None:

								                    # Replace encoding in the @charset rule with the specified one

								                    encoding = self.encoding

								                    if encoding.replace("_", "-").lower() == "utf-8-sig":

								                        encoding = "utf-8"

								                    newinput = _fixencoding(input, str(encoding), final)

								                    if newinput is None: # @charset rule incomplete => Retry next time

								                        self.buffer = input

								                        return ""

								                    input = newinput

								                else:

								                    # Use encoding from the @charset declaration

								                    self.encoding = detectencoding_unicode(input, final)[0]

								                if self.encoding is not None:

								                    if self.encoding == "css":

								                        raise ValueError("css not allowed as encoding name")

								                    info = codecs.lookup(self.encoding)

								                    encoding = self.encoding

								                    if self.encoding.replace("_", "-").lower() == "utf-8-sig":

								                        input = _fixencoding(input, "utf-8", True)

								                    self.encoder = info.incrementalencoder(self._errors)

								                    self.buffer = ""

								                else:

								                    self.buffer = input

								                    return ""

								            return self.encoder.encode(input, final)


								        def reset(self):

								            codecs.IncrementalEncoder.reset(self)

								            self.encoder = None

								            self.buffer = ""


								        def _geterrors(self):

								            return self._errors


								        def _seterrors(self, errors):

								            # Setting ``errors ``must be done on the real encoder too

								            if self.encoder is not None:

								                self.encoder.errors = errors

								            self._errors = errors

								        errors = property(_geterrors, _seterrors)


								        def getstate(self):

								            if self.encoder is not None:

								                state = (self.encoding, self.buffer, True, self.encoder.getstate())

								            else:

								                state = (self.encoding, self.buffer, False, None)

								            return _bytes2int(marshal.dumps(state))


								        def setstate(self, state):

								            state = _int2bytes(marshal.loads(state))

								            self.encoding = state[0]

								            self.buffer = state[1]

								            if state[2] is not None:

								                self.encoder = codecs.getincrementalencoder(self.encoding)(self._errors)

								                self.encoder.setstate(state[4])

								            else:

								                self.encoder = None


								class StreamWriter(codecs.StreamWriter):

								    def __init__(self, stream, errors="strict", encoding=None, header=False):

								        codecs.StreamWriter.__init__(self, stream, errors)

								        self.streamwriter = None

								        self.encoding = encoding

								        self._errors = errors

								        self.buffer = ""


								    def encode(self, input, errors='strict'):

								        li = len(input)

								        if self.streamwriter is None:

								            input = self.buffer + input

								            li = len(input)

								            if self.encoding is not None:

								                # Replace encoding in the @charset rule with the specified one

								                encoding = self.encoding

								                if encoding.replace("_", "-").lower() == "utf-8-sig":

								                    encoding = "utf-8"

								                newinput = _fixencoding(input, str(encoding), False)

								                if newinput is None: # @charset rule incomplete => Retry next time

								                    self.buffer = input

								                    return ("", 0)

								                input = newinput

								            else:

								                # Use encoding from the @charset declaration

								                self.encoding = detectencoding_unicode(input, False)[0]

								            if self.encoding is not None:

								                if self.encoding == "css":

								                    raise ValueError("css not allowed as encoding name")

								                self.streamwriter = codecs.getwriter(self.encoding)(self.stream, self._errors)

								                encoding = self.encoding

								                if self.encoding.replace("_", "-").lower() == "utf-8-sig":

								                    input = _fixencoding(input, "utf-8", True)

								                self.buffer = ""

								            else:

								                self.buffer = input

								                return ("", 0)

								        return (self.streamwriter.encode(input, errors)[0], li)


								    def _geterrors(self):

								        return self._errors


								    def _seterrors(self, errors):

								        # Setting ``errors`` must be done on the streamwriter too

								        try:

								            if self.streamwriter is not None:

								                self.streamwriter.errors = errors

								        except AttributeError as e:

								            # TODO: py3 only exception?

								            pass


								        self._errors = errors

								    errors = property(_geterrors, _seterrors)


								class StreamReader(codecs.StreamReader):

								    def __init__(self, stream, errors="strict", encoding=None, force=True):

								        codecs.StreamReader.__init__(self, stream, errors)

								        self.streamreader = None

								        self.encoding = encoding

								        self.force = force

								        self._errors = errors


								    def decode(self, input, errors='strict'):

								        if self.streamreader is None:

								            if self.encoding is None or not self.force:

								                (encoding, explicit) = detectencoding_str(input, False)

								                if encoding is None: # no encoding determined yet

								                    return ("", 0) # no encoding determined yet, so no output

								                elif encoding == "css":

								                    raise ValueError("css not allowed as encoding name")

								                if (explicit and not self.force) or self.encoding is None: # Take the encoding from the input

								                    self.encoding = encoding

								            streamreader = codecs.getreader(self.encoding)

								            streamreader = streamreader(self.stream, self._errors)

								            (output, consumed) = streamreader.decode(input, errors)

								            encoding = self.encoding

								            if encoding.replace("_", "-").lower() == "utf-8-sig":

								                encoding = "utf-8"

								            newoutput = _fixencoding(output, str(encoding), False)

								            if newoutput is not None:

								                self.streamreader = streamreader

								                return (newoutput, consumed)

								            return ("", 0) # we will create a new streamreader on the next call

								        return self.streamreader.decode(input, errors)


								    def _geterrors(self):

								        return self._errors


								    def _seterrors(self, errors):

								        # Setting ``errors`` must be done on the streamreader too

								        try:

								            if self.streamreader is not None:

								                self.streamreader.errors = errors

								        except AttributeError as e:

								            # TODO: py3 only exception?

								            pass


								        self._errors = errors

								    errors = property(_geterrors, _seterrors)


								if hasattr(codecs, "CodecInfo"):

								    # We're running on Python 2.5 or better

								    def search_function(name):

								        if name == "css":

								            return codecs.CodecInfo(

								                name="css",

								                encode=encode,

								                decode=decode,

								                incrementalencoder=IncrementalEncoder,

								                incrementaldecoder=IncrementalDecoder,

								                streamwriter=StreamWriter,

								                streamreader=StreamReader,

								            )

								else:

								    # If we're running on Python 2.4, define the utf-8-sig codec here

								    def utf8sig_encode(input, errors='strict'):

								        return (codecs.BOM_UTF8 + codecs.utf_8_encode(input, errors)[0], len(input))


								    def utf8sig_decode(input, errors='strict'):

								        prefix = 0

								        if input[:3] == codecs.BOM_UTF8:

								            input = input[3:]

								            prefix = 3

								        (output, consumed) = codecs.utf_8_decode(input, errors, True)

								        return (output, consumed+prefix)


								    class UTF8SigStreamWriter(codecs.StreamWriter):

								        def reset(self):

								            codecs.StreamWriter.reset(self)

								            try:

								                del self.encode

								            except AttributeError:

								                pass


								        def encode(self, input, errors='strict'):

								            self.encode = codecs.utf_8_encode

								            return utf8sig_encode(input, errors)


								    class UTF8SigStreamReader(codecs.StreamReader):

								        def reset(self):

								            codecs.StreamReader.reset(self)

								            try:

								                del self.decode

								            except AttributeError:

								                pass


								        def decode(self, input, errors='strict'):

								            if len(input) < 3 and codecs.BOM_UTF8.startswith(input):

								                # not enough data to decide if this is a BOM

								                # => try again on the next call

								                return ("", 0)

								            self.decode = codecs.utf_8_decode

								            return utf8sig_decode(input, errors)


								    def search_function(name):

								        import encodings

								        name = encodings.normalize_encoding(name)

								        if name == "css":

								            return (encode, decode, StreamReader, StreamWriter)

								        elif name == "utf_8_sig":

								            return (utf8sig_encode, utf8sig_decode, UTF8SigStreamReader, UTF8SigStreamWriter)


								codecs.register(search_function)


								# Error handler for CSS escaping


								def cssescape(exc):

								    if not isinstance(exc, UnicodeEncodeError):

								        raise TypeError("don't know how to handle %r" % exc)

								    return ("".join("\\%06x" % ord(c) for c in exc.object[exc.start:exc.end]), exc.end)


								codecs.register_error("cssescape", cssescape)