CouchPotatoServer/libs/tornado/httputil.py

#!/usr/bin/env python
#
# Copyright 2009 Facebook
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""HTTP utility code shared by clients and servers."""

from __future__ import absolute_import, division, with_statement

import logging
import urllib
import re

from tornado.escape import native_str, parse_qs_bytes, utf8
from tornado.util import b, ObjectDict


class HTTPHeaders(dict):
    """A dictionary that maintains Http-Header-Case for all keys.

    Supports multiple values per key via a pair of new methods,
    add() and get_list().  The regular dictionary interface returns a single
    value per key, with multiple values joined by a comma.

    >>> h = HTTPHeaders({"content-type": "text/html"})
    >>> h.keys()
    ['Content-Type']
    >>> h["Content-Type"]
    'text/html'

    >>> h.add("Set-Cookie", "A=B")
    >>> h.add("Set-Cookie", "C=D")
    >>> h["set-cookie"]
    'A=B,C=D'
    >>> h.get_list("set-cookie")
    ['A=B', 'C=D']

    >>> for (k,v) in sorted(h.get_all()):
    ...    print '%s: %s' % (k,v)
    ...
    Content-Type: text/html
    Set-Cookie: A=B
    Set-Cookie: C=D
    """
    def __init__(self, *args, **kwargs):
        # Don't pass args or kwargs to dict.__init__, as it will bypass
        # our __setitem__
        dict.__init__(self)
        self._as_list = {}
        self._last_key = None
        if (len(args) == 1 and len(kwargs) == 0 and
            isinstance(args[0], HTTPHeaders)):
            # Copy constructor
            for k, v in args[0].get_all():
                self.add(k, v)
        else:
            # Dict-style initialization
            self.update(*args, **kwargs)

    # new public methods

    def add(self, name, value):
        """Adds a new value for the given key."""
        norm_name = HTTPHeaders._normalize_name(name)
        self._last_key = norm_name
        if norm_name in self:
            # bypass our override of __setitem__ since it modifies _as_list
            dict.__setitem__(self, norm_name, self[norm_name] + ',' + value)
            self._as_list[norm_name].append(value)
        else:
            self[norm_name] = value

    def get_list(self, name):
        """Returns all values for the given header as a list."""
        norm_name = HTTPHeaders._normalize_name(name)
        return self._as_list.get(norm_name, [])

    def get_all(self):
        """Returns an iterable of all (name, value) pairs.

        If a header has multiple values, multiple pairs will be
        returned with the same name.
        """
        for name, list in self._as_list.iteritems():
            for value in list:
                yield (name, value)

    def parse_line(self, line):
        """Updates the dictionary with a single header line.

        >>> h = HTTPHeaders()
        >>> h.parse_line("Content-Type: text/html")
        >>> h.get('content-type')
        'text/html'
        """
        if line[0].isspace():
            # continuation of a multi-line header
            new_part = ' ' + line.lstrip()
            self._as_list[self._last_key][-1] += new_part
            dict.__setitem__(self, self._last_key,
                             self[self._last_key] + new_part)
        else:
            name, value = line.split(":", 1)
            self.add(name, value.strip())

    @classmethod
    def parse(cls, headers):
        """Returns a dictionary from HTTP header text.

        >>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")
        >>> sorted(h.iteritems())
        [('Content-Length', '42'), ('Content-Type', 'text/html')]
        """
        h = cls()
        for line in headers.splitlines():
            if line:
                h.parse_line(line)
        return h

    # dict implementation overrides

    def __setitem__(self, name, value):
        norm_name = HTTPHeaders._normalize_name(name)
        dict.__setitem__(self, norm_name, value)
        self._as_list[norm_name] = [value]

    def __getitem__(self, name):
        return dict.__getitem__(self, HTTPHeaders._normalize_name(name))

    def __delitem__(self, name):
        norm_name = HTTPHeaders._normalize_name(name)
        dict.__delitem__(self, norm_name)
        del self._as_list[norm_name]

    def __contains__(self, name):
        norm_name = HTTPHeaders._normalize_name(name)
        return dict.__contains__(self, norm_name)

    def get(self, name, default=None):
        return dict.get(self, HTTPHeaders._normalize_name(name), default)

    def update(self, *args, **kwargs):
        # dict.update bypasses our __setitem__
        for k, v in dict(*args, **kwargs).iteritems():
            self[k] = v

    def copy(self):
        # default implementation returns dict(self), not the subclass
        return HTTPHeaders(self)

    _NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9]*(-[A-Z0-9][a-z0-9]*)*$')
    _normalized_headers = {}

    @staticmethod
    def _normalize_name(name):
        """Converts a name to Http-Header-Case.

        >>> HTTPHeaders._normalize_name("coNtent-TYPE")
        'Content-Type'
        """
        try:
            return HTTPHeaders._normalized_headers[name]
        except KeyError:
            if HTTPHeaders._NORMALIZED_HEADER_RE.match(name):
                normalized = name
            else:
                normalized = "-".join([w.capitalize() for w in name.split("-")])
            HTTPHeaders._normalized_headers[name] = normalized
            return normalized


def url_concat(url, args):
    """Concatenate url and argument dictionary regardless of whether
    url has existing query parameters.

    >>> url_concat("http://example.com/foo?a=b", dict(c="d"))
    'http://example.com/foo?a=b&c=d'
    """
    if not args:
        return url
    if url[-1] not in ('?', '&'):
        url += '&' if ('?' in url) else '?'
    return url + urllib.urlencode(args)


class HTTPFile(ObjectDict):
    """Represents an HTTP file. For backwards compatibility, its instance
    attributes are also accessible as dictionary keys.

    :ivar filename:
    :ivar body:
    :ivar content_type: The content_type comes from the provided HTTP header
        and should not be trusted outright given that it can be easily forged.
    """
    pass


def parse_body_arguments(content_type, body, arguments, files):
    if content_type.startswith("application/x-www-form-urlencoded"):
        uri_arguments = parse_qs_bytes(native_str(body))
        for name, values in uri_arguments.iteritems():
            values = [v for v in values if v]
            if values:
                arguments.setdefault(name, []).extend(values)
    elif content_type.startswith("multipart/form-data"):
        fields = content_type.split(";")
        for field in fields:
            k, sep, v = field.strip().partition("=")
            if k == "boundary" and v:
                parse_multipart_form_data(utf8(v), body, arguments, files)
                break
        else:
            logging.warning("Invalid multipart/form-data")


def parse_multipart_form_data(boundary, data, arguments, files):
    """Parses a multipart/form-data body.

    The boundary and data parameters are both byte strings.
    The dictionaries given in the arguments and files parameters
    will be updated with the contents of the body.
    """
    # The standard allows for the boundary to be quoted in the header,
    # although it's rare (it happens at least for google app engine
    # xmpp).  I think we're also supposed to handle backslash-escapes
    # here but I'll save that until we see a client that uses them
    # in the wild.
    if boundary.startswith(b('"')) and boundary.endswith(b('"')):
        boundary = boundary[1:-1]
    final_boundary_index = data.rfind(b("--") + boundary + b("--"))
    if final_boundary_index == -1:
        logging.warning("Invalid multipart/form-data: no final boundary")
        return
    parts = data[:final_boundary_index].split(b("--") + boundary + b("\r\n"))
    for part in parts:
        if not part:
            continue
        eoh = part.find(b("\r\n\r\n"))
        if eoh == -1:
            logging.warning("multipart/form-data missing headers")
            continue
        headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))
        disp_header = headers.get("Content-Disposition", "")
        disposition, disp_params = _parse_header(disp_header)
        if disposition != "form-data" or not part.endswith(b("\r\n")):
            logging.warning("Invalid multipart/form-data")
            continue
        value = part[eoh + 4:-2]
        if not disp_params.get("name"):
            logging.warning("multipart/form-data value missing name")
            continue
        name = disp_params["name"]
        if disp_params.get("filename"):
            ctype = headers.get("Content-Type", "application/unknown")
            files.setdefault(name, []).append(HTTPFile(
                filename=disp_params["filename"], body=value,
                content_type=ctype))
        else:
            arguments.setdefault(name, []).append(value)


# _parseparam and _parse_header are copied and modified from python2.7's cgi.py
# The original 2.7 version of this code did not correctly support some
# combinations of semicolons and double quotes.
def _parseparam(s):
    while s[:1] == ';':
        s = s[1:]
        end = s.find(';')
        while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:
            end = s.find(';', end + 1)
        if end < 0:
            end = len(s)
        f = s[:end]
        yield f.strip()
        s = s[end:]


def _parse_header(line):
    """Parse a Content-type like header.

    Return the main content-type and a dictionary of options.

    """
    parts = _parseparam(';' + line)
    key = parts.next()
    pdict = {}
    for p in parts:
        i = p.find('=')
        if i >= 0:
            name = p[:i].strip().lower()
            value = p[i + 1:].strip()
            if len(value) >= 2 and value[0] == value[-1] == '"':
                value = value[1:-1]
                value = value.replace('\\\\', '\\').replace('\\"', '"')
            pdict[name] = value
    return key, pdict


def doctests():
    import doctest
    return doctest.DocTestSuite()
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`#!/usr/bin/env python`
			`#`
			`# Copyright 2009 Facebook`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License"); you may`
			`# not use this file except in compliance with the License. You may obtain`
			`# a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT`
			`# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the`
			`# License for the specific language governing permissions and limitations`
			`# under the License.`

			`"""HTTP utility code shared by clients and servers."""`

Update tornado 13 years ago			`from __future__ import absolute_import, division, with_statement`

Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`import logging`
			`import urllib`
			`import re`

Update tornado 13 years ago			`from tornado.escape import native_str, parse_qs_bytes, utf8`
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`from tornado.util import b, ObjectDict`

Update tornado 13 years ago
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`class HTTPHeaders(dict):`
			`"""A dictionary that maintains Http-Header-Case for all keys.`

			`Supports multiple values per key via a pair of new methods,`
			`add() and get_list(). The regular dictionary interface returns a single`
			`value per key, with multiple values joined by a comma.`

			`>>> h = HTTPHeaders({"content-type": "text/html"})`
			`>>> h.keys()`
			`['Content-Type']`
			`>>> h["Content-Type"]`
			`'text/html'`

			`>>> h.add("Set-Cookie", "A=B")`
			`>>> h.add("Set-Cookie", "C=D")`
			`>>> h["set-cookie"]`
			`'A=B,C=D'`
			`>>> h.get_list("set-cookie")`
			`['A=B', 'C=D']`

			`>>> for (k,v) in sorted(h.get_all()):`
			`... print '%s: %s' % (k,v)`
			`...`
			`Content-Type: text/html`
			`Set-Cookie: A=B`
			`Set-Cookie: C=D`
			`"""`
			`def __init__(self, args, *kwargs):`
			`# Don't pass args or kwargs to dict.__init__, as it will bypass`
			`# our __setitem__`
			`dict.__init__(self)`
			`self._as_list = {}`
			`self._last_key = None`
Update tornado 13 years ago			`if (len(args) == 1 and len(kwargs) == 0 and`
			`isinstance(args[0], HTTPHeaders)):`
			`# Copy constructor`
			`for k, v in args[0].get_all():`
			`self.add(k, v)`
			`else:`
			`# Dict-style initialization`
			`self.update(args, *kwargs)`
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago
			`# new public methods`

			`def add(self, name, value):`
			`"""Adds a new value for the given key."""`
			`norm_name = HTTPHeaders._normalize_name(name)`
			`self._last_key = norm_name`
			`if norm_name in self:`
			`# bypass our override of __setitem__ since it modifies _as_list`
			`dict.__setitem__(self, norm_name, self[norm_name] + ',' + value)`
			`self._as_list[norm_name].append(value)`
			`else:`
			`self[norm_name] = value`

			`def get_list(self, name):`
			`"""Returns all values for the given header as a list."""`
			`norm_name = HTTPHeaders._normalize_name(name)`
			`return self._as_list.get(norm_name, [])`

			`def get_all(self):`
			`"""Returns an iterable of all (name, value) pairs.`

			`If a header has multiple values, multiple pairs will be`
			`returned with the same name.`
			`"""`
			`for name, list in self._as_list.iteritems():`
			`for value in list:`
			`yield (name, value)`

			`def parse_line(self, line):`
			`"""Updates the dictionary with a single header line.`

			`>>> h = HTTPHeaders()`
			`>>> h.parse_line("Content-Type: text/html")`
			`>>> h.get('content-type')`
			`'text/html'`
			`"""`
			`if line[0].isspace():`
			`# continuation of a multi-line header`
			`new_part = ' ' + line.lstrip()`
			`self._as_list[self._last_key][-1] += new_part`
			`dict.__setitem__(self, self._last_key,`
			`self[self._last_key] + new_part)`
			`else:`
			`name, value = line.split(":", 1)`
			`self.add(name, value.strip())`

			`@classmethod`
			`def parse(cls, headers):`
			`"""Returns a dictionary from HTTP header text.`

			`>>> h = HTTPHeaders.parse("Content-Type: text/html\\r\\nContent-Length: 42\\r\\n")`
			`>>> sorted(h.iteritems())`
			`[('Content-Length', '42'), ('Content-Type', 'text/html')]`
			`"""`
			`h = cls()`
			`for line in headers.splitlines():`
			`if line:`
			`h.parse_line(line)`
			`return h`

			`# dict implementation overrides`

			`def __setitem__(self, name, value):`
			`norm_name = HTTPHeaders._normalize_name(name)`
			`dict.__setitem__(self, norm_name, value)`
			`self._as_list[norm_name] = [value]`

			`def __getitem__(self, name):`
			`return dict.__getitem__(self, HTTPHeaders._normalize_name(name))`

			`def __delitem__(self, name):`
			`norm_name = HTTPHeaders._normalize_name(name)`
			`dict.__delitem__(self, norm_name)`
			`del self._as_list[norm_name]`

			`def __contains__(self, name):`
			`norm_name = HTTPHeaders._normalize_name(name)`
			`return dict.__contains__(self, norm_name)`

			`def get(self, name, default=None):`
			`return dict.get(self, HTTPHeaders._normalize_name(name), default)`

			`def update(self, args, *kwargs):`
			`# dict.update bypasses our __setitem__`
			`for k, v in dict(args, *kwargs).iteritems():`
			`self[k] = v`

Update tornado 13 years ago			`def copy(self):`
			`# default implementation returns dict(self), not the subclass`
			`return HTTPHeaders(self)`

Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`_NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9](-[A-Z0-9][a-z0-9])*$')`
			`_normalized_headers = {}`

			`@staticmethod`
			`def _normalize_name(name):`
			`"""Converts a name to Http-Header-Case.`

			`>>> HTTPHeaders._normalize_name("coNtent-TYPE")`
			`'Content-Type'`
			`"""`
			`try:`
			`return HTTPHeaders._normalized_headers[name]`
			`except KeyError:`
			`if HTTPHeaders._NORMALIZED_HEADER_RE.match(name):`
			`normalized = name`
			`else:`
			`normalized = "-".join([w.capitalize() for w in name.split("-")])`
			`HTTPHeaders._normalized_headers[name] = normalized`
			`return normalized`


			`def url_concat(url, args):`
			`"""Concatenate url and argument dictionary regardless of whether`
			`url has existing query parameters.`

			`>>> url_concat("http://example.com/foo?a=b", dict(c="d"))`
			`'http://example.com/foo?a=b&c=d'`
			`"""`
Update tornado 13 years ago			`if not args:`
			`return url`
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`if url[-1] not in ('?', '&'):`
			`url += '&' if ('?' in url) else '?'`
			`return url + urllib.urlencode(args)`


			`class HTTPFile(ObjectDict):`
			`"""Represents an HTTP file. For backwards compatibility, its instance`
			`attributes are also accessible as dictionary keys.`

			`:ivar filename:`
			`:ivar body:`
			`:ivar content_type: The content_type comes from the provided HTTP header`
			`and should not be trusted outright given that it can be easily forged.`
			`"""`
			`pass`


Update tornado 13 years ago			`def parse_body_arguments(content_type, body, arguments, files):`
			`if content_type.startswith("application/x-www-form-urlencoded"):`
			`uri_arguments = parse_qs_bytes(native_str(body))`
			`for name, values in uri_arguments.iteritems():`
			`values = [v for v in values if v]`
			`if values:`
			`arguments.setdefault(name, []).extend(values)`
			`elif content_type.startswith("multipart/form-data"):`
			`fields = content_type.split(";")`
			`for field in fields:`
			`k, sep, v = field.strip().partition("=")`
			`if k == "boundary" and v:`
			`parse_multipart_form_data(utf8(v), body, arguments, files)`
			`break`
			`else:`
			`logging.warning("Invalid multipart/form-data")`


Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`def parse_multipart_form_data(boundary, data, arguments, files):`
			`"""Parses a multipart/form-data body.`

			`The boundary and data parameters are both byte strings.`
			`The dictionaries given in the arguments and files parameters`
			`will be updated with the contents of the body.`
			`"""`
			`# The standard allows for the boundary to be quoted in the header,`
			`# although it's rare (it happens at least for google app engine`
			`# xmpp). I think we're also supposed to handle backslash-escapes`
			`# here but I'll save that until we see a client that uses them`
			`# in the wild.`
			`if boundary.startswith(b('"')) and boundary.endswith(b('"')):`
			`boundary = boundary[1:-1]`
Update tornado 13 years ago			`final_boundary_index = data.rfind(b("--") + boundary + b("--"))`
			`if final_boundary_index == -1:`
			`logging.warning("Invalid multipart/form-data: no final boundary")`
			`return`
			`parts = data[:final_boundary_index].split(b("--") + boundary + b("\r\n"))`
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`for part in parts:`
Update tornado 13 years ago			`if not part:`
			`continue`
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`eoh = part.find(b("\r\n\r\n"))`
			`if eoh == -1:`
			`logging.warning("multipart/form-data missing headers")`
			`continue`
			`headers = HTTPHeaders.parse(part[:eoh].decode("utf-8"))`
			`disp_header = headers.get("Content-Disposition", "")`
			`disposition, disp_params = _parse_header(disp_header)`
			`if disposition != "form-data" or not part.endswith(b("\r\n")):`
			`logging.warning("Invalid multipart/form-data")`
			`continue`
			`value = part[eoh + 4:-2]`
			`if not disp_params.get("name"):`
			`logging.warning("multipart/form-data value missing name")`
			`continue`
			`name = disp_params["name"]`
			`if disp_params.get("filename"):`
			`ctype = headers.get("Content-Type", "application/unknown")`
			`files.setdefault(name, []).append(HTTPFile(`
			`filename=disp_params["filename"], body=value,`
			`content_type=ctype))`
			`else:`
			`arguments.setdefault(name, []).append(value)`


			`# _parseparam and _parse_header are copied and modified from python2.7's cgi.py`
			`# The original 2.7 version of this code did not correctly support some`
			`# combinations of semicolons and double quotes.`
			`def _parseparam(s):`
			`while s[:1] == ';':`
			`s = s[1:]`
			`end = s.find(';')`
			`while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2:`
			`end = s.find(';', end + 1)`
			`if end < 0:`
			`end = len(s)`
			`f = s[:end]`
			`yield f.strip()`
			`s = s[end:]`

Update tornado 13 years ago
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`def _parse_header(line):`
			`"""Parse a Content-type like header.`

			`Return the main content-type and a dictionary of options.`

			`"""`
			`parts = _parseparam(';' + line)`
			`key = parts.next()`
			`pdict = {}`
			`for p in parts:`
			`i = p.find('=')`
			`if i >= 0:`
			`name = p[:i].strip().lower()`
Update tornado 13 years ago			`value = p[i + 1:].strip()`
Use tornado wsgi server as werkzeug crashes on Internet Explorer 13 years ago			`if len(value) >= 2 and value[0] == value[-1] == '"':`
			`value = value[1:-1]`
			`value = value.replace('\\\\', '\\').replace('\\"', '"')`
			`pdict[name] = value`
			`return key, pdict`


			`def doctests():`
			`import doctest`
			`return doctest.DocTestSuite()`