diff --git a/libs/tornado/__init__.py b/libs/tornado/__init__.py index 68434e1..4a264c3 100755 --- a/libs/tornado/__init__.py +++ b/libs/tornado/__init__.py @@ -25,5 +25,5 @@ from __future__ import absolute_import, division, print_function, with_statement # is zero for an official release, positive for a development branch, # or negative for a release candidate or beta (after the base version # number has been incremented) -version = "3.1.dev2" -version_info = (3, 1, 0, -99) +version = "3.1b1" +version_info = (3, 1, 0, -98) diff --git a/libs/tornado/auth.py b/libs/tornado/auth.py index df95884..42400e1 100755 --- a/libs/tornado/auth.py +++ b/libs/tornado/auth.py @@ -43,7 +43,7 @@ Example usage for Google OpenID:: user = yield self.get_authenticated_user() # Save the user with e.g. set_secure_cookie() else: - self.authenticate_redirect() + yield self.authenticate_redirect() """ from __future__ import absolute_import, division, print_function, with_statement @@ -119,8 +119,10 @@ class OpenIdMixin(object): * ``_OPENID_ENDPOINT``: the identity provider's URI. """ + @return_future def authenticate_redirect(self, callback_uri=None, - ax_attrs=["name", "email", "language", "username"]): + ax_attrs=["name", "email", "language", "username"], + callback=None): """Redirects to the authentication URL for this service. After authentication, the service will redirect back to the given @@ -130,10 +132,17 @@ class OpenIdMixin(object): default (name, email, language, and username). If you don't need all those attributes for your app, you can request fewer with the ax_attrs keyword argument. + + .. versionchanged:: 3.1 + Returns a `.Future` and takes an optional callback. These are + not strictly necessary as this method is synchronous, + but they are supplied for consistency with + `OAuthMixin.authorize_redirect`. """ callback_uri = callback_uri or self.request.uri args = self._openid_args(callback_uri, ax_attrs=ax_attrs) self.redirect(self._OPENID_ENDPOINT + "?" + urllib_parse.urlencode(args)) + callback() @_auth_return_future def get_authenticated_user(self, callback, http_client=None): @@ -291,9 +300,9 @@ class OAuthMixin(object): Subclasses must also override the `_oauth_get_user_future` and `_oauth_consumer_token` methods. """ - + @return_future def authorize_redirect(self, callback_uri=None, extra_params=None, - http_client=None): + http_client=None, callback=None): """Redirects the user to obtain OAuth authorization for this service. The ``callback_uri`` may be omitted if you have previously @@ -305,6 +314,17 @@ class OAuthMixin(object): This method sets a cookie called ``_oauth_request_token`` which is subsequently used (and cleared) in `get_authenticated_user` for security purposes. + + Note that this method is asynchronous, although it calls + `.RequestHandler.finish` for you so it may not be necessary + to pass a callback or use the `.Future` it returns. However, + if this method is called from a function decorated with + `.gen.coroutine`, you must call it with ``yield`` to keep the + response from being closed prematurely. + + .. versionchanged:: 3.1 + Now returns a `.Future` and takes an optional callback, for + compatibility with `.gen.coroutine`. """ if callback_uri and getattr(self, "_OAUTH_NO_CALLBACKS", False): raise Exception("This service does not support oauth_callback") @@ -317,13 +337,15 @@ class OAuthMixin(object): self.async_callback( self._on_request_token, self._OAUTH_AUTHORIZE_URL, - callback_uri)) + callback_uri, + callback)) else: http_client.fetch( self._oauth_request_token_url(), self.async_callback( self._on_request_token, self._OAUTH_AUTHORIZE_URL, - callback_uri)) + callback_uri, + callback)) @_auth_return_future def get_authenticated_user(self, callback, http_client=None): @@ -384,9 +406,10 @@ class OAuthMixin(object): args["oauth_signature"] = signature return url + "?" + urllib_parse.urlencode(args) - def _on_request_token(self, authorize_url, callback_uri, response): + def _on_request_token(self, authorize_url, callback_uri, callback, + response): if response.error: - raise Exception("Could not get request token") + raise Exception("Could not get request token: %s" % response.error) request_token = _oauth_parse_response(response.body) data = (base64.b64encode(escape.utf8(request_token["key"])) + b"|" + base64.b64encode(escape.utf8(request_token["secret"]))) @@ -394,11 +417,13 @@ class OAuthMixin(object): args = dict(oauth_token=request_token["key"]) if callback_uri == "oob": self.finish(authorize_url + "?" + urllib_parse.urlencode(args)) + callback() return elif callback_uri: args["oauth_callback"] = urlparse.urljoin( self.request.full_url(), callback_uri) self.redirect(authorize_url + "?" + urllib_parse.urlencode(args)) + callback() def _oauth_access_token_url(self, request_token): consumer_token = self._oauth_consumer_token() @@ -521,9 +546,10 @@ class OAuth2Mixin(object): * ``_OAUTH_AUTHORIZE_URL``: The service's authorization url. * ``_OAUTH_ACCESS_TOKEN_URL``: The service's access token url. """ - + @return_future def authorize_redirect(self, redirect_uri=None, client_id=None, - client_secret=None, extra_params=None): + client_secret=None, extra_params=None, + callback=None): """Redirects the user to obtain OAuth authorization for this service. Some providers require that you register a redirect URL with @@ -531,6 +557,12 @@ class OAuth2Mixin(object): should call this method to log the user in, and then call ``get_authenticated_user`` in the handler for your redirect URL to complete the authorization process. + + .. versionchanged:: 3.1 + Returns a `.Future` and takes an optional callback. These are + not strictly necessary as this method is synchronous, + but they are supplied for consistency with + `OAuthMixin.authorize_redirect`. """ args = { "redirect_uri": redirect_uri, @@ -540,6 +572,7 @@ class OAuth2Mixin(object): args.update(extra_params) self.redirect( url_concat(self._OAUTH_AUTHORIZE_URL, args)) + callback() def _oauth_request_token_url(self, redirect_uri=None, client_id=None, client_secret=None, code=None, @@ -578,35 +611,42 @@ class TwitterMixin(OAuthMixin): user = yield self.get_authenticated_user() # Save the user using e.g. set_secure_cookie() else: - self.authorize_redirect() + yield self.authorize_redirect() The user object returned by `~OAuthMixin.get_authenticated_user` includes the attributes ``username``, ``name``, ``access_token``, and all of the custom Twitter user attributes described at https://dev.twitter.com/docs/api/1.1/get/users/show """ - _OAUTH_REQUEST_TOKEN_URL = "http://api.twitter.com/oauth/request_token" - _OAUTH_ACCESS_TOKEN_URL = "http://api.twitter.com/oauth/access_token" - _OAUTH_AUTHORIZE_URL = "http://api.twitter.com/oauth/authorize" - _OAUTH_AUTHENTICATE_URL = "http://api.twitter.com/oauth/authenticate" + _OAUTH_REQUEST_TOKEN_URL = "https://api.twitter.com/oauth/request_token" + _OAUTH_ACCESS_TOKEN_URL = "https://api.twitter.com/oauth/access_token" + _OAUTH_AUTHORIZE_URL = "https://api.twitter.com/oauth/authorize" + _OAUTH_AUTHENTICATE_URL = "https://api.twitter.com/oauth/authenticate" _OAUTH_NO_CALLBACKS = False - _TWITTER_BASE_URL = "http://api.twitter.com/1" + _TWITTER_BASE_URL = "https://api.twitter.com/1.1" - def authenticate_redirect(self, callback_uri=None): + @return_future + def authenticate_redirect(self, callback_uri=None, callback=None): """Just like `~OAuthMixin.authorize_redirect`, but auto-redirects if authorized. This is generally the right interface to use if you are using Twitter for single-sign on. + + .. versionchanged:: 3.1 + Now returns a `.Future` and takes an optional callback, for + compatibility with `.gen.coroutine`. """ http = self.get_auth_http_client() - http.fetch(self._oauth_request_token_url(callback_uri=callback_uri), self.async_callback( - self._on_request_token, self._OAUTH_AUTHENTICATE_URL, None)) + http.fetch(self._oauth_request_token_url(callback_uri=callback_uri), + self.async_callback( + self._on_request_token, self._OAUTH_AUTHENTICATE_URL, + None, callback)) @_auth_return_future def twitter_request(self, path, callback=None, access_token=None, post_args=None, **args): - """Fetches the given API path, e.g., ``/statuses/user_timeline/btaylor`` + """Fetches the given API path, e.g., ``statuses/user_timeline/btaylor`` The path should not include the format or API version number. (we automatically use JSON format and API version 1). @@ -635,7 +675,7 @@ class TwitterMixin(OAuthMixin): access_token=self.current_user["access_token"]) if not new_entry: # Call failed; perhaps missing permission? - self.authorize_redirect() + yield self.authorize_redirect() return self.finish("Posted a message!") @@ -683,7 +723,7 @@ class TwitterMixin(OAuthMixin): @gen.coroutine def _oauth_get_user_future(self, access_token): user = yield self.twitter_request( - "/users/show/" + escape.native_str(access_token["screen_name"]), + "/account/verify_credentials", access_token=access_token) if user: user["username"] = user["screen_name"] @@ -712,7 +752,7 @@ class FriendFeedMixin(OAuthMixin): user = yield self.get_authenticated_user() # Save the user using e.g. set_secure_cookie() else: - self.authorize_redirect() + yield self.authorize_redirect() The user object returned by `~OAuthMixin.get_authenticated_user()` includes the attributes ``username``, ``name``, and ``description`` in addition to @@ -760,7 +800,7 @@ class FriendFeedMixin(OAuthMixin): if not new_entry: # Call failed; perhaps missing permission? - self.authorize_redirect() + yield self.authorize_redirect() return self.finish("Posted a message!") @@ -841,13 +881,15 @@ class GoogleMixin(OpenIdMixin, OAuthMixin): user = yield self.get_authenticated_user() # Save the user with e.g. set_secure_cookie() else: - self.authenticate_redirect() + yield self.authenticate_redirect() """ _OPENID_ENDPOINT = "https://www.google.com/accounts/o8/ud" _OAUTH_ACCESS_TOKEN_URL = "https://www.google.com/accounts/OAuthGetAccessToken" + @return_future def authorize_redirect(self, oauth_scope, callback_uri=None, - ax_attrs=["name", "email", "language", "username"]): + ax_attrs=["name", "email", "language", "username"], + callback=None): """Authenticates and authorizes for the given Google resource. Some of the available resources which can be used in the ``oauth_scope`` @@ -859,11 +901,18 @@ class GoogleMixin(OpenIdMixin, OAuthMixin): You can authorize multiple resources by separating the resource URLs with a space. + + .. versionchanged:: 3.1 + Returns a `.Future` and takes an optional callback. These are + not strictly necessary as this method is synchronous, + but they are supplied for consistency with + `OAuthMixin.authorize_redirect`. """ callback_uri = callback_uri or self.request.uri args = self._openid_args(callback_uri, ax_attrs=ax_attrs, oauth_scope=oauth_scope) self.redirect(self._OPENID_ENDPOINT + "?" + urllib_parse.urlencode(args)) + callback() @_auth_return_future def get_authenticated_user(self, callback): @@ -918,7 +967,7 @@ class FacebookMixin(object): if self.get_argument("session", None): self.get_authenticated_user(self.async_callback(self._on_auth)) return - self.authenticate_redirect() + yield self.authenticate_redirect() def _on_auth(self, user): if not user: @@ -931,9 +980,17 @@ class FacebookMixin(object): required to make requests on behalf of the user later with `facebook_request`. """ + @return_future def authenticate_redirect(self, callback_uri=None, cancel_uri=None, - extended_permissions=None): - """Authenticates/installs this app for the current user.""" + extended_permissions=None, callback=None): + """Authenticates/installs this app for the current user. + + .. versionchanged:: 3.1 + Returns a `.Future` and takes an optional callback. These are + not strictly necessary as this method is synchronous, + but they are supplied for consistency with + `OAuthMixin.authorize_redirect`. + """ self.require_setting("facebook_api_key", "Facebook Connect") callback_uri = callback_uri or self.request.uri args = { @@ -953,9 +1010,10 @@ class FacebookMixin(object): args["req_perms"] = ",".join(extended_permissions) self.redirect("http://www.facebook.com/login.php?" + urllib_parse.urlencode(args)) + callback() def authorize_redirect(self, extended_permissions, callback_uri=None, - cancel_uri=None): + cancel_uri=None, callback=None): """Redirects to an authorization request for the given FB resource. The available resource names are listed at @@ -971,9 +1029,16 @@ class FacebookMixin(object): names. To get the session secret and session key, call get_authenticated_user() just as you would with authenticate_redirect(). + + .. versionchanged:: 3.1 + Returns a `.Future` and takes an optional callback. These are + not strictly necessary as this method is synchronous, + but they are supplied for consistency with + `OAuthMixin.authorize_redirect`. """ - self.authenticate_redirect(callback_uri, cancel_uri, - extended_permissions) + return self.authenticate_redirect(callback_uri, cancel_uri, + extended_permissions, + callback=callback) def get_authenticated_user(self, callback): """Fetches the authenticated Facebook user. @@ -1095,6 +1160,7 @@ class FacebookGraphMixin(OAuth2Mixin): _OAUTH_ACCESS_TOKEN_URL = "https://graph.facebook.com/oauth/access_token?" _OAUTH_AUTHORIZE_URL = "https://graph.facebook.com/oauth/authorize?" _OAUTH_NO_CALLBACKS = False + _FACEBOOK_BASE_URL = "https://graph.facebook.com" @_auth_return_future def get_authenticated_user(self, redirect_uri, client_id, client_secret, @@ -1115,7 +1181,7 @@ class FacebookGraphMixin(OAuth2Mixin): code=self.get_argument("code")) # Save the user with e.g. set_secure_cookie else: - self.authorize_redirect( + yield self.authorize_redirect( redirect_uri='/auth/facebookgraph/', client_id=self.settings["facebook_api_key"], extra_params={"scope": "read_stream,offline_access"}) @@ -1201,11 +1267,17 @@ class FacebookGraphMixin(OAuth2Mixin): if not new_entry: # Call failed; perhaps missing permission? - self.authorize_redirect() + yield self.authorize_redirect() return self.finish("Posted a message!") + + The given path is relative to ``self._FACEBOOK_BASE_URL``, + by default "https://graph.facebook.com". + + .. versionchanged:: 3.1 + Added the ability to override ``self._FACEBOOK_BASE_URL``. """ - url = "https://graph.facebook.com" + path + url = self._FACEBOOK_BASE_URL + path all_args = {} if access_token: all_args["access_token"] = access_token @@ -1223,8 +1295,8 @@ class FacebookGraphMixin(OAuth2Mixin): def _on_facebook_request(self, future, response): if response.error: - future.set_exception(AuthError("Error response %s fetching %s", - response.error, response.request.url)) + future.set_exception(AuthError("Error response %s fetching %s" % + (response.error, response.request.url))) return future.set_result(escape.json_decode(response.body)) diff --git a/libs/tornado/concurrent.py b/libs/tornado/concurrent.py index 15a039c..8a4f228 100755 --- a/libs/tornado/concurrent.py +++ b/libs/tornado/concurrent.py @@ -140,6 +140,9 @@ class DummyExecutor(object): future.set_exc_info(sys.exc_info()) return future + def shutdown(self, wait=True): + pass + dummy_executor = DummyExecutor() diff --git a/libs/tornado/curl_httpclient.py b/libs/tornado/curl_httpclient.py index adc2314..e090056 100755 --- a/libs/tornado/curl_httpclient.py +++ b/libs/tornado/curl_httpclient.py @@ -111,13 +111,19 @@ class CurlAsyncHTTPClient(AsyncHTTPClient): del self._fds[fd] else: ioloop_event = event_map[event] - if fd not in self._fds: - self.io_loop.add_handler(fd, self._handle_events, - ioloop_event) - self._fds[fd] = ioloop_event - else: - self.io_loop.update_handler(fd, ioloop_event) - self._fds[fd] = ioloop_event + # libcurl sometimes closes a socket and then opens a new + # one using the same FD without giving us a POLL_NONE in + # between. This is a problem with the epoll IOLoop, + # because the kernel can tell when a socket is closed and + # removes it from the epoll automatically, causing future + # update_handler calls to fail. Since we can't tell when + # this has happened, always use remove and re-add + # instead of update. + if fd in self._fds: + self.io_loop.remove_handler(fd) + self.io_loop.add_handler(fd, self._handle_events, + ioloop_event) + self._fds[fd] = ioloop_event def _set_timeout(self, msecs): """Called by libcurl to schedule a timeout.""" @@ -385,7 +391,7 @@ def _curl_setup_request(curl, request, buffer, headers): "PUT": pycurl.UPLOAD, "HEAD": pycurl.NOBODY, } - custom_methods = set(["DELETE"]) + custom_methods = set(["DELETE", "OPTIONS", "PATCH"]) for o in curl_options.values(): curl.setopt(o, False) if request.method in curl_options: diff --git a/libs/tornado/escape.py b/libs/tornado/escape.py index 016fdad..91c3e74 100755 --- a/libs/tornado/escape.py +++ b/libs/tornado/escape.py @@ -64,6 +64,9 @@ def xhtml_unescape(value): return re.sub(r"&(#?)(\w+?);", _convert_entity, _unicode(value)) +# The fact that json_encode wraps json.dumps is an implementation detail. +# Please see https://github.com/facebook/tornado/pull/706 +# before sending a pull request that adds **kwargs to this function. def json_encode(value): """JSON-encodes the given Python object.""" # JSON permits but does not require forward slashes to be escaped. @@ -85,41 +88,76 @@ def squeeze(value): return re.sub(r"[\x00-\x20]+", " ", value).strip() -def url_escape(value): - """Returns a URL-encoded version of the given value.""" - return urllib_parse.quote_plus(utf8(value)) +def url_escape(value, plus=True): + """Returns a URL-encoded version of the given value. + + If ``plus`` is true (the default), spaces will be represented + as "+" instead of "%20". This is appropriate for query strings + but not for the path component of a URL. Note that this default + is the reverse of Python's urllib module. + + .. versionadded:: 3.1 + The ``plus`` argument + """ + quote = urllib_parse.quote_plus if plus else urllib_parse.quote + return quote(utf8(value)) + # python 3 changed things around enough that we need two separate # implementations of url_unescape. We also need our own implementation # of parse_qs since python 3's version insists on decoding everything. if sys.version_info[0] < 3: - def url_unescape(value, encoding='utf-8'): + def url_unescape(value, encoding='utf-8', plus=True): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. + + If ``plus`` is true (the default), plus signs will be interpreted + as spaces (literal plus signs must be represented as "%2B"). This + is appropriate for query strings and form-encoded values but not + for the path component of a URL. Note that this default is the + reverse of Python's urllib module. + + .. versionadded:: 3.1 + The ``plus`` argument """ + unquote = (urllib_parse.unquote_plus if plus else urllib_parse.unquote) if encoding is None: - return urllib_parse.unquote_plus(utf8(value)) + return unquote(utf8(value)) else: - return unicode_type(urllib_parse.unquote_plus(utf8(value)), encoding) + return unicode_type(unquote(utf8(value)), encoding) parse_qs_bytes = _parse_qs else: - def url_unescape(value, encoding='utf-8'): + def url_unescape(value, encoding='utf-8', plus=True): """Decodes the given value from a URL. The argument may be either a byte or unicode string. If encoding is None, the result will be a byte string. Otherwise, the result is a unicode string in the specified encoding. + + If ``plus`` is true (the default), plus signs will be interpreted + as spaces (literal plus signs must be represented as "%2B"). This + is appropriate for query strings and form-encoded values but not + for the path component of a URL. Note that this default is the + reverse of Python's urllib module. + + .. versionadded:: 3.1 + The ``plus`` argument """ if encoding is None: + if plus: + # unquote_to_bytes doesn't have a _plus variant + value = to_basestring(value).replace('+', ' ') return urllib_parse.unquote_to_bytes(value) else: - return urllib_parse.unquote_plus(to_basestring(value), encoding=encoding) + unquote = (urllib_parse.unquote_plus if plus + else urllib_parse.unquote) + return unquote(to_basestring(value), encoding=encoding) def parse_qs_bytes(qs, keep_blank_values=False, strict_parsing=False): """Parses a query string like urlparse.parse_qs, but returns the @@ -150,7 +188,8 @@ def utf8(value): """ if isinstance(value, _UTF8_TYPES): return value - assert isinstance(value, unicode_type) + assert isinstance(value, unicode_type), \ + "Expected bytes, unicode, or None; got %r" % type(value) return value.encode("utf-8") _TO_UNICODE_TYPES = (unicode_type, type(None)) @@ -164,7 +203,8 @@ def to_unicode(value): """ if isinstance(value, _TO_UNICODE_TYPES): return value - assert isinstance(value, bytes_type) + assert isinstance(value, bytes_type), \ + "Expected bytes, unicode, or None; got %r" % type(value) return value.decode("utf-8") # to_unicode was previously named _unicode not because it was private, @@ -192,7 +232,8 @@ def to_basestring(value): """ if isinstance(value, _BASESTRING_TYPES): return value - assert isinstance(value, bytes_type) + assert isinstance(value, bytes_type), \ + "Expected bytes, unicode, or None; got %r" % type(value) return value.decode("utf-8") diff --git a/libs/tornado/gen.py b/libs/tornado/gen.py index 938fef6..92b7458 100755 --- a/libs/tornado/gen.py +++ b/libs/tornado/gen.py @@ -19,7 +19,6 @@ For example, the following asynchronous handler:: could be written with ``gen`` as:: class GenAsyncHandler(RequestHandler): - @asynchronous @gen.coroutine def get(self): http_client = AsyncHTTPClient() @@ -136,7 +135,7 @@ def engine(func): if runner is not None: return runner.handle_exception(typ, value, tb) return False - with ExceptionStackContext(handle_exception): + with ExceptionStackContext(handle_exception) as deactivate: try: result = func(*args, **kwargs) except (Return, StopIteration) as e: @@ -149,6 +148,7 @@ def engine(func): "@gen.engine functions cannot return values: " "%r" % (value,)) assert value is None + deactivate() runner = Runner(result, final_callback) runner.run() return @@ -156,6 +156,7 @@ def engine(func): raise ReturnValueIgnoredError( "@gen.engine functions cannot return values: %r" % (result,)) + deactivate() # no yield, so we're done return wrapper @@ -164,13 +165,7 @@ def coroutine(func): """Decorator for asynchronous generators. Any generator that yields objects from this module must be wrapped - in either this decorator or `engine`. These decorators only work - on functions that are already asynchronous. For - `~tornado.web.RequestHandler` :ref:`HTTP verb methods ` methods, this - means that both the `tornado.web.asynchronous` and - `tornado.gen.coroutine` decorators must be used (for proper - exception handling, ``asynchronous`` should come before - ``gen.coroutine``). + in either this decorator or `engine`. Coroutines may "return" by raising the special exception `Return(value) `. In Python 3.3+, it is also possible for @@ -208,21 +203,24 @@ def coroutine(func): typ, value, tb = sys.exc_info() future.set_exc_info((typ, value, tb)) return True - with ExceptionStackContext(handle_exception): + with ExceptionStackContext(handle_exception) as deactivate: try: result = func(*args, **kwargs) except (Return, StopIteration) as e: result = getattr(e, 'value', None) except Exception: + deactivate() future.set_exc_info(sys.exc_info()) return future else: if isinstance(result, types.GeneratorType): def final_callback(value): + deactivate() future.set_result(value) runner = Runner(result, final_callback) runner.run() return future + deactivate() future.set_result(result) return future return wrapper @@ -439,6 +437,9 @@ class _NullYieldPoint(YieldPoint): return None +_null_yield_point = _NullYieldPoint() + + class Runner(object): """Internal implementation of `tornado.gen.engine`. @@ -449,7 +450,7 @@ class Runner(object): def __init__(self, gen, final_callback): self.gen = gen self.final_callback = final_callback - self.yield_point = _NullYieldPoint() + self.yield_point = _null_yield_point self.pending_callbacks = set() self.results = {} self.running = False @@ -493,6 +494,7 @@ class Runner(object): if not self.yield_point.is_ready(): return next = self.yield_point.get_result() + self.yield_point = None except Exception: self.exc_info = sys.exc_info() try: @@ -505,6 +507,7 @@ class Runner(object): yielded = self.gen.send(next) except (StopIteration, Return) as e: self.finished = True + self.yield_point = _null_yield_point if self.pending_callbacks and not self.had_exception: # If we ran cleanly without waiting on all callbacks # raise an error (really more of a warning). If we @@ -518,6 +521,7 @@ class Runner(object): return except Exception: self.finished = True + self.yield_point = _null_yield_point raise if isinstance(yielded, list): yielded = Multi(yielded) @@ -531,7 +535,7 @@ class Runner(object): self.exc_info = sys.exc_info() else: self.exc_info = (BadYieldError( - "yielded unknown object %r" % (yielded,)),) + "yielded unknown object %r" % (yielded,)),) finally: self.running = False diff --git a/libs/tornado/httpclient.py b/libs/tornado/httpclient.py index 551fd0b..a34eb66 100755 --- a/libs/tornado/httpclient.py +++ b/libs/tornado/httpclient.py @@ -53,7 +53,7 @@ class HTTPClient(object): print response.body except httpclient.HTTPError as e: print "Error:", e - httpclient.close() + http_client.close() """ def __init__(self, async_client_class=None, **kwargs): self._io_loop = IOLoop() @@ -258,6 +258,7 @@ class HTTPRequest(object): :arg string url: URL to fetch :arg string method: HTTP method, e.g. "GET" or "POST" :arg headers: Additional HTTP headers to pass on the request + :arg body: HTTP body to pass on the request :type headers: `~tornado.httputil.HTTPHeaders` or `dict` :arg string auth_username: Username for HTTP authentication :arg string auth_password: Password for HTTP authentication @@ -310,6 +311,9 @@ class HTTPRequest(object): ``simple_httpclient`` and true in ``curl_httpclient`` :arg string client_key: Filename for client SSL key, if any :arg string client_cert: Filename for client SSL certificate, if any + + .. versionadded:: 3.1 + The ``auth_mode`` argument. """ if headers is None: headers = httputil.HTTPHeaders() @@ -326,7 +330,7 @@ class HTTPRequest(object): self.body = utf8(body) self.auth_username = auth_username self.auth_password = auth_password - self.auth_mode = auth_mode + self.auth_mode = auth_mode self.connect_timeout = connect_timeout self.request_timeout = request_timeout self.follow_redirects = follow_redirects diff --git a/libs/tornado/httpserver.py b/libs/tornado/httpserver.py index ef36e6b..d005545 100755 --- a/libs/tornado/httpserver.py +++ b/libs/tornado/httpserver.py @@ -40,9 +40,9 @@ from tornado import stack_context from tornado.util import bytes_type try: - import Cookie # py2 + import Cookie # py2 except ImportError: - import http.cookies as Cookie # py3 + import http.cookies as Cookie # py3 class HTTPServer(TCPServer): @@ -143,13 +143,13 @@ class HTTPServer(TCPServer): way other than `tornado.netutil.bind_sockets`. """ - def __init__(self, request_callback, no_keep_alive = False, io_loop = None, - xheaders = False, ssl_options = None, protocol = None, **kwargs): + def __init__(self, request_callback, no_keep_alive=False, io_loop=None, + xheaders=False, ssl_options=None, protocol=None, **kwargs): self.request_callback = request_callback self.no_keep_alive = no_keep_alive self.xheaders = xheaders self.protocol = protocol - TCPServer.__init__(self, io_loop = io_loop, ssl_options = ssl_options, + TCPServer.__init__(self, io_loop=io_loop, ssl_options=ssl_options, **kwargs) def handle_stream(self, stream, address): @@ -168,8 +168,8 @@ class HTTPConnection(object): We parse HTTP headers and bodies, and execute the request callback until the HTTP conection is closed. """ - def __init__(self, stream, address, request_callback, no_keep_alive = False, - xheaders = False, protocol = None): + def __init__(self, stream, address, request_callback, no_keep_alive=False, + xheaders=False, protocol=None): self.stream = stream self.address = address # Save the socket's address family now so we know how to @@ -180,23 +180,23 @@ class HTTPConnection(object): self.no_keep_alive = no_keep_alive self.xheaders = xheaders self.protocol = protocol - self._request = None - self._request_finished = False - self._write_callback = None - self._close_callback = None + self._clear_request_state() # Save stack context here, outside of any request. This keeps # contexts from one request from leaking into the next. self._header_callback = stack_context.wrap(self._on_headers) + self.stream.set_close_callback(self._on_connection_close) self.stream.read_until(b"\r\n\r\n", self._header_callback) - def _clear_callbacks(self): - """Clears the per-request callbacks. + def _clear_request_state(self): + """Clears the per-request state. This is run in between requests to allow the previous handler to be garbage collected (and prevent spurious close callbacks), and when the connection is closed (to break up cycles and facilitate garbage collection in cpython). """ + self._request = None + self._request_finished = False self._write_callback = None self._close_callback = None @@ -209,34 +209,35 @@ class HTTPConnection(object): recommended approach prior to Tornado 3.0). """ self._close_callback = stack_context.wrap(callback) - self.stream.set_close_callback(self._on_connection_close) def _on_connection_close(self): - callback = self._close_callback - self._close_callback = None - if callback: callback() + if self._close_callback is not None: + callback = self._close_callback + self._close_callback = None + callback() # Delete any unfinished callbacks to break up reference cycles. self._header_callback = None - self._clear_callbacks() + self._clear_request_state() def close(self): self.stream.close() # Remove this reference to self, which would otherwise cause a # cycle and delay garbage collection of this connection. self._header_callback = None - self._clear_callbacks() + self._clear_request_state() - def write(self, chunk, callback = None): + def write(self, chunk, callback=None): """Writes a chunk of output to the stream.""" - assert self._request, "Request closed" if not self.stream.closed(): self._write_callback = stack_context.wrap(callback) self.stream.write(chunk, self._on_write_complete) def finish(self): """Finishes the request.""" - assert self._request, "Request closed" self._request_finished = True + # No more data is coming, so instruct TCP to send any remaining + # data immediately instead of waiting for a full packet or ack. + self.stream.set_nodelay(True) if not self.stream.writing(): self._finish_request() @@ -256,7 +257,7 @@ class HTTPConnection(object): self._finish_request() def _finish_request(self): - if self.no_keep_alive: + if self.no_keep_alive or self._request is None: disconnect = True else: connection_header = self._request.headers.get("Connection") @@ -269,9 +270,7 @@ class HTTPConnection(object): disconnect = connection_header != "keep-alive" else: disconnect = True - self._request = None - self._request_finished = False - self._clear_callbacks() + self._clear_request_state() if disconnect: self.close() return @@ -280,6 +279,10 @@ class HTTPConnection(object): # directly, because in some cases the stream doesn't discover # that it's closed until you try to read from it. self.stream.read_until(b"\r\n\r\n", self._header_callback) + + # Turn Nagle's algorithm back on, leaving the stream in its + # default state for the next request. + self.stream.set_nodelay(False) except iostream.StreamClosedError: self.close() @@ -308,8 +311,8 @@ class HTTPConnection(object): remote_ip = '0.0.0.0' self._request = HTTPRequest( - connection = self, method = method, uri = uri, version = version, - headers = headers, remote_ip = remote_ip, protocol = self.protocol) + connection=self, method=method, uri=uri, version=version, + headers=headers, remote_ip=remote_ip, protocol=self.protocol) content_length = headers.get("Content-Length") if content_length: @@ -376,7 +379,10 @@ class HTTPRequest(object): Client's IP address as a string. If ``HTTPServer.xheaders`` is set, will pass along the real IP address provided by a load balancer - in the ``X-Real-Ip`` header + in the ``X-Real-Ip`` or ``X-Forwarded-For`` header. + + .. versionchanged:: 3.1 + The list format of ``X-Forwarded-For`` is now supported. .. attribute:: protocol @@ -409,9 +415,9 @@ class HTTPRequest(object): are typically kept open in HTTP/1.1, multiple requests can be handled sequentially on a single connection. """ - def __init__(self, method, uri, version = "HTTP/1.0", headers = None, - body = None, remote_ip = None, protocol = None, host = None, - files = None, connection = None): + def __init__(self, method, uri, version="HTTP/1.0", headers=None, + body=None, remote_ip=None, protocol=None, host=None, + files=None, connection=None): self.method = method self.uri = uri self.version = version @@ -431,8 +437,10 @@ class HTTPRequest(object): # xheaders can override the defaults if connection and connection.xheaders: # Squid uses X-Forwarded-For, others use X-Real-Ip + ip = self.headers.get("X-Forwarded-For", self.remote_ip) + ip = ip.split(',')[-1].strip() ip = self.headers.get( - "X-Real-Ip", self.headers.get("X-Forwarded-For", self.remote_ip)) + "X-Real-Ip", ip) if netutil.is_valid_ip(ip): self.remote_ip = ip # AWS uses X-Forwarded-Proto @@ -441,7 +449,6 @@ class HTTPRequest(object): if proto in ("http", "https"): self.protocol = proto - self.host = host or self.headers.get("Host") or "127.0.0.1" self.files = files or {} self.connection = connection @@ -449,7 +456,7 @@ class HTTPRequest(object): self._finish_time = None self.path, sep, self.query = uri.partition('?') - self.arguments = parse_qs_bytes(self.query, keep_blank_values = True) + self.arguments = parse_qs_bytes(self.query, keep_blank_values=True) def supports_http_1_1(self): """Returns True if this request supports HTTP/1.1 semantics""" @@ -468,10 +475,10 @@ class HTTPRequest(object): self._cookies = {} return self._cookies - def write(self, chunk, callback = None): + def write(self, chunk, callback=None): """Writes the given chunk to the response stream.""" assert isinstance(chunk, bytes_type) - self.connection.write(chunk, callback = callback) + self.connection.write(chunk, callback=callback) def finish(self): """Finishes this HTTP request on the open connection.""" @@ -489,7 +496,7 @@ class HTTPRequest(object): else: return self._finish_time - self._start_time - def get_ssl_certificate(self, binary_form = False): + def get_ssl_certificate(self, binary_form=False): """Returns the client's SSL certificate, if any. To use client certificates, the HTTPServer must have been constructed @@ -511,13 +518,12 @@ class HTTPRequest(object): """ try: return self.connection.stream.socket.getpeercert( - binary_form = binary_form) + binary_form=binary_form) except ssl.SSLError: return None def __repr__(self): - attrs = ("protocol", "host", "method", "uri", "version", "remote_ip", - "body") + attrs = ("protocol", "host", "method", "uri", "version", "remote_ip") args = ", ".join(["%s=%r" % (n, getattr(self, n)) for n in attrs]) return "%s(%s, headers=%s)" % ( self.__class__.__name__, args, dict(self.headers)) diff --git a/libs/tornado/httputil.py b/libs/tornado/httputil.py index a09aeab..3e7337d 100755 --- a/libs/tornado/httputil.py +++ b/libs/tornado/httputil.py @@ -18,9 +18,11 @@ from __future__ import absolute_import, division, print_function, with_statement +import calendar +import collections import datetime +import email.utils import numbers -import re import time from tornado.escape import native_str, parse_qs_bytes, utf8 @@ -42,6 +44,37 @@ except ImportError: from urllib.parse import urlencode # py3 +class _NormalizedHeaderCache(dict): + """Dynamic cached mapping of header names to Http-Header-Case. + + Implemented as a dict subclass so that cache hits are as fast as a + normal dict lookup, without the overhead of a python function + call. + + >>> normalized_headers = _NormalizedHeaderCache(10) + >>> normalized_headers["coNtent-TYPE"] + 'Content-Type' + """ + def __init__(self, size): + super(_NormalizedHeaderCache, self).__init__() + self.size = size + self.queue = collections.deque() + + def __missing__(self, key): + normalized = "-".join([w.capitalize() for w in key.split("-")]) + self[key] = normalized + self.queue.append(key) + if len(self.queue) > self.size: + # Limit the size of the cache. LRU would be better, but this + # simpler approach should be fine. In Python 2.7+ we could + # use OrderedDict (or in 3.2+, @functools.lru_cache). + old_key = self.queue.popleft() + del self[old_key] + return normalized + +_normalized_headers = _NormalizedHeaderCache(1000) + + class HTTPHeaders(dict): """A dictionary that maintains ``Http-Header-Case`` for all keys. @@ -89,7 +122,7 @@ class HTTPHeaders(dict): def add(self, name, value): """Adds a new value for the given key.""" - norm_name = HTTPHeaders._normalize_name(name) + norm_name = _normalized_headers[name] self._last_key = norm_name if norm_name in self: # bypass our override of __setitem__ since it modifies _as_list @@ -102,7 +135,7 @@ class HTTPHeaders(dict): def get_list(self, name): """Returns all values for the given header as a list.""" - norm_name = HTTPHeaders._normalize_name(name) + norm_name = _normalized_headers[name] return self._as_list.get(norm_name, []) def get_all(self): @@ -150,24 +183,24 @@ class HTTPHeaders(dict): # dict implementation overrides def __setitem__(self, name, value): - norm_name = HTTPHeaders._normalize_name(name) + norm_name = _normalized_headers[name] dict.__setitem__(self, norm_name, value) self._as_list[norm_name] = [value] def __getitem__(self, name): - return dict.__getitem__(self, HTTPHeaders._normalize_name(name)) + return dict.__getitem__(self, _normalized_headers[name]) def __delitem__(self, name): - norm_name = HTTPHeaders._normalize_name(name) + norm_name = _normalized_headers[name] dict.__delitem__(self, norm_name) del self._as_list[norm_name] def __contains__(self, name): - norm_name = HTTPHeaders._normalize_name(name) + norm_name = _normalized_headers[name] return dict.__contains__(self, norm_name) def get(self, name, default=None): - return dict.get(self, HTTPHeaders._normalize_name(name), default) + return dict.get(self, _normalized_headers[name], default) def update(self, *args, **kwargs): # dict.update bypasses our __setitem__ @@ -178,26 +211,6 @@ class HTTPHeaders(dict): # default implementation returns dict(self), not the subclass return HTTPHeaders(self) - _NORMALIZED_HEADER_RE = re.compile(r'^[A-Z0-9][a-z0-9]*(-[A-Z0-9][a-z0-9]*)*$') - _normalized_headers = {} - - @staticmethod - def _normalize_name(name): - """Converts a name to Http-Header-Case. - - >>> HTTPHeaders._normalize_name("coNtent-TYPE") - 'Content-Type' - """ - try: - return HTTPHeaders._normalized_headers[name] - except KeyError: - if HTTPHeaders._NORMALIZED_HEADER_RE.match(name): - normalized = name - else: - normalized = "-".join([w.capitalize() for w in name.split("-")]) - HTTPHeaders._normalized_headers[name] = normalized - return normalized - def url_concat(url, args): """Concatenate url and argument dictionary regardless of whether @@ -226,6 +239,77 @@ class HTTPFile(ObjectDict): pass +def _parse_request_range(range_header): + """Parses a Range header. + + Returns either ``None`` or tuple ``(start, end)``. + Note that while the HTTP headers use inclusive byte positions, + this method returns indexes suitable for use in slices. + + >>> start, end = _parse_request_range("bytes=1-2") + >>> start, end + (1, 3) + >>> [0, 1, 2, 3, 4][start:end] + [1, 2] + >>> _parse_request_range("bytes=6-") + (6, None) + >>> _parse_request_range("bytes=-6") + (-6, None) + >>> _parse_request_range("bytes=-0") + (None, 0) + >>> _parse_request_range("bytes=") + (None, None) + >>> _parse_request_range("foo=42") + >>> _parse_request_range("bytes=1-2,6-10") + + Note: only supports one range (ex, ``bytes=1-2,6-10`` is not allowed). + + See [0] for the details of the range header. + + [0]: http://greenbytes.de/tech/webdav/draft-ietf-httpbis-p5-range-latest.html#byte.ranges + """ + unit, _, value = range_header.partition("=") + unit, value = unit.strip(), value.strip() + if unit != "bytes": + return None + start_b, _, end_b = value.partition("-") + try: + start = _int_or_none(start_b) + end = _int_or_none(end_b) + except ValueError: + return None + if end is not None: + if start is None: + if end != 0: + start = -end + end = None + else: + end += 1 + return (start, end) + + +def _get_content_range(start, end, total): + """Returns a suitable Content-Range header: + + >>> print(_get_content_range(None, 1, 4)) + bytes 0-0/4 + >>> print(_get_content_range(1, 3, 4)) + bytes 1-2/4 + >>> print(_get_content_range(None, None, 4)) + bytes 0-3/4 + """ + start = start or 0 + end = (end or total) - 1 + return "bytes %s-%s/%s" % (start, end, total) + + +def _int_or_none(val): + val = val.strip() + if val == "": + return None + return int(val) + + def parse_body_arguments(content_type, body, arguments, files): """Parses a form request body. @@ -307,15 +391,15 @@ def format_timestamp(ts): >>> format_timestamp(1359312200) 'Sun, 27 Jan 2013 18:43:20 GMT' """ - if isinstance(ts, (tuple, time.struct_time)): + if isinstance(ts, numbers.Real): pass + elif isinstance(ts, (tuple, time.struct_time)): + ts = calendar.timegm(ts) elif isinstance(ts, datetime.datetime): - ts = ts.utctimetuple() - elif isinstance(ts, numbers.Real): - ts = time.gmtime(ts) + ts = calendar.timegm(ts.utctimetuple()) else: raise TypeError("unknown timestamp type: %r" % ts) - return time.strftime("%a, %d %b %Y %H:%M:%S GMT", ts) + return email.utils.formatdate(ts, usegmt=True) # _parseparam and _parse_header are copied and modified from python2.7's cgi.py # The original 2.7 version of this code did not correctly support some diff --git a/libs/tornado/ioloop.py b/libs/tornado/ioloop.py index dd9639c..5f37032 100755 --- a/libs/tornado/ioloop.py +++ b/libs/tornado/ioloop.py @@ -232,6 +232,11 @@ class IOLoop(Configurable): be allowed to return before attempting to call `IOLoop.close()`. Therefore the call to `close` will usually appear just after the call to `start` rather than near the call to `stop`. + + .. versionchanged:: 3.1 + If the `IOLoop` implementation supports non-integer objects + for "file descriptors", those objects will have their + ``close`` method when ``all_fds`` is true. """ raise NotImplementedError() @@ -485,6 +490,7 @@ class PollIOLoop(IOLoop): self._callbacks = [] self._callback_lock = threading.Lock() self._timeouts = [] + self._cancellations = 0 self._running = False self._stopped = False self._closing = False @@ -606,6 +612,7 @@ class PollIOLoop(IOLoop): if self._timeouts[0].callback is None: # the timeout was cancelled heapq.heappop(self._timeouts) + self._cancellations -= 1 elif self._timeouts[0].deadline <= now: timeout = heapq.heappop(self._timeouts) self._run_callback(timeout.callback) @@ -613,6 +620,14 @@ class PollIOLoop(IOLoop): seconds = self._timeouts[0].deadline - now poll_timeout = min(seconds, poll_timeout) break + if (self._cancellations > 512 + and self._cancellations > (len(self._timeouts) >> 1)): + # Clean up the timeout queue when it gets large and it's + # more than half cancellations. + self._cancellations = 0 + self._timeouts = [x for x in self._timeouts + if x.callback is not None] + heapq.heapify(self._timeouts) if self._callbacks: # If any callbacks or timeouts called add_callback, @@ -693,6 +708,7 @@ class PollIOLoop(IOLoop): # If this turns out to be a problem, we could add a garbage # collection pass whenever there are too many dead timeouts. timeout.callback = None + self._cancellations += 1 def add_callback(self, callback, *args, **kwargs): with self._callback_lock: diff --git a/libs/tornado/iostream.py b/libs/tornado/iostream.py index 16b0fac..079012c 100755 --- a/libs/tornado/iostream.py +++ b/libs/tornado/iostream.py @@ -48,6 +48,12 @@ except ImportError: class StreamClosedError(IOError): + """Exception raised by `IOStream` methods when the stream is closed. + + Note that the close callback is scheduled to run *after* other + callbacks on the stream (to allow for buffered data to be processed), + so you may see this error before you see the close callback. + """ pass @@ -64,10 +70,10 @@ class BaseIOStream(object): Subclasses must implement `fileno`, `close_fd`, `write_to_fd`, `read_from_fd`, and optionally `get_fd_error`. """ - def __init__(self, io_loop=None, max_buffer_size=104857600, + def __init__(self, io_loop=None, max_buffer_size=None, read_chunk_size=4096): self.io_loop = io_loop or ioloop.IOLoop.current() - self.max_buffer_size = max_buffer_size + self.max_buffer_size = max_buffer_size or 104857600 self.read_chunk_size = read_chunk_size self.error = None self._read_buffer = collections.deque() @@ -234,6 +240,10 @@ class BaseIOStream(object): if any(exc_info): self.error = exc_info[1] if self._read_until_close: + if (self._streaming_callback is not None and + self._read_buffer_size): + self._run_callback(self._streaming_callback, + self._consume(self._read_buffer_size)) callback = self._read_callback self._read_callback = None self._read_until_close = False @@ -269,6 +279,21 @@ class BaseIOStream(object): """Returns true if the stream has been closed.""" return self._closed + def set_nodelay(self, value): + """Sets the no-delay flag for this stream. + + By default, data written to TCP streams may be held for a time + to make the most efficient use of bandwidth (according to + Nagle's algorithm). The no-delay flag requests that data be + written as soon as possible, even if doing so would consume + additional bandwidth. + + This flag is currently defined only for TCP-based ``IOStreams``. + + .. versionadded:: 3.1 + """ + pass + def _handle_events(self, fd, events): if self.closed(): gen_log.warning("Got events for closed stream %d", fd) @@ -392,13 +417,21 @@ class BaseIOStream(object): return self._check_closed() try: - # See comments in _handle_read about incrementing _pending_callbacks - self._pending_callbacks += 1 - while not self.closed(): - if self._read_to_buffer() == 0: - break - finally: - self._pending_callbacks -= 1 + try: + # See comments in _handle_read about incrementing _pending_callbacks + self._pending_callbacks += 1 + while not self.closed(): + if self._read_to_buffer() == 0: + break + finally: + self._pending_callbacks -= 1 + except Exception: + # If there was an in _read_to_buffer, we called close() already, + # but couldn't run the close callback because of _pending_callbacks. + # Before we escape from this function, run the close callback if + # applicable. + self._maybe_run_close_callback() + raise if self._read_from_buffer(): return self._maybe_add_error_listener() @@ -522,8 +555,12 @@ class BaseIOStream(object): self._write_buffer_frozen = True break else: - gen_log.warning("Write error on %d: %s", - self.fileno(), e) + if e.args[0] not in (errno.EPIPE, errno.ECONNRESET): + # Broken pipe errors are usually caused by connection + # reset, and its better to not log EPIPE errors to + # minimize log spam + gen_log.warning("Write error on %d: %s", + self.fileno(), e) self.close(exc_info=True) return if not self._write_buffer and self._write_callback: @@ -714,6 +751,19 @@ class IOStream(BaseIOStream): self._run_callback(callback) self._connecting = False + def set_nodelay(self, value): + if (self.socket is not None and + self.socket.family in (socket.AF_INET, socket.AF_INET6)): + try: + self.socket.setsockopt(socket.IPPROTO_TCP, + socket.TCP_NODELAY, 1 if value else 0) + except socket.error as e: + # Sometimes setsockopt will fail if the socket is closed + # at the wrong time. This can happen with HTTPServer + # resetting the value to false between requests. + if e.errno != errno.EINVAL: + raise + class SSLIOStream(IOStream): """A utility class to write to and read from a non-blocking SSL socket. @@ -764,7 +814,7 @@ class SSLIOStream(IOStream): elif err.args[0] == ssl.SSL_ERROR_SSL: try: peer = self.socket.getpeername() - except: + except Exception: peer = '(not connected)' gen_log.warning("SSL Error on %d %s: %s", self.socket.fileno(), peer, err) @@ -773,6 +823,11 @@ class SSLIOStream(IOStream): except socket.error as err: if err.args[0] in (errno.ECONNABORTED, errno.ECONNRESET): return self.close(exc_info=True) + except AttributeError: + # On Linux, if the connection was reset before the call to + # wrap_socket, do_handshake will fail with an + # AttributeError. + return self.close(exc_info=True) else: self._ssl_accepting = False if not self._verify_cert(self.socket.getpeercert()): @@ -825,7 +880,7 @@ class SSLIOStream(IOStream): def connect(self, address, callback=None, server_hostname=None): # Save the user's callback and run it after the ssl handshake # has completed. - self._ssl_connect_callback = callback + self._ssl_connect_callback = stack_context.wrap(callback) self._server_hostname = server_hostname super(SSLIOStream, self).connect(address, callback=None) diff --git a/libs/tornado/locale.py b/libs/tornado/locale.py index 66e9ff6..310a517 100755 --- a/libs/tornado/locale.py +++ b/libs/tornado/locale.py @@ -43,6 +43,7 @@ from __future__ import absolute_import, division, print_function, with_statement import csv import datetime +import numbers import os import re @@ -287,7 +288,7 @@ class Locale(object): """ if self.code.startswith("ru"): relative = False - if type(date) in (int, long, float): + if isinstance(date, numbers.Real): date = datetime.datetime.utcfromtimestamp(date) now = datetime.datetime.utcnow() if date > now: diff --git a/libs/tornado/netutil.py b/libs/tornado/netutil.py index 7b7d48d..3703718 100755 --- a/libs/tornado/netutil.py +++ b/libs/tornado/netutil.py @@ -66,7 +66,12 @@ def bind_sockets(port, address=None, family=socket.AF_UNSPEC, backlog=128, flags for res in set(socket.getaddrinfo(address, port, family, socket.SOCK_STREAM, 0, flags)): af, socktype, proto, canonname, sockaddr = res - sock = socket.socket(af, socktype, proto) + try: + sock = socket.socket(af, socktype, proto) + except socket.error as e: + if e.args[0] == errno.EAFNOSUPPORT: + continue + raise set_close_exec(sock.fileno()) if os.name != 'nt': sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) @@ -135,8 +140,15 @@ def add_accept_handler(sock, callback, io_loop=None): try: connection, address = sock.accept() except socket.error as e: + # EWOULDBLOCK and EAGAIN indicate we have accepted every + # connection that is available. if e.args[0] in (errno.EWOULDBLOCK, errno.EAGAIN): return + # ECONNABORTED indicates that there was a connection + # but it was closed while still in the accept queue. + # (observed on FreeBSD). + if e.args[0] == errno.ECONNABORTED: + continue raise callback(connection, address) io_loop.add_handler(sock.fileno(), accept_handler, IOLoop.READ) @@ -200,15 +212,47 @@ class Resolver(Configurable): """ raise NotImplementedError() + def close(self): + """Closes the `Resolver`, freeing any resources used. + + .. versionadded:: 3.1 + + """ + pass + class ExecutorResolver(Resolver): - def initialize(self, io_loop=None, executor=None): + """Resolver implementation using a `concurrent.futures.Executor`. + + Use this instead of `ThreadedResolver` when you require additional + control over the executor being used. + + The executor will be shut down when the resolver is closed unless + ``close_resolver=False``; use this if you want to reuse the same + executor elsewhere. + """ + def initialize(self, io_loop=None, executor=None, close_executor=True): self.io_loop = io_loop or IOLoop.current() - self.executor = executor or dummy_executor + if executor is not None: + self.executor = executor + self.close_executor = close_executor + else: + self.executor = dummy_executor + self.close_executor = False + + def close(self): + if self.close_executor: + self.executor.shutdown() + self.executor = None @run_on_executor def resolve(self, host, port, family=socket.AF_UNSPEC): - addrinfo = socket.getaddrinfo(host, port, family) + # On Solaris, getaddrinfo fails if the given port is not found + # in /etc/services and no socket type is given, so we must pass + # one here. The socket type used here doesn't seem to actually + # matter (we discard the one we get back in the results), + # so the addresses we return should still be usable with SOCK_DGRAM. + addrinfo = socket.getaddrinfo(host, port, family, socket.SOCK_STREAM) results = [] for family, socktype, proto, canonname, address in addrinfo: results.append((family, address)) @@ -236,11 +280,31 @@ class ThreadedResolver(ExecutorResolver): Resolver.configure('tornado.netutil.ThreadedResolver', num_threads=10) + + .. versionchanged:: 3.1 + All ``ThreadedResolvers`` share a single thread pool, whose + size is set by the first one to be created. """ + _threadpool = None + _threadpool_pid = None + def initialize(self, io_loop=None, num_threads=10): - from concurrent.futures import ThreadPoolExecutor + threadpool = ThreadedResolver._create_threadpool(num_threads) super(ThreadedResolver, self).initialize( - io_loop=io_loop, executor=ThreadPoolExecutor(num_threads)) + io_loop=io_loop, executor=threadpool, close_executor=False) + + @classmethod + def _create_threadpool(cls, num_threads): + pid = os.getpid() + if cls._threadpool_pid != pid: + # Threads cannot survive after a fork, so if our pid isn't what it + # was when we created the pool then delete it. + cls._threadpool = None + if cls._threadpool is None: + from concurrent.futures import ThreadPoolExecutor + cls._threadpool = ThreadPoolExecutor(num_threads) + cls._threadpool_pid = pid + return cls._threadpool class OverrideResolver(Resolver): @@ -255,6 +319,9 @@ class OverrideResolver(Resolver): self.resolver = resolver self.mapping = mapping + def close(self): + self.resolver.close() + def resolve(self, host, port, *args, **kwargs): if (host, port) in self.mapping: host, port = self.mapping[(host, port)] @@ -331,9 +398,16 @@ else: class SSLCertificateError(ValueError): pass - def _dnsname_to_pat(dn): + def _dnsname_to_pat(dn, max_wildcards=1): pats = [] for frag in dn.split(r'.'): + if frag.count('*') > max_wildcards: + # Issue #17980: avoid denials of service by refusing more + # than one wildcard per fragment. A survery of established + # policy among SSL implementations showed it to be a + # reasonable choice. + raise SSLCertificateError( + "too many wildcards in certificate DNS name: " + repr(dn)) if frag == '*': # When '*' is a fragment by itself, it matches a non-empty dotless # fragment. @@ -361,8 +435,9 @@ else: if _dnsname_to_pat(value).match(hostname): return dnsnames.append(value) - if not san: - # The subject is only checked when subjectAltName is empty + if not dnsnames: + # The subject is only checked when there is no dNSName entry + # in subjectAltName for sub in cert.get('subject', ()): for key, value in sub: # XXX according to RFC 2818, the most specific Common Name diff --git a/libs/tornado/options.py b/libs/tornado/options.py index b96f815..1105c0e 100755 --- a/libs/tornado/options.py +++ b/libs/tornado/options.py @@ -61,6 +61,7 @@ instances to define isolated sets of options, such as for subcommands. from __future__ import absolute_import, division, print_function, with_statement import datetime +import numbers import re import sys import os @@ -100,6 +101,55 @@ class OptionParser(object): return self._options[name].set(value) raise AttributeError("Unrecognized option %r" % name) + def __iter__(self): + return iter(self._options) + + def __getitem__(self, item): + return self._options[item].value() + + def items(self): + """A sequence of (name, value) pairs. + + .. versionadded:: 3.1 + """ + return [(name, opt.value()) for name, opt in self._options.items()] + + def groups(self): + """The set of option-groups created by ``define``. + + .. versionadded:: 3.1 + """ + return set(opt.group_name for opt in self._options.values()) + + def group_dict(self, group): + """The names and values of options in a group. + + Useful for copying options into Application settings:: + + from tornado.options import define, parse_command_line, options + + define('template_path', group='application') + define('static_path', group='application') + + parse_command_line() + + application = Application( + handlers, **options.group_dict('application')) + + .. versionadded:: 3.1 + """ + return dict( + (name, opt.value()) for name, opt in self._options.items() + if not group or group == opt.group_name) + + def as_dict(self): + """The names and values of all options. + + .. versionadded:: 3.1 + """ + return dict( + (name, opt.value()) for name, opt in self._options.items()) + def define(self, name, default=None, type=None, help=None, metavar=None, multiple=False, group=None, callback=None): """Defines a new command line option. @@ -138,8 +188,8 @@ class OptionParser(object): by later flags. """ if name in self._options: - raise Error("Option %r already defined in %s", name, - self._options[name].file_name) + raise Error("Option %r already defined in %s" % + (name, self._options[name].file_name)) frame = sys._getframe(0) options_file = frame.f_code.co_filename file_name = frame.f_back.f_code.co_filename @@ -339,7 +389,7 @@ class _Option(object): if self.multiple: self._value = [] for part in value.split(","): - if self.type in (int, long): + if issubclass(self.type, numbers.Integral): # allow ranges of the form X:Y (inclusive at both ends) lo, _, hi = part.partition(":") lo = _parse(lo) @@ -359,11 +409,11 @@ class _Option(object): raise Error("Option %r is required to be a list of %s" % (self.name, self.type.__name__)) for item in value: - if item != None and not isinstance(item, self.type): + if item is not None and not isinstance(item, self.type): raise Error("Option %r is required to be a list of %s" % (self.name, self.type.__name__)) else: - if value != None and not isinstance(value, self.type): + if value is not None and not isinstance(value, self.type): raise Error("Option %r is required to be a %s (%s given)" % (self.name, self.type.__name__, type(value))) self._value = value diff --git a/libs/tornado/platform/twisted.py b/libs/tornado/platform/twisted.py index 910e46a..c0ee595 100755 --- a/libs/tornado/platform/twisted.py +++ b/libs/tornado/platform/twisted.py @@ -83,7 +83,6 @@ import twisted.names.resolve from zope.interface import implementer -from tornado.concurrent import return_future from tornado.escape import utf8 from tornado import gen import tornado.ioloop diff --git a/libs/tornado/process.py b/libs/tornado/process.py index 438db66..9bc193c 100755 --- a/libs/tornado/process.py +++ b/libs/tornado/process.py @@ -33,6 +33,7 @@ from binascii import hexlify from tornado import ioloop from tornado.iostream import PipeIOStream from tornado.log import gen_log +from tornado.platform.auto import set_close_exec from tornado import stack_context try: @@ -69,6 +70,13 @@ def _reseed_random(): random.seed(seed) +def _pipe_cloexec(): + r, w = os.pipe() + set_close_exec(r) + set_close_exec(w) + return r, w + + _task_id = None @@ -181,20 +189,20 @@ class Subprocess(object): _waiting = {} def __init__(self, *args, **kwargs): - self.io_loop = kwargs.pop('io_loop', None) + self.io_loop = kwargs.pop('io_loop', None) or ioloop.IOLoop.current() to_close = [] if kwargs.get('stdin') is Subprocess.STREAM: - in_r, in_w = os.pipe() + in_r, in_w = _pipe_cloexec() kwargs['stdin'] = in_r to_close.append(in_r) self.stdin = PipeIOStream(in_w, io_loop=self.io_loop) if kwargs.get('stdout') is Subprocess.STREAM: - out_r, out_w = os.pipe() + out_r, out_w = _pipe_cloexec() kwargs['stdout'] = out_w to_close.append(out_w) self.stdout = PipeIOStream(out_r, io_loop=self.io_loop) if kwargs.get('stderr') is Subprocess.STREAM: - err_r, err_w = os.pipe() + err_r, err_w = _pipe_cloexec() kwargs['stderr'] = err_w to_close.append(err_w) self.stderr = PipeIOStream(err_r, io_loop=self.io_loop) diff --git a/libs/tornado/simple_httpclient.py b/libs/tornado/simple_httpclient.py index 117ce75..d8dbb27 100755 --- a/libs/tornado/simple_httpclient.py +++ b/libs/tornado/simple_httpclient.py @@ -73,11 +73,21 @@ class SimpleAsyncHTTPClient(AsyncHTTPClient): self.queue = collections.deque() self.active = {} self.max_buffer_size = max_buffer_size - self.resolver = resolver or Resolver(io_loop=io_loop) + if resolver: + self.resolver = resolver + self.own_resolver = False + else: + self.resolver = Resolver(io_loop=io_loop) + self.own_resolver = True if hostname_mapping is not None: self.resolver = OverrideResolver(resolver=self.resolver, mapping=hostname_mapping) + def close(self): + super(SimpleAsyncHTTPClient, self).close() + if self.own_resolver: + self.resolver.close() + def fetch_impl(self, request, callback): self.queue.append((request, callback)) self._process_queue() @@ -279,9 +289,11 @@ class _HTTPConnection(object): if b'\n' in line: raise ValueError('Newline in header: ' + repr(line)) request_lines.append(line) - self.stream.write(b"\r\n".join(request_lines) + b"\r\n\r\n") + request_str = b"\r\n".join(request_lines) + b"\r\n\r\n" if self.request.body is not None: - self.stream.write(self.request.body) + request_str += self.request.body + self.stream.set_nodelay(True) + self.stream.write(request_str) self.stream.read_until_regex(b"\r?\n\r?\n", self._on_headers) def _release(self): @@ -300,7 +312,6 @@ class _HTTPConnection(object): def _handle_exception(self, typ, value, tb): if self.final_callback: self._remove_timeout() - gen_log.warning("uncaught exception", exc_info=(typ, value, tb)) self._run_callback(HTTPResponse(self.request, 599, error=value, request_time=self.io_loop.time() - self.start_time, )) diff --git a/libs/tornado/stack_context.py b/libs/tornado/stack_context.py index 8804d42..b1e82b0 100755 --- a/libs/tornado/stack_context.py +++ b/libs/tornado/stack_context.py @@ -108,6 +108,10 @@ class StackContext(object): def __init__(self, context_factory): self.context_factory = context_factory self.contexts = [] + self.active = True + + def _deactivate(self): + self.active = False # StackContext protocol def enter(self): @@ -133,6 +137,8 @@ class StackContext(object): _state.contexts = self.old_contexts raise + return self._deactivate + def __exit__(self, type, value, traceback): try: self.exit(type, value, traceback) @@ -151,6 +157,9 @@ class StackContext(object): 'stack_context inconsistency (may be caused by yield ' 'within a "with StackContext" block)') + # Break up a reference to itself to allow for faster GC on CPython. + self.new_contexts = None + class ExceptionStackContext(object): """Specialization of StackContext for exception handling. @@ -167,6 +176,10 @@ class ExceptionStackContext(object): """ def __init__(self, exception_handler): self.exception_handler = exception_handler + self.active = True + + def _deactivate(self): + self.active = False def exit(self, type, value, traceback): if type is not None: @@ -177,6 +190,8 @@ class ExceptionStackContext(object): self.new_contexts = (self.old_contexts[0], self) _state.contexts = self.new_contexts + return self._deactivate + def __exit__(self, type, value, traceback): try: if type is not None: @@ -190,6 +205,9 @@ class ExceptionStackContext(object): 'stack_context inconsistency (may be caused by yield ' 'within a "with StackContext" block)') + # Break up a reference to itself to allow for faster GC on CPython. + self.new_contexts = None + class NullContext(object): """Resets the `StackContext`. @@ -206,6 +224,32 @@ class NullContext(object): _state.contexts = self.old_contexts +def _remove_deactivated(contexts): + """Remove deactivated handlers from the chain""" + # Clean ctx handlers + stack_contexts = tuple([h for h in contexts[0] if h.active]) + + # Find new head + head = contexts[1] + while head is not None and not head.active: + head = head.old_contexts[1] + + # Process chain + ctx = head + while ctx is not None: + parent = ctx.old_contexts[1] + + while parent is not None: + if parent.active: + break + ctx.old_contexts = parent.old_contexts + parent = parent.old_contexts[1] + + ctx = parent + + return (stack_contexts, head) + + def wrap(fn): """Returns a callable object that will restore the current `StackContext` when executed. @@ -219,13 +263,19 @@ def wrap(fn): return fn # Capture current stack head - contexts = _state.contexts + # TODO: Any other better way to store contexts and update them in wrapped function? + cap_contexts = [_state.contexts] - #@functools.wraps def wrapped(*args, **kwargs): + ret = None try: - # Force local state - switch to new stack chain + # Capture old state current_state = _state.contexts + + # Remove deactivated items + cap_contexts[0] = contexts = _remove_deactivated(cap_contexts[0]) + + # Force new state _state.contexts = contexts # Current exception @@ -249,7 +299,7 @@ def wrap(fn): # Execute callback if no exception happened while restoring state if top is None: try: - fn(*args, **kwargs) + ret = fn(*args, **kwargs) except: exc = sys.exc_info() top = contexts[1] @@ -281,6 +331,7 @@ def wrap(fn): raise_exc_info(exc) finally: _state.contexts = current_state + return ret wrapped._wrapped = True return wrapped @@ -297,3 +348,29 @@ def _handle_exception(tail, exc): tail = tail.old_contexts[1] return exc + + +def run_with_stack_context(context, func): + """Run a coroutine ``func`` in the given `StackContext`. + + It is not safe to have a ``yield`` statement within a ``with StackContext`` + block, so it is difficult to use stack context with `.gen.coroutine`. + This helper function runs the function in the correct context while + keeping the ``yield`` and ``with`` statements syntactically separate. + + Example:: + + @gen.coroutine + def incorrect(): + with StackContext(ctx): + # ERROR: this will raise StackContextInconsistentError + yield other_coroutine() + + @gen.coroutine + def correct(): + yield run_with_stack_context(StackContext(ctx), other_coroutine) + + .. versionadded:: 3.1 + """ + with context: + return func() diff --git a/libs/tornado/tcpserver.py b/libs/tornado/tcpserver.py index fbd9c63..8473a21 100755 --- a/libs/tornado/tcpserver.py +++ b/libs/tornado/tcpserver.py @@ -77,13 +77,17 @@ class TCPServer(object): also be used in single-process servers if you want to create your listening sockets in some way other than `~tornado.netutil.bind_sockets`. + + .. versionadded:: 3.1 + The ``max_buffer_size`` argument. """ - def __init__(self, io_loop=None, ssl_options=None): + def __init__(self, io_loop=None, ssl_options=None, max_buffer_size=None): self.io_loop = io_loop self.ssl_options = ssl_options self._sockets = {} # fd -> socket object self._pending_sockets = [] self._started = False + self.max_buffer_size = max_buffer_size # Verify the SSL options. Otherwise we don't get errors until clients # connect. This doesn't verify that the keys are legitimate, but @@ -216,15 +220,25 @@ class TCPServer(object): else: raise except socket.error as err: - if err.args[0] == errno.ECONNABORTED: + # If the connection is closed immediately after it is created + # (as in a port scan), we can get one of several errors. + # wrap_socket makes an internal call to getpeername, + # which may return either EINVAL (Mac OS X) or ENOTCONN + # (Linux). If it returns ENOTCONN, this error is + # silently swallowed by the ssl module, so we need to + # catch another error later on (AttributeError in + # SSLIOStream._do_ssl_handshake). + # To test this behavior, try nmap with the -sT flag. + # https://github.com/facebook/tornado/pull/750 + if err.args[0] in (errno.ECONNABORTED, errno.EINVAL): return connection.close() else: raise try: if self.ssl_options is not None: - stream = SSLIOStream(connection, io_loop=self.io_loop) + stream = SSLIOStream(connection, io_loop=self.io_loop, max_buffer_size=self.max_buffer_size) else: - stream = IOStream(connection, io_loop=self.io_loop) + stream = IOStream(connection, io_loop=self.io_loop, max_buffer_size=self.max_buffer_size) self.handle_stream(stream, address) except Exception: app_log.error("Error in connection callback", exc_info=True) diff --git a/libs/tornado/template.py b/libs/tornado/template.py index 8e1bfba..341e07c 100755 --- a/libs/tornado/template.py +++ b/libs/tornado/template.py @@ -84,6 +84,9 @@ hand, but instead use the `~.RequestHandler.render` and `tornado.web.RequestHandler`, which load templates automatically based on the ``template_path`` `.Application` setting. +Variable names beginning with ``_tt_`` are reserved by the template +system and should not be used by application code. + Syntax Reference ---------------- @@ -252,8 +255,8 @@ class Template(object): "squeeze": escape.squeeze, "linkify": escape.linkify, "datetime": datetime, - "_utf8": escape.utf8, # for internal use - "_string_types": (unicode_type, bytes_type), + "_tt_utf8": escape.utf8, # for internal use + "_tt_string_types": (unicode_type, bytes_type), # __name__ and __loader__ allow the traceback mechanism to find # the generated source code. "__name__": self.name.replace('.', '_'), @@ -262,7 +265,7 @@ class Template(object): namespace.update(self.namespace) namespace.update(kwargs) exec_in(self.compiled, namespace) - execute = namespace["_execute"] + execute = namespace["_tt_execute"] # Clear the traceback module's cache of source data now that # we've generated a new template (mainly for this module's # unittests, where different tests reuse the same name). @@ -278,7 +281,6 @@ class Template(object): ancestors.reverse() for ancestor in ancestors: ancestor.find_named_blocks(loader, named_blocks) - self.file.find_named_blocks(loader, named_blocks) writer = _CodeWriter(buffer, named_blocks, loader, ancestors[0].template, compress_whitespace) ancestors[0].generate(writer) @@ -403,12 +405,12 @@ class _File(_Node): self.line = 0 def generate(self, writer): - writer.write_line("def _execute():", self.line) + writer.write_line("def _tt_execute():", self.line) with writer.indent(): - writer.write_line("_buffer = []", self.line) - writer.write_line("_append = _buffer.append", self.line) + writer.write_line("_tt_buffer = []", self.line) + writer.write_line("_tt_append = _tt_buffer.append", self.line) self.body.generate(writer) - writer.write_line("return _utf8('').join(_buffer)", self.line) + writer.write_line("return _tt_utf8('').join(_tt_buffer)", self.line) def each_child(self): return (self.body,) @@ -477,15 +479,15 @@ class _ApplyBlock(_Node): return (self.body,) def generate(self, writer): - method_name = "apply%d" % writer.apply_counter + method_name = "_tt_apply%d" % writer.apply_counter writer.apply_counter += 1 writer.write_line("def %s():" % method_name, self.line) with writer.indent(): - writer.write_line("_buffer = []", self.line) - writer.write_line("_append = _buffer.append", self.line) + writer.write_line("_tt_buffer = []", self.line) + writer.write_line("_tt_append = _tt_buffer.append", self.line) self.body.generate(writer) - writer.write_line("return _utf8('').join(_buffer)", self.line) - writer.write_line("_append(_utf8(%s(%s())))" % ( + writer.write_line("return _tt_utf8('').join(_tt_buffer)", self.line) + writer.write_line("_tt_append(_tt_utf8(%s(%s())))" % ( self.method, method_name), self.line) @@ -533,21 +535,21 @@ class _Expression(_Node): self.raw = raw def generate(self, writer): - writer.write_line("_tmp = %s" % self.expression, self.line) - writer.write_line("if isinstance(_tmp, _string_types):" - " _tmp = _utf8(_tmp)", self.line) - writer.write_line("else: _tmp = _utf8(str(_tmp))", self.line) + writer.write_line("_tt_tmp = %s" % self.expression, self.line) + writer.write_line("if isinstance(_tt_tmp, _tt_string_types):" + " _tt_tmp = _tt_utf8(_tt_tmp)", self.line) + writer.write_line("else: _tt_tmp = _tt_utf8(str(_tt_tmp))", self.line) if not self.raw and writer.current_template.autoescape is not None: # In python3 functions like xhtml_escape return unicode, # so we have to convert to utf8 again. - writer.write_line("_tmp = _utf8(%s(_tmp))" % + writer.write_line("_tt_tmp = _tt_utf8(%s(_tt_tmp))" % writer.current_template.autoescape, self.line) - writer.write_line("_append(_tmp)", self.line) + writer.write_line("_tt_append(_tt_tmp)", self.line) class _Module(_Expression): def __init__(self, expression, line): - super(_Module, self).__init__("_modules." + expression, line, + super(_Module, self).__init__("_tt_modules." + expression, line, raw=True) @@ -567,7 +569,7 @@ class _Text(_Node): value = re.sub(r"(\s*\n\s*)", "\n", value) if value: - writer.write_line('_append(%r)' % escape.utf8(value), self.line) + writer.write_line('_tt_append(%r)' % escape.utf8(value), self.line) class ParseError(Exception): @@ -616,7 +618,7 @@ class _CodeWriter(object): return IncludeTemplate() def write_line(self, line, line_number, indent=None): - if indent == None: + if indent is None: indent = self._indent line_comment = ' # %s:%d' % (self.current_template.name, line_number) if self.include_stack: diff --git a/libs/tornado/testing.py b/libs/tornado/testing.py index 51663a4..8355dcf 100755 --- a/libs/tornado/testing.py +++ b/libs/tornado/testing.py @@ -77,7 +77,7 @@ def bind_unused_port(): Returns a tuple (socket, port). """ - [sock] = netutil.bind_sockets(0, 'localhost', family=socket.AF_INET) + [sock] = netutil.bind_sockets(None, 'localhost', family=socket.AF_INET) port = sock.getsockname()[1] return sock, port @@ -86,6 +86,8 @@ def get_async_test_timeout(): """Get the global timeout setting for async tests. Returns a float, the timeout in seconds. + + .. versionadded:: 3.1 """ try: return float(os.environ.get('ASYNC_TEST_TIMEOUT')) @@ -98,12 +100,15 @@ class AsyncTestCase(unittest.TestCase): asynchronous code. The unittest framework is synchronous, so the test must be - complete by the time the test method returns. This class provides - the `stop()` and `wait()` methods for this purpose. The test + complete by the time the test method returns. This means that + asynchronous code cannot be used in quite the same way as usual. + To write test functions that use the same ``yield``-based patterns + used with the `tornado.gen` module, decorate your test methods + with `tornado.testing.gen_test` instead of + `tornado.gen.coroutine`. This class also provides the `stop()` + and `wait()` methods for a more manual style of testing. The test method itself must call ``self.wait()``, and asynchronous callbacks should call ``self.stop()`` to signal completion. - Alternately, the `gen_test` decorator can be used to use yield points - from the `tornado.gen` module. By default, a new `.IOLoop` is constructed for each test and is available as ``self.io_loop``. This `.IOLoop` should be used in the construction of @@ -118,8 +123,17 @@ class AsyncTestCase(unittest.TestCase): Example:: - # This test uses argument passing between self.stop and self.wait. + # This test uses coroutine style. class MyTestCase(AsyncTestCase): + @tornado.testing.gen_test + def test_http_fetch(self): + client = AsyncHTTPClient(self.io_loop) + response = yield client.fetch("http://www.tornadoweb.org") + # Test contents of response + self.assertIn("FriendFeed", response.body) + + # This test uses argument passing between self.stop and self.wait. + class MyTestCase2(AsyncTestCase): def test_http_fetch(self): client = AsyncHTTPClient(self.io_loop) client.fetch("http://www.tornadoweb.org/", self.stop) @@ -128,7 +142,7 @@ class AsyncTestCase(unittest.TestCase): self.assertIn("FriendFeed", response.body) # This test uses an explicit callback-based style. - class MyTestCase2(AsyncTestCase): + class MyTestCase3(AsyncTestCase): def test_http_fetch(self): client = AsyncHTTPClient(self.io_loop) client.fetch("http://www.tornadoweb.org/", self.handle_fetch) @@ -216,12 +230,16 @@ class AsyncTestCase(unittest.TestCase): def wait(self, condition=None, timeout=None): """Runs the `.IOLoop` until stop is called or timeout has passed. - In the event of a timeout, an exception will be thrown. The default - timeout is 5 seconds; it may be overridden with a ``timeout`` keyword - argument or globally with the ASYNC_TEST_TIMEOUT environment variable. + In the event of a timeout, an exception will be thrown. The + default timeout is 5 seconds; it may be overridden with a + ``timeout`` keyword argument or globally with the + ``ASYNC_TEST_TIMEOUT`` environment variable. If ``condition`` is not None, the `.IOLoop` will be restarted after `stop()` until ``condition()`` returns true. + + .. versionchanged:: 3.1 + Added the ``ASYNC_TEST_TIMEOUT`` environment variable. """ if timeout is None: timeout = get_async_test_timeout() @@ -385,7 +403,7 @@ def gen_test(func=None, timeout=None): response = yield gen.Task(self.fetch('/')) By default, ``@gen_test`` times out after 5 seconds. The timeout may be - overridden globally with the ASYNC_TEST_TIMEOUT environment variable, + overridden globally with the ``ASYNC_TEST_TIMEOUT`` environment variable, or for each test with the ``timeout`` keyword argument:: class MyTest(AsyncHTTPTestCase): @@ -393,8 +411,9 @@ def gen_test(func=None, timeout=None): def test_something_slow(self): response = yield gen.Task(self.fetch('/')) - If both the environment variable and the parameter are set, ``gen_test`` - uses the maximum of the two. + .. versionadded:: 3.1 + The ``timeout`` argument and ``ASYNC_TEST_TIMEOUT`` environment + variable. """ if timeout is None: timeout = get_async_test_timeout() diff --git a/libs/tornado/web.py b/libs/tornado/web.py index 6abf42c..eade230 100755 --- a/libs/tornado/web.py +++ b/libs/tornado/web.py @@ -52,6 +52,7 @@ request. from __future__ import absolute_import, division, print_function, with_statement + import base64 import binascii import datetime @@ -131,14 +132,14 @@ class RequestHandler(object): self.path_kwargs = None self.ui = ObjectDict((n, self._ui_method(m)) for n, m in application.ui_methods.items()) - # UIModules are available as both `modules` and `_modules` in the + # UIModules are available as both `modules` and `_tt_modules` in the # template namespace. Historically only `modules` was available # but could be clobbered by user additions to the namespace. - # The template {% module %} directive looks in `_modules` to avoid + # The template {% module %} directive looks in `_tt_modules` to avoid # possible conflicts. - self.ui["_modules"] = ObjectDict((n, self._ui_module(n, m)) for n, m in - application.ui_modules.items()) - self.ui["modules"] = self.ui["_modules"] + self.ui["_tt_modules"] = _UIModuleNamespace(self, + application.ui_modules) + self.ui["modules"] = self.ui["_tt_modules"] self.clear() # Check since connection is not available in WSGI if getattr(self.request, "connection", None): @@ -198,6 +199,15 @@ class RequestHandler(object): Override this method to perform common initialization regardless of the request method. + + Asynchronous support: Decorate this method with `.gen.coroutine` + or `.return_future` to make it asynchronous (the + `asynchronous` decorator cannot be used on `prepare`). + If this method returns a `.Future` execution will not proceed + until the `.Future` is done. + + .. versionadded:: 3.1 + Asynchronous support. """ pass @@ -232,11 +242,14 @@ class RequestHandler(object): self._headers = httputil.HTTPHeaders({ "Server": "TornadoServer/%s" % tornado.version, "Content-Type": "text/html; charset=UTF-8", - "Date": httputil.format_timestamp(time.gmtime()), + "Date": httputil.format_timestamp(time.time()), }) self.set_default_headers() - if not self.request.supports_http_1_1(): - if self.request.headers.get("Connection") == "Keep-Alive": + if (not self.request.supports_http_1_1() and + getattr(self.request, 'connection', None) and + not self.request.connection.no_keep_alive): + conn_header = self.request.headers.get("Connection") + if conn_header and (conn_header.lower() == "keep-alive"): self.set_header("Connection", "Keep-Alive") self._write_buffer = [] self._status_code = 200 @@ -300,6 +313,8 @@ class RequestHandler(object): if name in self._headers: del self._headers[name] + _INVALID_HEADER_CHAR_RE = re.compile(br"[\x00-\x1f]") + def _convert_header_value(self, value): if isinstance(value, bytes_type): pass @@ -315,7 +330,8 @@ class RequestHandler(object): # If \n is allowed into the header, it is possible to inject # additional headers or split the request. Also cap length to # prevent obviously erroneous values. - if len(value) > 4000 or re.search(br"[\x00-\x1f]", value): + if (len(value) > 4000 or + RequestHandler._INVALID_HEADER_CHAR_RE.search(value)): raise ValueError("Unsafe header value %r", value) return value @@ -325,7 +341,7 @@ class RequestHandler(object): """Returns the value of the argument with the given name. If default is not provided, the argument is considered to be - required, and we throw an HTTP 400 exception if it is missing. + required, and we raise a `MissingArgumentError` if it is missing. If the argument appears in the url more than once, we return the last value. @@ -335,7 +351,7 @@ class RequestHandler(object): args = self.get_arguments(name, strip=strip) if not args: if default is self._ARG_DEFAULT: - raise HTTPError(400, "Missing argument %s" % name) + raise MissingArgumentError(name) return default return args[-1] @@ -488,10 +504,8 @@ class RequestHandler(object): else: assert isinstance(status, int) and 300 <= status <= 399 self.set_status(status) - # Remove whitespace - url = re.sub(br"[\x00-\x20]+", "", utf8(url)) self.set_header("Location", urlparse.urljoin(utf8(self.request.uri), - url)) + utf8(url))) self.finish() def write(self, chunk): @@ -680,7 +694,11 @@ class RequestHandler(object): has been run, the previous callback will be discarded. """ if self.application._wsgi: - raise Exception("WSGI applications do not support flush()") + # WSGI applications cannot usefully support flush, so just make + # it a no-op (and run the callback immediately). + if callback is not None: + callback() + return chunk = b"".join(self._write_buffer) self._write_buffer = [] @@ -720,13 +738,10 @@ class RequestHandler(object): if (self._status_code == 200 and self.request.method in ("GET", "HEAD") and "Etag" not in self._headers): - etag = self.compute_etag() - if etag is not None: - self.set_header("Etag", etag) - inm = self.request.headers.get("If-None-Match") - if inm and inm.find(etag) != -1: - self._write_buffer = [] - self.set_status(304) + self.set_etag_header() + if self.check_etag_header(): + self._write_buffer = [] + self.set_status(304) if self._status_code == 304: assert not self._write_buffer, "Cannot send body with 304" self._clear_headers_for_304() @@ -891,6 +906,10 @@ class RequestHandler(object): self._current_user = self.get_current_user() return self._current_user + @current_user.setter + def current_user(self, value): + self._current_user = value + def get_current_user(self): """Override to determine the current user from, e.g., a cookie.""" return None @@ -976,27 +995,30 @@ class RequestHandler(object): return '' - def static_url(self, path, include_host=None): + def static_url(self, path, include_host=None, **kwargs): """Returns a static URL for the given relative static file path. This method requires you set the ``static_path`` setting in your application (which specifies the root directory of your static files). - We append ``?v=`` to the returned URL, which makes our - static file handler set an infinite expiration header on the - returned content. The signature is based on the content of the - file. + This method returns a versioned url (by default appending + ``?v=``), which allows the static files to be + cached indefinitely. This can be disabled by passing + ``include_version=False`` (in the default implementation; + other static file implementations are not required to support + this, but they may support other options). By default this method returns URLs relative to the current host, but if ``include_host`` is true the URL returned will be absolute. If this handler has an ``include_host`` attribute, that value will be used as the default for all `static_url` calls that do not pass ``include_host`` as a keyword argument. + """ self.require_setting("static_path", "static_url") - static_handler_class = self.settings.get( - "static_handler_class", StaticFileHandler) + get_url = self.settings.get("static_handler_class", + StaticFileHandler).make_static_url if include_host is None: include_host = getattr(self, "include_host", False) @@ -1005,7 +1027,8 @@ class RequestHandler(object): base = self.request.protocol + "://" + self.request.host else: base = "" - return base + static_handler_class.make_static_url(self.settings, path) + + return base + get_url(self.settings, path, **kwargs) def async_callback(self, callback, *args, **kwargs): """Obsolete - catches exceptions from the wrapped function. @@ -1041,6 +1064,8 @@ class RequestHandler(object): def compute_etag(self): """Computes the etag header to be used for this request. + By default uses a hash of the content written so far. + May be overridden to provide custom etag implementations, or may return None to disable tornado's default etag support. """ @@ -1049,6 +1074,38 @@ class RequestHandler(object): hasher.update(part) return '"%s"' % hasher.hexdigest() + def set_etag_header(self): + """Sets the response's Etag header using ``self.compute_etag()``. + + Note: no header will be set if ``compute_etag()`` returns ``None``. + + This method is called automatically when the request is finished. + """ + etag = self.compute_etag() + if etag is not None: + self.set_header("Etag", etag) + + def check_etag_header(self): + """Checks the ``Etag`` header against requests's ``If-None-Match``. + + Returns ``True`` if the request's Etag matches and a 304 should be + returned. For example:: + + self.set_etag_header() + if self.check_etag_header(): + self.set_status(304) + return + + This method is called automatically when the request is finished, + but may be called earlier for applications that override + `compute_etag` and want to do an early check for ``If-None-Match`` + before completing the request. The ``Etag`` header should be set + (perhaps with `set_etag_header`) before calling this method. + """ + etag = self._headers.get("Etag") + inm = utf8(self.request.headers.get("If-None-Match", "")) + return bool(etag and inm and inm.find(etag) >= 0) + def _stack_context_handle_exception(self, type, value, traceback): try: # For historical reasons _handle_request_exception only takes @@ -1074,15 +1131,41 @@ class RequestHandler(object): if self.request.method not in ("GET", "HEAD", "OPTIONS") and \ self.application.settings.get("xsrf_cookies"): self.check_xsrf_cookie() - self.prepare() - if not self._finished: - getattr(self, self.request.method.lower())( - *self.path_args, **self.path_kwargs) - if self._auto_finish and not self._finished: - self.finish() + self._when_complete(self.prepare(), self._execute_method) except Exception as e: self._handle_request_exception(e) + def _when_complete(self, result, callback): + try: + if result is None: + callback() + elif isinstance(result, Future): + if result.done(): + if result.result() is not None: + raise ValueError('Expected None, got %r' % result) + callback() + else: + # Delayed import of IOLoop because it's not available + # on app engine + from tornado.ioloop import IOLoop + IOLoop.current().add_future( + result, functools.partial(self._when_complete, + callback=callback)) + else: + raise ValueError("Expected Future or None, got %r" % result) + except Exception as e: + self._handle_request_exception(e) + + def _execute_method(self): + if not self._finished: + method = getattr(self, self.request.method.lower()) + self._when_complete(method(*self.path_args, **self.path_kwargs), + self._execute_finish) + + def _execute_finish(self): + if self._auto_finish and not self._finished: + self.finish() + def _generate_headers(self): reason = self._reason lines = [utf8(self.request.version + " " + @@ -1109,21 +1192,41 @@ class RequestHandler(object): " (" + self.request.remote_ip + ")" def _handle_request_exception(self, e): + self.log_exception(*sys.exc_info()) + if self._finished: + # Extra errors after the request has been finished should + # be logged, but there is no reason to continue to try and + # send a response. + return if isinstance(e, HTTPError): - if e.log_message: - format = "%d %s: " + e.log_message - args = [e.status_code, self._request_summary()] + list(e.args) - gen_log.warning(format, *args) if e.status_code not in httputil.responses and not e.reason: gen_log.error("Bad HTTP status code: %d", e.status_code) self.send_error(500, exc_info=sys.exc_info()) else: self.send_error(e.status_code, exc_info=sys.exc_info()) else: - app_log.error("Uncaught exception %s\n%r", self._request_summary(), - self.request, exc_info=True) self.send_error(500, exc_info=sys.exc_info()) + def log_exception(self, typ, value, tb): + """Override to customize logging of uncaught exceptions. + + By default logs instances of `HTTPError` as warnings without + stack traces (on the ``tornado.general`` logger), and all + other exceptions as errors with stack traces (on the + ``tornado.application`` logger). + + .. versionadded:: 3.1 + """ + if isinstance(value, HTTPError): + if value.log_message: + format = "%d %s: " + value.log_message + args = ([value.status_code, self._request_summary()] + + list(value.args)) + gen_log.warning(format, *args) + else: + app_log.error("Uncaught exception %s\n%r", self._request_summary(), + self.request, exc_info=(typ, value, tb)) + def _ui_module(self, name, module): def render(*args, **kwargs): if not hasattr(self, "_active_modules"): @@ -1152,6 +1255,11 @@ class RequestHandler(object): def asynchronous(method): """Wrap request handler methods with this if they are asynchronous. + This decorator is unnecessary if the method is also decorated with + ``@gen.coroutine`` (it is legal but unnecessary to use the two + decorators together, in which case ``@asynchronous`` must be + first). + This decorator should only be applied to the :ref:`HTTP verb methods `; its behavior is undefined for any other method. This decorator does not *make* a method asynchronous; it tells @@ -1175,6 +1283,8 @@ def asynchronous(method): self.write("Downloaded!") self.finish() + .. versionadded:: 3.1 + The ability to use ``@gen.coroutine`` without ``@asynchronous``. """ # Delay the IOLoop import because it's not available on app engine. from tornado.ioloop import IOLoop @@ -1461,7 +1571,8 @@ class Application(object): def unquote(s): if s is None: return s - return escape.url_unescape(s, encoding=None) + return escape.url_unescape(s, encoding=None, + plus=False) # Pass matched groups to the handler. Since # match.groups() includes both named and unnamed groups, # we want to use either groups or groupdict but not both. @@ -1559,6 +1670,20 @@ class HTTPError(Exception): return message +class MissingArgumentError(HTTPError): + """Exception raised by `RequestHandler.get_argument`. + + This is a subclass of `HTTPError`, so if it is uncaught a 400 response + code will be used instead of 500 (and a stack trace will not be logged). + + .. versionadded:: 3.1 + """ + def __init__(self, arg_name): + super(MissingArgumentError, self).__init__( + 400, 'Missing argument %s' % arg_name) + self.arg_name = arg_name + + class ErrorHandler(RequestHandler): """Generates an error response with ``status_code`` for all requests.""" def initialize(self, status_code): @@ -1594,21 +1719,59 @@ class RedirectHandler(RequestHandler): class StaticFileHandler(RequestHandler): """A simple handler that can serve static content from a directory. - To map a path to this handler for a static data directory ``/var/www``, + A `StaticFileHandler` is configured automatically if you pass the + ``static_path`` keyword argument to `Application`. This handler + can be customized with the ``static_url_prefix``, ``static_handler_class``, + and ``static_handler_args`` settings. + + To map an additional path to this handler for a static data directory you would add a line to your application like:: application = web.Application([ - (r"/static/(.*)", web.StaticFileHandler, {"path": "/var/www"}), + (r"/content/(.*)", web.StaticFileHandler, {"path": "/var/www"}), ]) - The local root directory of the content should be passed as the ``path`` - argument to the handler. - - To support aggressive browser caching, if the argument ``v`` is given - with the path, we set an infinite HTTP expiration header. So, if you - want browsers to cache a file indefinitely, send them to, e.g., - ``/static/images/myimage.png?v=xxx``. Override `get_cache_time` method for - more fine-grained cache control. + The handler constructor requires a ``path`` argument, which specifies the + local root directory of the content to be served. + + Note that a capture group in the regex is required to parse the value for + the ``path`` argument to the get() method (different than the constructor + argument above); see `URLSpec` for details. + + To maximize the effectiveness of browser caching, this class supports + versioned urls (by default using the argument ``?v=``). If a version + is given, we instruct the browser to cache this file indefinitely. + `make_static_url` (also available as `RequestHandler.static_url`) can + be used to construct a versioned url. + + This handler is intended primarily for use in development and light-duty + file serving; for heavy traffic it will be more efficient to use + a dedicated static file server (such as nginx or Apache). We support + the HTTP ``Accept-Ranges`` mechanism to return partial content (because + some browsers require this functionality to be present to seek in + HTML5 audio or video), but this handler should not be used with + files that are too large to fit comfortably in memory. + + **Subclassing notes** + + This class is designed to be extensible by subclassing, but because + of the way static urls are generated with class methods rather than + instance methods, the inheritance patterns are somewhat unusual. + Be sure to use the ``@classmethod`` decorator when overriding a + class method. Instance methods may use the attributes ``self.path`` + ``self.absolute_path``, and ``self.modified``. + + To change the way static urls are generated (e.g. to match the behavior + of another server or CDN), override `make_static_url`, `parse_url_path`, + `get_cache_time`, and/or `get_version`. + + To replace all interaction with the filesystem (e.g. to serve + static content from a database), override `get_content`, + `get_content_size`, `get_modified_time`, `get_absolute_path`, and + `validate_absolute_path`. + + .. versionchanged:: 3.1 + Many of the methods for subclasses were added in Tornado 3.1. """ CACHE_MAX_AGE = 86400 * 365 * 10 # 10 years @@ -1616,7 +1779,7 @@ class StaticFileHandler(RequestHandler): _lock = threading.Lock() # protects _static_hashes def initialize(self, path, default_filename=None): - self.root = os.path.abspath(path) + os.path.sep + self.root = path self.default_filename = default_filename @classmethod @@ -1628,60 +1791,270 @@ class StaticFileHandler(RequestHandler): self.get(path, include_body=False) def get(self, path, include_body=True): - path = self.parse_url_path(path) - abspath = os.path.abspath(os.path.join(self.root, path)) - # os.path.abspath strips a trailing / - # it needs to be temporarily added back for requests to root/ - if not (abspath + os.path.sep).startswith(self.root): - raise HTTPError(403, "%s is not in root static directory", path) - if os.path.isdir(abspath) and self.default_filename is not None: - # need to look at the request.path here for when path is empty - # but there is some prefix to the path that was already - # trimmed by the routing - if not self.request.path.endswith("/"): - self.redirect(self.request.path + "/") + # Set up our path instance variables. + self.path = self.parse_url_path(path) + del path # make sure we don't refer to path instead of self.path again + absolute_path = self.get_absolute_path(self.root, self.path) + self.absolute_path = self.validate_absolute_path( + self.root, absolute_path) + if self.absolute_path is None: + return + + self.modified = self.get_modified_time() + self.set_headers() + + if self.should_return_304(): + self.set_status(304) + return + + request_range = None + range_header = self.request.headers.get("Range") + if range_header: + # As per RFC 2616 14.16, if an invalid Range header is specified, + # the request will be treated as if the header didn't exist. + request_range = httputil._parse_request_range(range_header) + + if request_range: + start, end = request_range + size = self.get_content_size() + if (start is not None and start >= size) or end == 0: + # As per RFC 2616 14.35.1, a range is not satisfiable only: if + # the first requested byte is equal to or greater than the + # content, or when a suffix with length 0 is specified + self.set_status(416) # Range Not Satisfiable + self.set_header("Content-Type", "text/plain") + self.set_header("Content-Range", "bytes */%s" %(size, )) return - abspath = os.path.join(abspath, self.default_filename) - if not os.path.exists(abspath): - raise HTTPError(404) - if not os.path.isfile(abspath): - raise HTTPError(403, "%s is not a file", path) + if start is not None and start < 0: + start += size + # Note: only return HTTP 206 if less than the entire range has been + # requested. Not only is this semantically correct, but Chrome + # refuses to play audio if it gets an HTTP 206 in response to + # ``Range: bytes=0-``. + if size != (end or size) - (start or 0): + self.set_status(206) # Partial Content + self.set_header("Content-Range", + httputil._get_content_range(start, end, size)) + else: + start = end = None + content = self.get_content(self.absolute_path, start, end) + if isinstance(content, bytes_type): + content = [content] + content_length = 0 + for chunk in content: + if include_body: + self.write(chunk) + else: + content_length += len(chunk) + if not include_body: + assert self.request.method == "HEAD" + self.set_header("Content-Length", content_length) - stat_result = os.stat(abspath) - modified = datetime.datetime.utcfromtimestamp(stat_result[stat.ST_MTIME]) + def compute_etag(self): + """Sets the ``Etag`` header based on static url version. + + This allows efficient ``If-None-Match`` checks against cached + versions, and sends the correct ``Etag`` for a partial response + (i.e. the same ``Etag`` as the full file). + + .. versionadded:: 3.1 + """ + version_hash = self._get_cached_version(self.absolute_path) + if not version_hash: + return None + return '"%s"' % (version_hash, ) + + def set_headers(self): + """Sets the content and caching headers on the response. - self.set_header("Last-Modified", modified) + .. versionadded:: 3.1 + """ + self.set_header("Accept-Ranges", "bytes") + self.set_etag_header() - mime_type, encoding = mimetypes.guess_type(abspath) - if mime_type: - self.set_header("Content-Type", mime_type) + if self.modified is not None: + self.set_header("Last-Modified", self.modified) - cache_time = self.get_cache_time(path, modified, mime_type) + content_type = self.get_content_type() + if content_type: + self.set_header("Content-Type", content_type) + cache_time = self.get_cache_time(self.path, self.modified, content_type) if cache_time > 0: self.set_header("Expires", datetime.datetime.utcnow() + datetime.timedelta(seconds=cache_time)) self.set_header("Cache-Control", "max-age=" + str(cache_time)) - self.set_extra_headers(path) + self.set_extra_headers(self.path) + + def should_return_304(self): + """Returns True if the headers indicate that we should return 304. + + .. versionadded:: 3.1 + """ + if self.check_etag_header(): + return True # Check the If-Modified-Since, and don't send the result if the # content has not been modified ims_value = self.request.headers.get("If-Modified-Since") if ims_value is not None: date_tuple = email.utils.parsedate(ims_value) - if_since = datetime.datetime(*date_tuple[:6]) - if if_since >= modified: - self.set_status(304) + if date_tuple is not None: + if_since = datetime.datetime(*date_tuple[:6]) + if if_since >= self.modified: + return True + + return False + + @classmethod + def get_absolute_path(cls, root, path): + """Returns the absolute location of ``path`` relative to ``root``. + + ``root`` is the path configured for this `StaticFileHandler` + (in most cases the ``static_path`` `Application` setting). + + This class method may be overridden in subclasses. By default + it returns a filesystem path, but other strings may be used + as long as they are unique and understood by the subclass's + overridden `get_content`. + + .. versionadded:: 3.1 + """ + abspath = os.path.abspath(os.path.join(root, path)) + return abspath + + def validate_absolute_path(self, root, absolute_path): + """Validate and return the absolute path. + + ``root`` is the configured path for the `StaticFileHandler`, + and ``path`` is the result of `get_absolute_path` + + This is an instance method called during request processing, + so it may raise `HTTPError` or use methods like + `RequestHandler.redirect` (return None after redirecting to + halt further processing). This is where 404 errors for missing files + are generated. + + This method may modify the path before returning it, but note that + any such modifications will not be understood by `make_static_url`. + + In instance methods, this method's result is available as + ``self.absolute_path``. + + .. versionadded:: 3.1 + """ + root = os.path.abspath(root) + # os.path.abspath strips a trailing / + # it needs to be temporarily added back for requests to root/ + if not (absolute_path + os.path.sep).startswith(root): + raise HTTPError(403, "%s is not in root static directory", + self.path) + if (os.path.isdir(absolute_path) and + self.default_filename is not None): + # need to look at the request.path here for when path is empty + # but there is some prefix to the path that was already + # trimmed by the routing + if not self.request.path.endswith("/"): + self.redirect(self.request.path + "/", permanent=True) return + absolute_path = os.path.join(absolute_path, self.default_filename) + if not os.path.exists(absolute_path): + raise HTTPError(404) + if not os.path.isfile(absolute_path): + raise HTTPError(403, "%s is not a file", self.path) + return absolute_path + @classmethod + def get_content(cls, abspath, start=None, end=None): + """Retrieve the content of the requested resource which is located + at the given absolute path. + + This class method may be overridden by subclasses. Note that its + signature is different from other overridable class methods + (no ``settings`` argument); this is deliberate to ensure that + ``abspath`` is able to stand on its own as a cache key. + + This method should either return a byte string or an iterator + of byte strings. The latter is preferred for large files + as it helps reduce memory fragmentation. + + .. versionadded:: 3.1 + """ with open(abspath, "rb") as file: - data = file.read() - if include_body: - self.write(data) + if start is not None: + file.seek(start) + if end is not None: + remaining = end - (start or 0) else: - assert self.request.method == "HEAD" - self.set_header("Content-Length", len(data)) + remaining = None + while True: + chunk_size = 64 * 1024 + if remaining is not None and remaining < chunk_size: + chunk_size = remaining + chunk = file.read(chunk_size) + if chunk: + if remaining is not None: + remaining -= len(chunk) + yield chunk + else: + if remaining is not None: + assert remaining == 0 + return + + @classmethod + def get_content_version(cls, abspath): + """Returns a version string for the resource at the given path. + + This class method may be overridden by subclasses. The + default implementation is a hash of the file's contents. + + .. versionadded:: 3.1 + """ + data = cls.get_content(abspath) + hasher = hashlib.md5() + if isinstance(data, bytes_type): + hasher.update(data) + else: + for chunk in data: + hasher.update(chunk) + return hasher.hexdigest() + + def _stat(self): + if not hasattr(self, '_stat_result'): + self._stat_result = os.stat(self.absolute_path) + return self._stat_result + + def get_content_size(self): + """Retrieve the total size of the resource at the given path. + + This method may be overridden by subclasses. It will only + be called if a partial result is requested from `get_content` + + .. versionadded:: 3.1 + """ + stat_result = self._stat() + return stat_result[stat.ST_SIZE] + + def get_modified_time(self): + """Returns the time that ``self.absolute_path`` was last modified. + + May be overridden in subclasses. Should return a `~datetime.datetime` + object or None. + + .. versionadded:: 3.1 + """ + stat_result = self._stat() + modified = datetime.datetime.utcfromtimestamp(stat_result[stat.ST_MTIME]) + return modified + + def get_content_type(self): + """Returns the ``Content-Type`` header to be used for this request. + + .. versionadded:: 3.1 + """ + mime_type, encoding = mimetypes.guess_type(self.absolute_path) + return mime_type def set_extra_headers(self, path): """For subclass to add extra headers to the response""" @@ -1701,62 +2074,80 @@ class StaticFileHandler(RequestHandler): return self.CACHE_MAX_AGE if "v" in self.request.arguments else 0 @classmethod - def make_static_url(cls, settings, path): + def make_static_url(cls, settings, path, include_version=True): """Constructs a versioned url for the given path. - This method may be overridden in subclasses (but note that it is - a class method rather than an instance method). + This method may be overridden in subclasses (but note that it + is a class method rather than an instance method). Subclasses + are only required to implement the signature + ``make_static_url(cls, settings, path)``; other keyword + arguments may be passed through `~RequestHandler.static_url` + but are not standard. ``settings`` is the `Application.settings` dictionary. ``path`` is the static path being requested. The url returned should be relative to the current host. + + ``include_version`` determines whether the generated URL should + include the query string containing the version hash of the + file corresponding to the given ``path``. + """ - static_url_prefix = settings.get('static_url_prefix', '/static/') + url = settings.get('static_url_prefix', '/static/') + path + if not include_version: + return url + version_hash = cls.get_version(settings, path) - if version_hash: - return static_url_prefix + path + "?v=" + version_hash - return static_url_prefix + path + if not version_hash: + return url + + return '%s?v=%s' % (url, version_hash) + + def parse_url_path(self, url_path): + """Converts a static URL path into a filesystem path. + + ``url_path`` is the path component of the URL with + ``static_url_prefix`` removed. The return value should be + filesystem path relative to ``static_path``. + + This is the inverse of `make_static_url`. + """ + if os.path.sep != "/": + url_path = url_path.replace("/", os.path.sep) + return url_path @classmethod def get_version(cls, settings, path): """Generate the version string to be used in static URLs. - This method may be overridden in subclasses (but note that it - is a class method rather than a static method). The default - implementation uses a hash of the file's contents. - ``settings`` is the `Application.settings` dictionary and ``path`` is the relative location of the requested asset on the filesystem. The returned value should be a string, or ``None`` if no version could be determined. + + .. versionchanged:: 3.1 + This method was previously recommended for subclasses to override; + `get_content_version` is now preferred as it allows the base + class to handle caching of the result. """ - abs_path = os.path.join(settings["static_path"], path) + abs_path = cls.get_absolute_path(settings['static_path'], path) + return cls._get_cached_version(abs_path) + + @classmethod + def _get_cached_version(cls, abs_path): with cls._lock: hashes = cls._static_hashes if abs_path not in hashes: try: - f = open(abs_path, "rb") - hashes[abs_path] = hashlib.md5(f.read()).hexdigest() - f.close() + hashes[abs_path] = cls.get_content_version(abs_path) except Exception: - gen_log.error("Could not open static file %r", path) + gen_log.error("Could not open static file %r", abs_path) hashes[abs_path] = None hsh = hashes.get(abs_path) if hsh: - return hsh[:5] + return hsh return None - def parse_url_path(self, url_path): - """Converts a static URL path into a filesystem path. - - ``url_path`` is the path component of the URL with - ``static_url_prefix`` removed. The return value should be - filesystem path relative to ``static_path``. - """ - if os.path.sep != "/": - url_path = url_path.replace("/", os.path.sep) - return url_path - class FallbackHandler(RequestHandler): """A `RequestHandler` that wraps another HTTP server callback. @@ -2028,6 +2419,22 @@ class TemplateModule(UIModule): return "".join(self._get_resources("html_body")) +class _UIModuleNamespace(object): + """Lazy namespace which creates UIModule proxies bound to a handler.""" + def __init__(self, handler, ui_modules): + self.handler = handler + self.ui_modules = ui_modules + + def __getitem__(self, key): + return self.handler._ui_module(key, self.ui_modules[key]) + + def __getattr__(self, key): + try: + return self[key] + except KeyError as e: + raise AttributeError(str(e)) + + class URLSpec(object): """Specifies mappings between URLs and handlers.""" def __init__(self, pattern, handler_class, kwargs=None, name=None): @@ -2100,7 +2507,7 @@ class URLSpec(object): for a in args: if not isinstance(a, (unicode_type, bytes_type)): a = str(a) - converted_args.append(escape.url_escape(utf8(a))) + converted_args.append(escape.url_escape(utf8(a), plus=False)) return self._path % tuple(converted_args) url = URLSpec diff --git a/libs/tornado/websocket.py b/libs/tornado/websocket.py index 7bc6513..1eef401 100755 --- a/libs/tornado/websocket.py +++ b/libs/tornado/websocket.py @@ -35,6 +35,7 @@ from tornado.concurrent import Future from tornado.escape import utf8, native_str from tornado import httpclient from tornado.ioloop import IOLoop +from tornado.iostream import StreamClosedError from tornado.log import gen_log, app_log from tornado.netutil import Resolver from tornado import simple_httpclient @@ -227,6 +228,22 @@ class WebSocketHandler(tornado.web.RequestHandler): """ return False + def set_nodelay(self, value): + """Set the no-delay flag for this stream. + + By default, small messages may be delayed and/or combined to minimize + the number of packets sent. This can sometimes cause 200-500ms delays + due to the interaction between Nagle's algorithm and TCP delayed + ACKs. To reduce this delay (at the expense of possibly increasing + bandwidth usage), call ``self.set_nodelay(True)`` once the websocket + connection is established. + + See `.BaseIOStream.set_nodelay` for additional details. + + .. versionadded:: 3.1 + """ + self.stream.set_nodelay(value) + def get_websocket_scheme(self): """Return the url scheme used for this request, either "ws" or "wss". @@ -572,7 +589,10 @@ class WebSocketProtocol13(WebSocketProtocol): opcode = 0x1 message = tornado.escape.utf8(message) assert isinstance(message, bytes_type) - self._write_frame(True, opcode, message) + try: + self._write_frame(True, opcode, message) + except StreamClosedError: + self._abort() def write_ping(self, data): """Send ping frame.""" @@ -580,7 +600,10 @@ class WebSocketProtocol13(WebSocketProtocol): self._write_frame(True, 0x9, data) def _receive_frame(self): - self.stream.read_bytes(2, self._on_frame_start) + try: + self.stream.read_bytes(2, self._on_frame_start) + except StreamClosedError: + self._abort() def _on_frame_start(self, data): header, payloadlen = struct.unpack("BB", data) @@ -598,34 +621,46 @@ class WebSocketProtocol13(WebSocketProtocol): # control frames must have payload < 126 self._abort() return - if payloadlen < 126: - self._frame_length = payloadlen + try: + if payloadlen < 126: + self._frame_length = payloadlen + if self._masked_frame: + self.stream.read_bytes(4, self._on_masking_key) + else: + self.stream.read_bytes(self._frame_length, self._on_frame_data) + elif payloadlen == 126: + self.stream.read_bytes(2, self._on_frame_length_16) + elif payloadlen == 127: + self.stream.read_bytes(8, self._on_frame_length_64) + except StreamClosedError: + self._abort() + + def _on_frame_length_16(self, data): + self._frame_length = struct.unpack("!H", data)[0] + try: if self._masked_frame: self.stream.read_bytes(4, self._on_masking_key) else: self.stream.read_bytes(self._frame_length, self._on_frame_data) - elif payloadlen == 126: - self.stream.read_bytes(2, self._on_frame_length_16) - elif payloadlen == 127: - self.stream.read_bytes(8, self._on_frame_length_64) - - def _on_frame_length_16(self, data): - self._frame_length = struct.unpack("!H", data)[0] - if self._masked_frame: - self.stream.read_bytes(4, self._on_masking_key) - else: - self.stream.read_bytes(self._frame_length, self._on_frame_data) + except StreamClosedError: + self._abort() def _on_frame_length_64(self, data): self._frame_length = struct.unpack("!Q", data)[0] - if self._masked_frame: - self.stream.read_bytes(4, self._on_masking_key) - else: - self.stream.read_bytes(self._frame_length, self._on_frame_data) + try: + if self._masked_frame: + self.stream.read_bytes(4, self._on_masking_key) + else: + self.stream.read_bytes(self._frame_length, self._on_frame_data) + except StreamClosedError: + self._abort() def _on_masking_key(self, data): self._frame_mask = data - self.stream.read_bytes(self._frame_length, self._on_masked_frame_data) + try: + self.stream.read_bytes(self._frame_length, self._on_masked_frame_data) + except StreamClosedError: + self._abort() def _apply_mask(self, mask, data): mask = array.array("B", mask) @@ -744,12 +779,14 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): 'Sec-WebSocket-Version': '13', }) + self.resolver = Resolver(io_loop=io_loop) super(WebSocketClientConnection, self).__init__( io_loop, None, request, lambda: None, self._on_http_response, - 104857600, Resolver(io_loop=io_loop)) + 104857600, self.resolver) def _on_close(self): self.on_message(None) + self.resolver.close() def _on_http_response(self, response): if not self.connect_future.done(): @@ -757,7 +794,7 @@ class WebSocketClientConnection(simple_httpclient._HTTPConnection): self.connect_future.set_exception(response.error) else: self.connect_future.set_exception(WebSocketError( - "Non-websocket response")) + "Non-websocket response")) def _handle_1xx(self, code): assert code == 101 diff --git a/libs/tornado/wsgi.py b/libs/tornado/wsgi.py index 62cff59..6b5bfb8 100755 --- a/libs/tornado/wsgi.py +++ b/libs/tornado/wsgi.py @@ -284,7 +284,8 @@ class WSGIContainer(object): environ = { "REQUEST_METHOD": request.method, "SCRIPT_NAME": "", - "PATH_INFO": to_wsgi_str(escape.url_unescape(request.path, encoding=None)), + "PATH_INFO": to_wsgi_str(escape.url_unescape( + request.path, encoding=None, plus=False)), "QUERY_STRING": request.query, "REMOTE_ADDR": request.remote_ip, "SERVER_NAME": host,