diff options
Diffstat (limited to 'google_appengine/google/appengine/api/urlfetch.py')
-rwxr-xr-x | google_appengine/google/appengine/api/urlfetch.py | 361 |
1 files changed, 361 insertions, 0 deletions
diff --git a/google_appengine/google/appengine/api/urlfetch.py b/google_appengine/google/appengine/api/urlfetch.py new file mode 100755 index 0000000..8d9e836 --- /dev/null +++ b/google_appengine/google/appengine/api/urlfetch.py @@ -0,0 +1,361 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""URL downloading API. + +Methods defined in this module: + Fetch(): fetchs a given URL using an HTTP GET or POST +""" + + + + + +import os +import UserDict +import urllib2 +import urlparse + +from google.appengine.api import apiproxy_stub_map +from google.appengine.api import urlfetch_service_pb +from google.appengine.api.urlfetch_errors import * +from google.appengine.runtime import apiproxy_errors + +MAX_REDIRECTS = 5 + +GET = 1 +POST = 2 +HEAD = 3 +PUT = 4 +DELETE = 5 + + +_URL_STRING_MAP = { + 'GET': GET, + 'POST': POST, + 'HEAD': HEAD, + 'PUT': PUT, + 'DELETE': DELETE, +} + + +_VALID_METHODS = frozenset(_URL_STRING_MAP.values()) + + +class _CaselessDict(UserDict.IterableUserDict): + """Case insensitive dictionary. + + This class was lifted from os.py and slightly modified. + """ + + def __init__(self): + UserDict.IterableUserDict.__init__(self) + self.caseless_keys = {} + + def __setitem__(self, key, item): + """Set dictionary item. + + Args: + key: Key of new item. Key is case insensitive, so "d['Key'] = value " + will replace previous values set by "d['key'] = old_value". + item: Item to store. + """ + caseless_key = key.lower() + if caseless_key in self.caseless_keys: + del self.data[self.caseless_keys[caseless_key]] + self.caseless_keys[caseless_key] = key + self.data[key] = item + + def __getitem__(self, key): + """Get dictionary item. + + Args: + key: Key of item to get. Key is case insensitive, so "d['Key']" is the + same as "d['key']". + + Returns: + Item associated with key. + """ + return self.data[self.caseless_keys[key.lower()]] + + def __delitem__(self, key): + """Remove item from dictionary. + + Args: + key: Key of item to remove. Key is case insensitive, so "del d['Key']" is + the same as "del d['key']" + """ + caseless_key = key.lower() + del self.data[self.caseless_keys[caseless_key]] + del self.caseless_keys[caseless_key] + + def has_key(self, key): + """Determine if dictionary has item with specific key. + + Args: + key: Key to check for presence. Key is case insensitive, so + "d.has_key('Key')" evaluates to the same value as "d.has_key('key')". + + Returns: + True if dictionary contains key, else False. + """ + return key.lower() in self.caseless_keys + + def __contains__(self, key): + """Same as 'has_key', but used for 'in' operator.'""" + return self.has_key(key) + + def get(self, key, failobj=None): + """Get dictionary item, defaulting to another value if it does not exist. + + Args: + key: Key of item to get. Key is case insensitive, so "d['Key']" is the + same as "d['key']". + failobj: Value to return if key not in dictionary. + """ + try: + cased_key = self.caseless_keys[key.lower()] + except KeyError: + return failobj + return self.data[cased_key] + + def update(self, dict=None, **kwargs): + """Update dictionary using values from another dictionary and keywords. + + Args: + dict: Dictionary to update from. + kwargs: Keyword arguments to update from. + """ + if dict: + try: + keys = dict.keys() + except AttributeError: + for k, v in dict: + self[k] = v + else: + for k in keys: + self[k] = dict[k] + if kwargs: + self.update(kwargs) + + def copy(self): + """Make a shallow, case sensitive copy of self.""" + return dict(self) + + +def _is_fetching_self(url, method): + """Checks if the fetch is for the same URL from which it originated. + + Args: + url: str, The URL being fetched. + method: value from _VALID_METHODS. + + Returns: + boolean indicating whether or not it seems that the app is trying to fetch + itself. + """ + if (method != GET or + "HTTP_HOST" not in os.environ or + "PATH_INFO" not in os.environ): + return False + + scheme, host_port, path, query, fragment = urlparse.urlsplit(url) + + if host_port == os.environ['HTTP_HOST']: + current_path = urllib2.unquote(os.environ['PATH_INFO']) + desired_path = urllib2.unquote(path) + + if (current_path == desired_path or + (current_path in ('', '/') and desired_path in ('', '/'))): + return True + + return False + + +def create_rpc(deadline=None, callback=None): + """Creates an RPC object for use with the urlfetch API. + + Args: + deadline: Optional deadline in seconds for the operation; the default + is a system-specific deadline (typically 5 seconds). + callback: Optional callable to invoke on completion. + + Returns: + An apiproxy_stub_map.UserRPC object specialized for this service. + """ + return apiproxy_stub_map.UserRPC('urlfetch', deadline, callback) + + +def fetch(url, payload=None, method=GET, headers={}, + allow_truncated=False, follow_redirects=True, + deadline=None): + """Fetches the given HTTP URL, blocking until the result is returned. + + Other optional parameters are: + method: GET, POST, HEAD, PUT, or DELETE + payload: POST or PUT payload (implies method is not GET, HEAD, or DELETE). + this is ignored if the method is not POST or PUT. + headers: dictionary of HTTP headers to send with the request + allow_truncated: if true, truncate large responses and return them without + error. Otherwise, ResponseTooLargeError is raised when a response is + truncated. + follow_redirects: if true (the default), redirects are + transparently followed and the response (if less than 5 + redirects) contains the final destination's payload and the + response status is 200. You lose, however, the redirect chain + information. If false, you see the HTTP response yourself, + including the 'Location' header, and redirects are not + followed. + deadline: deadline in seconds for the operation. + + We use a HTTP/1.1 compliant proxy to fetch the result. + + The returned data structure has the following fields: + content: string containing the response from the server + status_code: HTTP status code returned by the server + headers: dictionary of headers returned by the server + + If the URL is an empty string or obviously invalid, we throw an + urlfetch.InvalidURLError. If the server cannot be contacted, we throw a + urlfetch.DownloadError. Note that HTTP errors are returned as a part + of the returned structure, so HTTP errors like 404 do not result in an + exception. + """ + rpc = create_rpc(deadline=deadline) + make_fetch_call(rpc, url, payload, method, headers, + allow_truncated, follow_redirects) + return rpc.get_result() + + +def make_fetch_call(rpc, url, payload=None, method=GET, headers={}, + allow_truncated=False, follow_redirects=True): + """Executes the RPC call to fetch a given HTTP URL. + + The first argument is a UserRPC instance. See urlfetch.fetch for a + thorough description of remaining arguments. + """ + assert rpc.service == 'urlfetch', repr(rpc.service) + if isinstance(method, basestring): + method = method.upper() + method = _URL_STRING_MAP.get(method, method) + if method not in _VALID_METHODS: + raise InvalidMethodError('Invalid method %s.' % str(method)) + + if _is_fetching_self(url, method): + raise InvalidURLError("App cannot fetch the same URL as the one used for " + "the request.") + + request = urlfetch_service_pb.URLFetchRequest() + response = urlfetch_service_pb.URLFetchResponse() + request.set_url(url) + + if method == GET: + request.set_method(urlfetch_service_pb.URLFetchRequest.GET) + elif method == POST: + request.set_method(urlfetch_service_pb.URLFetchRequest.POST) + elif method == HEAD: + request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD) + elif method == PUT: + request.set_method(urlfetch_service_pb.URLFetchRequest.PUT) + elif method == DELETE: + request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE) + + if payload and (method == POST or method == PUT): + request.set_payload(payload) + + for key, value in headers.iteritems(): + header_proto = request.add_header() + header_proto.set_key(key) + header_proto.set_value(str(value)) + + request.set_followredirects(follow_redirects) + + if rpc.deadline is not None: + request.set_deadline(rpc.deadline) + + rpc.make_call('Fetch', request, response, _get_fetch_result, allow_truncated) + + +def _get_fetch_result(rpc): + """Check success, handle exceptions, and return converted RPC result. + + This method waits for the RPC if it has not yet finished, and calls the + post-call hooks on the first invocation. + + Args: + rpc: A UserRPC object. + + Raises: + InvalidURLError if the url was invalid. + DownloadError if there was a problem fetching the url. + ResponseTooLargeError if the response was either truncated (and + allow_truncated=False was passed to make_fetch_call()), or if it + was too big for us to download. + + Returns: + A _URLFetchResult object. + """ + assert rpc.service == 'urlfetch', repr(rpc.service) + assert rpc.method == 'Fetch', repr(rpc.method) + try: + rpc.check_success() + except apiproxy_errors.ApplicationError, err: + if (err.application_error == + urlfetch_service_pb.URLFetchServiceError.INVALID_URL): + raise InvalidURLError(str(err)) + if (err.application_error == + urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR): + raise DownloadError(str(err)) + if (err.application_error == + urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR): + raise DownloadError(str(err)) + if (err.application_error == + urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE): + raise ResponseTooLargeError(None) + if (err.application_error == + urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED): + raise DownloadError(str(err)) + raise err + + response = rpc.response + allow_truncated = rpc.user_data + result = _URLFetchResult(response) + if response.contentwastruncated() and not allow_truncated: + raise ResponseTooLargeError(result) + return result + + +Fetch = fetch + + +class _URLFetchResult(object): + """A Pythonic representation of our fetch response protocol buffer. + """ + + def __init__(self, response_proto): + """Constructor. + + Args: + response_proto: the URLFetchResponse proto buffer to wrap. + """ + self.__pb = response_proto + self.content = response_proto.content() + self.status_code = response_proto.statuscode() + self.content_was_truncated = response_proto.contentwastruncated() + self.headers = _CaselessDict() + for header_proto in response_proto.header_list(): + self.headers[header_proto.key()] = header_proto.value() |