1 files changed, 361 insertions, 0 deletions
diff --git a/google_appengine/google/appengine/api/urlfetch.py b/google_appengine/google/appengine/api/urlfetch.py
new file mode 100755
index 0000000..8d9e836
--- /dev/null
+++ b/google_appengine/google/appengine/api/urlfetch.py
@@ -0,0 +1,361 @@
+#!/usr/bin/env python
+#
+# Copyright 2007 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""URL downloading API.
+
+Methods defined in this module:
+   Fetch(): fetchs a given URL using an HTTP GET or POST
+"""
+
+
+
+
+
+import os
+import UserDict
+import urllib2
+import urlparse
+
+from google.appengine.api import apiproxy_stub_map
+from google.appengine.api import urlfetch_service_pb
+from google.appengine.api.urlfetch_errors import *
+from google.appengine.runtime import apiproxy_errors
+
+MAX_REDIRECTS = 5
+
+GET = 1
+POST = 2
+HEAD = 3
+PUT = 4
+DELETE = 5
+
+
+_URL_STRING_MAP = {
+    'GET': GET,
+    'POST': POST,
+    'HEAD': HEAD,
+    'PUT': PUT,
+    'DELETE': DELETE,
+}
+
+
+_VALID_METHODS = frozenset(_URL_STRING_MAP.values())
+
+
+class _CaselessDict(UserDict.IterableUserDict):
+  """Case insensitive dictionary.
+
+  This class was lifted from os.py and slightly modified.
+  """
+
+  def __init__(self):
+    UserDict.IterableUserDict.__init__(self)
+    self.caseless_keys = {}
+
+  def __setitem__(self, key, item):
+    """Set dictionary item.
+
+    Args:
+      key: Key of new item.  Key is case insensitive, so "d['Key'] = value "
+        will replace previous values set by "d['key'] = old_value".
+      item: Item to store.
+    """
+    caseless_key = key.lower()
+    if caseless_key in self.caseless_keys:
+      del self.data[self.caseless_keys[caseless_key]]
+    self.caseless_keys[caseless_key] = key
+    self.data[key] = item
+
+  def __getitem__(self, key):
+    """Get dictionary item.
+
+    Args:
+      key: Key of item to get.  Key is case insensitive, so "d['Key']" is the
+        same as "d['key']".
+
+    Returns:
+      Item associated with key.
+    """
+    return self.data[self.caseless_keys[key.lower()]]
+
+  def __delitem__(self, key):
+    """Remove item from dictionary.
+
+    Args:
+      key: Key of item to remove.  Key is case insensitive, so "del d['Key']" is
+        the same as "del d['key']"
+    """
+    caseless_key = key.lower()
+    del self.data[self.caseless_keys[caseless_key]]
+    del self.caseless_keys[caseless_key]
+
+  def has_key(self, key):
+    """Determine if dictionary has item with specific key.
+
+    Args:
+      key: Key to check for presence.  Key is case insensitive, so
+        "d.has_key('Key')" evaluates to the same value as "d.has_key('key')".
+
+    Returns:
+      True if dictionary contains key, else False.
+    """
+    return key.lower() in self.caseless_keys
+
+  def __contains__(self, key):
+    """Same as 'has_key', but used for 'in' operator.'"""
+    return self.has_key(key)
+
+  def get(self, key, failobj=None):
+    """Get dictionary item, defaulting to another value if it does not exist.
+
+    Args:
+      key: Key of item to get.  Key is case insensitive, so "d['Key']" is the
+        same as "d['key']".
+      failobj: Value to return if key not in dictionary.
+    """
+    try:
+      cased_key = self.caseless_keys[key.lower()]
+    except KeyError:
+      return failobj
+    return self.data[cased_key]
+
+  def update(self, dict=None, **kwargs):
+    """Update dictionary using values from another dictionary and keywords.
+
+    Args:
+      dict: Dictionary to update from.
+      kwargs: Keyword arguments to update from.
+    """
+    if dict:
+      try:
+        keys = dict.keys()
+      except AttributeError:
+        for k, v in dict:
+          self[k] = v
+      else:
+        for k in keys:
+          self[k] = dict[k]
+    if kwargs:
+      self.update(kwargs)
+
+  def copy(self):
+    """Make a shallow, case sensitive copy of self."""
+    return dict(self)
+
+
+def _is_fetching_self(url, method):
+  """Checks if the fetch is for the same URL from which it originated.
+
+  Args:
+    url: str, The URL being fetched.
+    method: value from _VALID_METHODS.
+
+  Returns:
+    boolean indicating whether or not it seems that the app is trying to fetch
+      itself.
+  """
+  if (method != GET or
+      "HTTP_HOST" not in os.environ or
+      "PATH_INFO" not in os.environ):
+    return False
+
+  scheme, host_port, path, query, fragment = urlparse.urlsplit(url)
+
+  if host_port == os.environ['HTTP_HOST']:
+    current_path = urllib2.unquote(os.environ['PATH_INFO'])
+    desired_path = urllib2.unquote(path)
+
+    if (current_path == desired_path or
+        (current_path in ('', '/') and desired_path in ('', '/'))):
+      return True
+
+  return False
+
+
+def create_rpc(deadline=None, callback=None):
+  """Creates an RPC object for use with the urlfetch API.
+
+  Args:
+    deadline: Optional deadline in seconds for the operation; the default
+      is a system-specific deadline (typically 5 seconds).
+    callback: Optional callable to invoke on completion.
+
+  Returns:
+    An apiproxy_stub_map.UserRPC object specialized for this service.
+  """
+  return apiproxy_stub_map.UserRPC('urlfetch', deadline, callback)
+
+
+def fetch(url, payload=None, method=GET, headers={},
+          allow_truncated=False, follow_redirects=True,
+          deadline=None):
+  """Fetches the given HTTP URL, blocking until the result is returned.
+
+  Other optional parameters are:
+     method: GET, POST, HEAD, PUT, or DELETE
+     payload: POST or PUT payload (implies method is not GET, HEAD, or DELETE).
+       this is ignored if the method is not POST or PUT.
+     headers: dictionary of HTTP headers to send with the request
+     allow_truncated: if true, truncate large responses and return them without
+       error. Otherwise, ResponseTooLargeError is raised when a response is
+       truncated.
+     follow_redirects: if true (the default), redirects are
+       transparently followed and the response (if less than 5
+       redirects) contains the final destination's payload and the
+       response status is 200.  You lose, however, the redirect chain
+       information.  If false, you see the HTTP response yourself,
+       including the 'Location' header, and redirects are not
+       followed.
+     deadline: deadline in seconds for the operation.
+
+  We use a HTTP/1.1 compliant proxy to fetch the result.
+
+  The returned data structure has the following fields:
+     content: string containing the response from the server
+     status_code: HTTP status code returned by the server
+     headers: dictionary of headers returned by the server
+
+  If the URL is an empty string or obviously invalid, we throw an
+  urlfetch.InvalidURLError. If the server cannot be contacted, we throw a
+  urlfetch.DownloadError.  Note that HTTP errors are returned as a part
+  of the returned structure, so HTTP errors like 404 do not result in an
+  exception.
+  """
+  rpc = create_rpc(deadline=deadline)
+  make_fetch_call(rpc, url, payload, method, headers,
+                  allow_truncated, follow_redirects)
+  return rpc.get_result()
+
+
+def make_fetch_call(rpc, url, payload=None, method=GET, headers={},
+                    allow_truncated=False, follow_redirects=True):
+  """Executes the RPC call to fetch a given HTTP URL.
+
+  The first argument is a UserRPC instance.  See urlfetch.fetch for a
+  thorough description of remaining arguments.
+  """
+  assert rpc.service == 'urlfetch', repr(rpc.service)
+  if isinstance(method, basestring):
+    method = method.upper()
+  method = _URL_STRING_MAP.get(method, method)
+  if method not in _VALID_METHODS:
+    raise InvalidMethodError('Invalid method %s.' % str(method))
+
+  if _is_fetching_self(url, method):
+    raise InvalidURLError("App cannot fetch the same URL as the one used for "
+                          "the request.")
+
+  request = urlfetch_service_pb.URLFetchRequest()
+  response = urlfetch_service_pb.URLFetchResponse()
+  request.set_url(url)
+
+  if method == GET:
+    request.set_method(urlfetch_service_pb.URLFetchRequest.GET)
+  elif method == POST:
+    request.set_method(urlfetch_service_pb.URLFetchRequest.POST)
+  elif method == HEAD:
+    request.set_method(urlfetch_service_pb.URLFetchRequest.HEAD)
+  elif method == PUT:
+    request.set_method(urlfetch_service_pb.URLFetchRequest.PUT)
+  elif method == DELETE:
+    request.set_method(urlfetch_service_pb.URLFetchRequest.DELETE)
+
+  if payload and (method == POST or method == PUT):
+    request.set_payload(payload)
+
+  for key, value in headers.iteritems():
+    header_proto = request.add_header()
+    header_proto.set_key(key)
+    header_proto.set_value(str(value))
+
+  request.set_followredirects(follow_redirects)
+
+  if rpc.deadline is not None:
+    request.set_deadline(rpc.deadline)
+
+  rpc.make_call('Fetch', request, response, _get_fetch_result, allow_truncated)
+
+
+def _get_fetch_result(rpc):
+  """Check success, handle exceptions, and return converted RPC result.
+
+  This method waits for the RPC if it has not yet finished, and calls the
+  post-call hooks on the first invocation.
+
+  Args:
+    rpc: A UserRPC object.
+
+  Raises:
+    InvalidURLError if the url was invalid.
+    DownloadError if there was a problem fetching the url.
+    ResponseTooLargeError if the response was either truncated (and
+      allow_truncated=False was passed to make_fetch_call()), or if it
+      was too big for us to download.
+
+  Returns:
+    A _URLFetchResult object.
+  """
+  assert rpc.service == 'urlfetch', repr(rpc.service)
+  assert rpc.method == 'Fetch', repr(rpc.method)
+  try:
+    rpc.check_success()
+  except apiproxy_errors.ApplicationError, err:
+    if (err.application_error ==
+        urlfetch_service_pb.URLFetchServiceError.INVALID_URL):
+      raise InvalidURLError(str(err))
+    if (err.application_error ==
+        urlfetch_service_pb.URLFetchServiceError.UNSPECIFIED_ERROR):
+      raise DownloadError(str(err))
+    if (err.application_error ==
+        urlfetch_service_pb.URLFetchServiceError.FETCH_ERROR):
+      raise DownloadError(str(err))
+    if (err.application_error ==
+        urlfetch_service_pb.URLFetchServiceError.RESPONSE_TOO_LARGE):
+      raise ResponseTooLargeError(None)
+    if (err.application_error ==
+        urlfetch_service_pb.URLFetchServiceError.DEADLINE_EXCEEDED):
+      raise DownloadError(str(err))
+    raise err
+
+  response = rpc.response
+  allow_truncated = rpc.user_data
+  result = _URLFetchResult(response)
+  if response.contentwastruncated() and not allow_truncated:
+    raise ResponseTooLargeError(result)
+  return result
+
+
+Fetch = fetch
+
+
+class _URLFetchResult(object):
+  """A Pythonic representation of our fetch response protocol buffer.
+  """
+
+  def __init__(self, response_proto):
+    """Constructor.
+
+    Args:
+      response_proto: the URLFetchResponse proto buffer to wrap.
+    """
+    self.__pb = response_proto
+    self.content = response_proto.content()
+    self.status_code = response_proto.statuscode()
+    self.content_was_truncated = response_proto.contentwastruncated()
+    self.headers = _CaselessDict()
+    for header_proto in response_proto.header_list():
+      self.headers[header_proto.key()] = header_proto.value()