diff options
Diffstat (limited to 'google-appengine/google/appengine/ext/bulkload/bulkload_deprecated.py')
-rwxr-xr-x | google-appengine/google/appengine/ext/bulkload/bulkload_deprecated.py | 359 |
1 files changed, 359 insertions, 0 deletions
diff --git a/google-appengine/google/appengine/ext/bulkload/bulkload_deprecated.py b/google-appengine/google/appengine/ext/bulkload/bulkload_deprecated.py new file mode 100755 index 0000000..5eeacae --- /dev/null +++ b/google-appengine/google/appengine/ext/bulkload/bulkload_deprecated.py @@ -0,0 +1,359 @@ +#!/usr/bin/env python +# +# Copyright 2007 Google Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +"""DEPRECATED mix-in handler for bulk loading data into an application. + +Please use the new bulkloader. +""" + + + + + +import Cookie +import StringIO +import csv +import httplib +import os +import traceback + +import google +import wsgiref.handlers + +from google.appengine.api import datastore +from google.appengine.ext import webapp +from google.appengine.ext.bulkload import constants + + +def Validate(value, type): + """ Checks that value is non-empty and of the right type. + + Raises ValueError if value is None or empty, TypeError if it's not the given + type. + + Args: + value: any value + type: a type or tuple of types + """ + if not value: + raise ValueError('Value should not be empty; received %s.' % value) + elif not isinstance(value, type): + raise TypeError('Expected a %s, but received %s (a %s).' % + (type, value, value.__class__)) + + +class Loader(object): + """A base class for creating datastore entities from input data. + + To add a handler for bulk loading a new entity kind into your datastore, + write a subclass of this class that calls Loader.__init__ from your + class's __init__. + + If you need to run extra code to convert entities from the input + data, create new properties, or otherwise modify the entities before + they're inserted, override HandleEntity. + + See the CreateEntity method for the creation of entities from the + (parsed) input data. + """ + + __loaders = {} + __kind = None + __properties = None + + def __init__(self, kind, properties): + """ Constructor. + + Populates this Loader's kind and properties map. Also registers it with + the bulk loader, so that all you need to do is instantiate your Loader, + and the bulkload handler will automatically use it. + + Args: + kind: a string containing the entity kind that this loader handles + + properties: list of (name, converter) tuples. + + This is used to automatically convert the CSV columns into properties. + The converter should be a function that takes one argument, a string + value from the CSV file, and returns a correctly typed property value + that should be inserted. The tuples in this list should match the + columns in your CSV file, in order. + + For example: + [('name', str), + ('id_number', int), + ('email', datastore_types.Email), + ('user', users.User), + ('birthdate', lambda x: datetime.datetime.fromtimestamp(float(x))), + ('description', datastore_types.Text), + ] + """ + Validate(kind, basestring) + self.__kind = kind + + Validate(properties, list) + for name, fn in properties: + Validate(name, basestring) + assert callable(fn), ( + 'Conversion function %s for property %s is not callable.' % (fn, name)) + + self.__properties = properties + + Loader.__loaders[kind] = self + + + def kind(self): + """ Return the entity kind that this Loader handes. + """ + return self.__kind + + def CreateEntity(self, values, key_name=None): + """ Creates an entity from a list of property values. + + Args: + values: list/tuple of str + key_name: if provided, the name for the (single) resulting Entity + + Returns: + list of datastore.Entity + + The returned entities are populated with the property values from the + argument, converted to native types using the properties map given in + the constructor, and passed through HandleEntity. They're ready to be + inserted. + + Raises: + AssertionError if the number of values doesn't match the number + of properties in the properties map. + """ + Validate(values, (list, tuple)) + assert len(values) == len(self.__properties), ( + 'Expected %d CSV columns, found %d.' % + (len(self.__properties), len(values))) + + entity = datastore.Entity(self.__kind, name=key_name) + for (name, converter), val in zip(self.__properties, values): + if converter is bool and val.lower() in ('0', 'false', 'no'): + val = False + entity[name] = converter(val) + + entities = self.HandleEntity(entity) + + if entities is not None: + if not isinstance(entities, (list, tuple)): + entities = [entities] + + for entity in entities: + if not isinstance(entity, datastore.Entity): + raise TypeError('Expected a datastore.Entity, received %s (a %s).' % + (entity, entity.__class__)) + + return entities + + + def HandleEntity(self, entity): + """ Subclasses can override this to add custom entity conversion code. + + This is called for each entity, after its properties are populated from + CSV but before it is stored. Subclasses can override this to add custom + entity handling code. + + The entity to be inserted should be returned. If multiple entities should + be inserted, return a list of entities. If no entities should be inserted, + return None or []. + + Args: + entity: datastore.Entity + + Returns: + datastore.Entity or list of datastore.Entity + """ + return entity + + + @staticmethod + def RegisteredLoaders(): + """ Returns a list of the Loader instances that have been created. + """ + return dict(Loader.__loaders) + + +class BulkLoad(webapp.RequestHandler): + """A handler for bulk load requests. + + This class contains handlers for the bulkloading process. One for + GET to provide cookie information for the upload script, and one + handler for a POST request to upload the entities. + + In the POST request, the body contains the data representing the + entities' property values. The original format was a sequences of + lines of comma-separated values (and is handled by the Load + method). The current (version 1) format is a binary format described + in the Tools and Libraries section of the documentation, and is + handled by the LoadV1 method). + """ + + def get(self): + """ Handle a GET. Just show an info page. + """ + page = self.InfoPage(self.request.uri) + self.response.out.write(page) + + + def post(self): + """ Handle a POST. Reads CSV data, converts to entities, and stores them. + """ + self.response.headers['Content-Type'] = 'text/plain' + response, output = self.Load(self.request.get(constants.KIND_PARAM), + self.request.get(constants.CSV_PARAM)) + self.response.set_status(response) + self.response.out.write(output) + + + def InfoPage(self, uri): + """ Renders an information page with the POST endpoint and cookie flag. + + Args: + uri: a string containing the request URI + Returns: + A string with the contents of the info page to be displayed + """ + page = """ +<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html><head> +<title>Bulk Loader</title> +</head><body>""" + + page += ('The bulk load endpoint is: <a href="%s">%s</a><br />\n' % + (uri, uri)) + + cookies = os.environ.get('HTTP_COOKIE', None) + if cookies: + cookie = Cookie.BaseCookie(cookies) + for param in ['ACSID', 'dev_appserver_login']: + value = cookie.get(param) + if value: + page += ("Pass this flag to the client: --cookie='%s=%s'\n" % + (param, value.value)) + break + + else: + page += 'No cookie found!\n' + + page += '</body></html>' + return page + + def IterRows(self, reader): + """ Yields a tuple of a line number and row for each row of the CSV data. + + Args: + reader: a csv reader for the input data. + """ + line_num = 1 + for columns in reader: + yield (line_num, columns) + line_num += 1 + + def LoadEntities(self, iter, loader, key_format=None): + """Generates entities and loads them into the datastore. Returns + a tuple of HTTP code and string reply. + + Args: + iter: an iterator yielding pairs of a line number and row contents. + key_format: a format string to convert a line number into an + entity id. If None, then entity ID's are automatically generated. + """ + entities = [] + output = [] + for line_num, columns in iter: + key_name = None + if key_format is not None: + key_name = key_format % line_num + if columns: + try: + output.append('\nLoading from line %d...' % line_num) + new_entities = loader.CreateEntity(columns, key_name=key_name) + if new_entities: + entities.extend(new_entities) + output.append('done.') + except: + stacktrace = traceback.format_exc() + output.append('error:\n%s' % stacktrace) + return (httplib.BAD_REQUEST, ''.join(output)) + + datastore.Put(entities) + + return (httplib.OK, ''.join(output)) + + def Load(self, kind, data): + """Parses CSV data, uses a Loader to convert to entities, and stores them. + + On error, fails fast. Returns a "bad request" HTTP response code and + includes the traceback in the output. + + Args: + kind: a string containing the entity kind that this loader handles + data: a string containing the CSV data to load + + Returns: + tuple (response code, output) where: + response code: integer HTTP response code to return + output: string containing the HTTP response body + """ + data = data.encode('utf-8') + Validate(kind, basestring) + Validate(data, basestring) + output = [] + + try: + loader = Loader.RegisteredLoaders()[kind] + except KeyError: + output.append('Error: no Loader defined for kind %s.' % kind) + return (httplib.BAD_REQUEST, ''.join(output)) + + buffer = StringIO.StringIO(data) + reader = csv.reader(buffer, skipinitialspace=True) + + try: + csv.field_size_limit(800000) + except AttributeError: + pass + + return self.LoadEntities(self.IterRows(reader), loader) + + +def main(*loaders): + """Starts bulk upload. + + Raises TypeError if not, at least one Loader instance is given. + + Args: + loaders: One or more Loader instance. + """ + if not loaders: + raise TypeError('Expected at least one argument.') + + for loader in loaders: + if not isinstance(loader, Loader): + raise TypeError('Expected a Loader instance; received %r' % loader) + + application = webapp.WSGIApplication([('.*', BulkLoad)]) + wsgiref.handlers.CGIHandler().run(application) + +if __name__ == '__main__': + main() |