diff options
Diffstat (limited to 'google_appengine/lib/webob/docs/comment-example.txt')
-rw-r--r-- | google_appengine/lib/webob/docs/comment-example.txt | 414 |
1 files changed, 414 insertions, 0 deletions
diff --git a/google_appengine/lib/webob/docs/comment-example.txt b/google_appengine/lib/webob/docs/comment-example.txt new file mode 100644 index 0000000..2ea47ca --- /dev/null +++ b/google_appengine/lib/webob/docs/comment-example.txt @@ -0,0 +1,414 @@ +Comment Example +=============== + +.. contents:: + +Introduction +------------ + +This is an example of how to write WSGI middleware with WebOb. The +specific example adds a simple comment form to HTML web pages; any +page served through the middleware that is HTML gets a comment form +added to it, and shows any existing comments. + +Code +---- + +The finished code for this is available in +`docs/comment-example-code/example.py +<http://svn.pythonpaste.org/Paste/WebOb/trunk/docs/comment-example-code/example.py>`_ +-- you can run that file as a script to try it out. + +Instantiating Middleware +------------------------ + +Middleware of any complexity at all is usually best created as a +class with its configuration as arguments to that class. + +Every middleware needs an application (``app``) that it wraps. This +middleware also needs a location to store the comments; we'll put them +all in a single directory. + +.. code-block:: + + import os + + class Commenter(object): + def __init__(self, app, storage_dir): + self.app = app + self.storage_dir = storage_dir + if not os.path.exists(storage_dir): + os.makedirs(storage_dir) + +When you use this middleware, you'll use it like: + +.. code-block:: + + app = ... make the application ... + app = Commenter(app, storage_dir='./comments') + +For our application we'll use a simple static file server that is +included with `Paste <http://pythonpaste.org>`_ (use ``easy_install +Paste`` to install this). The setup is all at the bottom of +``example.py``, and looks like this: + +.. code-block:: + + if __name__ == '__main__': + import optparse + parser = optparse.OptionParser( + usage='%prog --port=PORT BASE_DIRECTORY' + ) + parser.add_option( + '-p', '--port', + default='8080', + dest='port', + type='int', + help='Port to serve on (default 8080)') + parser.add_option( + '--comment-data', + default='./comments', + dest='comment_data', + help='Place to put comment data into (default ./comments/)') + options, args = parser.parse_args() + if not args: + parser.error('You must give a BASE_DIRECTORY') + base_dir = args[0] + from paste.urlparser import StaticURLParser + app = StaticURLParser(base_dir) + app = Commenter(app, options.comment_data) + from wsgiref.simple_server import make_server + httpd = make_server('localhost', options.port, app) + print 'Serving on http://localhost:%s' % options.port + try: + httpd.serve_forever() + except KeyboardInterrupt: + print '^C' + +I won't explain it here, but basically it takes some options, creates +an application that serves static files +(``StaticURLParser(base_dir)``), wraps it with ``Commenter(app, +options.comment_data)`` then serves that. + +The Middleware +-------------- + +While we've created the class structure for the middleware, it doesn't +actually do anything. Here's a kind of minimal version of the +middleware (using WebOb): + +.. code-block:: + + from webob import Request + + class Commenter(object): + + def __init__(self, app, storage_dir): + self.app = app + self.storage_dir = storage_dir + if not os.path.exists(storage_dir): + os.makedirs(storage_dir) + + def __call__(self, environ, start_response): + req = Request(environ) + resp = req.get_response(self.app) + return resp(environ, start_response) + +This doesn't modify the response it any way. You could write it like +this without WebOb: + +.. code-block:: + + class Commenter(object): + ... + def __call__(self, environ, start_response): + return self.app(environ, start_response) + +But it won't be as convenient later. First, lets create a little bit +of infrastructure for our middleware. We need to save and load +per-url data (the comments themselves). We'll keep them in pickles, +where each url has a pickle named after the url (but double-quoted, so +``http://localhost:8080/index.html`` becomes +``http%3A%2F%2Flocalhost%3A8080%2Findex.html``). + +.. code-block:: + + from cPickle import load, dump + + class Commenter(object): + ... + + def get_data(self, url): + filename = self.url_filename(url) + if not os.path.exists(filename): + return [] + else: + f = open(filename, 'rb') + data = load(f) + f.close() + return data + + def save_data(self, url, data): + filename = self.url_filename(url) + f = open(filename, 'wb') + dump(data, f) + f.close() + + def url_filename(self, url): + # Double-quoting makes the filename safe + return os.path.join(self.storage_dir, urllib.quote(url, '')) + +You can get the full request URL with ``req.url``, so to get the +comment data with these methods you do ``data = +self.get_data(req.url)``. + +Now we'll update the ``__call__`` method to filter *some* responses, +and get the comment data for those. We don't want to change responses +that were error responses (anything but ``200``), nor do we want to +filter responses that aren't HTML. So we get: + +.. code-block:: + + class Commenter(object): + ... + + def __call__(self, environ, start_response): + req = Request(environ) + resp = req.get_response(self.app) + if resp.content_type != 'text/html' or resp.status_int != 200: + return resp(environ, start_response) + data = self.get_data(req.url) + ... do stuff with data, update resp ... + return resp(environ, start_response) + +So far we're punting on actually adding the comments to the page. We +also haven't defined what ``data`` will hold. Let's say it's a list +of dictionaries, where each dictionary looks like ``{'name': 'John +Doe', 'homepage': 'http://blog.johndoe.com', 'comments': 'Great +site!'}``. + +We'll also need a simple method to add stuff to the page. We'll use a +regular expression to find the end of the page and put text in: + +.. code-block:: + + import re + + class Commenter(object): + ... + + _end_body_re = re.compile(r'</body.*?>', re.I|re.S) + + def add_to_end(self, html, extra_html): + """ + Adds extra_html to the end of the html page (before </body>) + """ + match = self._end_body_re.search(html) + if not match: + return html + extra_html + else: + return html[:match.start()] + extra_html + html[match.start():] + +And then we'll use it like: + +.. code-block:: + + data = self.get_data(req.url) + body = resp.body + body = self.add_to_end(body, self.format_comments(data)) + resp.body = body + return resp(environ, start_response) + +We get the body, update it, and put it back in the response. This +also updates ``Content-Length``. Then we define: + +.. code-block:: + + from webob import html_escape + + class Commenter(object): + ... + + def format_comments(self, comments): + if not comments: + return '' + text = [] + text.append('<hr>') + text.append('<h2><a name="comment-area"></a>Comments (%s):</h2>' % len(comments)) + for comment in comments: + text.append('<h3><a href="%s">%s</a> at %s:</h3>' % ( + html_escape(comment['homepage']), html_escape(comment['name']), + time.strftime('%c', comment['time']))) + # Susceptible to XSS attacks!: + text.append(comment['comments']) + return ''.join(text) + +We put in a header (with an anchor we'll use later), and a section for +each comment. Note that ``html_escape`` is the same as ``cgi.escape`` +and just turns ``&`` into ``&``, etc. + +Because we put in some text without quoting it is susceptible to a +`Cross-Site Scripting +<http://en.wikipedia.org/wiki/Cross-site_scripting>`_ attack. Fixing +that is beyond the scope of this tutorial; you could quote it or clean +it with something like `lxml.html.clean +<http://codespeak.net/lxml/lxmlhtml.html#cleaning-up-html>`_. + +Accepting Comments +------------------ + +All of those pieces *display* comments, but still no one can actually +make comments. To handle this we'll take a little piece of the URL +space for our own, everything under ``/.comments``, so when someone +POSTs there it will add a comment. + +When the request comes in there are two parts to the path: +``SCRIPT_NAME`` and ``PATH_INFO``. Everything in ``SCRIPT_NAME`` has +already been parsed, and everything in ``PATH_INFO`` has yet to be +parsed. That means that the URL *without* ``PATH_INFO`` is the path +to the middleware; we can intercept anything else below +``SCRIPT_NAME`` but nothing above it. The name for the URL without +``PATH_INFO`` is ``req.application_url``. We have to capture it early +to make sure it doesn't change (since the WSGI application we are +wrapping may update ``SCRIPT_NAME`` and ``PATH_INFO``). + +So here's what this all looks like: + +.. code-block:: + + class Commenter(object): + ... + + def __call__(self, environ, start_response): + req = Request(environ) + if req.path_info_peek() == '.comments': + return self.process_comment(req)(environ, start_response) + # This is the base path of *this* middleware: + base_url = req.application_url + resp = req.get_response(self.app) + if resp.content_type != 'text/html' or resp.status_int != 200: + # Not an HTML response, we don't want to + # do anything to it + return resp(environ, start_response) + # Make sure the content isn't gzipped: + resp.decode_content() + comments = self.get_data(req.url) + body = resp.body + body = self.add_to_end(body, self.format_comments(comments)) + body = self.add_to_end(body, self.submit_form(base_url, req)) + resp.body = body + return resp(environ, start_response) + +``base_url`` is the path where the middleware is located (if you run +the example server, it will be ``http://localhost:PORT/``). We use +``req.path_info_peek()`` to look at the next segment of the URL -- +what comes after base_url. If it is ``.comments`` then we handle it +internally and don't pass the request on. + +We also put in a little guard, ``resp.decode_content()`` in case the +application returns a gzipped response. + +Then we get the data, add the comments, add the *form* to make new +comments, and return the result. + +submit_form +~~~~~~~~~~~ + +Here's what the form looks like: + +.. code-block:: + + class Commenter(object): + ... + + def submit_form(self, base_path, req): + return '''<h2>Leave a comment:</h2> + <form action="%s/.comments" method="POST"> + <input type="hidden" name="url" value="%s"> + <table width="100%%"> + <tr><td>Name:</td> + <td><input type="text" name="name" style="width: 100%%"></td></tr> + <tr><td>URL:</td> + <td><input type="text" name="homepage" style="width: 100%%"></td></tr> + </table> + Comments:<br> + <textarea name="comments" rows=10 style="width: 100%%"></textarea><br> + <input type="submit" value="Submit comment"> + </form> + ''' % (base_path, html_escape(req.url)) + +Nothing too exciting. It submits a form with the keys ``url`` (the +URL being commented on), ``name``, ``homepage``, and ``comments``. + +process_comment +~~~~~~~~~~~~~~~ + +If you look at the method call, what we do is call the method then +treat the result as a WSGI application: + +.. code-block:: + + return self.process_comment(req)(environ, start_response) + +You could write this as: + +.. code-block:: + + response = self.process_comment(req) + return response(environ, start_response) + +A common pattern in WSGI middleware that *doesn't* use WebOb is to +just do: + +.. code-block:: + + return self.process_comment(environ, start_response) + +But the WebOb style makes it easier to modify the response if you want +to; modifying a traditional WSGI response/application output requires +changing your logic flow considerably. + +Here's the actual processing code: + +.. code-block:: + + from webob import exc + from webob import Response + + class Commenter(object): + ... + + def process_comment(self, req): + try: + url = req.params['url'] + name = req.params['name'] + homepage = req.params['homepage'] + comments = req.params['comments'] + except KeyError, e: + resp = exc.HTTPBadRequest('Missing parameter: %s' % e) + return resp + data = self.get_data(url) + data.append(dict( + name=name, + homepage=homepage, + comments=comments, + time=time.gmtime())) + self.save_data(url, data) + resp = exc.HTTPSeeOther(location=url+'#comment-area') + return resp + +We either give a Bad Request response (if the form submission is +somehow malformed), or a redirect back to the original page. + +The classes in ``webob.exc`` (like ``HTTPBadRequest`` and +``HTTPSeeOther``) are Response subclasses that can be used to quickly +create responses for these non-200 cases where the response body +usually doesn't matter much. + +Conclusion +---------- + +This shows how to make response modifying middleware, which is +probably the most difficult kind of middleware to write with WSGI -- +modifying the request is quite simple in comparison, as you simply +update ``environ``. |