diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2013-04-25 22:13:23 +0200 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2013-04-26 05:45:33 +0200 |
commit | a8bcdefded5314ab8c3ea9beef27d4440672915a (patch) | |
tree | d95542856424b8fd6a1c4d0cee0fbb3cb16666c5 | |
download | server-execute-phantom-a8bcdefded5314ab8c3ea9beef27d4440672915a.tar.xz server-execute-phantom-a8bcdefded5314ab8c3ea9beef27d4440672915a.zip |
Initial commit.
-rw-r--r-- | README.md | 14 | ||||
-rw-r--r-- | __init__.py | 26 | ||||
-rw-r--r-- | driver.js | 11 | ||||
-rw-r--r-- | process.py | 36 |
4 files changed, 87 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 0000000..7a6ed5b --- /dev/null +++ b/README.md @@ -0,0 +1,14 @@ +# Server Execute Phantom + +This renders pages according to the [AJAX crawl specification](https://developers.google.com/webmasters/ajax-crawling/). + +## Nginx Configuration + + location / { + include uwsgi_params; + uwsgi_param HTTP_X_SE_ORIGINAL_URL $scheme://$host$request_uri; + if ($args ~* _escaped_fragment_) { + uwsgi_pass unix:/var/run/uwsgi-apps/server-execute-phantom.socket; + } + alias /var/www; + } diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e080959 --- /dev/null +++ b/__init__.py @@ -0,0 +1,26 @@ +from flask import Flask, request, abort +from urlparse import urlsplit, urlunsplit, parse_qs +from urllib import urlencode +from process import send_process +import os.path + +app = Flask(__name__) + +@app.route("/", defaults={"path": ""}) +@app.route("/<path:path>") +def render(path): + url = request.headers.get("x-se-original-url") + if url is None: + abort(404) + url = urlsplit(url, allow_fragments=False) + query = parse_qs(url.query) + fragment = "" + if "_escaped_fragment_" in query: + fragment = "!" + query["_escaped_fragment_"][0] + del query["_escaped_fragment_"] + url = urlunsplit((url.scheme, url.netloc, url.path, urlencode(query), fragment)) + + return send_process([ "phantomjs", "--load-images=false", os.path.join(os.path.dirname(os.path.abspath(__file__)), "driver.js"), url ]) + +if __name__ == '__main__': + app.run() diff --git a/driver.js b/driver.js new file mode 100644 index 0000000..3221da2 --- /dev/null +++ b/driver.js @@ -0,0 +1,11 @@ +var page = require("webpage").create(); +var args = require("system").args; + +if (args.length != 2) + phantom.exit(); + +page.settings.userAgent = "Server Execute Phantom"; +page.open(args[1], function() { + console.log(page.content); + phantom.exit(); +}); diff --git a/process.py b/process.py new file mode 100644 index 0000000..1dfc7ed --- /dev/null +++ b/process.py @@ -0,0 +1,36 @@ +from flask import Response +import subprocess +import os + +class ProcessWrapper(object): + def __init__(self, process, buffer_size=8192): + self.process = process + self.buffer_size = buffer_size + def close(self): + if self.process.returncode is not None: + return + self.process.stdout.close() + self.process.terminate() + self.process.wait() + def __iter__(self): + return self + def __del__(self): + self.close() + def next(self): + try: + data = self.process.stdout.read(self.buffer_size) + except: + self.close() + raise StopIteration() + if data: + return data + self.close() + raise StopIteration() + +def send_process(args): + def close_fds(): + os.close(0) + os.close(2) + process = subprocess.Popen(args, close_fds=True, stdout=subprocess.PIPE, preexec_fn=close_fds) + response = ProcessWrapper(process) + return Response(response, direct_passthrough=True) |