aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2013-04-25 22:13:23 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2013-04-26 05:45:33 +0200
commita8bcdefded5314ab8c3ea9beef27d4440672915a (patch)
treed95542856424b8fd6a1c4d0cee0fbb3cb16666c5
downloadserver-execute-phantom-a8bcdefded5314ab8c3ea9beef27d4440672915a.tar.xz
server-execute-phantom-a8bcdefded5314ab8c3ea9beef27d4440672915a.zip
Initial commit.
-rw-r--r--README.md14
-rw-r--r--__init__.py26
-rw-r--r--driver.js11
-rw-r--r--process.py36
4 files changed, 87 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..7a6ed5b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,14 @@
+# Server Execute Phantom
+
+This renders pages according to the [AJAX crawl specification](https://developers.google.com/webmasters/ajax-crawling/).
+
+## Nginx Configuration
+
+ location / {
+ include uwsgi_params;
+ uwsgi_param HTTP_X_SE_ORIGINAL_URL $scheme://$host$request_uri;
+ if ($args ~* _escaped_fragment_) {
+ uwsgi_pass unix:/var/run/uwsgi-apps/server-execute-phantom.socket;
+ }
+ alias /var/www;
+ }
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e080959
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1,26 @@
+from flask import Flask, request, abort
+from urlparse import urlsplit, urlunsplit, parse_qs
+from urllib import urlencode
+from process import send_process
+import os.path
+
+app = Flask(__name__)
+
+@app.route("/", defaults={"path": ""})
+@app.route("/<path:path>")
+def render(path):
+ url = request.headers.get("x-se-original-url")
+ if url is None:
+ abort(404)
+ url = urlsplit(url, allow_fragments=False)
+ query = parse_qs(url.query)
+ fragment = ""
+ if "_escaped_fragment_" in query:
+ fragment = "!" + query["_escaped_fragment_"][0]
+ del query["_escaped_fragment_"]
+ url = urlunsplit((url.scheme, url.netloc, url.path, urlencode(query), fragment))
+
+ return send_process([ "phantomjs", "--load-images=false", os.path.join(os.path.dirname(os.path.abspath(__file__)), "driver.js"), url ])
+
+if __name__ == '__main__':
+ app.run()
diff --git a/driver.js b/driver.js
new file mode 100644
index 0000000..3221da2
--- /dev/null
+++ b/driver.js
@@ -0,0 +1,11 @@
+var page = require("webpage").create();
+var args = require("system").args;
+
+if (args.length != 2)
+ phantom.exit();
+
+page.settings.userAgent = "Server Execute Phantom";
+page.open(args[1], function() {
+ console.log(page.content);
+ phantom.exit();
+});
diff --git a/process.py b/process.py
new file mode 100644
index 0000000..1dfc7ed
--- /dev/null
+++ b/process.py
@@ -0,0 +1,36 @@
+from flask import Response
+import subprocess
+import os
+
+class ProcessWrapper(object):
+ def __init__(self, process, buffer_size=8192):
+ self.process = process
+ self.buffer_size = buffer_size
+ def close(self):
+ if self.process.returncode is not None:
+ return
+ self.process.stdout.close()
+ self.process.terminate()
+ self.process.wait()
+ def __iter__(self):
+ return self
+ def __del__(self):
+ self.close()
+ def next(self):
+ try:
+ data = self.process.stdout.read(self.buffer_size)
+ except:
+ self.close()
+ raise StopIteration()
+ if data:
+ return data
+ self.close()
+ raise StopIteration()
+
+def send_process(args):
+ def close_fds():
+ os.close(0)
+ os.close(2)
+ process = subprocess.Popen(args, close_fds=True, stdout=subprocess.PIPE, preexec_fn=close_fds)
+ response = ProcessWrapper(process)
+ return Response(response, direct_passthrough=True)