aboutsummaryrefslogtreecommitdiffstats
path: root/wiki-philosophy.js
blob: 1772ec3e47995163a5e70578b7ce7e88aa865f11 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
/*
 * wiki-philosophy.js
 * How many clicks does it take to get from any word to Philosophy on Wikipedia?
 *
 * Copyright 2011 Jason A. Donenfeld <Jason@zx2c4.com>
 *
 */

var jsdom = require("jsdom");
if (process.argv.length <= 2)
	return console.error("Usage: " + process.argv[0] + " " + process.argv[1] + " word [target]");


// Keep track of links we've followed to prevent infinite loops.
var searchedWords = [];
// We use a queue so that we can simulate a BFS of the tree.
var linkQueue = []
var found = false;
var maxParallelRequests = 30;
var currentRequests = 0;
var jQuery = require("fs").readFileSync("./jquery-1.6.2.min.js").toString();
var targetWord = "Philosophy"


function search(page)
{
	if (searchedWords.indexOf(page.word) != -1)
		return; // Already searched!
	searchedWords.push(page.word);
	++currentRequests;
	jsdom.env({ html: "http://en.wikipedia.org/wiki/" + stringToWikiurl(page.word), src: [jQuery], done: function (errors, window) {
		--currentRequests;
		if (found)
			return;
		console.log("Fetched " + page.word);
		var $ = window.$;
		if ($ === undefined || $ == null) {
			console.error("jQuery failed to load");
			return;
		}
		if ($("#bodyContent .redirectMsg").length > 0) {
			// This page is a redirect page, follow the redirect
			var components = $("#bodyContent .redirectText > a").attr("href").split('/');
			appendToQueue({ word: wikiurlToString(components[components.length - 1]), parent: page });
		} else {
			// Find all links in the content and add them to stack.
			var found = false;
			var process = function (i, ele) {
				var components = $(ele).attr("href");
				if (components.indexOf("/wiki/") != 0)
					return true;
				components = components.split('/');
				components = components[components.length - 1];
				if (components.indexOf("Wikipedia") == 0 ||
				    components.indexOf("File:") == 0 ||
				    components.indexOf("Category:") == 0 ||
				    components.indexOf("Help:") == 0 ||
				    components.indexOf("Special:") == 0)
					return true;
				found = true;
				appendToQueue({ word: wikiurlToString(components), parent: page });
			};
			$.each($("#bodyContent > p a"), process);

			// Disambiguation page
			if (!found)
				$.each($("#bodyContent a"), process);
		}

		// Keep queuing pages.
		runQueue();

		// Free up jsdom's memory leak:
		// https://github.com/mikeal/spider/issues/5#issuecomment-1830243
		window.close();
	}});
}
function appendToQueue(page)
{
	if (page.word == targetWord) {
		found = true;
		done(page);
		return;
	}
	linkQueue.push(page);
}
function runQueue()
{
	while (linkQueue.length > 0 && !found && currentRequests < maxParallelRequests)
		search(linkQueue.shift());
}
function done(page)
{
	console.log("\n== " + targetWord + " found, sophia achieved! ==\n");

	// Walk tree backwards to make list.
	var list = []
	while (page != null) {
		list.unshift(page);
		page = page.parent;
	}
	for (var i = 0; i < list.length; ++i) {
		var indent = "";
		for (var j = 0; j < i; ++j)
			indent += '\t';
		console.log(indent + list[i].word);
	}
	process.exit(0);
}
function wikiurlToString(wiki)
{
	return wiki.replace(/_/g, " ");
}
function stringToWikiurl(string)
{
	return string.replace(/ /g, "_");
}

// Grab the starting word.
if (process.argv.length == 4)
	targetWord = process.argv[3][0].toUpperCase() + process.argv[3].slice(1);
appendToQueue({ word: process.argv[2][0].toUpperCase() + process.argv[2].slice(1), parent: null });
runQueue();