summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2010-05-21 19:15:22 +0200
committerJason A. Donenfeld <Jason@zx2c4.com>2010-05-21 19:15:22 +0200
commitaae451c3e97067eb1d9362dd3754448a7a8f871f (patch)
tree9388f176698f5590f95a09ead7f0d77f431a69d3
parentAdd banner, remove specific upload script in favor of generic one. (diff)
downloadMovieStatistics-aae451c3e97067eb1d9362dd3754448a7a8f871f.tar.xz
MovieStatistics-aae451c3e97067eb1d9362dd3754448a7a8f871f.zip
Added blog search, added try 10 times to main stats, fixed progress logic.
-rw-r--r--BlogSearchStats.py64
-rw-r--r--GoogleStats.py14
-rw-r--r--MovieSearchStats.py4
-rw-r--r--index.php14
4 files changed, 92 insertions, 4 deletions
diff --git a/BlogSearchStats.py b/BlogSearchStats.py
new file mode 100644
index 0000000..c214e7e
--- /dev/null
+++ b/BlogSearchStats.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+
+from sys import argv
+import csv
+import string
+from GoogleStats import blogSearchMonthResultCount
+from time import sleep
+from Emailer import sendFile
+from Imdb import titleCode
+
+
+def sanitizeString(dirty):
+ clean = ""
+ allowed = string.ascii_letters + string.digits + " -.'\""
+ for i, v in enumerate(dirty):
+ if allowed.find(str(v)) != -1:
+ clean = clean + str(v)
+ else:
+ clean = clean + " "
+ while clean.find(" ") != -1:
+ clean = clean.replace(" ", " ")
+ return clean.strip()
+
+def tryTenTimes(function):
+ count = 10
+ while count > 0:
+ count = count - 1
+ try:
+ return function()
+ except Exception as ex:
+ print "Error:", ex, "... sleeping 5."
+ sleep(5)
+ return None
+
+emailMessage = "Hello %s,\n\nThis is an automated message. Attached are the requested blog statistics for these movies:\n" % argv[3][0:argv[3].find('@')]
+listFile = open(argv[1])
+outFile = open(argv[2], "wb")
+output = csv.writer(outFile)
+header = ["Rank", "Movie", "Google Blog Search Result Count (past month)", "IMDB Title Code"]
+output.writerow(header)
+print header
+i = 0
+for line in listFile:
+ i = i + 1
+ movie = line.strip()
+ emailMessage = emailMessage + "\n * %s" % movie
+ searchString = sanitizeString(movie)
+ blogSearch = tryTenTimes(lambda: blogSearchMonthResultCount(searchString))
+ if blogSearch is None:
+ blogSearch = '???'
+ imdbTitleCode = tryTenTimes(lambda: titleCode(searchString))
+ if imdbTitleCode is None:
+ imdbTitleCode = '???'
+ row = [str(i), movie, str(blogSearch), imdbTitleCode]
+ output.writerow(row)
+ outFile.flush()
+ print row
+outFile.close()
+listFile.close()
+emailMessage += "\n\nEnjoy,\nJason"
+print "Sending email...",
+sendFile(argv[3], argv[2], emailMessage)
+print "sent!" \ No newline at end of file
diff --git a/GoogleStats.py b/GoogleStats.py
new file mode 100644
index 0000000..1cf889e
--- /dev/null
+++ b/GoogleStats.py
@@ -0,0 +1,14 @@
+# -*- coding: iso-8859-1 -*-
+#!/usr/bin/env python
+
+from urllib2 import build_opener
+from urllib import quote_plus
+from re import findall
+
+def blogSearchMonthResultCount(keyword):
+ opener = build_opener()
+ opener.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)')]
+ resultsScrape = findall('<div id=resultStats>(?:About ){0,1}([0-9,]+) result[s]{0,1}<nobr>', opener.open("http://www.google.com/search?q=%s&safe=off&tbs=blg:1,qdr:m" % quote_plus(keyword)).read())
+ if len(resultsScrape) == 0:
+ return 0
+ return int(resultsScrape[0].replace(",", "")) \ No newline at end of file
diff --git a/MovieSearchStats.py b/MovieSearchStats.py
index be00764..97792b9 100644
--- a/MovieSearchStats.py
+++ b/MovieSearchStats.py
@@ -69,7 +69,9 @@ for line in listFile:
yvc = tryTenTimes(lambda: YouTubeStats.viewCount(searchString))
if yvc is None:
yvc = '???'
- imdbTitleCode = titleCode(searchString)
+ imdbTitleCode = tryTenTimes(lambda: titleCode(searchString))
+ if imdbTitleCode is None:
+ imdbTitleCode = '???'
row = [str(i), movie, str(google['broad']['lsv']), str(google['exact']['lsv']), str(google['broad']['gsv']), str(google['exact']['gsv']), str(youtube['broad']), str(youtube['exact']), str(yvc), imdbTitleCode]
output.writerow(row)
outFile.flush()
diff --git a/index.php b/index.php
index c5eae4c..4dccc42 100644
--- a/index.php
+++ b/index.php
@@ -25,12 +25,14 @@ if ($_GET["action"] == "progress") {
echo "Waiting to begin...";
exit();
}
- $lines = -1;
+ $lines = -2;
while (!@feof($statsFile)) {
@fgets($statsFile);
$lines++;
}
- if ($lines <= $totalLines)
+ if ($lines < 0)
+ $lines = 0;
+ if ($lines < $totalLines)
echo "Progress: $lines movies out of $totalLines completed.";
else
echo "Finished... sending e-mail.";
@@ -45,7 +47,11 @@ if (!$running && $_GET["action"] == "start" && $_POST["forreal"] == "yeah") {
file_put_contents("input.txt", $_POST['list']);
file_put_contents("totalLines", (string)count(split("\n", trim($_POST['list']))));
@unlink("MovieStats.csv");
- runInBackground("/usr/bin/python MovieSearchStats.py input.txt MovieStats.csv '".$_POST['email']."'");
+ if ($_POST["type"] == "full")
+ $script = "MovieSearchStats.py";
+ elseif ($_POST["type"] == "blog")
+ $script = "BlogSearchStats.py";
+ runInBackground("/usr/bin/python $script input.txt MovieStats.csv '".$_POST['email']."'");
$running = true;
}
?>
@@ -94,6 +100,8 @@ Bambi
Jaws
...fill me up...
</textarea></label></p>
+<p><label><input type="radio" name="type" value="full" checked>YouTube &amp; Google Adwords Statistics</label><br>
+<label><input type="radio" name="type" value="blog">Google Blog Search Statistics</label></p>
<input type="submit" value="Start">
</form>
<?php endif; ?>