diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2010-05-21 19:15:22 +0200 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2010-05-21 19:15:22 +0200 |
commit | aae451c3e97067eb1d9362dd3754448a7a8f871f (patch) | |
tree | 9388f176698f5590f95a09ead7f0d77f431a69d3 | |
parent | Add banner, remove specific upload script in favor of generic one. (diff) | |
download | MovieStatistics-aae451c3e97067eb1d9362dd3754448a7a8f871f.tar.xz MovieStatistics-aae451c3e97067eb1d9362dd3754448a7a8f871f.zip |
Added blog search, added try 10 times to main stats, fixed progress logic.
-rw-r--r-- | BlogSearchStats.py | 64 | ||||
-rw-r--r-- | GoogleStats.py | 14 | ||||
-rw-r--r-- | MovieSearchStats.py | 4 | ||||
-rw-r--r-- | index.php | 14 |
4 files changed, 92 insertions, 4 deletions
diff --git a/BlogSearchStats.py b/BlogSearchStats.py new file mode 100644 index 0000000..c214e7e --- /dev/null +++ b/BlogSearchStats.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +from sys import argv +import csv +import string +from GoogleStats import blogSearchMonthResultCount +from time import sleep +from Emailer import sendFile +from Imdb import titleCode + + +def sanitizeString(dirty): + clean = "" + allowed = string.ascii_letters + string.digits + " -.'\"" + for i, v in enumerate(dirty): + if allowed.find(str(v)) != -1: + clean = clean + str(v) + else: + clean = clean + " " + while clean.find(" ") != -1: + clean = clean.replace(" ", " ") + return clean.strip() + +def tryTenTimes(function): + count = 10 + while count > 0: + count = count - 1 + try: + return function() + except Exception as ex: + print "Error:", ex, "... sleeping 5." + sleep(5) + return None + +emailMessage = "Hello %s,\n\nThis is an automated message. Attached are the requested blog statistics for these movies:\n" % argv[3][0:argv[3].find('@')] +listFile = open(argv[1]) +outFile = open(argv[2], "wb") +output = csv.writer(outFile) +header = ["Rank", "Movie", "Google Blog Search Result Count (past month)", "IMDB Title Code"] +output.writerow(header) +print header +i = 0 +for line in listFile: + i = i + 1 + movie = line.strip() + emailMessage = emailMessage + "\n * %s" % movie + searchString = sanitizeString(movie) + blogSearch = tryTenTimes(lambda: blogSearchMonthResultCount(searchString)) + if blogSearch is None: + blogSearch = '???' + imdbTitleCode = tryTenTimes(lambda: titleCode(searchString)) + if imdbTitleCode is None: + imdbTitleCode = '???' + row = [str(i), movie, str(blogSearch), imdbTitleCode] + output.writerow(row) + outFile.flush() + print row +outFile.close() +listFile.close() +emailMessage += "\n\nEnjoy,\nJason" +print "Sending email...", +sendFile(argv[3], argv[2], emailMessage) +print "sent!"
\ No newline at end of file diff --git a/GoogleStats.py b/GoogleStats.py new file mode 100644 index 0000000..1cf889e --- /dev/null +++ b/GoogleStats.py @@ -0,0 +1,14 @@ +# -*- coding: iso-8859-1 -*- +#!/usr/bin/env python + +from urllib2 import build_opener +from urllib import quote_plus +from re import findall + +def blogSearchMonthResultCount(keyword): + opener = build_opener() + opener.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)')] + resultsScrape = findall('<div id=resultStats>(?:About ){0,1}([0-9,]+) result[s]{0,1}<nobr>', opener.open("http://www.google.com/search?q=%s&safe=off&tbs=blg:1,qdr:m" % quote_plus(keyword)).read()) + if len(resultsScrape) == 0: + return 0 + return int(resultsScrape[0].replace(",", ""))
\ No newline at end of file diff --git a/MovieSearchStats.py b/MovieSearchStats.py index be00764..97792b9 100644 --- a/MovieSearchStats.py +++ b/MovieSearchStats.py @@ -69,7 +69,9 @@ for line in listFile: yvc = tryTenTimes(lambda: YouTubeStats.viewCount(searchString)) if yvc is None: yvc = '???' - imdbTitleCode = titleCode(searchString) + imdbTitleCode = tryTenTimes(lambda: titleCode(searchString)) + if imdbTitleCode is None: + imdbTitleCode = '???' row = [str(i), movie, str(google['broad']['lsv']), str(google['exact']['lsv']), str(google['broad']['gsv']), str(google['exact']['gsv']), str(youtube['broad']), str(youtube['exact']), str(yvc), imdbTitleCode] output.writerow(row) outFile.flush() @@ -25,12 +25,14 @@ if ($_GET["action"] == "progress") { echo "Waiting to begin..."; exit(); } - $lines = -1; + $lines = -2; while (!@feof($statsFile)) { @fgets($statsFile); $lines++; } - if ($lines <= $totalLines) + if ($lines < 0) + $lines = 0; + if ($lines < $totalLines) echo "Progress: $lines movies out of $totalLines completed."; else echo "Finished... sending e-mail."; @@ -45,7 +47,11 @@ if (!$running && $_GET["action"] == "start" && $_POST["forreal"] == "yeah") { file_put_contents("input.txt", $_POST['list']); file_put_contents("totalLines", (string)count(split("\n", trim($_POST['list'])))); @unlink("MovieStats.csv"); - runInBackground("/usr/bin/python MovieSearchStats.py input.txt MovieStats.csv '".$_POST['email']."'"); + if ($_POST["type"] == "full") + $script = "MovieSearchStats.py"; + elseif ($_POST["type"] == "blog") + $script = "BlogSearchStats.py"; + runInBackground("/usr/bin/python $script input.txt MovieStats.csv '".$_POST['email']."'"); $running = true; } ?> @@ -94,6 +100,8 @@ Bambi Jaws ...fill me up... </textarea></label></p> +<p><label><input type="radio" name="type" value="full" checked>YouTube & Google Adwords Statistics</label><br> +<label><input type="radio" name="type" value="blog">Google Blog Search Statistics</label></p> <input type="submit" value="Start"> </form> <?php endif; ?> |