diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2010-05-05 15:30:35 -0400 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2010-05-05 15:30:35 -0400 |
commit | e87f07024bbb724e74c3b5de98c9635f6ffaae84 (patch) | |
tree | 073fad0c3204914b04214fb456c2f639f86168fa | |
parent | Get 1000 youtube results like before. (diff) | |
download | MovieStatistics-e87f07024bbb724e74c3b5de98c9635f6ffaae84.tar.xz MovieStatistics-e87f07024bbb724e74c3b5de98c9635f6ffaae84.zip |
Added imdb title code lookup.
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | Imdb.py | 18 | ||||
-rw-r--r-- | MovieSearchStats.py | 6 |
3 files changed, 23 insertions, 2 deletions
@@ -1 +1,2 @@ .emailpassword +*.pyc @@ -0,0 +1,18 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- + +from urllib2 import urlopen +from json import loads +from urllib import quote_plus +from re import findall + +def titleCode(title): + try: + html = urlopen("http://www.imdb.com/find?s=tt&q=" + quote_plus(title)).read() + except: + return "" + titleCodes = findall("/title/tt([0-9]{7})/", html) + if len(titleCodes) > 0: + return titleCodes[0] + else: + return ""
\ No newline at end of file diff --git a/MovieSearchStats.py b/MovieSearchStats.py index 3ea37a7..be00764 100644 --- a/MovieSearchStats.py +++ b/MovieSearchStats.py @@ -8,6 +8,7 @@ from AdwordsStats import adwordsStats import YouTubeStats from time import sleep from Emailer import sendFile +from Imdb import titleCode def sanitizeString(dirty): @@ -38,7 +39,7 @@ adwords = adwordsStats() listFile = open(argv[1]) outFile = open(argv[2], "wb") output = csv.writer(outFile) -header = ["Rank", "Movie", "Google LSV (broad)", "Google LSV (exact)", "Google GSV (broad)", "Google GSV (exact)", "YouTube Search Volume (broad)", "YouTube Search Volume (exact)", "YouTube View Count"] +header = ["Rank", "Movie", "Google LSV (broad)", "Google LSV (exact)", "Google GSV (broad)", "Google GSV (exact)", "YouTube Search Volume (broad)", "YouTube Search Volume (exact)", "YouTube View Count", "IMDB Title Code"] output.writerow(header) print header i = 0 @@ -68,7 +69,8 @@ for line in listFile: yvc = tryTenTimes(lambda: YouTubeStats.viewCount(searchString)) if yvc is None: yvc = '???' - row = [str(i), movie, str(google['broad']['lsv']), str(google['exact']['lsv']), str(google['broad']['gsv']), str(google['exact']['gsv']), str(youtube['broad']), str(youtube['exact']), str(yvc)] + imdbTitleCode = titleCode(searchString) + row = [str(i), movie, str(google['broad']['lsv']), str(google['exact']['lsv']), str(google['broad']['gsv']), str(google['exact']['gsv']), str(youtube['broad']), str(youtube['exact']), str(yvc), imdbTitleCode] output.writerow(row) outFile.flush() print row |