summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2010-05-05 15:30:35 -0400
committerJason A. Donenfeld <Jason@zx2c4.com>2010-05-05 15:30:35 -0400
commite87f07024bbb724e74c3b5de98c9635f6ffaae84 (patch)
tree073fad0c3204914b04214fb456c2f639f86168fa
parentGet 1000 youtube results like before. (diff)
downloadMovieStatistics-e87f07024bbb724e74c3b5de98c9635f6ffaae84.tar.xz
MovieStatistics-e87f07024bbb724e74c3b5de98c9635f6ffaae84.zip
Added imdb title code lookup.
-rw-r--r--.gitignore1
-rw-r--r--Imdb.py18
-rw-r--r--MovieSearchStats.py6
3 files changed, 23 insertions, 2 deletions
diff --git a/.gitignore b/.gitignore
index b71ffa1..fd56561 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
.emailpassword
+*.pyc
diff --git a/Imdb.py b/Imdb.py
new file mode 100644
index 0000000..25df175
--- /dev/null
+++ b/Imdb.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+
+from urllib2 import urlopen
+from json import loads
+from urllib import quote_plus
+from re import findall
+
+def titleCode(title):
+ try:
+ html = urlopen("http://www.imdb.com/find?s=tt&q=" + quote_plus(title)).read()
+ except:
+ return ""
+ titleCodes = findall("/title/tt([0-9]{7})/", html)
+ if len(titleCodes) > 0:
+ return titleCodes[0]
+ else:
+ return "" \ No newline at end of file
diff --git a/MovieSearchStats.py b/MovieSearchStats.py
index 3ea37a7..be00764 100644
--- a/MovieSearchStats.py
+++ b/MovieSearchStats.py
@@ -8,6 +8,7 @@ from AdwordsStats import adwordsStats
import YouTubeStats
from time import sleep
from Emailer import sendFile
+from Imdb import titleCode
def sanitizeString(dirty):
@@ -38,7 +39,7 @@ adwords = adwordsStats()
listFile = open(argv[1])
outFile = open(argv[2], "wb")
output = csv.writer(outFile)
-header = ["Rank", "Movie", "Google LSV (broad)", "Google LSV (exact)", "Google GSV (broad)", "Google GSV (exact)", "YouTube Search Volume (broad)", "YouTube Search Volume (exact)", "YouTube View Count"]
+header = ["Rank", "Movie", "Google LSV (broad)", "Google LSV (exact)", "Google GSV (broad)", "Google GSV (exact)", "YouTube Search Volume (broad)", "YouTube Search Volume (exact)", "YouTube View Count", "IMDB Title Code"]
output.writerow(header)
print header
i = 0
@@ -68,7 +69,8 @@ for line in listFile:
yvc = tryTenTimes(lambda: YouTubeStats.viewCount(searchString))
if yvc is None:
yvc = '???'
- row = [str(i), movie, str(google['broad']['lsv']), str(google['exact']['lsv']), str(google['broad']['gsv']), str(google['exact']['gsv']), str(youtube['broad']), str(youtube['exact']), str(yvc)]
+ imdbTitleCode = titleCode(searchString)
+ row = [str(i), movie, str(google['broad']['lsv']), str(google['exact']['lsv']), str(google['broad']['gsv']), str(google['exact']['gsv']), str(youtube['broad']), str(youtube['exact']), str(yvc), imdbTitleCode]
output.writerow(row)
outFile.flush()
print row