summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2009-07-25 12:39:59 -0400
committerJason A. Donenfeld <Jason@zx2c4.com>2009-07-25 12:39:59 -0400
commit6efcb78aabee12c14a00df65c29b548ae9ee5fbd (patch)
tree95bfad45d25462460c27fdf8423a003a9e16f47a
parentRemoved bad enron dataset. (diff)
downloadgeoemail-6efcb78aabee12c14a00df65c29b548ae9ee5fbd.tar.xz
geoemail-6efcb78aabee12c14a00df65c29b548ae9ee5fbd.zip
Gmail header downloader.
l---------gmailheaderdownloader1
-rwxr-xr-xsrc/gmailheaderdownloader.py44
2 files changed, 45 insertions, 0 deletions
diff --git a/gmailheaderdownloader b/gmailheaderdownloader
new file mode 120000
index 00000000..874e6a60
--- /dev/null
+++ b/gmailheaderdownloader
@@ -0,0 +1 @@
+src/gmailheaderdownloader.py \ No newline at end of file
diff --git a/src/gmailheaderdownloader.py b/src/gmailheaderdownloader.py
new file mode 100755
index 00000000..cf48c83a
--- /dev/null
+++ b/src/gmailheaderdownloader.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+from imaplib import IMAP4_SSL
+from optparse import OptionParser
+import sys
+import os.path
+import os
+
+def main():
+ parser = OptionParser(usage="%prog --username/-u USERNAME --password/-p PASSWORD --fromsearch/-f FROM_SEARCH [DESTINATION_DIRECTORY]", description="Downloads gmail message headers to DESTINATION_DIRECTORY based on from whom the message was received")
+ parser.add_option("-u", "--username", action="store", type="string", metavar="USERNAME", help="Gmail username")
+ parser.add_option("-p", "--password", action="store", type="string", metavar="PASSWORD", help="Gmail password")
+ parser.add_option("-f", "--fromsearch", action="store", type="string", metavar="FROM_SEARCH", help="Specifies whose emails should be downloaded")
+ (options, args) = parser.parse_args()
+ if options.username == None or options.password == None or options.fromsearch == None:
+ parser.error("Username, password, and from are all required.")
+ if not options.username.lower().endswith("@gmail.com") and not options.username.lower().endswith("@googlemail.com"):
+ options.username += "@gmail.com"
+ if len(args) == 0:
+ destinationDir = "%s - %s" % (options.fromsearch, options.username)
+ else:
+ destinationDir = args[0]
+ if not os.path.exists(destinationDir):
+ os.mkdir(destinationDir)
+ imap = IMAP4_SSL("imap.gmail.com")
+ imap.login(options.username, options.password)
+ imap.select("[Gmail]/All Mail", True)
+ typ, data = imap.search(None, 'FROM', options.fromsearch)
+ if typ != "OK":
+ sys.exit(("Could not search properly: %s" % typ))
+ for num in data[0].split():
+ typ, data = imap.fetch(num, '(RFC822.HEADER)')
+ if typ != "OK":
+ sys.stderr.write("Could not fetch message %s: %s" % (num, typ))
+ else:
+ print "Writing headers for email %s" % num
+ f = open(os.path.join(destinationDir, str(num)), "w")
+ f.write(data[0][1])
+ f.close()
+ imap.close()
+ imap.logout()
+
+if __name__ == '__main__':
+ main() \ No newline at end of file