diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2009-07-25 12:39:59 -0400 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2009-07-25 12:39:59 -0400 |
commit | 6efcb78aabee12c14a00df65c29b548ae9ee5fbd (patch) | |
tree | 95bfad45d25462460c27fdf8423a003a9e16f47a | |
parent | Removed bad enron dataset. (diff) | |
download | geoemail-6efcb78aabee12c14a00df65c29b548ae9ee5fbd.tar.xz geoemail-6efcb78aabee12c14a00df65c29b548ae9ee5fbd.zip |
Gmail header downloader.
l--------- | gmailheaderdownloader | 1 | ||||
-rwxr-xr-x | src/gmailheaderdownloader.py | 44 |
2 files changed, 45 insertions, 0 deletions
diff --git a/gmailheaderdownloader b/gmailheaderdownloader new file mode 120000 index 00000000..874e6a60 --- /dev/null +++ b/gmailheaderdownloader @@ -0,0 +1 @@ +src/gmailheaderdownloader.py
\ No newline at end of file diff --git a/src/gmailheaderdownloader.py b/src/gmailheaderdownloader.py new file mode 100755 index 00000000..cf48c83a --- /dev/null +++ b/src/gmailheaderdownloader.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +from imaplib import IMAP4_SSL +from optparse import OptionParser +import sys +import os.path +import os + +def main(): + parser = OptionParser(usage="%prog --username/-u USERNAME --password/-p PASSWORD --fromsearch/-f FROM_SEARCH [DESTINATION_DIRECTORY]", description="Downloads gmail message headers to DESTINATION_DIRECTORY based on from whom the message was received") + parser.add_option("-u", "--username", action="store", type="string", metavar="USERNAME", help="Gmail username") + parser.add_option("-p", "--password", action="store", type="string", metavar="PASSWORD", help="Gmail password") + parser.add_option("-f", "--fromsearch", action="store", type="string", metavar="FROM_SEARCH", help="Specifies whose emails should be downloaded") + (options, args) = parser.parse_args() + if options.username == None or options.password == None or options.fromsearch == None: + parser.error("Username, password, and from are all required.") + if not options.username.lower().endswith("@gmail.com") and not options.username.lower().endswith("@googlemail.com"): + options.username += "@gmail.com" + if len(args) == 0: + destinationDir = "%s - %s" % (options.fromsearch, options.username) + else: + destinationDir = args[0] + if not os.path.exists(destinationDir): + os.mkdir(destinationDir) + imap = IMAP4_SSL("imap.gmail.com") + imap.login(options.username, options.password) + imap.select("[Gmail]/All Mail", True) + typ, data = imap.search(None, 'FROM', options.fromsearch) + if typ != "OK": + sys.exit(("Could not search properly: %s" % typ)) + for num in data[0].split(): + typ, data = imap.fetch(num, '(RFC822.HEADER)') + if typ != "OK": + sys.stderr.write("Could not fetch message %s: %s" % (num, typ)) + else: + print "Writing headers for email %s" % num + f = open(os.path.join(destinationDir, str(num)), "w") + f.write(data[0][1]) + f.close() + imap.close() + imap.logout() + +if __name__ == '__main__': + main()
\ No newline at end of file |