diff options
Diffstat (limited to 'uniqueemails.py')
-rwxr-xr-x | uniqueemails.py | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/uniqueemails.py b/uniqueemails.py new file mode 100755 index 0000000..f94992f --- /dev/null +++ b/uniqueemails.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python +# -*- coding: iso-8859-1 -*- +from imaplib import IMAP4_SSL +from optparse import OptionParser +from email.parser import HeaderParser +from email.message import Message +from email.utils import getaddresses +import sys +import os.path +import os + +def main(): + parser = OptionParser(usage="%prog --username/-u USERNAME --password/-p PASSWORD --mode/-m MODE [--domain/-d DOMAIN] [--cachedir/-c CACHEDIR]", description="Downloads gmail message headers and determines the set of all e-mail addresses on DOMAIN at which people have emailed you.") + parser.add_option("-u", "--username", action="store", type="string", metavar="USERNAME", help="Gmail username") + parser.add_option("-p", "--password", action="store", type="string", metavar="PASSWORD", help="Gmail password") + parser.add_option("-d", "--domain", action="store", type="string", metavar="DOMAIN", help="Domain name") + parser.add_option("-c", "--cachedir", action="store", type="string", metavar="CACHEDIR", help="The directory to cache fetched headers for subsequent runs") + parser.add_option("-m", "--mode", action="store", type="string", metavar="SENDERSFILE", help="If the mode is \"to\", this prints a list of all the emails you've received email on. If the mode is \"from\" this prints a list of everyone who has sent you email. If the mode is \"frombyto\" this prints a list of all the addresses that have emailed you sorted by the address at which you received email. If the mode is \"tobyfrom\" this prints a of all the addresses you have received e-mail from sorted and duplicated by who sent the e-mail.") + (options, args) = parser.parse_args() + if options.username == None or options.password == None: + parser.error("Username and password are all required.") + if options.mode != "from" and options.mode != "to" and options.mode != "frombyto" and options.mode != "tobyfrom": + parser.error("You must specify a mode.") + if not options.username.lower().endswith("@gmail.com") and not options.username.lower().endswith("@googlemail.com"): + options.username += "@gmail.com" + if options.cachedir != None and not os.path.exists(options.cachedir): + try: + os.makedirs(options.cachedir) + except: + sys.stderr.write("Could not make cache dir. Skipping cache.\n") + options.cachedir = None + + imap = IMAP4_SSL("imap.gmail.com") + imap.login(options.username, options.password) + imap.select("[Gmail]/All Mail", True) + typ, data = imap.search(None, 'ALL') + if typ != "OK": + sys.exit(("Could not search properly: %s" % typ)) + emailAddresses = {} + data = data[0].split() + length = len(data) + counter = 0 + parser = HeaderParser() + for num in data: + counter += 1 + if options.cachedir != None: + cachePath = os.path.join(options.cachedir, num) + else: + cachePath = None + if cachePath != None and os.path.exists(cachePath): + message = parser.parse(open(cachePath, "r")) + else: + try: + typ, data = imap.fetch(num, '(RFC822.HEADER)') + except: + sys.stderr.write("Failed to fetch ID %s\n" % num) + continue + if typ != "OK": + sys.stderr.write("Failed to fetch ID %s: %s\n" % (num, typ)) + continue + if cachePath != None: + try: + f = open(cachePath, "w") + f.write(data[0][1]) + f.close() + except: + sys.stderr.write("Could not write cache for %s" % num) + message = parser.parsestr(data[0][1], True) + tos = message.get_all('to', []) + ccs = message.get_all('cc', []) + resent_tos = message.get_all('resent-to', []) + resent_ccs = message.get_all('resent-cc', []) + all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs) + for address in all_recipients: + if len(address) == 2 and (options.domain == None or address[1].endswith(options.domain)): + to = address[1].lower() + fros = getaddresses(message.get_all('from', [])) + fro_addresses = set() + for addr in fros: + if len(addr) == 2: + fro_addresses.add(addr[1].lower()) + if options.mode == "to" or options.mode == "tobyfrom": + if to not in emailAddresses: + emailAddresses[to] = set() + emailAddresses[to] = emailAddresses[to].union(fro_addresses) + elif options.mode == "from" or options.mode == "frombyto": + for fro in fro_addresses: + if fro not in emailAddresses: + emailAddresses[fro] = set() + emailAddresses[fro].add(to) + sys.stderr.write("[%s of %s]: Message to %s from %s.\n" % (counter, length, address[1], fro_addresses)) + if len(all_recipients) == 0: + sys.stderr.write("[%s of %s]: Message has empty To header.\n" % (counter, length)) + imap.close() + imap.logout() + if options.mode == "to" or options.mode == "from": + for addr in emailAddresses.keys(): + print addr + elif options.mode == "tobyfrom" or options.mode == "frombyto": + for to, fro in emailAddresses.items(): + print to + for f in fro: + print "\t%s" % f + +if __name__ == '__main__': + main()
\ No newline at end of file |