summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2010-04-18 05:49:18 -0400
committerJason A. Donenfeld <Jason@zx2c4.com>2010-04-18 05:49:18 -0400
commit34f7d00bacce365530135bd2a5b290e19da1ecfc (patch)
tree0b252fd3023625849d242ee46eaa5aebabaa768a
downloadCatchAllCatcher-34f7d00bacce365530135bd2a5b290e19da1ecfc.tar.xz
CatchAllCatcher-34f7d00bacce365530135bd2a5b290e19da1ecfc.zip
Initial commit.HEADmaster
-rwxr-xr-xuniqueemails.py106
1 files changed, 106 insertions, 0 deletions
diff --git a/uniqueemails.py b/uniqueemails.py
new file mode 100755
index 0000000..f94992f
--- /dev/null
+++ b/uniqueemails.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+# -*- coding: iso-8859-1 -*-
+from imaplib import IMAP4_SSL
+from optparse import OptionParser
+from email.parser import HeaderParser
+from email.message import Message
+from email.utils import getaddresses
+import sys
+import os.path
+import os
+
+def main():
+ parser = OptionParser(usage="%prog --username/-u USERNAME --password/-p PASSWORD --mode/-m MODE [--domain/-d DOMAIN] [--cachedir/-c CACHEDIR]", description="Downloads gmail message headers and determines the set of all e-mail addresses on DOMAIN at which people have emailed you.")
+ parser.add_option("-u", "--username", action="store", type="string", metavar="USERNAME", help="Gmail username")
+ parser.add_option("-p", "--password", action="store", type="string", metavar="PASSWORD", help="Gmail password")
+ parser.add_option("-d", "--domain", action="store", type="string", metavar="DOMAIN", help="Domain name")
+ parser.add_option("-c", "--cachedir", action="store", type="string", metavar="CACHEDIR", help="The directory to cache fetched headers for subsequent runs")
+ parser.add_option("-m", "--mode", action="store", type="string", metavar="SENDERSFILE", help="If the mode is \"to\", this prints a list of all the emails you've received email on. If the mode is \"from\" this prints a list of everyone who has sent you email. If the mode is \"frombyto\" this prints a list of all the addresses that have emailed you sorted by the address at which you received email. If the mode is \"tobyfrom\" this prints a of all the addresses you have received e-mail from sorted and duplicated by who sent the e-mail.")
+ (options, args) = parser.parse_args()
+ if options.username == None or options.password == None:
+ parser.error("Username and password are all required.")
+ if options.mode != "from" and options.mode != "to" and options.mode != "frombyto" and options.mode != "tobyfrom":
+ parser.error("You must specify a mode.")
+ if not options.username.lower().endswith("@gmail.com") and not options.username.lower().endswith("@googlemail.com"):
+ options.username += "@gmail.com"
+ if options.cachedir != None and not os.path.exists(options.cachedir):
+ try:
+ os.makedirs(options.cachedir)
+ except:
+ sys.stderr.write("Could not make cache dir. Skipping cache.\n")
+ options.cachedir = None
+
+ imap = IMAP4_SSL("imap.gmail.com")
+ imap.login(options.username, options.password)
+ imap.select("[Gmail]/All Mail", True)
+ typ, data = imap.search(None, 'ALL')
+ if typ != "OK":
+ sys.exit(("Could not search properly: %s" % typ))
+ emailAddresses = {}
+ data = data[0].split()
+ length = len(data)
+ counter = 0
+ parser = HeaderParser()
+ for num in data:
+ counter += 1
+ if options.cachedir != None:
+ cachePath = os.path.join(options.cachedir, num)
+ else:
+ cachePath = None
+ if cachePath != None and os.path.exists(cachePath):
+ message = parser.parse(open(cachePath, "r"))
+ else:
+ try:
+ typ, data = imap.fetch(num, '(RFC822.HEADER)')
+ except:
+ sys.stderr.write("Failed to fetch ID %s\n" % num)
+ continue
+ if typ != "OK":
+ sys.stderr.write("Failed to fetch ID %s: %s\n" % (num, typ))
+ continue
+ if cachePath != None:
+ try:
+ f = open(cachePath, "w")
+ f.write(data[0][1])
+ f.close()
+ except:
+ sys.stderr.write("Could not write cache for %s" % num)
+ message = parser.parsestr(data[0][1], True)
+ tos = message.get_all('to', [])
+ ccs = message.get_all('cc', [])
+ resent_tos = message.get_all('resent-to', [])
+ resent_ccs = message.get_all('resent-cc', [])
+ all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
+ for address in all_recipients:
+ if len(address) == 2 and (options.domain == None or address[1].endswith(options.domain)):
+ to = address[1].lower()
+ fros = getaddresses(message.get_all('from', []))
+ fro_addresses = set()
+ for addr in fros:
+ if len(addr) == 2:
+ fro_addresses.add(addr[1].lower())
+ if options.mode == "to" or options.mode == "tobyfrom":
+ if to not in emailAddresses:
+ emailAddresses[to] = set()
+ emailAddresses[to] = emailAddresses[to].union(fro_addresses)
+ elif options.mode == "from" or options.mode == "frombyto":
+ for fro in fro_addresses:
+ if fro not in emailAddresses:
+ emailAddresses[fro] = set()
+ emailAddresses[fro].add(to)
+ sys.stderr.write("[%s of %s]: Message to %s from %s.\n" % (counter, length, address[1], fro_addresses))
+ if len(all_recipients) == 0:
+ sys.stderr.write("[%s of %s]: Message has empty To header.\n" % (counter, length))
+ imap.close()
+ imap.logout()
+ if options.mode == "to" or options.mode == "from":
+ for addr in emailAddresses.keys():
+ print addr
+ elif options.mode == "tobyfrom" or options.mode == "frombyto":
+ for to, fro in emailAddresses.items():
+ print to
+ for f in fro:
+ print "\t%s" % f
+
+if __name__ == '__main__':
+ main() \ No newline at end of file