summaryrefslogtreecommitdiffstats
path: root/uniqueemails.py
blob: f94992f1805e38fa31d37ed3bae9192f817a72ef (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
from imaplib import IMAP4_SSL
from optparse import OptionParser
from email.parser import HeaderParser
from email.message import Message
from email.utils import getaddresses
import sys
import os.path
import os

def main():
	parser = OptionParser(usage="%prog --username/-u USERNAME --password/-p PASSWORD --mode/-m MODE [--domain/-d DOMAIN] [--cachedir/-c CACHEDIR]", description="Downloads gmail message headers and determines the set of all e-mail addresses on DOMAIN at which people have emailed you.")
	parser.add_option("-u", "--username", action="store", type="string", metavar="USERNAME", help="Gmail username")
	parser.add_option("-p", "--password", action="store", type="string", metavar="PASSWORD", help="Gmail password")
	parser.add_option("-d", "--domain", action="store", type="string", metavar="DOMAIN", help="Domain name")
	parser.add_option("-c", "--cachedir", action="store", type="string", metavar="CACHEDIR", help="The directory to cache fetched headers for subsequent runs")
	parser.add_option("-m", "--mode", action="store", type="string", metavar="SENDERSFILE", help="If the mode is \"to\", this prints a list of all the emails you've received email on. If the mode is \"from\" this prints a list of everyone who has sent you email. If the mode is \"frombyto\" this prints a list of all the addresses that have emailed you sorted by the address at which you received email. If the mode is \"tobyfrom\" this prints a of all the addresses you have received e-mail from sorted and duplicated by who sent the e-mail.")
	(options, args) = parser.parse_args()
	if options.username == None or options.password == None:
		parser.error("Username and password are all required.")
	if options.mode != "from" and options.mode != "to" and options.mode != "frombyto" and options.mode != "tobyfrom":
		parser.error("You must specify a mode.")
	if not options.username.lower().endswith("@gmail.com") and not options.username.lower().endswith("@googlemail.com"):
		options.username += "@gmail.com"
	if options.cachedir != None and not os.path.exists(options.cachedir):
		try:
			os.makedirs(options.cachedir)
		except:
			sys.stderr.write("Could not make cache dir. Skipping cache.\n")
			options.cachedir = None

	imap = IMAP4_SSL("imap.gmail.com")
	imap.login(options.username, options.password)
	imap.select("[Gmail]/All Mail", True)
	typ, data = imap.search(None, 'ALL')
	if typ != "OK":
		sys.exit(("Could not search properly: %s" % typ))
	emailAddresses = {}
	data = data[0].split()
	length = len(data)
	counter = 0
	parser = HeaderParser()
	for num in data:
		counter += 1
		if options.cachedir != None:
			cachePath = os.path.join(options.cachedir, num)
		else:
			cachePath = None
		if cachePath != None and os.path.exists(cachePath):
			message = parser.parse(open(cachePath, "r"))
		else:
			try:
				typ, data = imap.fetch(num, '(RFC822.HEADER)')
			except:
				sys.stderr.write("Failed to fetch ID %s\n" % num)
				continue
			if typ != "OK":
				sys.stderr.write("Failed to fetch ID %s: %s\n" % (num, typ))
				continue
			if cachePath != None:
				try:
					f = open(cachePath, "w")
					f.write(data[0][1])
					f.close()
				except:
					sys.stderr.write("Could not write cache for %s" % num)
			message = parser.parsestr(data[0][1], True)
		tos = message.get_all('to', [])
		ccs = message.get_all('cc', [])
		resent_tos = message.get_all('resent-to', [])
		resent_ccs = message.get_all('resent-cc', [])
		all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
		for address in all_recipients:
			if len(address) == 2 and (options.domain == None or address[1].endswith(options.domain)):
				to = address[1].lower()
				fros = getaddresses(message.get_all('from', []))
				fro_addresses = set()
				for addr in fros:
					if len(addr) == 2:
						fro_addresses.add(addr[1].lower())
				if options.mode == "to" or options.mode == "tobyfrom":
					if to not in emailAddresses:
						emailAddresses[to] = set()
					emailAddresses[to] = emailAddresses[to].union(fro_addresses)
				elif options.mode == "from" or options.mode == "frombyto":
					for fro in fro_addresses:
						if fro not in emailAddresses:
							emailAddresses[fro] = set()
						emailAddresses[fro].add(to)
				sys.stderr.write("[%s of %s]: Message to %s from %s.\n" % (counter, length, address[1], fro_addresses))
		if len(all_recipients) == 0:
			sys.stderr.write("[%s of %s]: Message has empty To header.\n" % (counter, length))
	imap.close()
	imap.logout()
	if options.mode == "to" or options.mode == "from":
		for addr in emailAddresses.keys():
			print addr
	elif options.mode == "tobyfrom" or options.mode == "frombyto":
		for to, fro in emailAddresses.items():
			print to
			for f in fro:
				print "\t%s" % f

if __name__ == '__main__':
	main()