diff options
author | Jason A. Donenfeld <Jason@zx2c4.com> | 2009-07-24 01:00:37 -0400 |
---|---|---|
committer | Jason A. Donenfeld <Jason@zx2c4.com> | 2009-07-24 01:00:37 -0400 |
commit | ed485e8f46894e40c30024dda6f58882a2211a5a (patch) | |
tree | ec260c44f90576c7dc3348e92da8dfcdf6d041a3 | |
parent | Only plot verified points on map. (diff) | |
download | geoemail-ed485e8f46894e40c30024dda6f58882a2211a5a.tar.xz geoemail-ed485e8f46894e40c30024dda6f58882a2211a5a.zip |
Use python's email parser instead of bad self-parsing.
-rw-r--r-- | src/emailinfo.py | 62 |
1 files changed, 31 insertions, 31 deletions
diff --git a/src/emailinfo.py b/src/emailinfo.py index 95fc1fc3..deaa285f 100644 --- a/src/emailinfo.py +++ b/src/emailinfo.py @@ -6,12 +6,14 @@ import ipaddr import email.utils import datetime from weblookuptools import * +from email.parser import Parser class EmailInfo: __location = None ipRegex = re.compile(r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)") ipInHostRegex = re.compile(r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)-){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)") hostRegex = re.compile(generateHostRegex()) + parser = Parser() def __init__(self, ip = "", date = 0, debug = "", path = ""): self.ip = ip self.date = date @@ -31,39 +33,37 @@ class EmailInfo: @classmethod def parseFile(cls, fileName): file = open(fileName) + msg = EmailInfo.parser.parse(file, True) receiveHosts = [] date = False - for line in file: - line = line.strip() - if line.startswith("Received:"): #Only look at Received lines - filteredIps = [] - #After the "by" or "for" there is no useful information - byIndex = line.find("by") - forIndex = line.find("for") - if forIndex != -1 and forIndex < byIndex: - byIndex = forIndex - if byIndex != -1: - line = line[:byIndex] - ips = EmailInfo.ipRegex.findall(line) - hosts = EmailInfo.hostRegex.findall(line) - #Some host names have IPs embedded in them, like dyn-23.54.128.44.columbia.edu - for host in hosts: - ipInHosts = EmailInfo.ipInHostRegex.findall(host) - if len(ipInHosts) > 0: - ips.insert(0, ipInHosts[0].replace("-", ".")) - hosts.remove(host) - #Filter out non-internet IPs using Google's library - for ip in ips: - if EmailInfo.isInternetIP(ip): - filteredIps.append(ip) - #We seperate the hosts from the ips because we give more importance to IPs, since host resolution may have changed over the years. - if len(hosts) > 0 or len(filteredIps) > 0: - receiveHosts.append([hosts, filteredIps]) - elif line.startswith("Date:") and not date: - try: - date = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(line[6:]))) - except: - date = False + for line in msg.get_all("Received"): + filteredIps = [] + #After the "by" or "for" there is no useful information + byIndex = line.find("by") + forIndex = line.find("for") + if forIndex != -1 and forIndex < byIndex: + byIndex = forIndex + if byIndex != -1: + line = line[:byIndex] + ips = EmailInfo.ipRegex.findall(line) + hosts = EmailInfo.hostRegex.findall(line) + #Some host names have IPs embedded in them, like dyn-23.54.128.44.columbia.edu + for host in hosts: + ipInHosts = EmailInfo.ipInHostRegex.findall(host) + if len(ipInHosts) > 0: + ips.insert(0, ipInHosts[0].replace("-", ".")) + hosts.remove(host) + #Filter out non-internet IPs using Google's library + for ip in ips: + if EmailInfo.isInternetIP(ip): + filteredIps.append(ip) + #We seperate the hosts from the ips because we give more importance to IPs, since host resolution may have changed over the years. + if len(hosts) > 0 or len(filteredIps) > 0: + receiveHosts.append([hosts, filteredIps]) + try: + date = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(msg.get("Date")))) + except: + date = False file.close() #Earlier lines come later in the file, so we look at the file reversed receiveHosts.reverse() |