summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJason A. Donenfeld <Jason@zx2c4.com>2009-07-24 01:00:37 -0400
committerJason A. Donenfeld <Jason@zx2c4.com>2009-07-24 01:00:37 -0400
commited485e8f46894e40c30024dda6f58882a2211a5a (patch)
treeec260c44f90576c7dc3348e92da8dfcdf6d041a3
parentOnly plot verified points on map. (diff)
downloadgeoemail-ed485e8f46894e40c30024dda6f58882a2211a5a.tar.xz
geoemail-ed485e8f46894e40c30024dda6f58882a2211a5a.zip
Use python's email parser instead of bad self-parsing.
-rw-r--r--src/emailinfo.py62
1 files changed, 31 insertions, 31 deletions
diff --git a/src/emailinfo.py b/src/emailinfo.py
index 95fc1fc3..deaa285f 100644
--- a/src/emailinfo.py
+++ b/src/emailinfo.py
@@ -6,12 +6,14 @@ import ipaddr
import email.utils
import datetime
from weblookuptools import *
+from email.parser import Parser
class EmailInfo:
__location = None
ipRegex = re.compile(r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)")
ipInHostRegex = re.compile(r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)-){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)")
hostRegex = re.compile(generateHostRegex())
+ parser = Parser()
def __init__(self, ip = "", date = 0, debug = "", path = ""):
self.ip = ip
self.date = date
@@ -31,39 +33,37 @@ class EmailInfo:
@classmethod
def parseFile(cls, fileName):
file = open(fileName)
+ msg = EmailInfo.parser.parse(file, True)
receiveHosts = []
date = False
- for line in file:
- line = line.strip()
- if line.startswith("Received:"): #Only look at Received lines
- filteredIps = []
- #After the "by" or "for" there is no useful information
- byIndex = line.find("by")
- forIndex = line.find("for")
- if forIndex != -1 and forIndex < byIndex:
- byIndex = forIndex
- if byIndex != -1:
- line = line[:byIndex]
- ips = EmailInfo.ipRegex.findall(line)
- hosts = EmailInfo.hostRegex.findall(line)
- #Some host names have IPs embedded in them, like dyn-23.54.128.44.columbia.edu
- for host in hosts:
- ipInHosts = EmailInfo.ipInHostRegex.findall(host)
- if len(ipInHosts) > 0:
- ips.insert(0, ipInHosts[0].replace("-", "."))
- hosts.remove(host)
- #Filter out non-internet IPs using Google's library
- for ip in ips:
- if EmailInfo.isInternetIP(ip):
- filteredIps.append(ip)
- #We seperate the hosts from the ips because we give more importance to IPs, since host resolution may have changed over the years.
- if len(hosts) > 0 or len(filteredIps) > 0:
- receiveHosts.append([hosts, filteredIps])
- elif line.startswith("Date:") and not date:
- try:
- date = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(line[6:])))
- except:
- date = False
+ for line in msg.get_all("Received"):
+ filteredIps = []
+ #After the "by" or "for" there is no useful information
+ byIndex = line.find("by")
+ forIndex = line.find("for")
+ if forIndex != -1 and forIndex < byIndex:
+ byIndex = forIndex
+ if byIndex != -1:
+ line = line[:byIndex]
+ ips = EmailInfo.ipRegex.findall(line)
+ hosts = EmailInfo.hostRegex.findall(line)
+ #Some host names have IPs embedded in them, like dyn-23.54.128.44.columbia.edu
+ for host in hosts:
+ ipInHosts = EmailInfo.ipInHostRegex.findall(host)
+ if len(ipInHosts) > 0:
+ ips.insert(0, ipInHosts[0].replace("-", "."))
+ hosts.remove(host)
+ #Filter out non-internet IPs using Google's library
+ for ip in ips:
+ if EmailInfo.isInternetIP(ip):
+ filteredIps.append(ip)
+ #We seperate the hosts from the ips because we give more importance to IPs, since host resolution may have changed over the years.
+ if len(hosts) > 0 or len(filteredIps) > 0:
+ receiveHosts.append([hosts, filteredIps])
+ try:
+ date = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(msg.get("Date"))))
+ except:
+ date = False
file.close()
#Earlier lines come later in the file, so we look at the file reversed
receiveHosts.reverse()