1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
import re
import ipaddr
import email.utils
import datetime
from weblookuptools import *
class EmailInfo:
__location = None
ipRegex = re.compile(r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)")
ipInHostRegex = re.compile(r"(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)-){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)")
hostRegex = re.compile(generateHostRegex())
def __init__(self, ip = "", date = 0, debug = "", path = ""):
self.ip = ip
self.date = date
self.debug = debug
self.path = path
def location(self):
if self.__location != None:
return self.__location
self.__location = iplocation(self.ip)
return self.__location
def __cmp__(self, other):
if self.date > other.date:
return 1
if self.date < other.date:
return -1
return 0
@classmethod
def parseFile(cls, fileName):
file = open(fileName)
receiveHosts = []
date = False
for line in file:
line = line.strip()
if line.startswith("Received:"): #Only look at Received lines
filteredIps = []
#After the "by" or "for" there is no useful information
byIndex = line.find("by")
forIndex = line.find("for")
if forIndex != -1 and forIndex < byIndex:
byIndex = forIndex
if byIndex != -1:
line = line[:byIndex]
ips = EmailInfo.ipRegex.findall(line)
hosts = EmailInfo.hostRegex.findall(line)
#Some host names have IPs embedded in them, like dyn-23.54.128.44.columbia.edu
for host in hosts:
ipInHosts = EmailInfo.ipInHostRegex.findall(host)
if len(ipInHosts) > 0:
ips.insert(0, ipInHosts[0].replace("-", "."))
hosts.remove(host)
#Filter out non-internet IPs using Google's library
for ip in ips:
if EmailInfo.isInternetIP(ip):
filteredIps.append(ip)
#We seperate the hosts from the ips because we give more importance to IPs, since host resolution may have changed over the years.
if len(hosts) > 0 or len(filteredIps) > 0:
receiveHosts.append([hosts, filteredIps])
elif line.startswith("Date:") and not date:
try:
date = datetime.datetime.utcfromtimestamp(email.utils.mktime_tz(email.utils.parsedate_tz(line[6:])))
except:
date = False
file.close()
#Earlier lines come later in the file, so we look at the file reversed
receiveHosts.reverse()
for i in range(len(receiveHosts)):
if len(receiveHosts[i][0]) > 0 or len(receiveHosts[i][1]) > 0:
#Resolve all hostnames to IP
for j in range(len(receiveHosts[i][0])):
try:
ip = ipFromHost(receiveHosts[i][0][j])
if isInternetIP(ip):
receiveHosts[i][0][j] = ip
else:
receiveHosts[i][0][j] = ''
except:
receiveHosts[i][0][j] = ''
#Remove non-resolivng hostnames
while receiveHosts[i][0].count('') > 0:
receiveHosts[i][0].remove('')
#Remove duplicate IPs
for j in range(len(receiveHosts[i])):
for host in receiveHosts[i][j]:
while receiveHosts[i][j].count(host) > 1:
receiveHosts[i][j].remove(host)
ip = False
debug = False
if i == 0 and (len(receiveHosts[i][0]) == 1 or len(receiveHosts[i][1]) == 1):
pass
elif len(receiveHosts[i][1]) > 1:
debug = "MANY IP: %s, hit %i" % (receiveHosts[i][1], (i + 1))
elif len(receiveHosts[i][1]) == 0 and len(receiveHosts[i][0]) > 1:
debug = "MANY DOMAIN: %s, hit %i" % (receiveHosts[i][0], (i + 1))
elif len(receiveHosts[i][1]) == 1 and i > 0:
debug = "IP HIT ON %i: %s" % ((i + 1), receiveHosts[i][1][0])
elif len(receiveHosts[i][0]) == 1 and i > 0:
debug = "HOST HIT ON %i: %s" % ((i + 1), receiveHosts[i][0][0])
#Choose last IP, preferring IPs to resolved hosts
if len(receiveHosts[i][1]) > 0:
ip = receiveHosts[i][1][len(receiveHosts[i][1]) - 1]
elif len(receiveHosts[i][0]) > 0:
ip = receiveHosts[i][0][len(receiveHosts[i][0]) - 1]
if ip:
return cls(ip, date, debug, fileName)
raise RuntimeError("Unable to parse %s" % fileName)
@staticmethod
def isInternetIP(ip):
ipObj = ipaddr.IPv4(ip)
return ip != "0.0.0.0" and not ipObj.IsLoopback() and not ipObj.IsLinkLocal() and not ipObj.IsMulticast() and not ipObj.IsRFC1918()
|