1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
# Based on "python-http-pipelining" by Markus J @ ActiveState Code / Recipes
# http://code.activestate.com/recipes/576673-python-http-pipelining/
# Rewrote by: 2013 Laurent Ghigonis <laurent@p1sec.com>
from httplib import HTTPConnection, _CS_IDLE
import urlparse
class HTTP_pipeline():
def __init__(self, domain, queue, cb_response,
max_out_bound=4, debuglevel=0):
print "XXX HTTP_pipeline.__init__"
self.queue = queue
self.cb_response = cb_response
self.max_out_bound = max_out_bound
self.debuglevel = debuglevel
self.conn = HTTPConnection(domain)
self.conn.set_debuglevel(debuglevel)
self.respobjs = list()
self.data = list()
self.headers = {'Host':domain,
'Content-Length':0,
'Connection':'Keep-Alive'}
def run(self):
print "XXX HTTP_pipeline.run"
while True:
# Send
out_bound = 0
while out_bound < self.max_out_bound:
page = self.queue.get()
if self.debuglevel > 0:
print 'Sending request for %r...' % (page,)
self.conn._HTTPConnection__state = _CS_IDLE # FU private variable!
self.conn.request("GET", page, None, self.headers)
res = self.conn.response_class(self.conn.sock, strict=self.conn.strict, method=self.conn._method)
self.respobjs.append(res)
self.data.append(None)
out_bound += 1
# Try to read a response
for i,resp in enumerate(self.respobjs):
if resp is None:
continue
if self.debuglevel > 0:
print 'Retrieving %r...' % (resp)
out_bound -= 1
skip_read = False
resp.begin()
if self.debuglevel > 0:
print ' %d %s' % (resp.status, resp.reason)
if 200 <= resp.status < 300:
# Ok
data = resp.read()
cookie = resp.getheader('Set-Cookie')
if cookie is not None:
self.headers['Cookie'] = cookie
skip_read = True
self.respobjs.remove(resp)
self.cb_response(resp, data)
elif 300 <= resp.status < 400:
# Redirect
loc = resp.getheader('Location')
parsed = loc and urlparse.urlparse(loc)
if not parsed:
# Missing or empty location header
data = (resp.status, resp.reason)
elif parsed.netloc != '' and parsed.netloc != host:
# Redirect to another host
data = (resp.status, resp.reason, loc)
else:
# Redirect URL
path = urlparse.urlunparse(parsed._replace(scheme='',netloc='',fragment=''))
#print ' Updated %r to %r' % (pages[i],path)
#pages[i] = path
data = (resp.status, resp.reason, path)
self.respobjs.remove(resp)
self.cb_response(resp, data)
elif resp.status >= 400:
# Failed
data = (resp.status, resp.reason)
self.respobjs.remove(resp)
self.cb_response(resp, data)
if resp.will_close:
# Connection (will be) closed, need to resend
self.conn.close()
if self.debuglevel > 0:
print ' Connection closed'
# XXX reconnect
# XXX resend
break
elif not skip_read:
resp.read() # read any data
if any(r is None for r in enumerate(self.respobjs)):
# Send another pending request
break
else:
break # All respobjs are None?
|