toys/brhute-py/httplib_pipelining.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

# Based on "python-http-pipelining" by Markus J @ ActiveState Code / Recipes
# http://code.activestate.com/recipes/576673-python-http-pipelining/
# Rewrote by: 2013 Laurent Ghigonis <laurent@p1sec.com>

from httplib import HTTPConnection, _CS_IDLE
import urlparse

class HTTP_pipeline():
    def __init__(self, domain, queue, cb_response,
                 max_out_bound=4, debuglevel=0):
        print "XXX HTTP_pipeline.__init__"
        self.queue = queue
        self.cb_response = cb_response
        self.max_out_bound = max_out_bound
        self.debuglevel = debuglevel
        self.conn = HTTPConnection(domain)
        self.conn.set_debuglevel(debuglevel)
        self.respobjs = list()
        self.data = list()
        self.headers = {'Host':domain,
                        'Content-Length':0,
                        'Connection':'Keep-Alive'}

    def run(self):
        print "XXX HTTP_pipeline.run"
        while True:
            # Send 
            out_bound = 0
            while out_bound < self.max_out_bound:
                page = self.queue.get()
                if self.debuglevel > 0:
                    print 'Sending request for %r...' % (page,)
                self.conn._HTTPConnection__state = _CS_IDLE # FU private variable!
                self.conn.request("GET", page, None, self.headers)
                res = self.conn.response_class(self.conn.sock, strict=self.conn.strict, method=self.conn._method)
                self.respobjs.append(res)
                self.data.append(None)
                out_bound += 1
            # Try to read a response
            for i,resp in enumerate(self.respobjs):
                if resp is None:
                    continue
                if self.debuglevel > 0:
                    print 'Retrieving %r...' % (resp)
                out_bound -= 1
                skip_read = False
                resp.begin()
                if self.debuglevel > 0:
                    print '    %d %s' % (resp.status, resp.reason)
                if 200 <= resp.status < 300:
                    # Ok
                    data = resp.read()
                    cookie = resp.getheader('Set-Cookie')
                    if cookie is not None:
                        self.headers['Cookie'] = cookie
                    skip_read = True
                    self.respobjs.remove(resp)
                    self.cb_response(resp, data)
                elif 300 <= resp.status < 400:
                    # Redirect
                    loc = resp.getheader('Location')
                    parsed = loc and urlparse.urlparse(loc)
                    if not parsed:
                        # Missing or empty location header
                        data = (resp.status, resp.reason)
                    elif parsed.netloc != '' and parsed.netloc != host:
                        # Redirect to another host
                        data = (resp.status, resp.reason, loc)
                    else:
                        # Redirect URL
                        path = urlparse.urlunparse(parsed._replace(scheme='',netloc='',fragment=''))
                        #print '  Updated %r to %r' % (pages[i],path)
                        #pages[i] = path
                        data = (resp.status, resp.reason, path)
                    self.respobjs.remove(resp)
                    self.cb_response(resp, data)
                elif resp.status >= 400:
                    # Failed
                    data = (resp.status, resp.reason)
                    self.respobjs.remove(resp)
                    self.cb_response(resp, data)
                if resp.will_close:
                    # Connection (will be) closed, need to resend
                    self.conn.close()
                    if self.debuglevel > 0:
                        print '  Connection closed'
                    # XXX reconnect
                    # XXX resend
                    break
                elif not skip_read:
                    resp.read() # read any data
                if any(r is None for r in enumerate(self.respobjs)):
                    # Send another pending request
                    break
            else:
                break # All respobjs are None?