# Based on "python-http-pipelining" by Markus J @ ActiveState Code / Recipes # http://code.activestate.com/recipes/576673-python-http-pipelining/ # Rewrote by: 2013 Laurent Ghigonis from httplib import HTTPConnection, _CS_IDLE import urlparse class HTTP_pipeline(): def __init__(self, domain, queue, cb_response, max_out_bound=4, debuglevel=0): print "XXX HTTP_pipeline.__init__" self.queue = queue self.cb_response = cb_response self.max_out_bound = max_out_bound self.debuglevel = debuglevel self.conn = HTTPConnection(domain) self.conn.set_debuglevel(debuglevel) self.respobjs = list() self.data = list() self.headers = {'Host':domain, 'Content-Length':0, 'Connection':'Keep-Alive'} def run(self): print "XXX HTTP_pipeline.run" while True: # Send out_bound = 0 while out_bound < self.max_out_bound: page = self.queue.get() if self.debuglevel > 0: print 'Sending request for %r...' % (page,) self.conn._HTTPConnection__state = _CS_IDLE # FU private variable! self.conn.request("GET", page, None, self.headers) res = self.conn.response_class(self.conn.sock, strict=self.conn.strict, method=self.conn._method) self.respobjs.append(res) self.data.append(None) out_bound += 1 # Try to read a response for i,resp in enumerate(self.respobjs): if resp is None: continue if self.debuglevel > 0: print 'Retrieving %r...' % (resp) out_bound -= 1 skip_read = False resp.begin() if self.debuglevel > 0: print ' %d %s' % (resp.status, resp.reason) if 200 <= resp.status < 300: # Ok data = resp.read() cookie = resp.getheader('Set-Cookie') if cookie is not None: self.headers['Cookie'] = cookie skip_read = True self.respobjs.remove(resp) self.cb_response(resp, data) elif 300 <= resp.status < 400: # Redirect loc = resp.getheader('Location') parsed = loc and urlparse.urlparse(loc) if not parsed: # Missing or empty location header data = (resp.status, resp.reason) elif parsed.netloc != '' and parsed.netloc != host: # Redirect to another host data = (resp.status, resp.reason, loc) else: # Redirect URL path = urlparse.urlunparse(parsed._replace(scheme='',netloc='',fragment='')) #print ' Updated %r to %r' % (pages[i],path) #pages[i] = path data = (resp.status, resp.reason, path) self.respobjs.remove(resp) self.cb_response(resp, data) elif resp.status >= 400: # Failed data = (resp.status, resp.reason) self.respobjs.remove(resp) self.cb_response(resp, data) if resp.will_close: # Connection (will be) closed, need to resend self.conn.close() if self.debuglevel > 0: print ' Connection closed' # XXX reconnect # XXX resend break elif not skip_read: resp.read() # read any data if any(r is None for r in enumerate(self.respobjs)): # Send another pending request break else: break # All respobjs are None?