curl-and-python
Curl object reuse
Date: Thu, 7 Jul 2011 17:51:22 -0700 (PDT)
I would like persistent connections. I'm trying to reuse the handles in my multi curl. I have some code hacked from retriever-multi. Basically I just do the initialisation then call a subroutine that runs perform and waits for the results.
It just gets stuck after the first download. What am I missing here?
...Daniel
import pycurl
import sys
import time
import datetime
if len(sys.argv) < 2:
sys.exit('Usage: %s <N>' % sys.argv[0])
URL = "http://cs.ucsb.edu"
HEADERS = ['Connection: Keep-Alive', 'Keep-Alive: 300']
N = int(sys.argv[1])
fileNum = 0
def doCurl(N):
global fileNum
num_handles=1
while num_handles:
begin = time.time()
while 1:
ret, num_handles = m.perform()
if ret != pycurl.E_CALL_MULTI_PERFORM:
break
num_q, ok_list, err_list = m.info_read()
for c in ok_list:
delta = time.time()-begin
m.remove_handle(c)
print "Success: ", delta, c.filename, c.getinfo(pycurl.EFFECTIVE_URL)
for c, errno, errmsg in err_list:
c.fp.close()
c.fp = None
m.remove_handle(c)
print "Failed: ", c.filename, c.url, errno, errmsg
m.select(1.0)
# Pre-allocate a list of curl objects
m = pycurl.CurlMulti()
m.handles = []
for i in range(N):
c = pycurl.Curl()
c.fp = None
c.setopt(pycurl.FOLLOWLOCATION, 1)
c.setopt(pycurl.MAXREDIRS, 5)
c.setopt(pycurl.CONNECTTIMEOUT, 30)
c.setopt(pycurl.TIMEOUT, 300)
c.setopt(pycurl.NOSIGNAL, 1)
c.setopt (pycurl.HTTPHEADER, HEADERS)
c.filename = "./data/doc_%03d.dat" % (fileNum)
fileNum = fileNum + 1
c.fp = open(c.filename, "wb")
c.url = URL
c.setopt(pycurl.URL, c.url)
c.setopt(pycurl.WRITEDATA, c.fp)
m.handles.append(c)
m.add_handle(c)
print m.handles[0]
while(1):
doCurl(N)
_______________________________________________
http://cool.haxx.se/cgi-bin/mailman/listinfo/curl-and-python
Received on 2011-07-08