curl-library
RE: LIBCURL couldn't start connections for some of EASY handle
Date: Thu, 24 Apr 2008 20:26:43 +0800
To be clear, my pseudo code is:
for(;;) {
create hundreds of EASY handle
Set options for them include TIMEOUT options
Add them to a MULTI handle
Call curl_multi_socket_all() to start connections
epoll_wait();
handle events
Call curl_multi_read_info() to know which EASY handles are completed;
Remove these EASY handle from MULTI handle
}
After several loops, more than 5000 EASY handles have been add to MULTI
handle, but seems that LIBCURL hasn't start connections for some of them
and these handles do not has chance to be removed from MULTI handle.
From: yifei.chen [mailto:yifei.chen_at_keygate-inc.com]
Sent: Thursday, April 24, 2008 5:02 PM
To: 'curl-library_at_cool.haxx.se'
Subject: LIBCURL couldn't start connections for some of EASY handle
Hi all,
I use LIBCURL ( 7.18.0 ) in my project recently. My project works
like a spider daemon which downloads tons of URLs, I use the MULTI
interface with epoll() and I met some problems
The major problem is :
Some of EASY handles which have been add into MULTI handle, seems never
completed , they can't be identified by curl_multi_info_read(), so they
never have chance to be removed from MULTI interface. I run 'netstat -tan'
and couldn't find any HTTP connection, so I think these LIBCURL hasn't start
connections for these handles.
Following are the code slip from my project, can someone help me to check
this code slip? Thanks!
typedef struct netio_context
{
CURLM *pMultiHandlers;
unsigned int total_handles;
int still_running;
int epfd;
} netio_context_t;
// socket callback
static int mySocketFunc(CURL *e, curl_socket_t s, int what, void *cbp, void
*sockp)
{
netio_context_t *pNetIOContext = (netio_context_t*)cbp;
struct epoll_event ev;
memset(&ev, 0, sizeof(struct epoll_event));
if (what == CURL_POLL_REMOVE) {
epoll_ctl(pNetIOContext->epfd, EPOLL_CTL_DEL, s, &ev);
} else {
ev.data.fd = s;
ev.events = EPOLLIN | EPOLLPRI;
if(-1 == epoll_ctl(pNetIOContext->epfd, EPOLL_CTL_ADD, s,
&ev))
g_warning("*********faled in epoll_ctl() 2,
%d*************\n", errno);
}
return 0;
}
static netio_context_t *createNetIOContext()
{
netio_context_t *pContext =
(netio_context_t*)malloc(sizeof(netio_context_t));
bzero(pContext, sizeof(netio_context_t));
pContext->pMultiHandlers = curl_multi_init();
curl_multi_setopt(pContext->pMultiHandlers,
CURLMOPT_SOCKETFUNCTION, mySocketFunc);
curl_multi_setopt(pContext->pMultiHandlers, CURLMOPT_SOCKETDATA,
pContext);
pContext->epfd = epoll_create(g_configInfo.max_handlers);
return pContext;
}
static void setHandleOptions(CURL *pHandle, const char *url )
{
curl_easy_setopt(pHandle, CURLOPT_URL, url);
curl_easy_setopt(pHandle, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(pHandle, CURLOPT_NOSIGNAL, 1);
curl_easy_setopt(pHandle, CURLOPT_NOPROGRESS, 1);
curl_easy_setopt(pHandle, CURLOPT_VERBOSE , 0);
curl_easy_setopt(pHandle, CURLOPT_WRITEFUNCTION, myWriteFunc);
curl_easy_setopt(pHandle, CURLOPT_HEADERFUNCTION, myHeaderFunc);
curl_easy_setopt(pHandle, CURLOPT_FAILONERROR, 1);
curl_easy_setopt(pHandle, CURLOPT_DNS_CACHE_TIMEOUT,
g_configInfo.dnscache_timeout);
curl_easy_setopt(pHandle, CURLOPT_ENCODING, "");
curl_easy_setopt(pHandle, CURLOPT_USERAGENT,
"libcurl/7.18.0");
curl_easy_setopt(pHandle, CURLOPT_MAXREDIRS, 5);
curl_easy_setopt(pHandle, CURLOPT_IPRESOLVE,
CURL_IPRESOLVE_V4);
curl_easy_setopt(pHandle, CURLOPT_TIMEOUT ,
g_configInfo.timeout);
curl_easy_setopt(pHandle, CURLOPT_CONNECTTIMEOUT ,
g_configInfo.connect_timeout);
curl_easy_setopt(pHandle, CURLOPT_FORBID_REUSE, 1);
curl_easy_setopt(pHandle, CURLOPT_FRESH_CONNECT, 1);
}
// download thread entry
static void* downloadThreadEntry(void *arg)
{
curl_global_init(CURL_GLOBAL_ALL);
netio_context_t *pNetIOContext = createNetIOContext();
CURLMcode ret;
int nready;
int i;
int still_running;
unsigned int total_handlers;
struct epoll_event *ev_array =
(struct epoll_event*)malloc(g_configInfo.max_handlers *
sizeof(struct epoll_event));
int timeout = 1000;
while(1) {
total_handlers = pNetIOContext->total_handles;
========= Get URLs from a queue, create EASY handle, set
options, then add them to the MULTI interface, =====================
if(pNetIOContext->total_handles > total_handlers) {
// there has some new handles, call curl_multi_socket_all() to
start connections
while(CURLM_CALL_MULTI_PERFORM ==
curl_multi_socket_all(pNetIOContext->pMultiHandlers,
&pNetIOContext->still_running));
}
still_running = pNetIOContext->still_running;
nready = epoll_wait(pNetIOContext->epfd, ev_array,
g_configInfo.max_handlers, timeout);
if(nready > 0) {
int bitset;
for(i = 0; i < nready; i++) {
bitset = 0;
if(ev_array[i].events & (EPOLLIN |
EPOLLPRI))
bitset |= CURL_CSELECT_IN;
if(ev_array[i].events & (EPOLLERR |
EPOLLHUP)) {
bitset |= CURL_CSELECT_ERR;
}
do {
ret =
curl_multi_socket_action(pNetIOContext->pMultiHandlers, ev_array[i].data.fd,
bitset, &pNetIOContext->still_running);
} while(CURLM_CALL_MULTI_PERFORM ==
ret) ;
}
} else if(nready == 0) { ///< timeout
if(pNetIOContext->total_handles) {
while(CURLM_CALL_MULTI_PERFORM ==
curl_multi_socket_all(pNetIOContext->pMultiHandlers,
&pNetIOContext->still_running));
}
} else {
if(errno == EINTR)
continue;
g_warning("failed in epoll_wait, %d\n", errno);
break;
}
if(still_running != pNetIOContext->still_running)
============== Some of handles have finished, call
curl_multi_info_read() to get those handles infos and remove them from the
MULTI handle =================
}
free(ev_array);
curl_global_cleanup();
return NULL;
}
Received on 2008-04-24