Re: Memory leak with curl_multi_socket_action
Date: Mon, 25 May 2020 10:06:50 +0100
On Mon, May 25, 2020 at 7:56 AM Daniel Stenberg <daniel_at_haxx.se> wrote:
> On Sun, 24 May 2020, James Read via curl-library wrote:
>
> > ==78076== by 0x48BBEE0: curl_dbg_calloc (memdebug.c:205)
> > ==78076== by 0x490A1D0: Curl_ssl_initsessions (vtls.c:608)
>
> This is the TLS session ID cache. Do you cleanup this multi handle
> correctly?
>
>
I call curl_multi_cleanup here:
void *
crawler_init(void *arg)
{
GlobalInfo g;
struct itimerspec its;
struct epoll_event ev;
struct epoll_event events[10000];
signal(SIGUSR1, thread_sighandler);
memset(&g, 0, sizeof(GlobalInfo));
memcpy(&g.config, arg, sizeof(CrawlerConfig));
if (pthread_mutex_init(&g.lock, NULL) != 0) {
fprintf(stderr, "mutex init has failed\n");
return (NULL);
}
if (pthread_mutex_init(&g.parsed_lock, NULL) != 0) {
fprintf(stderr, "mutex init has failed\n");
return (NULL);
}
/* Give chance to resolver to resolve as many hosts he is able to */
sleep(2);
g.epfd = epoll_create1(EPOLL_CLOEXEC);
if (g.epfd == -1) {
perror("epoll_create1 failed\n");
exit(1);
}
g.tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
if (g.tfd == -1) {
perror("timerfd_create failed\n");
exit(1);
}
memset(&its, 0, sizeof(struct itimerspec));
its.it_interval.tv_sec = 1;
its.it_value.tv_sec = 1;
timerfd_settime(g.tfd, 0, &its, NULL);
ev.events = EPOLLIN;
ev.data.fd = g.tfd;
epoll_ctl(g.epfd, EPOLL_CTL_ADD, g.tfd, &ev);
curl_global_init(CURL_GLOBAL_DEFAULT);
g.multi = curl_multi_init();
/* setup the generic multi interface options we want */
curl_multi_setopt(g.multi, CURLMOPT_SOCKETFUNCTION, sock_cb);
curl_multi_setopt(g.multi, CURLMOPT_SOCKETDATA, &g);
curl_multi_setopt(g.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb);
curl_multi_setopt(g.multi, CURLMOPT_TIMERDATA, &g);
/* we don't call any curl_multi_socket*() function yet as we have
no handles added! */
//printf("Starting crawler...\n");
while (!should_exit) {
int idx;
int err = epoll_wait(g.epfd, events,
sizeof(events)/sizeof(struct epoll_event), 10000);
struct link *link =
redis_url_pending_pop_range(g.config.queue_length);
while (link) {
struct link *next = link->next;
new_conn(link->url, &g);
free(link->url);
free(link->host);
free(link);
link = next;
}
if (err == -1) {
if (errno == EINTR) {
fprintf(MSG_OUT, "note: wait
interrupted\n");
continue;
} else {
perror("epoll_wait");
exit(1);
}
}
for (idx = 0; idx < err; ++idx) {
if (events[idx].data.fd == g.tfd) {
timer_cb(&g, events[idx].events);
} else {
event_cb(&g, events[idx].data.fd,
events[idx].events);
}
}
}
fprintf(MSG_OUT, "Exiting normally.\n");
fflush(MSG_OUT);
curl_multi_cleanup(g.multi);
curl_global_cleanup();
return (NULL);
}
> > ==78076== by 0x489E601: allocate_conn (url.c:1562)
> > ==78076== by 0x48A28CA: create_conn (url.c:3378)
>
> This is a connection struct used for holding on to everything that is
> related
> to a single connection. This is possibly due to not having cleaned up the
> multi handle too, or perhaps an easy handle.
>
> > ==78076== by 0x489C739: Curl_open (url.c:588)
> > ==78076== by 0x488DCF4: curl_easy_init (easy.c:301)
>
> This is an easy handle. Missing a call to curl_easy_cleanup() ?
>
I call curl_easy_cleanup here:
static void
check_multi_info(GlobalInfo *g)
{
char *eff_url;
CURLMsg *msg;
int msgs_left;
ConnInfo *conn;
CURL *easy;
char *ct;
double time;
double dl;
//CURLcode res;
while ((msg = curl_multi_info_read(g->multi, &msgs_left))) {
if (msg->msg == CURLMSG_DONE) {
easy = msg->easy_handle;
//res = msg->data.result;
curl_easy_getinfo(easy, CURLINFO_PRIVATE, &conn);
curl_easy_getinfo(easy, CURLINFO_EFFECTIVE_URL,
&eff_url);
curl_easy_getinfo(easy, CURLINFO_CONTENT_TYPE, &ct);
curl_easy_getinfo(easy, CURLINFO_TOTAL_TIME, &time);
curl_easy_getinfo(easy, CURLINFO_SIZE_DOWNLOAD,
&dl);
//fprintf(MSG_OUT, "DONE: %s => (%d) %s\n",
eff_url, res, conn->error);
if (eff_url && strlen(eff_url) > 0 && conn->data &&
starts_with(ct, "text/html") == 0) {
parsed_sites_inc(g);
//printf("\rParsed sites: %d",
g->parsed_sites);
//printf("Parsed %s\n", eff_url);
html_parse(eff_url, conn->data);
}
curl_multi_remove_handle(g->multi, easy);
//free(conn->url);
free(conn->data);
curl_easy_cleanup(easy);
free(conn);
}
}
}
What am I missing?
James Read
> > ==78076== at 0x483B723: malloc (in
> > /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
> > ==78076== by 0x483E017: realloc (in
> > /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
> > ==78076== by 0x10CA22: write_cb (crawler.c:277)
> > ==78076== by 0x48D707B: chop_write (sendf.c:606)
>
> This is your callback code doing the allocation (realloc really).
>
> --
>
> / daniel.haxx.se | Commercial curl support up to 24x7 is available!
> | Private help, bug fixes, support, ports, new features
> | https://www.wolfssl.com/contact/
>
-------------------------------------------------------------------
Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library
Etiquette: https://curl.haxx.se/mail/etiquette.html
Received on 2020-05-25