curl / Mailing Lists / curl-library / Single Mail
Buy commercial curl support from WolfSSL. We help you work out your issues, debug your libcurl applications, use the API, port to new platforms, add new features and more. With a team lead by the curl founder himself.

Re: Memory leak with curl_multi_socket_action

From: James Read via curl-library <curl-library_at_cool.haxx.se>
Date: Mon, 25 May 2020 23:24:02 +0100

On Mon, May 25, 2020 at 11:15 PM James Read <jamesread5737_at_gmail.com> wrote:

>
>
> On Mon, May 25, 2020 at 10:37 PM Daniel Stenberg <daniel_at_haxx.se> wrote:
>
>> On Mon, 25 May 2020, James Read wrote:
>>
>> > I call curl_multi_cleanup here:
>>
>> > I call curl_easy_cleanup here:
>>
>> > What am I missing?
>>
>> I don't think we'll be able to tell you that. We can't reproduce this
>> problem.
>> We don't see any memory leak in any builds on any platforms.
>>
>> I suggest you write up a sample program based on your experiences that
>> reproduces the problem, and show that to us. Then we can help out more!
>>
>
> #include <errno.h>
> #include <fcntl.h>
> #include <signal.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
> #include <sys/epoll.h>
> #include <sys/stat.h>
> #include <sys/time.h>
> #include <sys/timerfd.h>
> #include <sys/types.h>
> #include <sys/resource.h>
> #include <sys/socket.h>
> #include <arpa/inet.h>
> #include <time.h>
> #include <unistd.h>
> #include <pthread.h>
> #include <netdb.h>
> #include <time.h>
> #include <netinet/in.h>
>
> #include <curl/curl.h>
>
> #define MSG_OUT stdout
> #define DEFAULT_QUEUE_LENGTH 10000
> #define mycase(code) \
> case code: s = __STRING(code)
>
> typedef struct _CrawlerConfig
> {
> char redis_address[1024];
> int redis_port;
> char mongo_address[1024];
> int mongo_port;
>
> char url_file[1024];
>
> int queue_length;
> } CrawlerConfig;
>
> /* Global information, common to all connections */
> typedef struct _GlobalInfo
> {
> int epfd; /* epoll filedescriptor */
> int tfd; /* timer filedescriptor */
> CURLM *multi;
> int still_running;
> pthread_mutex_t lock;
> int concurrent_connections;
> pthread_mutex_t parsed_lock;
> int parsed_sites;
>
> CrawlerConfig config;
> } GlobalInfo;
>
> /* Information associated with a specific easy handle */
> typedef struct _ConnInfo
> {
> CURL *easy;
> char *url;
> GlobalInfo *global;
> char error[CURL_ERROR_SIZE];
> size_t size;
> char *data;
> } ConnInfo;
>
> /* Information associated with a specific socket */
> typedef struct _SockInfo
> {
> curl_socket_t sockfd;
> CURL *easy;
> int action;
> long timeout;
> GlobalInfo *global;
> } SockInfo;
>
> /* Die if we get a bad CURLMcode somewhere */
> static void
> mcode_or_die(const char *where, CURLMcode code)
> {
> if (CURLM_OK != code) {
> const char *s;
>
> switch (code) {
> mycase(CURLM_BAD_HANDLE); break;
> mycase(CURLM_BAD_EASY_HANDLE); break;
> mycase(CURLM_OUT_OF_MEMORY); break;
> mycase(CURLM_INTERNAL_ERROR); break;
> mycase(CURLM_UNKNOWN_OPTION); break;
> mycase(CURLM_LAST); break;
> default: s = "CURLM_unknown"; break;
> mycase(CURLM_BAD_SOCKET);
> fprintf(MSG_OUT, "ERROR: %s returns %s\n", where, s);
> /* ignore this error */
> return;
> }
>
> fprintf(MSG_OUT, "ERROR: %s returns %s\n", where, s);
> exit(code);
> }
> }
>
> void
> concurrent_connections_inc(GlobalInfo *g)
> {
> g->concurrent_connections++;
>
> printf("\rParsed sites: %d, %d parallel connections, %d still running\t",
> g->parsed_sites, g->concurrent_connections, g->still_running);
> fflush(stdout);
> }
>
> void
> concurrent_connections_dec(GlobalInfo *g)
> {
> g->concurrent_connections--;
>
> printf("\rParsed sites: %d, %d parallel connections, %d still running\t",
> g->parsed_sites, g->concurrent_connections, g->still_running);
> fflush(stdout);
> }
>
> static void timer_cb(GlobalInfo* g, int revents);
>
> /* Update the timer after curl_multi library does it's thing. Curl will
> * inform us through this callback what it wants the new timeout to be,
> * after it does some work. */
> static int
> multi_timer_cb(CURLM *multi, long timeout_ms, GlobalInfo *g)
> {
> struct itimerspec its;
>
> //fprintf(MSG_OUT, "multi_timer_cb: Setting timeout to %ld ms\n",
> timeout_ms);
>
> if (timeout_ms > 0) {
> its.it_interval.tv_sec = 1;
> its.it_interval.tv_nsec = 0;
> its.it_value.tv_sec = timeout_ms / 1000;
> its.it_value.tv_nsec = (timeout_ms % 1000) * 1000 * 1000;
> } else if(timeout_ms == 0) {
> /* libcurl wants us to timeout now, however setting both fields of
> * new_value.it_value to zero disarms the timer. The closest we can
> * do is to schedule the timer to fire in 1 ns. */
> its.it_interval.tv_sec = 1;
> its.it_interval.tv_nsec = 0;
> its.it_value.tv_sec = 0;
> its.it_value.tv_nsec = 1;
> } else {
> memset(&its, 0, sizeof(struct itimerspec));
> }
>
> timerfd_settime(g->tfd, /*flags=*/ 0, &its, NULL);
>
> return (0);
> }
>
> /* Check for completed transfers, and remove their easy handles */
> static void
> check_multi_info(GlobalInfo *g)
> {
> char *eff_url;
> CURLMsg *msg;
> int msgs_left;
> ConnInfo *conn;
> CURL *easy;
> char *ct;
> double time;
> double dl;
> //CURLcode res;
>
> while ((msg = curl_multi_info_read(g->multi, &msgs_left))) {
> if (msg->msg == CURLMSG_DONE) {
> easy = msg->easy_handle;
> //res = msg->data.result;
> curl_easy_getinfo(easy, CURLINFO_PRIVATE, &conn);
> curl_easy_getinfo(easy, CURLINFO_EFFECTIVE_URL, &eff_url);
> curl_easy_getinfo(easy, CURLINFO_CONTENT_TYPE, &ct);
> curl_easy_getinfo(easy, CURLINFO_TOTAL_TIME, &time);
> curl_easy_getinfo(easy, CURLINFO_SIZE_DOWNLOAD, &dl);
>
> //fprintf(MSG_OUT, "DONE: %s => (%d) %s\n", eff_url, res, conn->error);
>
> curl_multi_remove_handle(g->multi, easy);
> //free(conn->url);
> free(conn->data);
> curl_easy_cleanup(easy);
> free(conn);
> }
> }
> }
>
> /* Called by libevent when we get action on a multi socket
> filedescriptor*/
> static void
> event_cb(GlobalInfo *g, int fd, int revents)
> {
> CURLMcode rc;
> struct itimerspec its;
>
> int action = ((revents & EPOLLIN) ? CURL_CSELECT_IN : 0) |
> ((revents & EPOLLOUT) ? CURL_CSELECT_OUT : 0);
>
> rc = curl_multi_socket_action(g->multi, fd, action, &g->still_running);
> mcode_or_die("event_cb: curl_multi_socket_action", rc);
>
> check_multi_info(g);
>
> if (g->still_running <= 0) {
> //fprintf(MSG_OUT, "last transfer done, kill timeout\n");
> memset(&its, 0, sizeof(struct itimerspec));
> timerfd_settime(g->tfd, 0, &its, NULL);
> }
> }
>
> /* Called by main loop when our timeout expires */
> static void
> timer_cb(GlobalInfo* g, int revents)
> {
> CURLMcode rc;
> uint64_t count = 0;
> ssize_t err = 0;
>
> err = read(g->tfd, &count, sizeof(uint64_t));
> if (err == -1) {
> /* Note that we may call the timer callback even if the timerfd isn't
> * readable. It's possible that there are multiple events stored in the
> * epoll buffer (i.e. the timer may have fired multiple times). The
> * event count is cleared after the first call so future events in the
> * epoll buffer will fail to read from the timer. */
> if (errno == EAGAIN) {
> //fprintf(MSG_OUT, "EAGAIN on tfd %d\n", g->tfd);
> return;
> }
> }
>
> if (err != sizeof(uint64_t)) {
> fprintf(stderr, "read(tfd) == %ld", err);
> perror("read(tfd)");
> }
>
> rc = curl_multi_socket_action(g->multi, CURL_SOCKET_TIMEOUT, 0,
> &g->still_running);
> mcode_or_die("timer_cb: curl_multi_socket_action", rc);
> check_multi_info(g);
> }
>
> /* Assign information to a SockInfo structure */
> static void
> setsock(SockInfo *f, curl_socket_t s, CURL *e, int act, GlobalInfo *g)
> {
> struct epoll_event ev;
> int kind = ((act & CURL_POLL_IN) ? EPOLLIN : 0) |
> ((act & CURL_POLL_OUT) ? EPOLLOUT : 0);
>
> if (f->sockfd) {
> concurrent_connections_dec(g);
> if (epoll_ctl(g->epfd, EPOLL_CTL_DEL, f->sockfd, NULL))
> fprintf(stderr, "EPOLL_CTL_DEL failed for fd: %d : %s\n",
> f->sockfd, strerror(errno));
> }
>
> f->sockfd = s;
> f->action = act;
> f->easy = e;
>
> ev.events = kind;
> ev.data.fd = s;
>
> concurrent_connections_inc(g);
> if (epoll_ctl(g->epfd, EPOLL_CTL_ADD, s, &ev)) {
> fprintf(stderr, "EPOLL_CTL_ADD failed for fd: %d : %s\n",
> s, strerror(errno));
> }
> }
>
> /* Initialize a new SockInfo structure */
> static void
> addsock(curl_socket_t s, CURL *easy, int action, GlobalInfo *g)
> {
> SockInfo *fdp = (SockInfo *)calloc(sizeof(SockInfo), 1);
>
> fdp->global = g;
> setsock(fdp, s, easy, action, g);
> curl_multi_assign(g->multi, s, fdp);
> }
>
> static size_t
> write_cb(void *contents, size_t size, size_t nmemb, void *p)
> {
> ConnInfo *conn = (ConnInfo *)p;
> size_t realsize = size * nmemb;
>
> conn->data = realloc(conn->data, conn->size + realsize + 1);
> if (conn->data == NULL) {
> /* out of memory! */
> printf("not enough memory (realloc returned NULL)\n");
> return 0;
> }
>
> memcpy(&(conn->data[conn->size]), contents, realsize);
> conn->size += realsize;
> conn->data[conn->size] = 0;
>
> return realsize;
> }
>
> /* Create a new easy handle, and add it to the global curl_multi */
> int
> new_conn(char *url, GlobalInfo *g)
> {
> ConnInfo *conn;
> CURLMcode rc;
> char buffer1[1024];
> char buffer2[1024];
>
> memset(buffer1, 0, sizeof(buffer1));
> memset(buffer2, 0, sizeof(buffer2));
>
> conn = (ConnInfo*)calloc(1, sizeof(ConnInfo));
> conn->error[0]='\0';
> conn->global = g;
>
> conn->easy = curl_easy_init();
> if (!conn->easy) {
> fprintf(MSG_OUT, "curl_easy_init() failed, exiting!\n");
> exit(2);
> }
>
> int autoresolve = 0;
>
> conn->global = g;
> conn->url = url;
> curl_easy_setopt(conn->easy, CURLOPT_URL, conn->url);
> curl_easy_setopt(conn->easy, CURLOPT_WRITEFUNCTION, write_cb);
> curl_easy_setopt(conn->easy, CURLOPT_WRITEDATA, conn);
> curl_easy_setopt(conn->easy, CURLOPT_FOLLOWLOCATION, 1L);
> curl_easy_setopt(conn->easy, CURLOPT_ERRORBUFFER, conn->error);
> curl_easy_setopt(conn->easy, CURLOPT_PRIVATE, conn);
> curl_easy_setopt(conn->easy, CURLOPT_NOPROGRESS, 1L);
> curl_easy_setopt(conn->easy, CURLOPT_PROGRESSDATA, conn);
> curl_easy_setopt(conn->easy, CURLOPT_FOLLOWLOCATION, 1L);
> curl_easy_setopt(conn->easy, CURLOPT_LOW_SPEED_TIME, 3L);
> curl_easy_setopt(conn->easy, CURLOPT_LOW_SPEED_LIMIT, 100L);
> curl_easy_setopt(conn->easy, CURLOPT_CONNECTTIMEOUT, 10L);
> curl_easy_setopt(conn->easy, CURLOPT_CLOSESOCKETDATA, g);
>
> rc = curl_multi_add_handle(g->multi, conn->easy);
> mcode_or_die("new_conn: curl_multi_add_handle", rc);
>
> /* note that the add_handle() will set a time-out to trigger very soon so
> that the necessary socket_action() call will be called by this app */
>
> return (0);
> }
>
> /* Clean up the SockInfo structure */
> static void
> remsock(SockInfo *f, GlobalInfo* g)
> {
> if (f) {
> if (f->sockfd) {
> g->concurrent_connections--;
> if (epoll_ctl(g->epfd, EPOLL_CTL_DEL, f->sockfd, NULL))
> fprintf(stderr, "EPOLL_CTL_DEL failed for fd: %d : %s\n",
> f->sockfd, strerror(errno));
> }
>
> free(f);
> }
> }
>
> /* CURLMOPT_SOCKETFUNCTION */
> static int
> sock_cb(CURL *e, curl_socket_t s, int what, void *cbp, void *sockp)
> {
> GlobalInfo *g = (GlobalInfo*) cbp;
> SockInfo *fdp = (SockInfo*) sockp;
>
> if (what == CURL_POLL_REMOVE) {
> remsock(fdp, g);
> } else {
> if (!fdp) {
> addsock(s, e, what, g);
> } else {
> setsock(fdp, s, e, what, g);
> }
> }
>
> return (0);
> }
>
> int should_exit = 0;
>
> void
> signal_handler(int signo)
> {
> should_exit = 1;
> }
>
> static void
> thread_sighandler(int signum)
> {
> printf("\nCrawler exiting.\n");
> pthread_exit(NULL);
> }
>
> void *
> crawler_init(void *arg)
> {
> GlobalInfo g;
> struct itimerspec its;
> struct epoll_event ev;
> struct epoll_event events[10000];
>
> signal(SIGUSR1, thread_sighandler);
>
> memset(&g, 0, sizeof(GlobalInfo));
>
> memcpy(&g.config, arg, sizeof(CrawlerConfig));
>
> g.epfd = epoll_create1(EPOLL_CLOEXEC);
> if (g.epfd == -1) {
> perror("epoll_create1 failed\n");
> exit(1);
> }
>
> g.tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
> if (g.tfd == -1) {
> perror("timerfd_create failed\n");
> exit(1);
> }
>
> memset(&its, 0, sizeof(struct itimerspec));
> its.it_interval.tv_sec = 1;
> its.it_value.tv_sec = 1;
> timerfd_settime(g.tfd, 0, &its, NULL);
>
> ev.events = EPOLLIN;
> ev.data.fd = g.tfd;
> epoll_ctl(g.epfd, EPOLL_CTL_ADD, g.tfd, &ev);
>
> curl_global_init(CURL_GLOBAL_DEFAULT);
> g.multi = curl_multi_init();
>
> /* setup the generic multi interface options we want */
> curl_multi_setopt(g.multi, CURLMOPT_SOCKETFUNCTION, sock_cb);
> curl_multi_setopt(g.multi, CURLMOPT_SOCKETDATA, &g);
> curl_multi_setopt(g.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb);
> curl_multi_setopt(g.multi, CURLMOPT_TIMERDATA, &g);
>
> /* we don't call any curl_multi_socket*() function yet as we have no
> handles added! */
>
> //printf("Starting crawler...\n");
>
> while (!should_exit) {
> int idx;
> int err = epoll_wait(g.epfd, events, sizeof(events)/sizeof(struct
> epoll_event), 10000);
>
>
> new_conn("www.google.com", &g);
> new_conn("www.yahoo.com", &g);
> new_conn("www.bing.com", &g);
>
> if (err == -1) {
> if (errno == EINTR) {
> fprintf(MSG_OUT, "note: wait interrupted\n");
> continue;
> } else {
> perror("epoll_wait");
> exit(1);
> }
> }
>
> for (idx = 0; idx < err; ++idx) {
> if (events[idx].data.fd == g.tfd) {
> timer_cb(&g, events[idx].events);
> } else {
> event_cb(&g, events[idx].data.fd, events[idx].events);
> }
> }
> }
>
> fprintf(MSG_OUT, "Exiting normally.\n");
> fflush(MSG_OUT);
>
> curl_multi_cleanup(g.multi);
> curl_global_cleanup();
>
> return (NULL);
> }
>
> int
> main(int argc, char **argv)
> {
> int cleanup = 0, opt, ret;
> CrawlerConfig config;
>
> memset(&config, 0, sizeof(config));
> config.queue_length = DEFAULT_QUEUE_LENGTH;
>
> should_exit = 0;
> signal(SIGINT, signal_handler);
> signal(SIGKILL, signal_handler);
>
> crawler_init( (void *)&config);
>
>
> printf("Exiting.\n");
>
> return (0);
> }
>
>
Gmail seems to have taken out all the formatting. Apologies. It should
still compile though.

James Read

-------------------------------------------------------------------
Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library
Etiquette: https://curl.haxx.se/mail/etiquette.html
Received on 2020-05-26