curl / Mailing Lists / curl-library / Single Mail
Buy commercial curl support from WolfSSL. We help you work out your issues, debug your libcurl applications, use the API, port to new platforms, add new features and more. With a team lead by the curl founder himself.

Re: Memory leak with curl_multi_socket_action

From: James Read via curl-library <curl-library_at_cool.haxx.se>
Date: Mon, 25 May 2020 10:06:50 +0100

On Mon, May 25, 2020 at 7:56 AM Daniel Stenberg <daniel_at_haxx.se> wrote:

> On Sun, 24 May 2020, James Read via curl-library wrote:
>
> > ==78076== by 0x48BBEE0: curl_dbg_calloc (memdebug.c:205)
> > ==78076== by 0x490A1D0: Curl_ssl_initsessions (vtls.c:608)
>
> This is the TLS session ID cache. Do you cleanup this multi handle
> correctly?
>
>
I call curl_multi_cleanup here:

void *
crawler_init(void *arg)
{
        GlobalInfo g;
        struct itimerspec its;
        struct epoll_event ev;
        struct epoll_event events[10000];

        signal(SIGUSR1, thread_sighandler);

        memset(&g, 0, sizeof(GlobalInfo));

        memcpy(&g.config, arg, sizeof(CrawlerConfig));

        if (pthread_mutex_init(&g.lock, NULL) != 0) {
                fprintf(stderr, "mutex init has failed\n");
                return (NULL);
        }

        if (pthread_mutex_init(&g.parsed_lock, NULL) != 0) {
                fprintf(stderr, "mutex init has failed\n");
                return (NULL);
        }

        /* Give chance to resolver to resolve as many hosts he is able to */
        sleep(2);

        g.epfd = epoll_create1(EPOLL_CLOEXEC);
        if (g.epfd == -1) {
                perror("epoll_create1 failed\n");
                exit(1);
        }

        g.tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK | TFD_CLOEXEC);
        if (g.tfd == -1) {
                perror("timerfd_create failed\n");
                exit(1);
        }

        memset(&its, 0, sizeof(struct itimerspec));
        its.it_interval.tv_sec = 1;
        its.it_value.tv_sec = 1;
        timerfd_settime(g.tfd, 0, &its, NULL);

        ev.events = EPOLLIN;
        ev.data.fd = g.tfd;
        epoll_ctl(g.epfd, EPOLL_CTL_ADD, g.tfd, &ev);

curl_global_init(CURL_GLOBAL_DEFAULT);
        g.multi = curl_multi_init();

        /* setup the generic multi interface options we want */
        curl_multi_setopt(g.multi, CURLMOPT_SOCKETFUNCTION, sock_cb);
        curl_multi_setopt(g.multi, CURLMOPT_SOCKETDATA, &g);
        curl_multi_setopt(g.multi, CURLMOPT_TIMERFUNCTION, multi_timer_cb);
        curl_multi_setopt(g.multi, CURLMOPT_TIMERDATA, &g);

        /* we don't call any curl_multi_socket*() function yet as we have
no handles added! */

        //printf("Starting crawler...\n");

        while (!should_exit) {
                int idx;
                int err = epoll_wait(g.epfd, events,
sizeof(events)/sizeof(struct epoll_event), 10000);

                struct link *link =
redis_url_pending_pop_range(g.config.queue_length);

                while (link) {
                        struct link *next = link->next;

                        new_conn(link->url, &g);

                        free(link->url);
                        free(link->host);
                        free(link);

                        link = next;
                }

                if (err == -1) {
                        if (errno == EINTR) {
                                fprintf(MSG_OUT, "note: wait
interrupted\n");
                                continue;
                        } else {
                                perror("epoll_wait");
                                exit(1);
                        }
                }

                for (idx = 0; idx < err; ++idx) {
                        if (events[idx].data.fd == g.tfd) {
                                timer_cb(&g, events[idx].events);
                        } else {
                                event_cb(&g, events[idx].data.fd,
events[idx].events);
                        }
                }
        }

        fprintf(MSG_OUT, "Exiting normally.\n");
        fflush(MSG_OUT);

        curl_multi_cleanup(g.multi);
        curl_global_cleanup();

        return (NULL);
}

> > ==78076== by 0x489E601: allocate_conn (url.c:1562)
> > ==78076== by 0x48A28CA: create_conn (url.c:3378)
>
> This is a connection struct used for holding on to everything that is
> related
> to a single connection. This is possibly due to not having cleaned up the
> multi handle too, or perhaps an easy handle.
>
> > ==78076== by 0x489C739: Curl_open (url.c:588)
> > ==78076== by 0x488DCF4: curl_easy_init (easy.c:301)
>
> This is an easy handle. Missing a call to curl_easy_cleanup() ?
>

I call curl_easy_cleanup here:

static void
check_multi_info(GlobalInfo *g)
{
        char *eff_url;
        CURLMsg *msg;
        int msgs_left;
        ConnInfo *conn;
        CURL *easy;
        char *ct;
        double time;
        double dl;
        //CURLcode res;

        while ((msg = curl_multi_info_read(g->multi, &msgs_left))) {
                if (msg->msg == CURLMSG_DONE) {
                        easy = msg->easy_handle;
                        //res = msg->data.result;
                        curl_easy_getinfo(easy, CURLINFO_PRIVATE, &conn);
                        curl_easy_getinfo(easy, CURLINFO_EFFECTIVE_URL,
&eff_url);
                        curl_easy_getinfo(easy, CURLINFO_CONTENT_TYPE, &ct);
                        curl_easy_getinfo(easy, CURLINFO_TOTAL_TIME, &time);
                        curl_easy_getinfo(easy, CURLINFO_SIZE_DOWNLOAD,
&dl);

                        //fprintf(MSG_OUT, "DONE: %s => (%d) %s\n",
eff_url, res, conn->error);

                        if (eff_url && strlen(eff_url) > 0 && conn->data &&
starts_with(ct, "text/html") == 0) {
                                parsed_sites_inc(g);
                                //printf("\rParsed sites: %d",
g->parsed_sites);
                                //printf("Parsed %s\n", eff_url);
                                html_parse(eff_url, conn->data);
                        }

                        curl_multi_remove_handle(g->multi, easy);
                        //free(conn->url);
                        free(conn->data);
                        curl_easy_cleanup(easy);
                        free(conn);
                }
        }
}

What am I missing?

James Read

> > ==78076== at 0x483B723: malloc (in
> > /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
> > ==78076== by 0x483E017: realloc (in
> > /usr/lib/x86_64-linux-gnu/valgrind/vgpreload_memcheck-amd64-linux.so)
> > ==78076== by 0x10CA22: write_cb (crawler.c:277)
> > ==78076== by 0x48D707B: chop_write (sendf.c:606)
>
> This is your callback code doing the allocation (realloc really).
>
> --
>
> / daniel.haxx.se | Commercial curl support up to 24x7 is available!
> | Private help, bug fixes, support, ports, new features
> | https://www.wolfssl.com/contact/
>

-------------------------------------------------------------------
Unsubscribe: https://cool.haxx.se/list/listinfo/curl-library
Etiquette: https://curl.haxx.se/mail/etiquette.html
Received on 2020-05-25