cURL / Mailing Lists / curl-users / Single Mail

curl-users

Problem in curl_multi_perform while storing html content into Mysql database

From: Hemachandran <hemachan83_at_gmail.com>
Date: Sat, 22 Aug 2009 12:41:50 +0530

Hi,

I am new to curl in c++. I want to crawl multiple html content and storing
into mysql database from c++. I can able to crawl multiple webpage using
curl_multi_perform but my problem is while i'm storing the content into the
database, contents are mingled and stored. I want to store it seperately.

Source code of my program

/*****************************************************************************
 * _ _ ____ _
 * Project ___| | | | _ \| |
 * / __| | | | |_) | |
 * | (__| |_| | _ <| |___
 * \___|\___/|_| \_\_____|
 *
 * $Id: 10-at-a-time.c,v 1.9 2008-09-22 17:27:24 danf Exp $
 *
 * Example application source code using the multi interface to download
many
 * files, but with a capped maximum amount of simultaneous transfers.
 *
 * Written by Michael Wallner
 */

#include <iostream>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifndef WIN32
# include <unistd.h>
#endif
#include <curl/multi.h>
#include <mysql.h>

using namespace std;

static const char *urls[] = {
  "http://www.altavista.com",
  "http://www.cuil.com",
};

#define MAX 2 /* number of simultaneous transfers */
#define CNT sizeof(urls)/sizeof(char*) /* total number of transfers to do */

static string contents;

static size_t cb(void *ptr, size_t size, size_t nmemb, void *stream) {

    int numbytes = size*nmemb;
    // The data is not null-terminated, so get the last character, and
replace
    // it with '\0'.
    char lastchar = *((char *) ptr + numbytes - 1);
    *((char *) ptr + numbytes - 1) = '\0';
    contents.append((char *)ptr);
    contents.append(1,lastchar);
    *((char *) ptr + numbytes - 1) = lastchar; // Might not be necessary.

    return size*nmemb;
}

static void init(CURLM *cm, int i)
{
  CURL *eh = curl_easy_init();

  curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, cb);
  curl_easy_setopt(eh, CURLOPT_WRITEDATA, stdout);
  curl_easy_setopt(eh, CURLOPT_HEADER, 0L);
  curl_easy_setopt(eh, CURLOPT_URL, urls[i]);
  curl_easy_setopt(eh, CURLOPT_PRIVATE, urls[i]);
  curl_easy_setopt(eh, CURLOPT_VERBOSE, 0L);

  curl_multi_add_handle(cm, eh);
}

int main(void)
{

    MYSQL *connection, mysql;
    mysql_init(&mysql);

    MYSQL_RES *result;

    connection =
mysql_real_connect(&mysql,"localhost","root","root","test",0,0,0);

    if(!connection) {
        return 0;
    }

  CURLM *cm;
  CURLMsg *msg;
  long L;
  unsigned int C=0;
  int M, Q, U = -1;
  fd_set R, W, E;
  struct timeval T;

  curl_global_init(CURL_GLOBAL_ALL);

  cm = curl_multi_init();

  /* we can optionally limit the total amount of connections this multi
handle
     uses */
  curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX);

  for (C = 0; C < MAX; ++C) {
    init(cm, C);
  }

  while (U) {
    while (CURLM_CALL_MULTI_PERFORM == curl_multi_perform(cm, &U));

    if (U) {
      FD_ZERO(&R);
      FD_ZERO(&W);
      FD_ZERO(&E);

      if (curl_multi_fdset(cm, &R, &W, &E, &M)) {
        fprintf(stderr, "E: curl_multi_fdset\n");
        return EXIT_FAILURE;
      }

      if (curl_multi_timeout(cm, &L)) {
        fprintf(stderr, "E: curl_multi_timeout\n");
        return EXIT_FAILURE;
      }
      if (L == -1)
        L = 100;

      if (M == -1) {
#ifdef WIN32
        Sleep(L);
#else
        sleep(L / 1000);
#endif
      } else {
        T.tv_sec = L/1000;
        T.tv_usec = (L%1000)*1000;

        if (0 > select(M+1, &R, &W, &E, &T)) {
          fprintf(stderr, "E: select(%i,,,,%li): %i: %s\n",
              M+1, L, errno, strerror(errno));
          return EXIT_FAILURE;
        }
      }
    }

    while ((msg = curl_multi_info_read(cm, &Q))) {
      if (msg->msg == CURLMSG_DONE) {
        char *url;
        CURL *e = msg->easy_handle;
        curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &url);

        //cout << contents << endl;

        //fprintf(stderr,"%s\n", url);

        char * S = new char[strlen(contents.c_str())*3 +1];
        mysql_real_escape_string(&mysql, S, contents.c_str(),
contents.length());
        contents = contents.assign(S);

        const char* temp = contents.c_str();
        string query;

        query = " INSERT INTO testing ( name ) VALUES ( ' ";
        query += temp;
        query += " ' ); ";

        const char* xx = query.c_str();

        //cout << xx << endl;

        if( mysql_real_query( &mysql, xx, strlen(xx) ) == 0 ) {

            cout << "inserted successfully" << endl;
            //contents.clear();
        }

        curl_multi_remove_handle(cm, e);
        curl_easy_cleanup(e);
      }
      else {
        fprintf(stderr, "E: CURLMsg (%d)\n", msg->msg);
      }
      if (C < CNT) {
        init(cm, C++);
        U++; /* just to prevent it from remaining at 0 if there are more
                URLs to get */
      }
    }
  }

  curl_multi_cleanup(cm);
  curl_global_cleanup();

  return EXIT_SUCCESS;
}

Please help me. I awaiting for your answers eagerly.

Thanks & Regards,

P. Hemachandran.

-------------------------------------------------------------------
List admin: http://cool.haxx.se/cgi-bin/mailman/listinfo/curl-users
FAQ: http://curl.haxx.se/docs/faq.html
Etiquette: http://curl.haxx.se/mail/etiquette.html
Received on 2009-08-22