curl-users
Problem in curl_multi_perform while storing html content into Mysql database
Date: Sat, 22 Aug 2009 12:41:50 +0530
Hi,
I am new to curl in c++. I want to crawl multiple html content and storing
into mysql database from c++. I can able to crawl multiple webpage using
curl_multi_perform but my problem is while i'm storing the content into the
database, contents are mingled and stored. I want to store it seperately.
Source code of my program
/*****************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* $Id: 10-at-a-time.c,v 1.9 2008-09-22 17:27:24 danf Exp $
*
* Example application source code using the multi interface to download
many
* files, but with a capped maximum amount of simultaneous transfers.
*
* Written by Michael Wallner
*/
#include <iostream>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
#ifndef WIN32
# include <unistd.h>
#endif
#include <curl/multi.h>
#include <mysql.h>
using namespace std;
static const char *urls[] = {
"http://www.altavista.com",
"http://www.cuil.com",
};
#define MAX 2 /* number of simultaneous transfers */
#define CNT sizeof(urls)/sizeof(char*) /* total number of transfers to do */
static string contents;
static size_t cb(void *ptr, size_t size, size_t nmemb, void *stream) {
int numbytes = size*nmemb;
// The data is not null-terminated, so get the last character, and
replace
// it with '\0'.
char lastchar = *((char *) ptr + numbytes - 1);
*((char *) ptr + numbytes - 1) = '\0';
contents.append((char *)ptr);
contents.append(1,lastchar);
*((char *) ptr + numbytes - 1) = lastchar; // Might not be necessary.
return size*nmemb;
}
static void init(CURLM *cm, int i)
{
CURL *eh = curl_easy_init();
curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, cb);
curl_easy_setopt(eh, CURLOPT_WRITEDATA, stdout);
curl_easy_setopt(eh, CURLOPT_HEADER, 0L);
curl_easy_setopt(eh, CURLOPT_URL, urls[i]);
curl_easy_setopt(eh, CURLOPT_PRIVATE, urls[i]);
curl_easy_setopt(eh, CURLOPT_VERBOSE, 0L);
curl_multi_add_handle(cm, eh);
}
int main(void)
{
MYSQL *connection, mysql;
mysql_init(&mysql);
MYSQL_RES *result;
connection =
mysql_real_connect(&mysql,"localhost","root","root","test",0,0,0);
if(!connection) {
return 0;
}
CURLM *cm;
CURLMsg *msg;
long L;
unsigned int C=0;
int M, Q, U = -1;
fd_set R, W, E;
struct timeval T;
curl_global_init(CURL_GLOBAL_ALL);
cm = curl_multi_init();
/* we can optionally limit the total amount of connections this multi
handle
uses */
curl_multi_setopt(cm, CURLMOPT_MAXCONNECTS, (long)MAX);
for (C = 0; C < MAX; ++C) {
init(cm, C);
}
while (U) {
while (CURLM_CALL_MULTI_PERFORM == curl_multi_perform(cm, &U));
if (U) {
FD_ZERO(&R);
FD_ZERO(&W);
FD_ZERO(&E);
if (curl_multi_fdset(cm, &R, &W, &E, &M)) {
fprintf(stderr, "E: curl_multi_fdset\n");
return EXIT_FAILURE;
}
if (curl_multi_timeout(cm, &L)) {
fprintf(stderr, "E: curl_multi_timeout\n");
return EXIT_FAILURE;
}
if (L == -1)
L = 100;
if (M == -1) {
#ifdef WIN32
Sleep(L);
#else
sleep(L / 1000);
#endif
} else {
T.tv_sec = L/1000;
T.tv_usec = (L%1000)*1000;
if (0 > select(M+1, &R, &W, &E, &T)) {
fprintf(stderr, "E: select(%i,,,,%li): %i: %s\n",
M+1, L, errno, strerror(errno));
return EXIT_FAILURE;
}
}
}
while ((msg = curl_multi_info_read(cm, &Q))) {
if (msg->msg == CURLMSG_DONE) {
char *url;
CURL *e = msg->easy_handle;
curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &url);
//cout << contents << endl;
//fprintf(stderr,"%s\n", url);
char * S = new char[strlen(contents.c_str())*3 +1];
mysql_real_escape_string(&mysql, S, contents.c_str(),
contents.length());
contents = contents.assign(S);
const char* temp = contents.c_str();
string query;
query = " INSERT INTO testing ( name ) VALUES ( ' ";
query += temp;
query += " ' ); ";
const char* xx = query.c_str();
//cout << xx << endl;
if( mysql_real_query( &mysql, xx, strlen(xx) ) == 0 ) {
cout << "inserted successfully" << endl;
//contents.clear();
}
curl_multi_remove_handle(cm, e);
curl_easy_cleanup(e);
}
else {
fprintf(stderr, "E: CURLMsg (%d)\n", msg->msg);
}
if (C < CNT) {
init(cm, C++);
U++; /* just to prevent it from remaining at 0 if there are more
URLs to get */
}
}
}
curl_multi_cleanup(cm);
curl_global_cleanup();
return EXIT_SUCCESS;
}
Please help me. I awaiting for your answers eagerly.
Thanks & Regards,
P. Hemachandran.
-------------------------------------------------------------------
List admin: http://cool.haxx.se/cgi-bin/mailman/listinfo/curl-users
FAQ: http://curl.haxx.se/docs/faq.html
Etiquette: http://curl.haxx.se/mail/etiquette.html
Received on 2009-08-22