curl-library
Re: OK, I need help
Date: Thu, 01 Jul 2010 21:47:36 -0600
I also tried this, however I think that I need to set the htmlbody in
the write_function due to the fact the treads could be overwriting one
an other varablies. However I can not for the life of me figure out how
to pass it an integer so I know which of the array's to store it in. I
will be working on this later tonight and will keep you posted.
/*****************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
*/
/* A multi-threaded example that uses pthreads extensively to fetch
* X remote files at once */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <curl/curl.h>
using namespace std;
#define NUMT 4
/*
List of URLs to fetch.
If you intend to use a SSL-based protocol here you MUST setup the
OpenSSL
callback functions as described here:
http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION
*/
struct Url_Data
{
const char *UrlAddress;
char *UrlHtmlBody;
size_t UrlHtmlBodySize;
bool UrlHtmlBodyMalloced;
char RedirectAddress[65536];
char IP[65536];
long HttpResponse;
}Url_Data_Array[47];
int UrlIndex;
const char * const urls[NUMT]= {
"http://curl.haxx.se/",
"ftp://cool.haxx.se/",
"http://www.contactor.se/",
"www.haxx.se"
};
char* memory;
size_t UrlConnectionHtmlBody_size;
static size_t write_data(char *ptr, size_t size, size_t nmemb, void
*stream)
{
size_t mem;
//increase the memory buffer size being held
mem = size * nmemb;
// set the sizt_t to know how long the char* is
UrlConnectionHtmlBody_size += mem;
if (mem>0)
{
memory = (char*)realloc(memory, UrlConnectionHtmlBody_size);
}
else
{
memory = (char*) malloc(UrlConnectionHtmlBody_size);
}
// store the data
if (mem)
{
memcpy(&(memory[UrlConnectionHtmlBody_size-mem]), ptr, mem);
};
return mem;
};
static void *pull_one_url(void *UrlIndex)
{
int i = (int)UrlIndex;
const char *url = Url_Data_Array[(int)UrlIndex].UrlAddress;
memory = NULL;
UrlConnectionHtmlBody_size = 0;
CURL *curl_handle;
CURLcode res;
/* init the curl session */
curl_handle = curl_easy_init();
/* set URL to get */
curl_easy_setopt(curl_handle, CURLOPT_URL, url);
/* no progress meter please */
curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);
/* send all data to this function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
res = curl_easy_perform(curl_handle); /* ignores error */
if(CURLE_OK == res)
{
Url_Data_Array[(int)UrlIndex].UrlHtmlBody = (char*)
malloc(UrlConnectionHtmlBody_size);
if (Url_Data_Array[(int)UrlIndex].UrlHtmlBody != NULL)
{
Url_Data_Array[(int)UrlIndex].UrlHtmlBodyMalloced =
true;
memcpy(&(Url_Data_Array[(int)UrlIndex].UrlHtmlBody[0]),
memory, UrlConnectionHtmlBody_size);
Url_Data_Array[(int)UrlIndex].UrlHtmlBodySize =
UrlConnectionHtmlBody_size;
}
}
char *Ra;
char *IP;
long HttpResponse;
/* get the CURLINFO_HTTP_CONNECTCODE*/
res = curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE,
&HttpResponse);
if((CURLE_OK == res) && HttpResponse)
{
Url_Data_Array[(int)UrlIndex].HttpResponse = HttpResponse;
};
/* ask for the ReDirectAddress*/
res = curl_easy_getinfo(curl_handle, CURLINFO_REDIRECT_URL, &Ra);
if((CURLE_OK == res) && Ra)
{
strcpy(Url_Data_Array[(int)UrlIndex].RedirectAddress,Ra);
Url_Data_Array[(int)UrlIndex].RedirectAddress[(strlen(Url_Data_Array[(int)UrlIndex].RedirectAddress)+1)] = '\0';
};
// Get the IP address for the web site
res = curl_easy_getinfo(curl_handle, CURLINFO_PRIMARY_IP, &IP);
if((CURLE_OK == res) && IP)
{
strcpy(Url_Data_Array[(int)UrlIndex].IP,IP);
Url_Data_Array[(int)UrlIndex].IP[(strlen(Url_Data_Array[(int)UrlIndex].IP)+1)] = '\0';
};
curl_easy_cleanup(curl_handle);
return NULL;
}
/*
int pthread_create(pthread_t *new_thread_ID,
const pthread_attr_t *attr,
void * (*start_func)(void *), void *arg);
*/
int main(int argc, char **argv)
{
pthread_t tid[NUMT];
int i;
int error;
/* Must initialize libcurl before any threads are started */
curl_global_init(CURL_GLOBAL_ALL);
for(i=0; i< NUMT; i++)
{
Url_Data_Array[i].UrlAddress = urls[i];
error = pthread_create(&tid[i],
NULL, /* default attributes please */
pull_one_url,
(void *)i);
if(0 != error)
{
fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i,
error);
}
else
{
fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);
}
}
/* now wait for all threads to terminate */
for(i=0; i< NUMT; i++)
{
error = pthread_join(tid[i], NULL);
fprintf(stderr, "Thread %d terminated\n", i);
}
for (int z = 0; z < 3 ; z++)
{
// printf("%s\n",Url_Data_Array[z].UrlHtmlBody);
printf("HttpResponse = %lu\n",Url_Data_Array[z].HttpResponse);
printf("RedirectAddress = %s
\n",Url_Data_Array[z].RedirectAddress);
printf("IP = %s\n",Url_Data_Array[z].IP);
}
return 0;
}
On Thu, 2010-07-01 at 09:46 -0700, johansen_at_opensolaris.org wrote:
> On Thu, Jul 01, 2010 at 12:26:04AM -0600, Donald Boissonneault wrote:
> > I wrote the following function, however due to slow dns resolving I need
> > to make it so I can pull like 10 urls at a time or more. Would like to
> > be able to change that depending on connection speed. Below is the code.
> > I have been trying to do this with multi-urls for over 3 months. I am
> > very new to c programming, but am making very good progress. However,
> > this multi-tasking is really confusing me. How could I turn the below
> > code into something so it would load the urls IP, HTTP Response code,
> > URL re-direct address and the html body into a an array. I know the
> > purpose of programming is to write your own code, but I am totally lost
> > here. I do not start school till fall and would like to continue working
> > on my program. If you can help I would be very happy.
> > Thank you,
> > Don
> >
> >
> >
> > #include <curl/curl.h>
> > #include <curl/types.h>
> > #include <curl/easy.h>
> >
> > using namespace std;
> >
> > #include "MakeFile.h"
> >
> > char* memory;
> > size_t UrlConnectionHtmlBody_size;
> >
> > static size_t write_data(char *ptr, size_t size, size_t nmemb, void
> > *stream);
> >
> > static size_t write_data(char *ptr, size_t size, size_t nmemb, void
> > *stream)
> > {
> > size_t mem;
> > //increase the memory buffer size being held
> > mem = size * nmemb;
> > // set the sizt_t to know how long the char* is
> > UrlConnectionHtmlBody_size += mem;
> > if (mem>0)
> > {
> > memory = (char*)realloc(memory, UrlConnectionHtmlBody_size);
> > }
> > else
> > {
> > memory = (char*) malloc(UrlConnectionHtmlBody_size);
> > }
> > // store the data
> > if (mem)
> > {
> > memcpy(&(memory[UrlConnectionHtmlBody_size-mem]), ptr, mem);
> > };
> > return mem;
> > };
> >
> > void UrlGetInfo(char* VarUrlToGet)
> > {
> > const char *p = VarUrlToGet; // get const char * representation
> > printf("Get Url %s\n",VarUrlToGet);
> > //Reset string varable for getting data
> > memory = NULL;
> > UrlConnectionHtmlBody_size = 0;
> > CURL *curl_handle;
> > CURLcode res;
> > curl_global_init(CURL_GLOBAL_ALL);
> >
> > /* init the curl session */
> > curl_handle = curl_easy_init();
> >
> > /* set URL to get */
> > curl_easy_setopt(curl_handle, CURLOPT_URL, p);
> >
> > /* no progress meter please */
> > curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);
> >
> > /* send all data to this function */
> > curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
> >
> > /*
> > * Notice here that if you want the actual data sent anywhere else
> > but
> > * stdout, you should consider using the CURLOPT_WRITEDATA option.
> > */
> >
> > /* get it! */
> > res = curl_easy_perform(curl_handle);
> > if(CURLE_OK == res)
> > {
> > //set the information for the body to the UrlInfo
> >
> > // pointer Redirect Site
> > char *ra;
> > char *ip;
> > long HttpResponse;
> > /* get the CURLINFO_HTTP_CONNECTCODE*/
> > res = curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE,
> > &HttpResponse);
> > /* ask for the ReDirectAddress*/
> > res = curl_easy_getinfo(curl_handle, CURLINFO_REDIRECT_URL,
> > &ra);
> > if((CURLE_OK == res) && ra)
> > {
> > };
> > // Get the IP address for the web site
> > res = curl_easy_getinfo(curl_handle, CURLINFO_PRIMARY_IP, &ip);
> > if((CURLE_OK == res) && ip)
> > {
> > };
> > }
> > free (memory);
> > /* cleanup curl stuff */
> > curl_easy_cleanup(curl_handle);
> > };
>
> I'm not certain what exactly you're trying to accomplish with this code,
> but there's a good example of how to download 10 requests at a time
> on curl's website:
>
> http://curl.haxx.se/libcurl/c/10-at-a-time.html
>
> You'll need to use the multi interface if you'd like to perform multiple
> downloads using a single thread of control.
>
> -j
>
>
> -------------------------------------------------------------------
> List admin: http://cool.haxx.se/list/listinfo/curl-library
> Etiquette: http://curl.haxx.se/mail/etiquette.html
-------------------------------------------------------------------
List admin: http://cool.haxx.se/list/listinfo/curl-library
Etiquette: http://curl.haxx.se/mail/etiquette.html
Received on 2010-07-02