cURL / Mailing Lists / curl-library / Single Mail

curl-library

Re: OK, I need help

From: Donald Boissonneault <donnyb_at_xplornet.ca>
Date: Thu, 01 Jul 2010 21:47:36 -0600

 I also tried this, however I think that I need to set the htmlbody in
the write_function due to the fact the treads could be overwriting one
an other varablies. However I can not for the life of me figure out how
to pass it an integer so I know which of the array's to store it in. I
will be working on this later tonight and will keep you posted.

/*****************************************************************************
 * _ _ ____ _
 * Project ___| | | | _ \| |
 * / __| | | | |_) | |
 * | (__| |_| | _ <| |___
 * \___|\___/|_| \_\_____|
 *
 */

/* A multi-threaded example that uses pthreads extensively to fetch
 * X remote files at once */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <curl/curl.h>

using namespace std;
#define NUMT 4

/*
  List of URLs to fetch.

  If you intend to use a SSL-based protocol here you MUST setup the
OpenSSL
  callback functions as described here:

  http://www.openssl.org/docs/crypto/threads.html#DESCRIPTION

*/
struct Url_Data
{
    const char *UrlAddress;
    char *UrlHtmlBody;
    size_t UrlHtmlBodySize;
    bool UrlHtmlBodyMalloced;
    char RedirectAddress[65536];
    char IP[65536];
    long HttpResponse;
}Url_Data_Array[47];

int UrlIndex;

const char * const urls[NUMT]= {
  "http://curl.haxx.se/",
  "ftp://cool.haxx.se/",
  "http://www.contactor.se/",
  "www.haxx.se"
};
char* memory;
size_t UrlConnectionHtmlBody_size;

static size_t write_data(char *ptr, size_t size, size_t nmemb, void
*stream)
{
    size_t mem;
    //increase the memory buffer size being held
    mem = size * nmemb;
    // set the sizt_t to know how long the char* is
    UrlConnectionHtmlBody_size += mem;
    if (mem>0)
    {
        memory = (char*)realloc(memory, UrlConnectionHtmlBody_size);
    }
    else
    {
        memory = (char*) malloc(UrlConnectionHtmlBody_size);
    }
    // store the data
    if (mem)
    {
        memcpy(&(memory[UrlConnectionHtmlBody_size-mem]), ptr, mem);
    };
    return mem;
};

static void *pull_one_url(void *UrlIndex)
{
    int i = (int)UrlIndex;
    const char *url = Url_Data_Array[(int)UrlIndex].UrlAddress;
    memory = NULL;
    UrlConnectionHtmlBody_size = 0;
    CURL *curl_handle;
    CURLcode res;

    /* init the curl session */
    curl_handle = curl_easy_init();

    /* set URL to get */
    curl_easy_setopt(curl_handle, CURLOPT_URL, url);

    /* no progress meter please */
    curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);

    /* send all data to this function */
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);

    res = curl_easy_perform(curl_handle); /* ignores error */
    if(CURLE_OK == res)
    {
        Url_Data_Array[(int)UrlIndex].UrlHtmlBody = (char*)
malloc(UrlConnectionHtmlBody_size);
            if (Url_Data_Array[(int)UrlIndex].UrlHtmlBody != NULL)
            {
                Url_Data_Array[(int)UrlIndex].UrlHtmlBodyMalloced =
true;
                memcpy(&(Url_Data_Array[(int)UrlIndex].UrlHtmlBody[0]),
memory, UrlConnectionHtmlBody_size);
                Url_Data_Array[(int)UrlIndex].UrlHtmlBodySize =
UrlConnectionHtmlBody_size;
            }
    }
    char *Ra;
    char *IP;
    long HttpResponse;
    /* get the CURLINFO_HTTP_CONNECTCODE*/
    res = curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE,
&HttpResponse);
    if((CURLE_OK == res) && HttpResponse)
    {
        Url_Data_Array[(int)UrlIndex].HttpResponse = HttpResponse;
    };
    /* ask for the ReDirectAddress*/
    res = curl_easy_getinfo(curl_handle, CURLINFO_REDIRECT_URL, &Ra);
    if((CURLE_OK == res) && Ra)
    {
        strcpy(Url_Data_Array[(int)UrlIndex].RedirectAddress,Ra);

Url_Data_Array[(int)UrlIndex].RedirectAddress[(strlen(Url_Data_Array[(int)UrlIndex].RedirectAddress)+1)] = '\0';
    };
    // Get the IP address for the web site
    res = curl_easy_getinfo(curl_handle, CURLINFO_PRIMARY_IP, &IP);
    if((CURLE_OK == res) && IP)
    {
        strcpy(Url_Data_Array[(int)UrlIndex].IP,IP);

Url_Data_Array[(int)UrlIndex].IP[(strlen(Url_Data_Array[(int)UrlIndex].IP)+1)] = '\0';
    };
    curl_easy_cleanup(curl_handle);

    return NULL;
}

/*
   int pthread_create(pthread_t *new_thread_ID,
   const pthread_attr_t *attr,
   void * (*start_func)(void *), void *arg);
*/

int main(int argc, char **argv)
{
    pthread_t tid[NUMT];
    int i;
    int error;

    /* Must initialize libcurl before any threads are started */
    curl_global_init(CURL_GLOBAL_ALL);

    for(i=0; i< NUMT; i++)
    {
        Url_Data_Array[i].UrlAddress = urls[i];
        error = pthread_create(&tid[i],
                           NULL, /* default attributes please */
                           pull_one_url,
                           (void *)i);
        if(0 != error)
        {
        fprintf(stderr, "Couldn't run thread number %d, errno %d\n", i,
error);
        }
        else
        {
        fprintf(stderr, "Thread %d, gets %s\n", i, urls[i]);
        }
    }

    /* now wait for all threads to terminate */
    for(i=0; i< NUMT; i++)
    {
        error = pthread_join(tid[i], NULL);
        fprintf(stderr, "Thread %d terminated\n", i);
    }

  for (int z = 0; z < 3 ; z++)
  {
     // printf("%s\n",Url_Data_Array[z].UrlHtmlBody);
      printf("HttpResponse = %lu\n",Url_Data_Array[z].HttpResponse);
      printf("RedirectAddress = %s
\n",Url_Data_Array[z].RedirectAddress);
      printf("IP = %s\n",Url_Data_Array[z].IP);
  }
  return 0;
}

On Thu, 2010-07-01 at 09:46 -0700, johansen_at_opensolaris.org wrote:
> On Thu, Jul 01, 2010 at 12:26:04AM -0600, Donald Boissonneault wrote:
> > I wrote the following function, however due to slow dns resolving I need
> > to make it so I can pull like 10 urls at a time or more. Would like to
> > be able to change that depending on connection speed. Below is the code.
> > I have been trying to do this with multi-urls for over 3 months. I am
> > very new to c programming, but am making very good progress. However,
> > this multi-tasking is really confusing me. How could I turn the below
> > code into something so it would load the urls IP, HTTP Response code,
> > URL re-direct address and the html body into a an array. I know the
> > purpose of programming is to write your own code, but I am totally lost
> > here. I do not start school till fall and would like to continue working
> > on my program. If you can help I would be very happy.
> > Thank you,
> > Don
> >
> >
> >
> > #include <curl/curl.h>
> > #include <curl/types.h>
> > #include <curl/easy.h>
> >
> > using namespace std;
> >
> > #include "MakeFile.h"
> >
> > char* memory;
> > size_t UrlConnectionHtmlBody_size;
> >
> > static size_t write_data(char *ptr, size_t size, size_t nmemb, void
> > *stream);
> >
> > static size_t write_data(char *ptr, size_t size, size_t nmemb, void
> > *stream)
> > {
> > size_t mem;
> > //increase the memory buffer size being held
> > mem = size * nmemb;
> > // set the sizt_t to know how long the char* is
> > UrlConnectionHtmlBody_size += mem;
> > if (mem>0)
> > {
> > memory = (char*)realloc(memory, UrlConnectionHtmlBody_size);
> > }
> > else
> > {
> > memory = (char*) malloc(UrlConnectionHtmlBody_size);
> > }
> > // store the data
> > if (mem)
> > {
> > memcpy(&(memory[UrlConnectionHtmlBody_size-mem]), ptr, mem);
> > };
> > return mem;
> > };
> >
> > void UrlGetInfo(char* VarUrlToGet)
> > {
> > const char *p = VarUrlToGet; // get const char * representation
> > printf("Get Url %s\n",VarUrlToGet);
> > //Reset string varable for getting data
> > memory = NULL;
> > UrlConnectionHtmlBody_size = 0;
> > CURL *curl_handle;
> > CURLcode res;
> > curl_global_init(CURL_GLOBAL_ALL);
> >
> > /* init the curl session */
> > curl_handle = curl_easy_init();
> >
> > /* set URL to get */
> > curl_easy_setopt(curl_handle, CURLOPT_URL, p);
> >
> > /* no progress meter please */
> > curl_easy_setopt(curl_handle, CURLOPT_NOPROGRESS, 1L);
> >
> > /* send all data to this function */
> > curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, write_data);
> >
> > /*
> > * Notice here that if you want the actual data sent anywhere else
> > but
> > * stdout, you should consider using the CURLOPT_WRITEDATA option.
> > */
> >
> > /* get it! */
> > res = curl_easy_perform(curl_handle);
> > if(CURLE_OK == res)
> > {
> > //set the information for the body to the UrlInfo
> >
> > // pointer Redirect Site
> > char *ra;
> > char *ip;
> > long HttpResponse;
> > /* get the CURLINFO_HTTP_CONNECTCODE*/
> > res = curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE,
> > &HttpResponse);
> > /* ask for the ReDirectAddress*/
> > res = curl_easy_getinfo(curl_handle, CURLINFO_REDIRECT_URL,
> > &ra);
> > if((CURLE_OK == res) && ra)
> > {
> > };
> > // Get the IP address for the web site
> > res = curl_easy_getinfo(curl_handle, CURLINFO_PRIMARY_IP, &ip);
> > if((CURLE_OK == res) && ip)
> > {
> > };
> > }
> > free (memory);
> > /* cleanup curl stuff */
> > curl_easy_cleanup(curl_handle);
> > };
>
> I'm not certain what exactly you're trying to accomplish with this code,
> but there's a good example of how to download 10 requests at a time
> on curl's website:
>
> http://curl.haxx.se/libcurl/c/10-at-a-time.html
>
> You'll need to use the multi interface if you'd like to perform multiple
> downloads using a single thread of control.
>
> -j
>
>
> -------------------------------------------------------------------
> List admin: http://cool.haxx.se/list/listinfo/curl-library
> Etiquette: http://curl.haxx.se/mail/etiquette.html

-------------------------------------------------------------------
List admin: http://cool.haxx.se/list/listinfo/curl-library
Etiquette: http://curl.haxx.se/mail/etiquette.html
Received on 2010-07-02