cURL / Mailing Lists / curl-library / Single Mail

curl-library

Re: Info request about the zero copy interface (2) (zcopycli.c)

From: Legolas <legolas558_at_email.it>
Date: Thu, 08 Dec 2005 12:51:13 +0100

> Daniel Stenberg ha scritto:
>
> Some further comments/ideas:
>
>> Upload
>>
>> We also need to cater for uploads, where the current code calls a read
>> callback with a buffer pointer to have the application copy data to
>> it. To
>> turn that into zero-copy, it would need to be done in a way that
>> allows the
>> application to instead pass in (return?) a pointer and size with
>> data to
>> upload.
>
I have not yet supplied this example

>> (...) It would be much
>> more elegant to either have libcurl specify in the invoke if the
>> buffer is
>> user-provided or libcurl-provided, or just never call this callback
>> with
>> anything else than user-provided buffers.
>
I have changed the 'simple_buf' structure to an enhanced 'curl_buf'
structure (sorry for the inappropiate naming), with more attributes and
a 'flags' specification. You can give a look to the attached source code
to see what I am talking about. Please note that the example does not
cover all the possible usages of this new kind of API, very flexible.
A basic support of chained buffers has been implemented, but I think it
should be designed better.

--
  Giuseppe

/*
        zcopycli.c - Pseudo code for a theorical client
                                 application able to handle zero-copy

        (c) Giuseppe M. legolas558 _at_ email.it

        Read more at:
        http://curl.haxx.se/mail/lib-2005-12/0000.html
          - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        This pseudo code has been modified to cover two general types of
        application (with APP_ONE or APP_TWO definitions).

        (1) applications that need to just 'look into' a buffer of
                downloaded data; in this example, a simple file download

    (2) applications that need to stream the entire data into a larger
                ordered buffer (most common case)
        
        Excuse me for general code chaos, I am exploiting the fact this is
        just pseudo code...

    In this example I will introduce the possible use of an enhanced
        'curl_buf' structure to exchange buffers and relative information
        between the application and the library. This would let both them
        to set its attributes instead of passing a lot of parameters.
        The two applications can now handle chains of buffers.
          - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
        Please use the mailing list to give new advices, and also to point out
        corrections when needed!
*/

#include <stdio.h>
#include <stdlib.h>
#include <curl/curl.h>

#define curl_buf struct _curl_buf

typedef struct _curl_buf {
        char *base;
        char *here; /* we expect this pointer to behave as a file
                                                                                pointer, updated each time a write or read
                                                                                happens */
        char *top;
        struct _curl_buf *next;
        int flags; /* used to specify ownership, write-access etc. */
        int *expired; /* pointer to a variable set to 0 when the library
                                                                           buffer is no more available for the application.
                                                                           Other values should help the application in
                                                                           predicting if the buffer is going to expire.
                                                                           A micro-semaphore...*/
};

#define CBF_CAN_WRITE 0x000F
#define CBF_LIBRARY_OWNED 0x00F0

#define CB_SIZE(cb) ((int)(cb->top-cb->base))
#define CB_AVAIL(cb) ((int)(cb->top-cb->here))
#define CB_DELTA(cb) ((int)(cb->here - cb->base))
#define CB_WRITEABLE(cb) (cb->flags && CBF_CAN_WRITE)
#define CB_LIBRARY_OWNED(cb) (cb->flags && CBF_LIBRARY_OWNED)

/* this function allows the caller to allocate a
        buffer with a minimum fixed granularity */
int cb_granularity_fix(int amount, int granularity)
{
        div_t dv;
        dv = div(amount, granularity);
        amount = (dv.quot + (dv.rem>0));
        if (!amount) amount++;
        return amount*granularity;
}

/* */
int cb_assert_size(curl_buf *cb, int desired_size) {
        int delta, new_size;
        if (desired_size > CB_SIZE(cb)) {
                new_size = cb_granularity_fix(desired_size, 1024);
                delta = CB_DELTA(cb);
                cb->base = realloc(cb->base, new_size);
                if (cb->base == NULL)
                        return 0;
                cb->here = cb->base + delta;
                cb->top = cb->base + new_size;
                return new_size;
        }
        return CB_SIZE(cb);
}

/* append one buffer to another inserting at the 'here' position */
int cb_insert(curl_buf *main, curl_buf *element) {
        int len;
        len = CB_DELTA(element);
        cb_assert_size(main, CB_DELTA(main) + len);
        memcpy(main->here, element->here, len);
        main->here += len;
        return len;
}

typedef curl_buf * (awb_prototype(void *custom_data, int desired_size));
/* from here on, AWB stands for Allocate Write Buffer).
        This function is defined by the application and specified to libcurl
        through an improbable CURLOPT_AWB parameter forcing the library to
        use it instead of the internal one. Quoting J.Lokier:
        "The library will call it when data is not already available in a
        fixed location due to algorithms such as chunked decoding, zlib and
    SSL decryption."
        Basically, this function must return an allocated structure of type
        'curl_buf' containing a buffer sized AT LEAST as the value specified
        in 'desired_size'. Any combination of the 'curl_buf' structure are
        accepted as long as it is valid.
        An application following behaviour (1) will try to re-use the same
        buffer for any single-use operation needed.
        In case (2) application will instead reallocate its buffer and pass
        the new pointer to the library.
*/

typedef int (wcb_prototype(void *custom_data, curl_buf *real_buffer));
/* WCB stands for Write Call Back, this function is called when at least
        one buffer is prepared (eventually other buffers are chained through
        the 'next' pointer inside the struct).
*/

curl_buf * allocate_write_buffer(void *custom_data, int desired_size) {
        curl_buf *cb;
        cb = (curl_buf *)custom_data;
#ifdef APP_ONE
        cb_assert_size(cb, desired_size);
#else /* APP_TWO */
        cb_assert_size(cb, CB_DELTA(cb) + desired_size);
#endif
        return cb;
}

int write_callback(void *custom_data, curl_buf *real_buffer) {
        int processed;
        processed = 0;
NextElement:
#ifdef APP_ONE
        if (fwrite(real_buffer->here, 1, CB_DELTA(real_buffer), output_file)
                != CB_DELTA(real_buffer))
                return processed; /* file write error */
#else
        if (CB_LIBRARY_OWNED(real_buffer)) {
        /* library is providing a private buffer,
                all our work here is to copy from that to our big streamed one.
                Again, quoting J.Lokier:
                "When receiver use its own buffer, and sender already has the
                 data in its own buffer, then and only then do we have to memcpy()"
                 The memcpy() is done by cb_insert()
        */
                cb_insert((curl_buf *) custom_data, real_buffer);
        } /* else library has written to our buffer, that's ok
                 Note: it is expected that both the application and the library
                 will properly update the 'here' internal pointer of the 'curl_buf'
                 structure.
        */
#endif
        processed++;
        if (real_buffer->next) {
                real_buffer = real_buffer->next;
                goto NextElement;
        }
        return processed; /* instead of the total bytes, the total number of
                                            processed buffers. The library should have calculated
                                                this depth checksum. */
}

/**/
#define APP_ONE
/**/
/*
#define APP_TWO
*/

/* see the main() code for explanation of the followings */
#ifdef APP_ONE
FILE *output_file; /* used in example app #1 */
#else /* APP_TWO */
#define BUFFER_SIZE (1024*32) /* used by app #2 */
#endif

int main(void)
{
  curl_buf buffer;
  CURL *curl;
  CURLcode res;

  curl = curl_easy_init();
  if(!curl) return -1;

  memset(&buffer, 0, sizeof(curl_buf));
#ifdef APP_ONE
  output_file = fopen("index.htm", "wb");
/* since output will be flushed to file system,
        we do not use any starting memory buffer.
        Please note that it if needed (when the library
        calls 'allocate_write_buffer') it will be anyway
        dynamically allocated. */
#else /* APP_TWO */
  cb_assert_size(&buffer, BUFFER_SIZE);
/* since we need the entire downloaded file into
        an ordered memory stream, we allocate the huge
        memory block before everything begins */
#endif
  
  curl_easy_setopt(curl, CURLOPT_URL, "curl.haxx.se");

/* set the new 'allocate_write_buffer' handler */
  curl_easy_setopt(curl, CURLOPT_AWB, &allocate_write_buffer);
/* */
  curl_easy_setopt(curl, CURLOPT_WCB, &write_callback);
/* CURLOPT_WRITEDATA would be used (as now) to set a custom parameter
        ('custom_data') for calls to 'allocate_write_buffer' & 'write_callback'
*/
  curl_easy_setopt(curl, CURLOPT_WRITEDATA, &buffer);

  res = curl_easy_perform(curl);

  if (!res) {
          fprintf(stderr, "Perform error: %s\n", curl_easy_strerror(res));
          curl_easy_cleanup(curl);
#ifdef APP_ONE
          fclose(output_file);
          if (buffer.base)
#endif
                  free(buffer.base);
          return -2;
  }

  /* now, if using application (1), we have a file called 'index.htm'
        with the downloaded content.
  */

  /* in the 2nd case a memory stream starting from 'buffer.base' and ending
        at 'buffer.here' is available for post-processing.
        Implementing a zero copy interface is a very complex problem and this
        example is just a draw of 'what should it look like'
  */

#ifdef APP_ONE
  if (buffer.base)
#endif
          free(buffer.base);

  curl_easy_cleanup(curl);
#ifdef APP_ONE
  fclose(output_file);
#endif
  return 0;
}
Received on 2005-12-08