Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

content_encoding: use writer struct subclasses for different encodings #9455

Closed
wants to merge 1 commit into from

Conversation

monnerat
Copy link
Contributor

@monnerat monnerat commented Sep 8, 2022

The variable-sized encoding-specific storage of a struct contenc_writer currently relies on void * alignment that may be insufficient with regards to the specific storage fields, although having not caused any problems yet.

In addition, gcc 11.3 issues a warning on access to fields of partially allocated structures that can occur when the specific storage size is 0:

  content_encoding.c: In function ‘Curl_build_unencoding_stack’:
  content_encoding.c:980:21: warning: array subscript ‘struct contenc_writer[0]’ is partly outside array bounds of ‘unsigned char[16]’ [-Warray-bounds]
    980 |     writer->handler = handler;
        |     ~~~~~~~~~~~~~~~~^~~~~~~~~
  In file included from content_encoding.c:49:
  memdebug.h:115:29: note: referencing an object of size 16 allocated by ‘curl_dbg_calloc’
    115 | #define calloc(nbelem,size) curl_dbg_calloc(nbelem, size, __LINE__, __FILE__)
        |                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  content_encoding.c:977:60: note: in expansion of macro ‘calloc’
    977 |   struct contenc_writer *writer = (struct contenc_writer *)calloc(1, sz);

To solve both these problems, the current commit replaces the contenc_writer/params structure pairs by "subclasses" of struct contenc_writer. These are structures that contain a contenc_writer at offset 0. Proper field alignment is therefore handled by the compiler and full structure allocation is performed, silencing the warnings.

This replaces PR #9450 in a much simpler and more readable way.

@bagder
Copy link
Member

bagder commented Sep 9, 2022

I don't think the macros help, I think they make the code harder to read. I suggest this change;

diff --git a/lib/content_encoding.c b/lib/content_encoding.c
index 4d4dd2589..ac7cb6f8f 100644
--- a/lib/content_encoding.c
+++ b/lib/content_encoding.c
@@ -52,16 +52,10 @@
 
 #ifndef CURL_DISABLE_HTTP
 
 #define DSIZ CURL_MAX_WRITE_SIZE /* buffer size for decompressed data */
 
-/* Macros for emulating subclasses. */
-#define SUBCLASS_OF(t)  t super
-#define SUBCLASS(t, p)  ((t *) (DEBUGASSERT(offsetof(t, super) == 0),   \
-  DEBUGASSERT(sizeof(((t *) 0)->super) == sizeof(*p)), p))
-
-
 #ifdef HAVE_LIBZ
 
 /* Comment this out if zlib is always going to be at least ver. 1.2.0.4
    (doing so will reduce code size slightly). */
 #define OLD_ZLIB_SUPPORT 1
@@ -87,11 +81,11 @@ typedef enum {
   ZLIB_INIT_GZIP             /* initialized in transparent gzip mode */
 } zlibInitState;
 
 /* Deflate and gzip writer. */
 struct zlib_writer {
-  SUBCLASS_OF(struct contenc_writer);
+  struct contenc_writer super;
   zlibInitState zlib_init;   /* zlib init state */
   uInt trailerlen;           /* Remaining trailer byte count. */
   z_stream z;                /* State structure for zlib. */
 };
 
@@ -166,11 +160,11 @@ static CURLcode process_trailer(struct Curl_easy *data,
 
 static CURLcode inflate_stream(struct Curl_easy *data,
                                struct contenc_writer *writer,
                                zlibInitState started)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *)writer;
   z_stream *z = &zp->z;         /* zlib state structure */
   uInt nread = z->avail_in;
   Bytef *orig_in = z->next_in;
   bool done = FALSE;
   CURLcode result = CURLE_OK;   /* Curl_client_write status */
@@ -269,11 +263,11 @@ static CURLcode inflate_stream(struct Curl_easy *data,
 
 /* Deflate handler. */
 static CURLcode deflate_init_writer(struct Curl_easy *data,
                                     struct contenc_writer *writer)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *) writer;
   z_stream *z = &zp->z;     /* zlib state structure */
 
   if(!writer->downstream)
     return CURLE_WRITE_ERROR;
 
@@ -289,11 +283,11 @@ static CURLcode deflate_init_writer(struct Curl_easy *data,
 
 static CURLcode deflate_unencode_write(struct Curl_easy *data,
                                        struct contenc_writer *writer,
                                        const char *buf, size_t nbytes)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *)writer;
   z_stream *z = &zp->z;     /* zlib state structure */
 
   /* Set the compressed input when this function is called */
   z->next_in = (Bytef *) buf;
   z->avail_in = (uInt) nbytes;
@@ -306,11 +300,11 @@ static CURLcode deflate_unencode_write(struct Curl_easy *data,
 }
 
 static void deflate_close_writer(struct Curl_easy *data,
                                  struct contenc_writer *writer)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *) writer;
   z_stream *z = &zp->z;     /* zlib state structure */
 
   exit_zlib(data, z, &zp->zlib_init, CURLE_OK);
 }
 
@@ -326,11 +320,11 @@ static const struct content_encoding deflate_encoding = {
 
 /* Gzip handler. */
 static CURLcode gzip_init_writer(struct Curl_easy *data,
                                  struct contenc_writer *writer)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *) writer;
   z_stream *z = &zp->z;     /* zlib state structure */
 
   if(!writer->downstream)
     return CURLE_WRITE_ERROR;
 
@@ -443,11 +437,11 @@ static enum {
 
 static CURLcode gzip_unencode_write(struct Curl_easy *data,
                                     struct contenc_writer *writer,
                                     const char *buf, size_t nbytes)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *) writer;
   z_stream *z = &zp->z;     /* zlib state structure */
 
   if(zp->zlib_init == ZLIB_INIT_GZIP) {
     /* Let zlib handle the gzip decompression entirely */
     z->next_in = (Bytef *) buf;
@@ -570,11 +564,11 @@ static CURLcode gzip_unencode_write(struct Curl_easy *data,
 }
 
 static void gzip_close_writer(struct Curl_easy *data,
                               struct contenc_writer *writer)
 {
-  struct zlib_writer *zp = SUBCLASS(struct zlib_writer, writer);
+  struct zlib_writer *zp = (struct zlib_writer *) writer;
   z_stream *z = &zp->z;     /* zlib state structure */
 
   exit_zlib(data, z, &zp->zlib_init, CURLE_OK);
 }
 
@@ -591,11 +585,11 @@ static const struct content_encoding gzip_encoding = {
 
 
 #ifdef HAVE_BROTLI
 /* Brotli writer. */
 struct brotli_writer {
-  SUBCLASS_OF(struct contenc_writer);
+  struct contenc_writer super;
   BrotliDecoderState *br;    /* State structure for brotli. */
 };
 
 static CURLcode brotli_map_error(BrotliDecoderErrorCode be)
 {
@@ -636,11 +630,11 @@ static CURLcode brotli_map_error(BrotliDecoderErrorCode be)
 }
 
 static CURLcode brotli_init_writer(struct Curl_easy *data,
                                    struct contenc_writer *writer)
 {
-  struct brotli_writer *bp = SUBCLASS(struct brotli_writer, writer);
+  struct brotli_writer *bp = (struct brotli_writer *) writer;
   (void) data;
 
   if(!writer->downstream)
     return CURLE_WRITE_ERROR;
 
@@ -650,11 +644,11 @@ static CURLcode brotli_init_writer(struct Curl_easy *data,
 
 static CURLcode brotli_unencode_write(struct Curl_easy *data,
                                       struct contenc_writer *writer,
                                       const char *buf, size_t nbytes)
 {
-  struct brotli_writer *bp = SUBCLASS(struct brotli_writer, writer);
+  struct brotli_writer *bp = (struct brotli_writer *) writer;
   const uint8_t *src = (const uint8_t *) buf;
   char *decomp;
   uint8_t *dst;
   size_t dstleft;
   CURLcode result = CURLE_OK;
@@ -697,11 +691,11 @@ static CURLcode brotli_unencode_write(struct Curl_easy *data,
 }
 
 static void brotli_close_writer(struct Curl_easy *data,
                                 struct contenc_writer *writer)
 {
-  struct brotli_writer *bp = SUBCLASS(struct brotli_writer, writer);
+  struct brotli_writer *bp = (struct brotli_writer *) writer;
 
   (void) data;
 
   if(bp->br) {
     BrotliDecoderDestroyInstance(bp->br);
@@ -721,19 +715,19 @@ static const struct content_encoding brotli_encoding = {
 
 
 #ifdef HAVE_ZSTD
 /* Zstd writer. */
 struct zstd_writer {
-  SUBCLASS_OF(struct contenc_writer);
+  struct contenc_writer super;
   ZSTD_DStream *zds;    /* State structure for zstd. */
   void *decomp;
 };
 
 static CURLcode zstd_init_writer(struct Curl_easy *data,
                                  struct contenc_writer *writer)
 {
-  struct zstd_writer *zp = SUBCLASS(struct zstd_writer, writer);
+  struct zstd_writer *zp = (struct zstd_writer *) writer;
 
   (void)data;
 
   if(!writer->downstream)
     return CURLE_WRITE_ERROR;
@@ -746,11 +740,11 @@ static CURLcode zstd_init_writer(struct Curl_easy *data,
 static CURLcode zstd_unencode_write(struct Curl_easy *data,
                                     struct contenc_writer *writer,
                                     const char *buf, size_t nbytes)
 {
   CURLcode result = CURLE_OK;
-  struct zstd_writer *zp = SUBCLASS(struct zstd_writer, writer);
+  struct zstd_writer *zp = (struct zstd_writer *) writer;
   ZSTD_inBuffer in;
   ZSTD_outBuffer out;
   size_t errorCode;
 
   if(!zp->decomp) {
@@ -785,11 +779,11 @@ static CURLcode zstd_unencode_write(struct Curl_easy *data,
 }
 
 static void zstd_close_writer(struct Curl_easy *data,
                               struct contenc_writer *writer)
 {
-  struct zstd_writer *zp = SUBCLASS(struct zstd_writer, writer);
+  struct zstd_writer *zp = (struct zstd_writer *) writer;
 
   (void)data;
 
   if(zp->decomp) {
     free(zp->decomp);

@monnerat
Copy link
Contributor Author

monnerat commented Sep 9, 2022

I don't think the macros help, I think they make the code harder to read.

The idea was to emphasize the object-oriented subclassing and implement limited checks on proper subtypes layout.
I'll change it anyway.

The variable-sized encoding-specific storage of a struct contenc_writer
currently relies on void * alignment that may be insufficient with
regards to the specific storage fields, although having not caused any
problems yet.

In addition, gcc 11.3 issues a warning on access to fields of partially
allocated structures that can occur when the specific storage size is 0:

  content_encoding.c: In function ‘Curl_build_unencoding_stack’:
  content_encoding.c:980:21: warning: array subscript ‘struct contenc_writer[0]’ is partly outside array bounds of ‘unsigned char[16]’ [-Warray-bounds]
    980 |     writer->handler = handler;
        |     ~~~~~~~~~~~~~~~~^~~~~~~~~
  In file included from content_encoding.c:49:
  memdebug.h:115:29: note: referencing an object of size 16 allocated by ‘curl_dbg_calloc’
    115 | #define calloc(nbelem,size) curl_dbg_calloc(nbelem, size, __LINE__, __FILE__)
        |                             ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  content_encoding.c:977:60: note: in expansion of macro ‘calloc’
    977 |   struct contenc_writer *writer = (struct contenc_writer *)calloc(1, sz);

To solve both these problems, the current commit replaces the
contenc_writer/params structure pairs by "subclasses" of struct
contenc_writer. These are structures that contain a contenc_writer at
offset 0. Proper field alignment is therefore handled by the compiler and
full structure allocation is performed, silencing the warnings.
@bagder
Copy link
Member

bagder commented Sep 11, 2022

Can you also double-check the commit message? It seems to explain the earlier fix, does it not?

@monnerat
Copy link
Contributor Author

It seems to explain the earlier fix, does it not?

No, it doesn't. The first part describes the problems and thus has not changed. The last paragraph documents the way they are fixed and is completely new.

@bagder
Copy link
Member

bagder commented Sep 11, 2022

Thanks!

@bagder bagder closed this in 4399b03 Sep 11, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Development

Successfully merging this pull request may close these issues.

None yet

2 participants