From dbcced8e32b50c068ac297106f0502ee200a1ebd Mon Sep 17 00:00:00 2001 From: Patrick Monnerat Date: Sun, 5 Nov 2017 15:09:48 +0100 Subject: [PATCH] HTTP: support multiple Content-Encodings This is implemented as an output streaming stack of unencoders, the last calling the client write procedure. New test 230 checks this feature. Bug: https://github.com/curl/curl/pull/2002 Reported-By: Daniel Bankhead --- docs/INTERNALS.md | 2 +- docs/TODO | 7 - lib/content_encoding.c | 574 +++++++++++++++++++++++++++++++++------- lib/content_encoding.h | 49 ++-- lib/http.c | 68 +---- lib/http_chunks.c | 42 +-- lib/transfer.c | 37 +-- lib/url.c | 14 +- lib/urldata.h | 24 +- tests/data/Makefile.inc | 2 +- tests/data/test230 | 200 ++++++++++++++ 11 files changed, 736 insertions(+), 283 deletions(-) create mode 100644 tests/data/test230 diff --git a/docs/INTERNALS.md b/docs/INTERNALS.md index 6c1d5baf2..fb9d50378 100644 --- a/docs/INTERNALS.md +++ b/docs/INTERNALS.md @@ -673,7 +673,7 @@ Content Encoding where string is the intended value of the Accept-Encoding header. - Currently, libcurl does not support multiple encodings and only + Currently, libcurl does support multiple encodings but only understands how to process responses that use the "deflate" or "gzip" Content-Encoding, so the only values for [`CURLOPT_ACCEPT_ENCODING`][5] that will work (besides "identity," which does nothing) are "deflate" diff --git a/docs/TODO b/docs/TODO index b10323a9d..3e5f8bd75 100644 --- a/docs/TODO +++ b/docs/TODO @@ -67,7 +67,6 @@ 5.7 Brotli compression 5.8 QUIC 5.9 Leave secure cookies alone - 5.10 Support Multiple Content-Encodings 6. TELNET 6.1 ditch stdin @@ -538,12 +537,6 @@ This is not detailed in any FTP specification. https://tools.ietf.org/html/draft-ietf-httpbis-cookie-alone-01 -5.10 Support Multiple Content-Encodings - - RFC 7231 Section 3.1.2.2 allows multiple encodings for a single request. Using - this may result in lower bandwidth and promotes a more resource-friendly web. - Currently, Chrome and Firefox support multiple encodings. - 6. TELNET diff --git a/lib/content_encoding.c b/lib/content_encoding.c index 110226034..76a9e6866 100644 --- a/lib/content_encoding.c +++ b/lib/content_encoding.c @@ -22,16 +22,23 @@ #include "curl_setup.h" -#ifdef HAVE_LIBZ - #include "urldata.h" #include +#include #include "sendf.h" +#include "http.h" #include "content_encoding.h" #include "strdup.h" +#include "strcase.h" #include "curl_memory.h" #include "memdebug.h" +#define CONTENT_ENCODING_DEFAULT "identity" + +#ifndef CURL_DISABLE_HTTP + +#ifdef HAVE_LIBZ + /* Comment this out if zlib is always going to be at least ver. 1.2.0.4 (doing so will reduce code size slightly). */ #define OLD_ZLIB_SUPPORT 1 @@ -49,6 +56,21 @@ #define COMMENT 0x10 /* bit 4 set: file comment present */ #define RESERVED 0xE0 /* bits 5..7: reserved */ +typedef enum { + ZLIB_UNINIT, /* uninitialized */ + ZLIB_INIT, /* initialized */ + ZLIB_GZIP_HEADER, /* reading gzip header */ + ZLIB_GZIP_INFLATING, /* inflating gzip stream */ + ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ +} zlibInitState; + +/* Writer parameters. */ +typedef struct { + zlibInitState zlib_init; /* zlib init state */ + z_stream z; /* State structure for zlib. */ +} zlib_params; + + static voidpf zalloc_cb(voidpf opaque, unsigned int items, unsigned int size) { @@ -79,19 +101,27 @@ process_zlib_error(struct connectdata *conn, z_stream *z) } static CURLcode -exit_zlib(z_stream *z, zlibInitState *zlib_init, CURLcode result) +exit_zlib(struct connectdata *conn, + z_stream *z, zlibInitState *zlib_init, CURLcode result) { - inflateEnd(z); - *zlib_init = ZLIB_UNINIT; + if(*zlib_init == ZLIB_GZIP_HEADER) + Curl_safefree(z->next_in); + + if(*zlib_init != ZLIB_UNINIT) { + if(inflateEnd(z) != Z_OK && result == CURLE_OK) + result = process_zlib_error(conn, z); + *zlib_init = ZLIB_UNINIT; + } + return result; } static CURLcode -inflate_stream(struct connectdata *conn, - struct SingleRequest *k) +inflate_stream(struct connectdata *conn, contenc_writer *writer) { + zlib_params *zp = (zlib_params *) &writer->params; int allow_restart = 1; - z_stream *z = &k->z; /* zlib state structure */ + z_stream *z = &zp->z; /* zlib state structure */ uInt nread = z->avail_in; Bytef *orig_in = z->next_in; int status; /* zlib status */ @@ -102,35 +132,31 @@ inflate_stream(struct connectdata *conn, large to hold on the stack */ decomp = malloc(DSIZ); if(decomp == NULL) { - return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } /* because the buffer size is fixed, iteratively decompress and transfer to the client via client_write. */ for(;;) { /* (re)set buffer for decompressed output for every iteration */ - z->next_out = (Bytef *)decomp; + z->next_out = (Bytef *) decomp; z->avail_out = DSIZ; status = inflate(z, Z_SYNC_FLUSH); if(status == Z_OK || status == Z_STREAM_END) { allow_restart = 0; - if((DSIZ - z->avail_out) && (!k->ignorebody)) { - result = Curl_client_write(conn, CLIENTWRITE_BODY, decomp, + result = Curl_unencode_write(conn, writer->downstream, decomp, DSIZ - z->avail_out); - /* if !CURLE_OK, clean up, return */ - if(result) { - free(decomp); - return exit_zlib(z, &k->zlib_init, result); - } + /* if !CURLE_OK, clean up, return */ + if(result) { + free(decomp); + return exit_zlib(conn, z, &zp->zlib_init, result); } /* Done? clean up, return */ if(status == Z_STREAM_END) { free(decomp); - if(inflateEnd(z) == Z_OK) - return exit_zlib(z, &k->zlib_init, result); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, result); } /* Done with these bytes, exit */ @@ -148,7 +174,8 @@ inflate_stream(struct connectdata *conn, (void) inflateEnd(z); /* don't care about the return code */ if(inflateInit2(z, -MAX_WBITS) != Z_OK) { free(decomp); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + zp->zlib_init = ZLIB_UNINIT; /* inflateEnd() already called. */ + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } z->next_in = orig_in; z->avail_in = nread; @@ -157,36 +184,97 @@ inflate_stream(struct connectdata *conn, } else { /* Error; exit loop, handle below */ free(decomp); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } } - /* Will never get here */ + /* UNREACHED */ } -CURLcode -Curl_unencode_deflate_write(struct connectdata *conn, - struct SingleRequest *k, - ssize_t nread) + +/* Deflate handler. */ +static CURLcode deflate_init_writer(struct connectdata *conn, + contenc_writer *writer) { - z_stream *z = &k->z; /* zlib state structure */ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ - /* Initialize zlib? */ - if(k->zlib_init == ZLIB_UNINIT) { - memset(z, 0, sizeof(z_stream)); - z->zalloc = (alloc_func)zalloc_cb; - z->zfree = (free_func)zfree_cb; + if(!writer->downstream) + return CURLE_WRITE_ERROR; - if(inflateInit(z) != Z_OK) - return process_zlib_error(conn, z); - k->zlib_init = ZLIB_INIT; - } + /* Initialize zlib */ + z->zalloc = (alloc_func) zalloc_cb; + z->zfree = (free_func) zfree_cb; + + if(inflateInit(z) != Z_OK) + return process_zlib_error(conn, z); + zp->zlib_init = ZLIB_INIT; + return CURLE_OK; +} + +static CURLcode deflate_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ /* Set the compressed input when this function is called */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; + z->next_in = (Bytef *) buf; + z->avail_in = (uInt) nbytes; /* Now uncompress the data */ - return inflate_stream(conn, k); + return inflate_stream(conn, writer); +} + +static void deflate_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ + + exit_zlib(conn, z, &zp->zlib_init, CURLE_OK); +} + +static const content_encoding deflate_encoding = { + "deflate", + NULL, + deflate_init_writer, + deflate_unencode_write, + deflate_close_writer, + sizeof(zlib_params) +}; + + +/* Gzip handler. */ +static CURLcode gzip_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ + + if(!writer->downstream) + return CURLE_WRITE_ERROR; + + /* Initialize zlib */ + z->zalloc = (alloc_func) zalloc_cb; + z->zfree = (free_func) zfree_cb; + + if(strcmp(zlibVersion(), "1.2.0.4") >= 0) { + /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ + if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) { + return process_zlib_error(conn, z); + } + zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ + } + else { + /* we must parse the gzip header ourselves */ + if(inflateInit2(z, -MAX_WBITS) != Z_OK) { + return process_zlib_error(conn, z); + } + zp->zlib_init = ZLIB_INIT; /* Initial call state */ + } + + return CURLE_OK; } #ifdef OLD_ZLIB_SUPPORT @@ -273,47 +361,25 @@ static enum { } #endif -CURLcode -Curl_unencode_gzip_write(struct connectdata *conn, - struct SingleRequest *k, - ssize_t nread) +static CURLcode gzip_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) { - z_stream *z = &k->z; /* zlib state structure */ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ - /* Initialize zlib? */ - if(k->zlib_init == ZLIB_UNINIT) { - memset(z, 0, sizeof(z_stream)); - z->zalloc = (alloc_func)zalloc_cb; - z->zfree = (free_func)zfree_cb; - - if(strcmp(zlibVersion(), "1.2.0.4") >= 0) { - /* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */ - if(inflateInit2(z, MAX_WBITS + 32) != Z_OK) { - return process_zlib_error(conn, z); - } - k->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */ - } - else { - /* we must parse the gzip header ourselves */ - if(inflateInit2(z, -MAX_WBITS) != Z_OK) { - return process_zlib_error(conn, z); - } - k->zlib_init = ZLIB_INIT; /* Initial call state */ - } - } - - if(k->zlib_init == ZLIB_INIT_GZIP) { + if(zp->zlib_init == ZLIB_INIT_GZIP) { /* Let zlib handle the gzip decompression entirely */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; + z->next_in = (Bytef *) buf; + z->avail_in = (uInt) nbytes; /* Now uncompress the data */ - return inflate_stream(conn, k); + return inflate_stream(conn, writer); } #ifndef OLD_ZLIB_SUPPORT /* Support for old zlib versions is compiled away and we are running with an old version, so return an error. */ - return exit_zlib(z, &k->zlib_init, CURLE_WRITE_ERROR); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_WRITE_ERROR); #else /* This next mess is to get around the potential case where there isn't @@ -326,18 +392,18 @@ Curl_unencode_gzip_write(struct connectdata *conn, * can handle the gzip header themselves. */ - switch(k->zlib_init) { + switch(zp->zlib_init) { /* Skip over gzip header? */ case ZLIB_INIT: { /* Initial call state */ ssize_t hlen; - switch(check_gzip_header((unsigned char *)k->str, nread, &hlen)) { + switch(check_gzip_header((unsigned char *) buf, nbytes, &hlen)) { case GZIP_OK: - z->next_in = (Bytef *)k->str + hlen; - z->avail_in = (uInt)(nread - hlen); - k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ + z->next_in = (Bytef *) buf + hlen; + z->avail_in = (uInt) (nbytes - hlen); + zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: @@ -348,19 +414,19 @@ Curl_unencode_gzip_write(struct connectdata *conn, * the first place, and it's even more unlikely for a transfer to fail * immediately afterwards, it should seldom be a problem. */ - z->avail_in = (uInt)nread; + z->avail_in = (uInt) nbytes; z->next_in = malloc(z->avail_in); if(z->next_in == NULL) { - return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } - memcpy(z->next_in, k->str, z->avail_in); - k->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ + memcpy(z->next_in, buf, z->avail_in); + zp->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */ /* We don't have any data to inflate yet */ return CURLE_OK; case GZIP_BAD: default: - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } } @@ -370,22 +436,22 @@ Curl_unencode_gzip_write(struct connectdata *conn, { /* Need more gzip header data state */ ssize_t hlen; - z->avail_in += (uInt)nread; + z->avail_in += (uInt) nbytes; z->next_in = Curl_saferealloc(z->next_in, z->avail_in); if(z->next_in == NULL) { - return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY); + return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY); } /* Append the new block of data to the previous one */ - memcpy(z->next_in + z->avail_in - nread, k->str, nread); + memcpy(z->next_in + z->avail_in - nbytes, buf, nbytes); switch(check_gzip_header(z->next_in, z->avail_in, &hlen)) { case GZIP_OK: /* This is the zlib stream data */ free(z->next_in); /* Don't point into the malloced block since we just freed it */ - z->next_in = (Bytef *)k->str + hlen + nread - z->avail_in; - z->avail_in = (uInt)(z->avail_in - hlen); - k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ + z->next_in = (Bytef *) buf + hlen + nbytes - z->avail_in; + z->avail_in = (uInt) (z->avail_in - hlen); + zp->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */ break; case GZIP_UNDERFLOW: @@ -394,8 +460,7 @@ Curl_unencode_gzip_write(struct connectdata *conn, case GZIP_BAD: default: - free(z->next_in); - return exit_zlib(z, &k->zlib_init, process_zlib_error(conn, z)); + return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z)); } } @@ -404,8 +469,8 @@ Curl_unencode_gzip_write(struct connectdata *conn, case ZLIB_GZIP_INFLATING: default: /* Inflating stream state */ - z->next_in = (Bytef *)k->str; - z->avail_in = (uInt)nread; + z->next_in = (Bytef *) buf; + z->avail_in = (uInt) nbytes; break; } @@ -415,17 +480,332 @@ Curl_unencode_gzip_write(struct connectdata *conn, } /* We've parsed the header, now uncompress the data */ - return inflate_stream(conn, k); + return inflate_stream(conn, writer); #endif } +static void gzip_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + zlib_params *zp = (zlib_params *) &writer->params; + z_stream *z = &zp->z; /* zlib state structure */ + + exit_zlib(conn, z, &zp->zlib_init, CURLE_OK); +} + +static const content_encoding gzip_encoding = { + "gzip", + "x-gzip", + gzip_init_writer, + gzip_unencode_write, + gzip_close_writer, + sizeof(zlib_params) +}; + +#endif /* HAVE_LIBZ */ + + +/* Identity handler. */ +static CURLcode identity_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR; +} + +static CURLcode identity_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + return Curl_unencode_write(conn, writer->downstream, buf, nbytes); +} + +static void identity_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + (void) writer; +} + +static const content_encoding identity_encoding = { + "identity", + NULL, + identity_init_writer, + identity_unencode_write, + identity_close_writer, + 0 +}; + + +/* supported content encodings table. */ +static const content_encoding * const encodings[] = { + &identity_encoding, +#ifdef HAVE_LIBZ + &deflate_encoding, + &gzip_encoding, +#endif + NULL +}; + + +/* Return a list of comma-separated names of supported encodings. */ +char *Curl_all_content_encodings(void) +{ + size_t len = 0; + const content_encoding * const *cep; + const content_encoding *ce; + char *ace; + char *p; + + for(cep = encodings; *cep; cep++) { + ce = *cep; + if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) + len += strlen(ce->name) + 2; + } + + if(!len) + return strdup(CONTENT_ENCODING_DEFAULT); + + ace = malloc(len); + if(ace) { + p = ace; + for(cep = encodings; *cep; cep++) { + ce = *cep; + if(!strcasecompare(ce->name, CONTENT_ENCODING_DEFAULT)) { + strcpy(p, ce->name); + p += strlen(p); + *p++ = ','; + *p++ = ' '; + } + } + p[-2] = '\0'; + } + + return ace; +} + + +/* Real client writer: no downstream. */ +static CURLcode client_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + return writer->downstream? CURLE_WRITE_ERROR: CURLE_OK; +} + +static CURLcode client_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + struct Curl_easy *data = conn->data; + struct SingleRequest *k = &data->req; + + (void) writer; + + if(!nbytes || k->ignorebody) + return CURLE_OK; + + return Curl_client_write(conn, CLIENTWRITE_BODY, (char *) buf, nbytes); +} + +static void client_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + (void) writer; +} + +static const content_encoding client_encoding = { + NULL, + NULL, + client_init_writer, + client_unencode_write, + client_close_writer, + 0 +}; + + +/* Deferred error dummy writer. */ +static CURLcode error_init_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + return writer->downstream? CURLE_OK: CURLE_WRITE_ERROR; +} + +static CURLcode error_unencode_write(struct connectdata *conn, + contenc_writer *writer, + const char *buf, size_t nbytes) +{ + char *all = Curl_all_content_encodings(); + + (void) writer; + (void) buf; + (void) nbytes; + + if(!all) + return CURLE_OUT_OF_MEMORY; + failf(conn->data, "Unrecognized content encoding type. " + "libcurl understands %s content encodings.", all); + free(all); + return CURLE_BAD_CONTENT_ENCODING; +} + +static void error_close_writer(struct connectdata *conn, + contenc_writer *writer) +{ + (void) conn; + (void) writer; +} + +static const content_encoding error_encoding = { + NULL, + NULL, + error_init_writer, + error_unencode_write, + error_close_writer, + 0 +}; + +/* Create an unencoding writer stage using the given handler. */ +static contenc_writer *new_unencoding_writer(struct connectdata *conn, + const content_encoding *handler, + contenc_writer *downstream) +{ + size_t sz = offsetof(contenc_writer, params) + handler->paramsize; + contenc_writer *writer = (contenc_writer *) malloc(sz); + + if(writer) { + memset(writer, 0, sz); + writer->handler = handler; + writer->downstream = downstream; + if(handler->init_writer(conn, writer)) { + free(writer); + writer = NULL; + } + } + + return writer; +} + +/* Write data using an unencoding writer stack. */ +CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes) +{ + return writer->handler->unencode_write(conn, writer, buf, nbytes); +} + +/* Close and clean-up the connection's writer stack. */ void Curl_unencode_cleanup(struct connectdata *conn) { struct Curl_easy *data = conn->data; struct SingleRequest *k = &data->req; - z_stream *z = &k->z; - if(k->zlib_init != ZLIB_UNINIT) - (void) exit_zlib(z, &k->zlib_init, CURLE_OK); + contenc_writer *writer = k->writer_stack; + + while(writer) { + k->writer_stack = writer->downstream; + writer->handler->close_writer(conn, writer); + free(writer); + writer = k->writer_stack; + } } -#endif /* HAVE_LIBZ */ +/* Find the content encoding by name. */ +static const content_encoding *find_encoding(const char *name, size_t len) +{ + const content_encoding * const *cep; + const content_encoding *ce; + + for(cep = encodings; *cep; cep++) { + ce = *cep; + if((strncasecompare(name, ce->name, len) && !ce->name[len]) || + (ce->alias && strncasecompare(name, ce->alias, len) && !ce->alias[len])) + return ce; + } + return NULL; +} + +/* Set-up the unencoding stack from the Content-Encoding header value. + * See RFC 7231 section 3.1.2.2. */ +CURLcode Curl_build_unencoding_stack(struct connectdata *conn, + const char *enclist, int maybechunked) +{ + struct Curl_easy *data = conn->data; + struct SingleRequest *k = &data->req; + + do { + const char *name; + size_t namelen; + + /* Parse a single encoding name. */ + while(ISSPACE(*enclist) || *enclist == ',') + enclist++; + + name = enclist; + + for(namelen = 0; *enclist && *enclist != ','; enclist++) + if(!ISSPACE(*enclist)) + namelen = enclist - name + 1; + + /* Special case: chunked encoding is handled at the reader level. */ + if(maybechunked && namelen == 7 && strncasecompare(name, "chunked", 7)) { + k->chunk = TRUE; /* chunks coming our way. */ + Curl_httpchunk_init(conn); /* init our chunky engine. */ + } + else if(namelen) { + const content_encoding *encoding = find_encoding(name, namelen); + contenc_writer *writer; + + if(!k->writer_stack) { + k->writer_stack = new_unencoding_writer(conn, &client_encoding, NULL); + + if(!k->writer_stack) + return CURLE_OUT_OF_MEMORY; + } + + if(!encoding) + encoding = &error_encoding; /* Defer error at stack use. */ + + /* Stack the unencoding stage. */ + writer = new_unencoding_writer(conn, encoding, k->writer_stack); + if(!writer) + return CURLE_OUT_OF_MEMORY; + k->writer_stack = writer; + } + } while(*enclist); + + return CURLE_OK; +} + +#else +/* Stubs for builds without HTTP. */ +CURLcode Curl_build_unencoding_stack(struct connectdata *conn, + const char *enclist, int maybechunked) +{ + (void) conn; + (void) enclist; + (void) maybechunked; + return CURLE_NOT_BUILT_IN; +} + +CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes) +{ + (void) conn; + (void) writer; + (void) buf; + (void) nbytes; + return CURLE_NOT_BUILT_IN; +} + +void Curl_unencode_cleanup(struct connectdata *conn) +{ + (void) conn; +} + +char *Curl_all_content_encodings(void) +{ + return strdup(CONTENT_ENCODING_DEFAULT); /* Satisfy caller. */ +} + +#endif /* CURL_DISABLE_HTTP */ diff --git a/lib/content_encoding.h b/lib/content_encoding.h index 3fadd2899..4cd52be62 100644 --- a/lib/content_encoding.h +++ b/lib/content_encoding.h @@ -7,7 +7,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -23,26 +23,33 @@ ***************************************************************************/ #include "curl_setup.h" -/* - * Comma-separated list all supported Content-Encodings ('identity' is implied) - */ -#ifdef HAVE_LIBZ -#define ALL_CONTENT_ENCODINGS "deflate, gzip" -/* force a cleanup */ +/* Decoding writer. */ +typedef struct contenc_writer_s contenc_writer; +typedef struct content_encoding_s content_encoding; + +struct contenc_writer_s { + const content_encoding *handler; /* Encoding handler. */ + contenc_writer *downstream; /* Downstream writer. */ + void *params; /* Encoding-specific storage (variable length). */ +}; + +/* Content encoding writer. */ +struct content_encoding_s { + const char *name; /* Encoding name. */ + const char *alias; /* Encoding name alias. */ + CURLcode (*init_writer)(struct connectdata *conn, contenc_writer *writer); + CURLcode (*unencode_write)(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes); + void (*close_writer)(struct connectdata *conn, contenc_writer *writer); + size_t paramsize; +}; + + +CURLcode Curl_build_unencoding_stack(struct connectdata *conn, + const char *enclist, int maybechunked); +CURLcode Curl_unencode_write(struct connectdata *conn, contenc_writer *writer, + const char *buf, size_t nbytes); void Curl_unencode_cleanup(struct connectdata *conn); -#else -#define ALL_CONTENT_ENCODINGS "identity" -#define Curl_unencode_cleanup(x) Curl_nop_stmt -#endif - -CURLcode Curl_unencode_deflate_write(struct connectdata *conn, - struct SingleRequest *req, - ssize_t nread); - -CURLcode -Curl_unencode_gzip_write(struct connectdata *conn, - struct SingleRequest *k, - ssize_t nread); - +char *Curl_all_content_encodings(void); #endif /* HEADER_CURL_CONTENT_ENCODING_H */ diff --git a/lib/http.c b/lib/http.c index 0716f8eea..def51abc3 100644 --- a/lib/http.c +++ b/lib/http.c @@ -3103,7 +3103,7 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, !(conn->handler->protocol & CURLPROTO_RTSP) && data->set.httpreq != HTTPREQ_HEAD) { /* On HTTP 1.1, when connection is not to get closed, but no - Content-Length nor Content-Encoding chunked have been + Content-Length nor Transfer-Encoding chunked have been received, according to RFC2616 section 4.4 point 5, we assume that the server will close the connection to signal the end of the document. */ @@ -3613,51 +3613,9 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, * of chunks, and a chunk-data set to zero signals the * end-of-chunks. */ - char *start; - - /* Find the first non-space letter */ - start = k->p + 18; - - for(;;) { - /* skip whitespaces and commas */ - while(*start && (ISSPACE(*start) || (*start == ','))) - start++; - - if(checkprefix("chunked", start)) { - k->chunk = TRUE; /* chunks coming our way */ - - /* init our chunky engine */ - Curl_httpchunk_init(conn); - - start += 7; - } - - if(k->auto_decoding) - /* TODO: we only support the first mentioned compression for now */ - break; - - if(checkprefix("identity", start)) { - k->auto_decoding = IDENTITY; - start += 8; - } - else if(checkprefix("deflate", start)) { - k->auto_decoding = DEFLATE; - start += 7; - } - else if(checkprefix("gzip", start)) { - k->auto_decoding = GZIP; - start += 4; - } - else if(checkprefix("x-gzip", start)) { - k->auto_decoding = GZIP; - start += 6; - } - else - /* unknown! */ - break; - - } - + result = Curl_build_unencoding_stack(conn, k->p + 18, TRUE); + if(result) + return result; } else if(checkprefix("Content-Encoding:", k->p) && data->set.str[STRING_ENCODING]) { @@ -3668,21 +3626,9 @@ CURLcode Curl_http_readwrite_headers(struct Curl_easy *data, * 2616). zlib cannot handle compress. However, errors are * handled further down when the response body is processed */ - char *start; - - /* Find the first non-space letter */ - start = k->p + 17; - while(*start && ISSPACE(*start)) - start++; - - /* Record the content-encoding for later use */ - if(checkprefix("identity", start)) - k->auto_decoding = IDENTITY; - else if(checkprefix("deflate", start)) - k->auto_decoding = DEFLATE; - else if(checkprefix("gzip", start) - || checkprefix("x-gzip", start)) - k->auto_decoding = GZIP; + result = Curl_build_unencoding_stack(conn, k->p + 17, FALSE); + if(result) + return result; } else if(checkprefix("Content-Range:", k->p)) { /* Content-Range: bytes [num]- diff --git a/lib/http_chunks.c b/lib/http_chunks.c index 92d773112..161642969 100644 --- a/lib/http_chunks.c +++ b/lib/http_chunks.c @@ -187,49 +187,17 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn, piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize); /* Write the data portion available */ -#ifdef HAVE_LIBZ - switch(conn->data->set.http_ce_skip? - IDENTITY : data->req.auto_decoding) { - case IDENTITY: -#endif - if(!k->ignorebody) { - if(!data->set.http_te_skip) - result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, - piece); - else - result = CURLE_OK; - } -#ifdef HAVE_LIBZ - break; - - case DEFLATE: - /* update data->req.keep.str to point to the chunk data. */ - data->req.str = datap; - result = Curl_unencode_deflate_write(conn, &data->req, - (ssize_t)piece); - break; - - case GZIP: - /* update data->req.keep.str to point to the chunk data. */ - data->req.str = datap; - result = Curl_unencode_gzip_write(conn, &data->req, - (ssize_t)piece); - break; - - default: - failf(conn->data, - "Unrecognized content encoding type. " - "libcurl understands `identity', `deflate' and `gzip' " - "content encodings."); - return CHUNKE_BAD_ENCODING; + if(conn->data->set.http_ce_skip || !k->writer_stack) { + if(!k->ignorebody) + result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, piece); } -#endif + else + result = Curl_unencode_write(conn, k->writer_stack, datap, piece); if(result) return CHUNKE_WRITE_ERROR; *wrote += piece; - ch->datasize -= piece; /* decrease amount left to expect */ datap += piece; /* move read pointer forward */ length -= piece; /* decrease space left in this round */ diff --git a/lib/transfer.c b/lib/transfer.c index 937477670..8f15b1a15 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -779,48 +779,19 @@ static CURLcode readwrite_data(struct Curl_easy *data, in http_chunks.c. Make sure that ALL_CONTENT_ENCODINGS contains all the encodings handled here. */ -#ifdef HAVE_LIBZ - switch(conn->data->set.http_ce_skip ? - IDENTITY : k->auto_decoding) { - case IDENTITY: -#endif - /* This is the default when the server sends no - Content-Encoding header. See Curl_readwrite_init; the - memset() call initializes k->auto_decoding to zero. */ + if(conn->data->set.http_ce_skip || !k->writer_stack) { if(!k->ignorebody) { - #ifndef CURL_DISABLE_POP3 - if(conn->handler->protocol&PROTO_FAMILY_POP3) + if(conn->handler->protocol & PROTO_FAMILY_POP3) result = Curl_pop3_write(conn, k->str, nread); else #endif /* CURL_DISABLE_POP3 */ - result = Curl_client_write(conn, CLIENTWRITE_BODY, k->str, nread); } -#ifdef HAVE_LIBZ - break; - - case DEFLATE: - /* Assume CLIENTWRITE_BODY; headers are not encoded. */ - if(!k->ignorebody) - result = Curl_unencode_deflate_write(conn, k, nread); - break; - - case GZIP: - /* Assume CLIENTWRITE_BODY; headers are not encoded. */ - if(!k->ignorebody) - result = Curl_unencode_gzip_write(conn, k, nread); - break; - - default: - failf(data, "Unrecognized content encoding type. " - "libcurl understands `identity', `deflate' and `gzip' " - "content encodings."); - result = CURLE_BAD_CONTENT_ENCODING; - break; } -#endif + else + result = Curl_unencode_write(conn, k->writer_stack, k->str, nread); } k->badheader = HEADER_NORMAL; /* taken care of now */ diff --git a/lib/url.c b/lib/url.c index 374ac6cfa..aeb0c9027 100644 --- a/lib/url.c +++ b/lib/url.c @@ -1011,9 +1011,17 @@ CURLcode Curl_setopt(struct Curl_easy *data, CURLoption option, * */ argptr = va_arg(param, char *); - result = setstropt(&data->set.str[STRING_ENCODING], - (argptr && !*argptr)? - ALL_CONTENT_ENCODINGS: argptr); + if(argptr && !*argptr) { + argptr = Curl_all_content_encodings(); + if(!argptr) + result = CURLE_OUT_OF_MEMORY; + else { + result = setstropt(&data->set.str[STRING_ENCODING], argptr); + free(argptr); + } + } + else + result = setstropt(&data->set.str[STRING_ENCODING], argptr); break; case CURLOPT_TRANSFER_ENCODING: diff --git a/lib/urldata.h b/lib/urldata.h index e5aae1430..cfdd8d028 100644 --- a/lib/urldata.h +++ b/lib/urldata.h @@ -464,16 +464,6 @@ struct hostname { #define KEEP_SENDBITS (KEEP_SEND | KEEP_SEND_HOLD | KEEP_SEND_PAUSE) -#ifdef HAVE_LIBZ -typedef enum { - ZLIB_UNINIT, /* uninitialized */ - ZLIB_INIT, /* initialized */ - ZLIB_GZIP_HEADER, /* reading gzip header */ - ZLIB_GZIP_INFLATING, /* inflating gzip stream */ - ZLIB_INIT_GZIP /* initialized in transparent gzip mode */ -} zlibInitState; -#endif - #ifdef CURLRES_ASYNCH struct Curl_async { char *hostname; @@ -561,18 +551,8 @@ struct SingleRequest { enum expect100 exp100; /* expect 100 continue state */ enum upgrade101 upgr101; /* 101 upgrade state */ - int auto_decoding; /* What content encoding. sec 3.5, RFC2616. */ - -#define IDENTITY 0 /* No encoding */ -#define DEFLATE 1 /* zlib deflate [RFC 1950 & 1951] */ -#define GZIP 2 /* gzip algorithm [RFC 1952] */ - -#ifdef HAVE_LIBZ - zlibInitState zlib_init; /* possible zlib init state; - undefined if Content-Encoding header. */ - z_stream z; /* State structure for zlib. */ -#endif - + struct contenc_writer_s *writer_stack; /* Content unencoding stack. */ + /* See sec 3.5, RFC2616. */ time_t timeofdoc; long bodywrites; diff --git a/tests/data/Makefile.inc b/tests/data/Makefile.inc index 9104f34f5..305f6f318 100644 --- a/tests/data/Makefile.inc +++ b/tests/data/Makefile.inc @@ -45,7 +45,7 @@ test190 test191 test192 test193 test194 test195 test196 test197 test198 \ test199 test200 test201 test202 test203 test204 test205 test206 test207 \ test208 test209 test210 test211 test212 test213 test214 test215 test216 \ test217 test218 test219 test220 test221 test222 test223 test224 test225 \ -test226 test227 test228 test229 test231 test233 test234 \ +test226 test227 test228 test229 test230 test231 test233 test234 \ test235 test236 test237 test238 test239 test240 test241 test242 test243 \ test244 test245 test246 test247 test248 test249 test250 test251 test252 \ test253 test254 test255 test256 test257 test258 test259 test260 test261 \ diff --git a/tests/data/test230 b/tests/data/test230 new file mode 100644 index 000000000..cc166a3f1 --- /dev/null +++ b/tests/data/test230 @@ -0,0 +1,200 @@ + + + +HTTP +HTTP GET +compressed + + +# +# Server-side + + +SFRUUC8xLjEgMjAwIE9LDQpEYXRlOiBNb24sIDI5IE5vdiAyMDA0IDIxOjU2OjUzIEdNVA0KU2Vy +dmVyOiBBcGFjaGUvMS4zLjMxIChEZWJpYW4gR05VL0xpbnV4KSBtb2RfZ3ppcC8xLjMuMjYuMWEg +UEhQLzQuMy45LTEgbW9kX3NzbC8yLjguMjAgT3BlblNTTC8wLjkuN2QgbW9kX3BlcmwvMS4yOQ0K +VmFyeTogQWNjZXB0LUVuY29kaW5nDQpDb250ZW50LVR5cGU6IHRleHQvaHRtbDsgY2hhcnNldD1J +U08tODg1OS0xDQpDb250ZW50LUVuY29kaW5nOiBkZWZsYXRlLCBpZGVudGl0eSwgZ3ppcA0KQ29u +dGVudC1MZW5ndGg6IDEzMjgNCg0KH4sIAPpl+lkAAwEZBeb6eJzcWNtu4zYQfTeQf2D91AK2brYT +OXC0KHLZBM02wdoFuk8GLdE2G0kUSMq5PPTbO5QoibEcr3eDvgRIYnLmnMPhDEmTmXx6SmK0IVxQ +lp51XcvpIpKGLKLp6qx7M73r+/5o3He7n4LO5JeLu/PZt/tLlHH2DwllP6ZCAhBNv01nl19Qdy1l +dmrbS07EOiFYWimR9gKHDySN7GXS1zzRd6yhFcmoC6JbWkEHocqm2k1vTqPAPXa9iW0YSkSEJZnj +KCKAGI/9vuP3PRc5w1PHPfXGE9vwG4Q8Ux9R4DnOsO86fddHjnfqwc9AUyrEqzhSnJC5WDMugzDn +cR2OYW/jl3kcF3CE0wjFdNGmFhAdHhGhljpnSVJSUoIkY7UAx/wZLRlHYUxJKvuCRgT99fUWSY5T +sYSKWjCLRshQ3hkMogLhcgSlWqlwVd8ljYk46uRCddQY4jmV+MlCNxKJPMtAX6Dr2ey+V/yd9tAV +tI86V0X74uZ81kOzy9s/L+Hz9uJ3gF3d3F72iuE/391fX36FNgzwSGKIqZTqZ0zInm7m0AoZe6BE +FNooz2KGIxgCllqekKiZdQ9lWIhHxiPVhMjSPFkU9un09qgTEi7pkoZQVzD9QTj4mChDgWo8wQjF +tCAbGXsknERHncVzlaQekmvyZsarslhHndkaqAjD74KmajMJSG2dapVgBpsOec5RJ8bpKscrIooY +SLqhnKUJDCBAR5fQWBsbKnFM5fNchIyTYHTiD63RycTesm+BM8JDkAwGlntsYCvzFhrm8wB7bWwg +C5Ne1yzLY8ybsY5HY4hhCMt529MiVAO6A8t3XxFeh2I4ymCc0Su0EQ7HxbnhWyNnYuuO6ZmHLAdd +z6282vAKUw7iD2qMMYDIFyLkNJNwRIpgoE6H16YSBqVPw/Vc7eXggixxHsuJbRpLGNR/Xh1gGZQ9 +2HloVielrdaLPbFbrEZszRLythAsYMpLFXV42iZD69YCjaZcvRwuB2CtpGiNyOLFO1wEwFpE0RqR +F5odLgJgLaJojUi4hj1GYrY6XKqmaMFGopHlWXK4IIC1lKI1IhFZHC4CYC2iaI0IE0+HiwBYiyia +US8RqfPyB2pWEqq6abqxzHMOaRMk0Ou36hqF2YgfKMlGVMXYCENE3RwOV1FoLVMQG52Ecs744Uol +XmtpslnXhAVVraBZemIKhxyk4MvNzP4bncPpASmjeYJuS8fErhAar76n5JyTmNSZa5nn+v4WnFiu +Z8EF6Q33G2x1rzo5dvxRi1hdsNocdS/afXHaBSznYu+azATOUQITXjM5l2v4qoactUwlEucSbjKi +DqnsV93aoE9gnFISo6kkKXzDrya26WxRoEq76/7vAq8ioopsIFt0zmIS3D2mhNe4wlRFapuhVr1q +CasveE4TmmJpzk5yuCEUtYGC1p2W1/OO97kHe7n7nK7v7+W6e8eFpbE/6r1u93i4zz3eS/bHe73O +Xrc7+k7c3wlsf2SD1tjl/W67/LAmMngywUMMrqO1Tm18RvI5I2ddTkJ4HSibeknVi7LBmRvZUUPt +cuwk6nsLuE+Gqhg7XTuZxuOsRd1+uL3FlVSqDQV2uLOjX/Vt6redWiW23mkN4u28seLehuP/L2nO +T2dsOHhnxtT76uMnyvUGI/cdmXqBp9jHz9LAc4Yn78jSNaFJhOOPn6jhcDTw3pGosA9PffEzeTIs ++qyv/ysUdP4DAAD//4IzEaNjAAAAAP//AwDdOI7RbCh2MRkFAAA= + + + +HTTP/1.1 200 OK +Date: Mon, 29 Nov 2004 21:56:53 GMT +Server: Apache/1.3.31 (Debian GNU/Linux) mod_gzip/1.3.26.1a PHP/4.3.9-1 mod_ssl/2.8.20 OpenSSL/0.9.7d mod_perl/1.29 +Vary: Accept-Encoding +Content-Type: text/html; charset=ISO-8859-1 +Content-Encoding: deflate, identity, gzip +Content-Length: 1328 + + + + + + 1612 + 1998-08-21 04:01:29 + 2004-10-18 02:22:23 + curl + curl and libcurl + Command line tool and library for client-side URL transfers. + curl and libcurl is a tool for transferring files +using URL syntax. It supports HTTP, HTTPS, FTP, +FTPS, DICT, TELNET, LDAP, FILE, and GOPHER, as +well as HTTP-post, HTTP-put, cookies, FTP upload, +resumed transfers, passwords, portnumbers, SSL +certificates, Kerberos, and proxies. It is powered +by libcurl, the client-side URL transfer library. +There are bindings to libcurl for over 20 +languages and environments. + + 5784.57 + 3.16 + 169 + 6594.54 + 13.81 + 105 + 8.50 + 21 + 183 + 323 + Default + http://freshmeat.net/projects/curl/ + http://freshmeat.net/redir/curl/1612/url_homepage/ + http://freshmeat.net/redir/curl/1612/url_tgz/ + http://freshmeat.net/redir/curl/1612/url_bz2/ + http://freshmeat.net/redir/curl/1612/url_zip/ + http://freshmeat.net/redir/curl/1612/url_changelog/ + http://freshmeat.net/redir/curl/1612/url_rpm/ + http://freshmeat.net/redir/curl/1612/url_deb/ + http://freshmeat.net/redir/curl/1612/url_osx/ + http://freshmeat.net/redir/curl/1612/url_bsdport/ + + http://freshmeat.net/redir/curl/1612/url_cvs/ + http://freshmeat.net/redir/curl/1612/url_list/ + http://freshmeat.net/redir/curl/1612/url_mirror/ + + MIT/X Consortium License + + 7.12.2 + 176085 + 2004-10-18 02:22:23 + + + + + Daniel Stenberg + http://freshmeat.net/~bagder/ + Owner + + + + 12 + 226 + 3 + 2 + 188 + 216 + 200 + 220 + 164 + 90 + 89 + 809 + 150 + 224 + 900 + 839 + + + + 0 + 7464 + 7464 + OpenSSL (Default) + + + 0 + 0 + 7443 + OpenLDAP + + + 0 + 0 + 12351 + zlib + + + 0 + 0 + 32047 + Heimdal + + + 0 + 0 + 44532 + c-ares + + + + + + + + +# +# Client-side + + +libz + + +http + + +HTTP GET multiply compressed content + + +http://%HOSTIP:%HTTPPORT/230 --compressed + + + +# +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET /230 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +Accept: */* +Accept-Encoding: deflate, gzip + + + +