Dan Fandrich's gzip handling fix

This commit is contained in:
Daniel Stenberg 2004-10-27 21:46:11 +00:00
parent 96cf615e9d
commit 8bfcae65ef
2 changed files with 148 additions and 78 deletions

27
CHANGES
View File

@ -7,6 +7,33 @@
Changelog
Daniel (27 October 2004)
- Dan Fandrich:
An improvement to the gzip handling of libcurl. There were two problems with
the old version: it was possible for a malicious gzip file to cause libcurl
to leak memory, as a buffer was malloced to hold the header and never freed
if the header ended with no file contents. The second problem is that the
64 KiB decompression buffer was allocated on the stack, which caused
unexpectedly high stack usage and overflowed the stack on some systems
(someone complained about that in the mailing list about a year ago).
Both problems are fixed by this patch. The first one is fixed when a recent
(1.2) version of zlib is used, as it takes care of gzip header parsing
itself. A check for the version number is done at run-time and libcurl uses
that feature if it's present. I've created a define OLD_ZLIB_SUPPORT that
can be commented out to save some code space if libcurl is guaranteed to be
using a 1.2 version of zlib.
The second problem is solved by dynamically allocating the memory buffer
instead of storing it on the stack. The allocation/free is done for every
incoming packet, which is suboptimal, but should be dwarfed by the actual
decompression computation.
I've also factored out some common code between deflate and gzip to reduce
the code footprint somewhat. I've tested the gzip code on a few test files
and I tried deflate using the freshmeat.net server, and it all looks OK. I
didn't try running it with valgrind, however.
- Added a --retry option to curl that takes a numerical option for the number
of times the operation should be retried. It is retried if a transient error
is detected or if a timeout occurred. By default, it will first wait one

View File

@ -36,6 +36,10 @@
#include "memdebug.h"
/* Comment this out if zlib is always going to be at least ver. 1.2.0.4
(doing so will reduce code size slightly). */
#define OLD_ZLIB_SUPPORT 1
#define DSIZ 0x10000 /* buffer size for decompressed data */
#define GZIP_MAGIC_0 0x1f
@ -49,14 +53,23 @@
#define COMMENT 0x10 /* bit 4 set: file comment present */
#define RESERVED 0xE0 /* bits 5..7: reserved */
enum zlibState {
ZLIB_UNINIT, /* uninitialized */
ZLIB_INIT, /* initialized */
ZLIB_GZIP_HEADER, /* reading gzip header */
ZLIB_GZIP_INFLATING, /* inflating gzip stream */
ZLIB_INIT_GZIP /* initialized in transparent gzip mode */
};
static CURLcode
process_zlib_error(struct SessionHandle *data, z_stream *z)
process_zlib_error(struct SessionHandle *data,
z_stream *z)
{
if (z->msg)
failf (data, "Error while processing content unencoding.\n%s",
failf (data, "Error while processing content unencoding: %s",
z->msg);
else
failf (data, "Error while processing content unencoding.\n"
failf (data, "Error while processing content unencoding: "
"Unknown failure within decompression software.");
return CURLE_BAD_CONTENT_ENCODING;
@ -66,55 +79,48 @@ static CURLcode
exit_zlib(z_stream *z, bool *zlib_init, CURLcode result)
{
inflateEnd(z);
*zlib_init = 0;
*zlib_init = ZLIB_UNINIT;
return result;
}
CURLcode
Curl_unencode_deflate_write(struct SessionHandle *data,
struct Curl_transfer_keeper *k,
ssize_t nread)
static CURLcode
inflate_stream(struct SessionHandle *data,
struct Curl_transfer_keeper *k)
{
z_stream *z = &k->z; /* zlib state structure */
int status; /* zlib status */
CURLcode result = CURLE_OK; /* Curl_client_write status */
char decomp[DSIZ]; /* Put the decompressed data here. */
z_stream *z = &k->z; /* zlib state structure */
char *decomp; /* Put the decompressed data here. */
/* Initialize zlib? */
if (!k->zlib_init) {
z->zalloc = (alloc_func)Z_NULL;
z->zfree = (free_func)Z_NULL;
z->opaque = 0;
z->next_in = NULL;
z->avail_in = 0;
if (inflateInit(z) != Z_OK)
return process_zlib_error(data, z);
k->zlib_init = 1;
/* Dynamically allocate a buffer for decompression because it's uncommonly
large to hold on the stack */
decomp = (char*)malloc(DSIZ);
if (decomp == NULL) {
return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
}
/* Set the compressed input when this function is called */
z->next_in = (Bytef *)k->str;
z->avail_in = (uInt)nread;
/* because the buffer size is fixed, iteratively decompress
and transfer to the client via client_write. */
/* because the buffer size is fixed, iteratively decompress and transfer to
the client via client_write. */
for (;;) {
/* (re)set buffer for decompressed output for every iteration */
z->next_out = (Bytef *)&decomp[0];
z->next_out = (Bytef *)decomp;
z->avail_out = DSIZ;
status = inflate(z, Z_SYNC_FLUSH);
if (status == Z_OK || status == Z_STREAM_END) {
if (DSIZ - z->avail_out) {
if(DSIZ - z->avail_out) {
result = Curl_client_write(data, CLIENTWRITE_BODY, decomp,
DSIZ - z->avail_out);
/* if !CURLE_OK, clean up, return */
if (result)
if (result) {
free(decomp);
return exit_zlib(z, &k->zlib_init, result);
}
}
/* Done?; clean up, return */
/* Done? clean up, return */
if (status == Z_STREAM_END) {
free(decomp);
if (inflateEnd(z) == Z_OK)
return exit_zlib(z, &k->zlib_init, result);
else
@ -122,15 +128,47 @@ Curl_unencode_deflate_write(struct SessionHandle *data,
}
/* Done with these bytes, exit */
if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0)
if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0) {
free(decomp);
return result;
}
}
else { /* Error; exit loop, handle below */
free(decomp);
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
}
/* Will never get here */
}
CURLcode
Curl_unencode_deflate_write(struct SessionHandle *data,
struct Curl_transfer_keeper *k,
ssize_t nread)
{
z_stream *z = &k->z; /* zlib state structure */
/* Initialize zlib? */
if (k->zlib_init == ZLIB_UNINIT) {
z->zalloc = (alloc_func)Z_NULL;
z->zfree = (free_func)Z_NULL;
z->opaque = 0;
z->next_in = NULL;
z->avail_in = 0;
if (inflateInit(z) != Z_OK)
return process_zlib_error(data, z);
k->zlib_init = ZLIB_INIT;
}
/* Set the compressed input when this function is called */
z->next_in = (Bytef *)k->str;
z->avail_in = (uInt)nread;
/* Now uncompress the data */
return inflate_stream(data, k);
}
#ifdef OLD_ZLIB_SUPPORT
/* Skip over the gzip header */
static enum {
GZIP_OK,
@ -213,38 +251,67 @@ static enum {
*headerlen = totallen - len;
return GZIP_OK;
}
#endif
CURLcode
Curl_unencode_gzip_write(struct SessionHandle *data,
struct Curl_transfer_keeper *k,
ssize_t nread)
{
int status; /* zlib status */
CURLcode result = CURLE_OK; /* Curl_client_write status */
char decomp[DSIZ]; /* Put the decompressed data here. */
z_stream *z = &k->z; /* zlib state structure */
/* Initialize zlib? */
if (!k->zlib_init) {
if (k->zlib_init == ZLIB_UNINIT) {
z->zalloc = (alloc_func)Z_NULL;
z->zfree = (free_func)Z_NULL;
z->opaque = 0;
z->next_in = NULL;
z->avail_in = 0;
if (inflateInit2(z, -MAX_WBITS) != Z_OK)
return process_zlib_error(data, z);
k->zlib_init = 1; /* Initial call state */
if (strcmp(zlibVersion(), "1.2.0.4") >= 0) {
/* zlib ver. >= 1.2.0.4 supports transparent gzip decompressing */
if (inflateInit2(z, MAX_WBITS+32) != Z_OK) {
return process_zlib_error(data, z);
}
k->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */
} else {
/* we must parse the gzip header ourselves */
if (inflateInit2(z, -MAX_WBITS) != Z_OK) {
return process_zlib_error(data, z);
}
k->zlib_init = ZLIB_INIT; /* Initial call state */
}
}
if (k->zlib_init == ZLIB_INIT_GZIP) {
/* Let zlib handle the gzip decompression entirely */
z->next_in = (Bytef *)k->str;
z->avail_in = (uInt)nread;
/* Now uncompress the data */
return inflate_stream(data, k);
}
#ifndef OLD_ZLIB_SUPPORT
/* Support for old zlib versions is compiled away and we are running with
an old version, so return an error. */
return exit_zlib(z, &k->zlib_init, CURLE_FUNCTION_NOT_FOUND);
#else
/* This next mess is to get around the potential case where there isn't
* enough data passed in to skip over the gzip header. If that happens, we
* malloc a block and copy what we have then wait for the next call. If
* there still isn't enough (this is definitely a worst-case scenario), we
* make the block bigger, copy the next part in and keep waiting.
*
* This is only required with zlib versions < 1.2.0.4 as newer versions
* can handle the gzip header themselves.
*/
switch (k->zlib_init) {
/* Skip over gzip header? */
if (k->zlib_init == 1) {
case ZLIB_INIT:
{
/* Initial call state */
ssize_t hlen;
@ -252,7 +319,7 @@ Curl_unencode_gzip_write(struct SessionHandle *data,
case GZIP_OK:
z->next_in = (Bytef *)k->str + hlen;
z->avail_in = (uInt)(nread - hlen);
k->zlib_init = 3; /* Inflating stream state */
k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */
break;
case GZIP_UNDERFLOW:
@ -269,7 +336,7 @@ Curl_unencode_gzip_write(struct SessionHandle *data,
return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
}
memcpy(z->next_in, k->str, z->avail_in);
k->zlib_init = 2; /* Need more gzip header data state */
k->zlib_init = ZLIB_GZIP_HEADER; /* Need more gzip header data state */
/* We don't have any data to inflate yet */
return CURLE_OK;
@ -279,7 +346,10 @@ Curl_unencode_gzip_write(struct SessionHandle *data,
}
}
else if (k->zlib_init == 2) {
break;
case ZLIB_GZIP_HEADER:
{
/* Need more gzip header data state */
ssize_t hlen;
unsigned char *oldblock = z->next_in;
@ -300,7 +370,7 @@ Curl_unencode_gzip_write(struct SessionHandle *data,
/* Don't point into the malloced block since we just freed it */
z->next_in = (Bytef *)k->str + hlen + nread - z->avail_in;
z->avail_in = (uInt)(z->avail_in - hlen);
k->zlib_init = 3; /* Inflating stream state */
k->zlib_init = ZLIB_GZIP_INFLATING; /* Inflating stream state */
break;
case GZIP_UNDERFLOW:
@ -314,10 +384,14 @@ Curl_unencode_gzip_write(struct SessionHandle *data,
}
}
else {
break;
case ZLIB_GZIP_INFLATING:
default:
/* Inflating stream state */
z->next_in = (Bytef *)k->str;
z->avail_in = (uInt)nread;
break;
}
if (z->avail_in == 0) {
@ -325,39 +399,8 @@ Curl_unencode_gzip_write(struct SessionHandle *data,
return CURLE_OK;
}
/* because the buffer size is fixed, iteratively decompress and transfer to
the client via client_write. */
for (;;) {
/* (re)set buffer for decompressed output for every iteration */
z->next_out = (Bytef *)&decomp[0];
z->avail_out = DSIZ;
status = inflate(z, Z_SYNC_FLUSH);
if (status == Z_OK || status == Z_STREAM_END) {
if(DSIZ - z->avail_out) {
result = Curl_client_write(data, CLIENTWRITE_BODY, decomp,
DSIZ - z->avail_out);
/* if !CURLE_OK, clean up, return */
if (result)
return exit_zlib(z, &k->zlib_init, result);
}
/* Done?; clean up, return */
/* We should really check the gzip CRC here */
if (status == Z_STREAM_END) {
if (inflateEnd(z) == Z_OK)
return exit_zlib(z, &k->zlib_init, result);
else
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
/* Done with these bytes, exit */
if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0)
return result;
}
else { /* Error; exit loop, handle below */
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
}
/* We've parsed the header, now uncompress the data */
return inflate_stream(data, k);
#endif
}
#endif /* HAVE_LIBZ */