1
0
mirror of https://github.com/moparisthebest/curl synced 2024-12-21 23:58:49 -05:00

Dan Fandrich's gzip patch applied

This commit is contained in:
Daniel Stenberg 2003-04-11 08:49:20 +00:00
parent 0b0a88b78d
commit 019c4088cf
6 changed files with 277 additions and 26 deletions

View File

@ -5,15 +5,15 @@
HTTP/1.1 [RFC 2616] specifies that a client may request that a server encode
its response. This is usually used to compress a response using one of a set
of commonly available compression techniques. These schemes are `deflate'
(the zlib algorithm), `gzip' and `compress' [sec 3.5, RFC 2616]. A client
requests that the sever perform an encoding by including an Accept-Encoding
header in the request document. The value of the header should be one of the
recognized tokens `deflate', ... (there's a way to register new
schemes/tokens, see sec 3.5 of the spec). A server MAY honor the client's
encoding request. When a response is encoded, the server includes a
Content-Encoding header in the response. The value of the Content-Encoding
header indicates which scheme was used to encode the data.
of commonly available compression techniques. These schemes are `deflate' (the
zlib algorithm), `gzip' and `compress' [sec 3.5, RFC 2616]. A client requests
that the sever perform an encoding by including an Accept-Encoding header in
the request document. The value of the header should be one of the recognized
tokens `deflate', ... (there's a way to register new schemes/tokens, see sec
3.5 of the spec). A server MAY honor the client's encoding request. When a
response is encoded, the server includes a Content-Encoding header in the
response. The value of the Content-Encoding header indicates which scheme was
used to encode the data.
A client may tell a server that it can understand several different encoding
schemes. In this case the server may choose any one of those and use it to
@ -24,11 +24,10 @@ information on the Accept-Encoding header.
* Current support for content encoding:
I added support for the 'deflate' content encoding to both libcurl and curl.
Both regular and chunked transfers should work although I've tested only the
former. The library zlib is required for this feature. Places where I
modified the source code are commented and typically include my initials and
the date (e.g., 08/29/02 jhrg).
Support for the 'deflate' and 'gzip' content encoding are supported by
libcurl. Both regular and chunked transfers should work fine. The library
zlib is required for this feature. 'deflate' support was added by James
Gallagher, and support for the 'gzip' encoding was added by Dan Fandrich.
* The libcurl interface:
@ -39,15 +38,21 @@ To cause libcurl to request a content encoding use:
where <string> is the intended value of the Accept-Encoding header.
Currently, libcurl only understands how to process responses that use the
`deflate' Content-Encoding, so the only value for CURLOPT_ENCODING that will
work (besides "identity," which does nothing) is "deflate." If a response is
encoded using either the `gzip' or `compress' methods, libcurl will return an
error indicating that the response could not be decoded. If <string> is null
or empty no Accept-Encoding header is generated.
"deflate" or "gzip" Content-Encoding, so the only values for CURLOPT_ENCODING
that will work (besides "identity," which does nothing) are "deflate" and
"gzip" If a response is encoded using the "compress" or methods, libcurl will
return an error indicating that the response could not be decoded. If
<string> is NULL or empty no Accept-Encoding header is generated.
The CURLOPT_ENCODING must be set to any non-NULL value for content to be
automatically decoded. If it is not set and the server still sends encoded
content (despite not having been asked), the data is returned in its raw form
and the Content-Encoding type is not checked.
* The curl interface:
Use the --compressed option with curl to cause it to ask servers to compress
responses using deflate.
responses using deflate.
James Gallagher <jgallagher@gso.uri.edu>
Dan Fandrich <dan@coneharvesters.com>

View File

@ -25,6 +25,9 @@
#ifdef HAVE_LIBZ
#include <stdlib.h>
#include <string.h>
#include "urldata.h"
#include <curl/curl.h>
#include <curl/types.h>
@ -32,6 +35,16 @@
#define DSIZ 4096 /* buffer size for decompressed data */
#define GZIP_MAGIC_0 0x1f
#define GZIP_MAGIC_1 0x8b
/* gzip flag byte */
#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
#define COMMENT 0x10 /* bit 4 set: file comment present */
#define RESERVED 0xE0 /* bits 5..7: reserved */
static CURLcode
process_zlib_error(struct SessionHandle *data, z_stream *z)
@ -74,7 +87,7 @@ Curl_unencode_deflate_write(struct SessionHandle *data,
k->zlib_init = 1;
}
/* Set the compressed input when this fucntion is called */
/* Set the compressed input when this function is called */
z->next_in = (Bytef *)k->str;
z->avail_in = nread;
@ -111,4 +124,225 @@ Curl_unencode_deflate_write(struct SessionHandle *data,
}
}
}
/* Skip over the gzip header */
static enum {
GZIP_OK,
GZIP_BAD,
GZIP_UNDERFLOW
}
check_gzip_header(unsigned char const *data, ssize_t len, ssize_t *headerlen)
{
int method, flags;
const ssize_t totallen = len;
/* The shortest header is 10 bytes */
if (len < 10)
return GZIP_UNDERFLOW;
if ((data[0] != GZIP_MAGIC_0) || (data[1] != GZIP_MAGIC_1))
return GZIP_BAD;
method = data[2];
flags = data[3];
if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
/* Can't handle this compression method or unknown flag */
return GZIP_BAD;
}
/* Skip over time, xflags, OS code and all previous bytes */
len -= 10;
data += 10;
if (flags & EXTRA_FIELD) {
ssize_t extra_len;
if (len < 2)
return GZIP_UNDERFLOW;
extra_len = (data[1] << 8) | data[0];
if (len < (extra_len+2))
return GZIP_UNDERFLOW;
len -= (extra_len + 2);
}
if (flags & ORIG_NAME) {
/* Skip over NUL-terminated file name */
while (len && *data) {
--len;
++data;
}
if (!len || *data)
return GZIP_UNDERFLOW;
/* Skip over the NUL */
--len;
++data;
}
if (flags & COMMENT) {
/* Skip over NUL-terminated comment */
while (len && *data) {
--len;
++data;
}
if (!len || *data)
return GZIP_UNDERFLOW;
/* Skip over the NUL */
--len;
++data;
}
if (flags & HEAD_CRC) {
if (len < 2)
return GZIP_UNDERFLOW;
len -= 2;
data += 2;
}
*headerlen = totallen - len;
return GZIP_OK;
}
CURLcode
Curl_unencode_gzip_write(struct SessionHandle *data,
struct Curl_transfer_keeper *k,
ssize_t nread)
{
int status; /* zlib status */
int result; /* Curl_client_write status */
char decomp[DSIZ]; /* Put the decompressed data here. */
z_stream *z = &k->z; /* zlib state structure */
/* Initialize zlib? */
if (!k->zlib_init) {
z->zalloc = (alloc_func)Z_NULL;
z->zfree = (free_func)Z_NULL;
z->opaque = 0; /* of dubious use 08/27/02 jhrg */
if (inflateInit2(z, -MAX_WBITS) != Z_OK)
return process_zlib_error(data, z);
k->zlib_init = 1; /* Initial call state */
}
/* This next mess is to get around the potential case where there isn't
enough data passed in to skip over the gzip header. If that happens,
we malloc a block and copy what we have then wait for the next call. If
there still isn't enough (this is definitely a worst-case scenario), we
make the block bigger, copy the next part in and keep waiting. */
/* Skip over gzip header? */
if (k->zlib_init == 1) {
/* Initial call state */
ssize_t hlen;
switch (check_gzip_header((unsigned char *)k->str, nread, &hlen)) {
case GZIP_OK:
z->next_in = (Bytef *)k->str + hlen;
z->avail_in = nread - hlen;
k->zlib_init = 3; /* Inflating stream state */
break;
case GZIP_UNDERFLOW:
/* We need more data so we can find the end of the gzip header */
z->avail_in = nread;
z->next_in = malloc(z->avail_in);
if (z->next_in == NULL) {
return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
}
memcpy(z->next_in, k->str, z->avail_in);
k->zlib_init = 2; /* Need more gzip header data state */
/* We don't have any data to inflate yet */
return CURLE_OK;
case GZIP_BAD:
default:
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
}
else if (k->zlib_init == 2) {
/* Need more gzip header data state */
ssize_t hlen;
unsigned char *oldblock = z->next_in;
z->avail_in += nread;
z->next_in = realloc(z->next_in, z->avail_in);
if (z->next_in == NULL) {
free(oldblock);
return exit_zlib(z, &k->zlib_init, CURLE_OUT_OF_MEMORY);
}
/* Append the new block of data to the previous one */
memcpy(z->next_in + z->avail_in - nread, k->str, nread);
switch (check_gzip_header(z->next_in, z->avail_in, &hlen)) {
case GZIP_OK:
/* This is the zlib stream data */
free(z->next_in);
/* Don't point into the malloced block since we just freed it */
z->next_in = (Bytef *)k->str + hlen + nread - z->avail_in;
z->avail_in = z->avail_in - hlen;
k->zlib_init = 3; /* Inflating stream state */
break;
case GZIP_UNDERFLOW:
/* We still don't have any data to inflate! */
return CURLE_OK;
case GZIP_BAD:
default:
free(z->next_in);
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
}
else {
/* Inflating stream state */
z->next_in = (Bytef *)k->str;
z->avail_in = nread;
}
if (z->avail_in == 0) {
/* We don't have any data to inflate; wait until next time */
return CURLE_OK;
}
/* because the buffer size is fixed, iteratively decompress
and transfer to the client via client_write. */
for (;;) {
/* (re)set buffer for decompressed output for every iteration */
z->next_out = (Bytef *)&decomp[0];
z->avail_out = DSIZ;
status = inflate(z, Z_SYNC_FLUSH);
if (status == Z_OK || status == Z_STREAM_END) {
result = Curl_client_write(data, CLIENTWRITE_BODY, decomp,
DSIZ - z->avail_out);
/* if !CURLE_OK, clean up, return */
if (result) {
return exit_zlib(z, &k->zlib_init, result);
}
/* Done?; clean up, return */
/* We should really check the gzip CRC here */
if (status == Z_STREAM_END) {
if (inflateEnd(z) == Z_OK)
return exit_zlib(z, &k->zlib_init, result);
else
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
/* Done with these bytes, exit */
if (status == Z_OK && z->avail_in == 0 && z->avail_out > 0)
return result;
}
else { /* Error; exit loop, handle below */
return exit_zlib(z, &k->zlib_init, process_zlib_error(data, z));
}
}
}
#endif /* HAVE_LIBZ */

View File

@ -24,3 +24,8 @@
CURLcode Curl_unencode_deflate_write(struct SessionHandle *data,
struct Curl_transfer_keeper *k,
ssize_t nread);
CURLcode
Curl_unencode_gzip_write(struct SessionHandle *data,
struct Curl_transfer_keeper *k,
ssize_t nread);

View File

@ -190,6 +190,9 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
break;
case GZIP:
result = Curl_unencode_gzip_write(conn->data, &conn->keep, piece);
break;
case COMPRESS:
default:
failf (conn->data,

View File

@ -648,7 +648,7 @@ CURLcode Curl_readwrite(struct connectdata *conn,
data->set.encoding) {
/*
* Process Content-Encoding. Look for the values: identity, gzip,
* defalte, compress, x-gzip and x-compress. x-gzip and
* deflate, compress, x-gzip and x-compress. x-gzip and
* x-compress are the same as gzip and compress. (Sec 3.5 RFC
* 2616). zlib cannot handle compress, and gzip is not currently
* implemented. However, errors are handled further down when the
@ -888,7 +888,7 @@ CURLcode Curl_readwrite(struct connectdata *conn,
if(k->badheader < HEADER_ALLBAD) {
/* This switch handles various content encodings. If there's an
error here, be sure to check over the almost identical code
in http_chunk.c. 08/29/02 jhrg */
in http_chunks.c. 08/29/02 jhrg */
#ifdef HAVE_LIBZ
switch (k->content_encoding) {
case IDENTITY:
@ -907,8 +907,12 @@ CURLcode Curl_readwrite(struct connectdata *conn,
result = Curl_unencode_deflate_write(data, k, nread);
break;
case GZIP: /* FIXME 08/27/02 jhrg */
case COMPRESS:
case GZIP:
/* Assume CLIENTWRITE_BODY; headers are not encoded. */
result = Curl_unencode_gzip_write(data, k, nread);
break;
case COMPRESS: /* FIXME 08/27/02 jhrg */
default:
failf (data, "Unrecognized content encoding type. "
"libcurl understands `identity' and `deflate' "

View File

@ -296,7 +296,7 @@ struct Curl_transfer_keeper {
#ifdef HAVE_LIBZ
bool zlib_init; /* True if zlib already initialized;
undefined if Content-Encdoing header. */
undefined if Content-Encoding header. */
z_stream z; /* State structure for zlib. */
#endif