content_encoding: rework zlib_inflate

- When zlib version is < 1.2.0.4, process gzip trailer before considering
extra data as an error.
- Inflate with Z_BLOCK instead of Z_SYNC_FLUSH to maximize correct data
and minimize corrupt data output.
- Do not try to restart deflate decompression in raw mode if output has
started or if the leading data is not available anymore.
- New test 232 checks inflating raw-deflated content.

Closes #2068
This commit is contained in:
Patrick Monnerat 2017-12-20 16:02:42 +01:00
parent e639d4ca4d
commit 4acc9d3d1a
3 changed files with 305 additions and 54 deletions

View File

@ -73,7 +73,9 @@
typedef enum {
ZLIB_UNINIT, /* uninitialized */
ZLIB_INIT, /* initialized */
ZLIB_INFLATING, /* Inflating started. */
ZLIB_GZIP_HEADER, /* reading gzip header */
ZLIB_GZIP_TRAILER, /* reading gzip trailer */
ZLIB_GZIP_INFLATING, /* inflating gzip stream */
ZLIB_INIT_GZIP /* initialized in transparent gzip mode */
} zlibInitState;
@ -81,6 +83,7 @@ typedef enum {
/* Writer parameters. */
typedef struct {
zlibInitState zlib_init; /* zlib init state */
uInt trailerlen; /* Remaining trailer byte count. */
z_stream z; /* State structure for zlib. */
} zlib_params;
@ -130,80 +133,120 @@ exit_zlib(struct connectdata *conn,
return result;
}
static CURLcode
inflate_stream(struct connectdata *conn, contenc_writer *writer)
static CURLcode process_trailer(struct connectdata *conn, zlib_params *zp)
{
z_stream *z = &zp->z;
CURLcode result = CURLE_OK;
uInt len = z->avail_in < zp->trailerlen? z->avail_in: zp->trailerlen;
/* Consume expected trailer bytes. Terminate stream if exhausted.
Issue an error if unexpected bytes follow. */
zp->trailerlen -= len;
z->avail_in -= len;
z->next_in += len;
if(z->avail_in)
result = CURLE_WRITE_ERROR;
if(result || !zp->trailerlen)
result = exit_zlib(conn, z, &zp->zlib_init, result);
else {
/* Only occurs for gzip with zlib < 1.2.0.4. */
zp->zlib_init = ZLIB_GZIP_TRAILER;
}
return result;
}
static CURLcode inflate_stream(struct connectdata *conn,
contenc_writer *writer, zlibInitState started)
{
zlib_params *zp = (zlib_params *) &writer->params;
int allow_restart = 1;
z_stream *z = &zp->z; /* zlib state structure */
uInt nread = z->avail_in;
Bytef *orig_in = z->next_in;
int status; /* zlib status */
bool done = FALSE;
CURLcode result = CURLE_OK; /* Curl_client_write status */
char *decomp; /* Put the decompressed data here. */
/* Check state. */
if(zp->zlib_init != ZLIB_INIT &&
zp->zlib_init != ZLIB_INFLATING &&
zp->zlib_init != ZLIB_INIT_GZIP &&
zp->zlib_init != ZLIB_GZIP_INFLATING)
return exit_zlib(conn, z, &zp->zlib_init, CURLE_WRITE_ERROR);
/* Dynamically allocate a buffer for decompression because it's uncommonly
large to hold on the stack */
decomp = malloc(DSIZ);
if(decomp == NULL) {
if(decomp == NULL)
return exit_zlib(conn, z, &zp->zlib_init, CURLE_OUT_OF_MEMORY);
}
/* because the buffer size is fixed, iteratively decompress and transfer to
the client via client_write. */
for(;;) {
if(z->avail_in == 0) {
free(decomp);
return result;
}
the client via downstream_write function. */
while(!done) {
done = TRUE;
/* (re)set buffer for decompressed output for every iteration */
z->next_out = (Bytef *) decomp;
z->avail_out = DSIZ;
status = inflate(z, Z_SYNC_FLUSH);
status = inflate(z, Z_BLOCK);
/* Flush output data if some. */
if(z->avail_out != DSIZ) {
if(status == Z_OK || status == Z_STREAM_END) {
allow_restart = 0;
zp->zlib_init = started; /* Data started. */
result = Curl_unencode_write(conn, writer->downstream, decomp,
DSIZ - z->avail_out);
/* if !CURLE_OK, clean up, return */
if(result) {
free(decomp);
return exit_zlib(conn, z, &zp->zlib_init, result);
exit_zlib(conn, z, &zp->zlib_init, result);
break;
}
}
}
/* Done? clean up, return */
if(status == Z_STREAM_END) {
free(decomp);
return exit_zlib(conn, z, &zp->zlib_init, result);
}
/* Done with these bytes, exit */
/* status is always Z_OK at this point! */
continue;
}
else if(allow_restart && status == Z_DATA_ERROR) {
/* Dispatch by inflate() status. */
switch(status) {
case Z_OK:
/* Always loop: there may be unflushed latched data in zlib state. */
done = FALSE;
break;
case Z_BUF_ERROR:
/* No more data to flush: just exit loop. */
break;
case Z_STREAM_END:
result = process_trailer(conn, zp);
break;
case Z_DATA_ERROR:
/* some servers seem to not generate zlib headers, so this is an attempt
to fix and continue anyway */
if(zp->zlib_init == ZLIB_INIT) {
/* Do not use inflateReset2(): only available since zlib 1.2.3.4. */
(void) inflateEnd(z); /* don't care about the return code */
if(inflateInit2(z, -MAX_WBITS) != Z_OK) {
free(decomp);
zp->zlib_init = ZLIB_UNINIT; /* inflateEnd() already called. */
return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
}
if(inflateInit2(z, -MAX_WBITS) == Z_OK) {
z->next_in = orig_in;
z->avail_in = nread;
allow_restart = 0;
continue;
zp->zlib_init = ZLIB_INFLATING;
done = FALSE;
break;
}
zp->zlib_init = ZLIB_UNINIT; /* inflateEnd() already called. */
}
/* FALLTHROUGH */
default:
result = exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
break;
}
}
else { /* Error; exit loop, handle below */
free(decomp);
return exit_zlib(conn, z, &zp->zlib_init, process_zlib_error(conn, z));
}
}
/* UNREACHED */
/* We're about to leave this call so the `nread' data bytes won't be seen
again. If we are in a state that would wrongly allow restart in raw mode
at the next call, assume output has already started. */
if(nread && zp->zlib_init == ZLIB_INIT)
zp->zlib_init = started; /* Cannot restart anymore. */
return result;
}
@ -239,7 +282,7 @@ static CURLcode deflate_unencode_write(struct connectdata *conn,
z->avail_in = (uInt) nbytes;
/* Now uncompress the data */
return inflate_stream(conn, writer);
return inflate_stream(conn, writer, ZLIB_INFLATING);
}
static void deflate_close_writer(struct connectdata *conn,
@ -283,10 +326,11 @@ static CURLcode gzip_init_writer(struct connectdata *conn,
zp->zlib_init = ZLIB_INIT_GZIP; /* Transparent gzip decompress state */
}
else {
/* we must parse the gzip header ourselves */
/* we must parse the gzip header and trailer ourselves */
if(inflateInit2(z, -MAX_WBITS) != Z_OK) {
return process_zlib_error(conn, z);
}
zp->trailerlen = 8; /* A CRC-32 and a 32-bit input size (RFC 1952, 2.2) */
zp->zlib_init = ZLIB_INIT; /* Initial call state */
}
@ -389,7 +433,7 @@ static CURLcode gzip_unencode_write(struct connectdata *conn,
z->next_in = (Bytef *) buf;
z->avail_in = (uInt) nbytes;
/* Now uncompress the data */
return inflate_stream(conn, writer);
return inflate_stream(conn, writer, ZLIB_INIT_GZIP);
}
#ifndef OLD_ZLIB_SUPPORT
@ -482,6 +526,11 @@ static CURLcode gzip_unencode_write(struct connectdata *conn,
}
break;
case ZLIB_GZIP_TRAILER:
z->next_in = (Bytef *) buf;
z->avail_in = (uInt) nbytes;
return process_trailer(conn, zp);
case ZLIB_GZIP_INFLATING:
default:
/* Inflating stream state */
@ -496,7 +545,7 @@ static CURLcode gzip_unencode_write(struct connectdata *conn,
}
/* We've parsed the header, now uncompress the data */
return inflate_stream(conn, writer);
return inflate_stream(conn, writer, ZLIB_GZIP_INFLATING);
#endif
}

View File

@ -45,7 +45,7 @@ test190 test191 test192 test193 test194 test195 test196 test197 test198 \
test199 test200 test201 test202 test203 test204 test205 test206 test207 \
test208 test209 test210 test211 test212 test213 test214 test215 test216 \
test217 test218 test219 test220 test221 test222 test223 test224 test225 \
test226 test227 test228 test229 test230 test231 test233 test234 \
test226 test227 test228 test229 test230 test231 test232 test233 test234 \
test235 test236 test237 test238 test239 test240 test241 test242 test243 \
test244 test245 test246 test247 test248 test249 test250 test251 test252 \
test253 test254 test255 test256 test257 test258 test259 test260 test261 \

202
tests/data/test232 Normal file
View File

@ -0,0 +1,202 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
compressed
</keywords>
</info>
#
# Server-side
<reply>
<data base64="yes">
SFRUUC8xLjEgMjAwIE9LDQpEYXRlOiBNb24sIDI5IE5vdiAyMDA0IDIxOjU2OjUzIEdNVA0KU2Vy
dmVyOiBBcGFjaGUvMS4zLjMxIChEZWJpYW4gR05VL0xpbnV4KSBtb2RfZ3ppcC8xLjMuMjYuMWEg
UEhQLzQuMy45LTEgbW9kX3NzbC8yLjguMjAgT3BlblNTTC8wLjkuN2QgbW9kX3BlcmwvMS4yOQ0K
VmFyeTogQWNjZXB0LUVuY29kaW5nDQpDb250ZW50LVR5cGU6IHRleHQvaHRtbDsgY2hhcnNldD1J
U08tODg1OS0xDQpDb250ZW50LUVuY29kaW5nOiBkZWZsYXRlDQpDb250ZW50LUxlbmd0aDogMTI4
Nw0KDQrdWNtu4zYQfTeQf2D91AK2brYTOXC0KHLZBM02wdoFuk8GLdE2G0kUSMq5PPTbO5QoibEc
r3eDvgRIYnLmnMPhDEmTmXx6SmK0IVxQlp51XcvpIpKGLKLp6qx7M73r+/5o3He7n4LO5JeLu/PZ
t/tLlHH2DwllP6ZCAhBNv01nl19Qdy1ldmrbS07EOiFYWimR9gKHDySN7GXS1zzRd6yhFcmoC6Jb
WkEHocqm2k1vTqPAPXa9iW0YSkSEJZnjKCKAGI/9vuP3PRc5w1PHPfXGE9vwG4Q8Ux9R4DnOsO86
fddHjnfqwc9AUyrEqzhSnJC5WDMugzDncR2OYW/jl3kcF3CE0wjFdNGmFhAdHhGhljpnSVJSUoIk
Y7UAx/wZLRlHYUxJKvuCRgT99fUWSY5TsYSKWjCLRshQ3hkMogLhcgSlWqlwVd8ljYk46uRCddQY
4jmV+MlCNxKJPMtAX6Dr2ey+V/yd9tAVtI86V0X74uZ81kOzy9s/L+Hz9uJ3gF3d3F72iuE/391f
X36FNgzwSGKIqZTqZ0zInm7m0AoZe6BEFNooz2KGIxgCllqekKiZdQ9lWIhHxiPVhMjSPFkU9un0
9qgTEi7pkoZQVzD9QTj4mChDgWo8wQjFtCAbGXsknERHncVzlaQekmvyZsarslhHndkaqAjD74Km
ajMJSG2dapVgBpsOec5RJ8bpKscrIooYSLqhnKUJDCBAR5fQWBsbKnFM5fNchIyTYHTiD63RycTe
sm+BM8JDkAwGlntsYCvzFhrm8wB7bWwgC5Ne1yzLY8ybsY5HY4hhCMt529MiVAO6A8t3XxFeh2I4
ymCc0Su0EQ7HxbnhWyNnYuuO6ZmHLAddz6282vAKUw7iD2qMMYDIFyLkNJNwRIpgoE6H16YSBqVP
w/Vc7eXggixxHsuJbRpLGNR/Xh1gGZQ92HloVielrdaLPbFbrEZszRLythAsYMpLFXV42iZD69YC
jaZcvRwuB2CtpGiNyOLFO1wEwFpE0RqRF5odLgJgLaJojUi4hj1GYrY6XKqmaMFGopHlWXK4IIC1
lKI1IhFZHC4CYC2iaI0IE0+HiwBYiyiaUS8RqfPyB2pWEqq6abqxzHMOaRMk0Ou36hqF2YgfKMlG
VMXYCENE3RwOV1FoLVMQG52Ecs744UolXmtpslnXhAVVraBZemIKhxyk4MvNzP4bncPpASmjeYJu
S8fErhAar76n5JyTmNSZa5nn+v4WnFiuZ8EF6Q33G2x1rzo5dvxRi1hdsNocdS/afXHaBSznYu+a
zATOUQITXjM5l2v4qoactUwlEucSbjKiDqnsV93aoE9gnFISo6kkKXzDrya26WxRoEq76/7vAq8i
oopsIFt0zmIS3D2mhNe4wlRFapuhVr1qCasveE4TmmJpzk5yuCEUtYGC1p2W1/OO97kHe7n7nK7v
7+W6e8eFpbE/6r1u93i4zz3eS/bHe73OXrc7+k7c3wlsf2SD1tjl/W67/LAmMngywUMMrqO1Tm18
RvI5I2ddTkJ4HSibeknVi7LBmRvZUUPtcuwk6nsLuE+Gqhg7XTuZxuOsRd1+uL3FlVSqDQV2uLOj
X/Vt6redWiW23mkN4u28seLehuP/L2nOT2dsOHhnxtT76uMnyvUGI/cdmXqBp9jHz9LAc4Yn78jS
NaFJhOOPn6jhcDTw3pGosA9PffEzeTIs+qyv/ysUdOpm89+x/wA=
</data>
<datacheck>
HTTP/1.1 200 OK
Date: Mon, 29 Nov 2004 21:56:53 GMT
Server: Apache/1.3.31 (Debian GNU/Linux) mod_gzip/1.3.26.1a PHP/4.3.9-1 mod_ssl/2.8.20 OpenSSL/0.9.7d mod_perl/1.29
Vary: Accept-Encoding
Content-Type: text/html; charset=ISO-8859-1
Content-Encoding: deflate
Content-Length: 1287
<?xml version="1.0" encoding="ISO-8859-1"?>
<!DOCTYPE project-listing SYSTEM "http://freshmeat.net/backend/fm-projects-0.4.dtd">
<project-listing>
<project>
<project_id>1612</project_id>
<date_added>1998-08-21 04:01:29</date_added>
<date_updated>2004-10-18 02:22:23</date_updated>
<projectname_short>curl</projectname_short>
<projectname_full>curl and libcurl</projectname_full>
<desc_short>Command line tool and library for client-side URL transfers.</desc_short>
<desc_full>curl and libcurl is a tool for transferring files
using URL syntax. It supports HTTP, HTTPS, FTP,
FTPS, DICT, TELNET, LDAP, FILE, and GOPHER, as
well as HTTP-post, HTTP-put, cookies, FTP upload,
resumed transfers, passwords, portnumbers, SSL
certificates, Kerberos, and proxies. It is powered
by libcurl, the client-side URL transfer library.
There are bindings to libcurl for over 20
languages and environments.
</desc_full>
<vitality_score>5784.57</vitality_score>
<vitality_percent>3.16</vitality_percent>
<vitality_rank>169</vitality_rank>
<popularity_score>6594.54</popularity_score>
<popularity_percent>13.81</popularity_percent>
<popularity_rank>105</popularity_rank>
<rating>8.50</rating>
<rating_count>21</rating_count>
<rating_rank>183</rating_rank>
<subscriptions>323</subscriptions>
<branch_name>Default</branch_name>
<url_project_page>http://freshmeat.net/projects/curl/</url_project_page>
<url_homepage>http://freshmeat.net/redir/curl/1612/url_homepage/</url_homepage>
<url_tgz>http://freshmeat.net/redir/curl/1612/url_tgz/</url_tgz>
<url_bz2>http://freshmeat.net/redir/curl/1612/url_bz2/</url_bz2>
<url_zip>http://freshmeat.net/redir/curl/1612/url_zip/</url_zip>
<url_changelog>http://freshmeat.net/redir/curl/1612/url_changelog/</url_changelog>
<url_rpm>http://freshmeat.net/redir/curl/1612/url_rpm/</url_rpm>
<url_deb>http://freshmeat.net/redir/curl/1612/url_deb/</url_deb>
<url_osx>http://freshmeat.net/redir/curl/1612/url_osx/</url_osx>
<url_bsdport>http://freshmeat.net/redir/curl/1612/url_bsdport/</url_bsdport>
<url_purchase></url_purchase>
<url_cvs>http://freshmeat.net/redir/curl/1612/url_cvs/</url_cvs>
<url_list>http://freshmeat.net/redir/curl/1612/url_list/</url_list>
<url_mirror>http://freshmeat.net/redir/curl/1612/url_mirror/</url_mirror>
<url_demo></url_demo>
<license>MIT/X Consortium License</license>
<latest_release>
<latest_release_version>7.12.2</latest_release_version>
<latest_release_id>176085</latest_release_id>
<latest_release_date>2004-10-18 02:22:23</latest_release_date>
</latest_release>
<screenshot_thumb></screenshot_thumb>
<authors>
<author>
<author_name>Daniel Stenberg</author_name>
<author_url>http://freshmeat.net/~bagder/</author_url>
<author_role>Owner</author_role>
</author>
</authors>
<descriminators>
<trove_id>12</trove_id>
<trove_id>226</trove_id>
<trove_id>3</trove_id>
<trove_id>2</trove_id>
<trove_id>188</trove_id>
<trove_id>216</trove_id>
<trove_id>200</trove_id>
<trove_id>220</trove_id>
<trove_id>164</trove_id>
<trove_id>90</trove_id>
<trove_id>89</trove_id>
<trove_id>809</trove_id>
<trove_id>150</trove_id>
<trove_id>224</trove_id>
<trove_id>900</trove_id>
<trove_id>839</trove_id>
</descriminators>
<dependencies>
<dependency type="recommended">
<dependency_release_id>0</dependency_release_id>
<dependency_branch_id>7464</dependency_branch_id>
<dependency_project_id>7464</dependency_project_id>
<dependency_project_title>OpenSSL (Default)</dependency_project_title>
</dependency>
<dependency type="optional">
<dependency_release_id>0</dependency_release_id>
<dependency_branch_id>0</dependency_branch_id>
<dependency_project_id>7443</dependency_project_id>
<dependency_project_title>OpenLDAP</dependency_project_title>
</dependency>
<dependency type="optional">
<dependency_release_id>0</dependency_release_id>
<dependency_branch_id>0</dependency_branch_id>
<dependency_project_id>12351</dependency_project_id>
<dependency_project_title>zlib</dependency_project_title>
</dependency>
<dependency type="optional">
<dependency_release_id>0</dependency_release_id>
<dependency_branch_id>0</dependency_branch_id>
<dependency_project_id>32047</dependency_project_id>
<dependency_project_title>Heimdal</dependency_project_title>
</dependency>
<dependency type="optional">
<dependency_release_id>0</dependency_release_id>
<dependency_branch_id>0</dependency_branch_id>
<dependency_project_id>44532</dependency_project_id>
<dependency_project_title>c-ares</dependency_project_title>
</dependency>
</dependencies>
</project>
</project-listing>
</datacheck>
</reply>
#
# Client-side
<client>
<features>
libz
</features>
<server>
http
</server>
<name>
HTTP GET deflate raw-compressed content
</name>
<command>
http://%HOSTIP:%HTTPPORT/232 --compressed
</command>
</client>
#
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<strippart>
s/^Accept-Encoding: .*/Accept-Encoding: xxx/
</strippart>
<protocol>
GET /232 HTTP/1.1
Host: %HOSTIP:%HTTPPORT
Accept: */*
Accept-Encoding: xxx
</protocol>
</verify>
</testcase>