From 0790b2791091ca46bed3813dc7d04cd3f30e5dac Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Tue, 5 Apr 2011 23:24:52 +0200 Subject: [PATCH] HTTP: add support for gzip and deflate Transfer-Encoding Transfer-Encoding differs from Content-Encoding in a few subtle ways, but primarily it concerns the transfer only and not the content so when discovered to be compressed we know we have to uncompress it. There will only arrive compressed transfers in a response after we have requested them with the appropriate TE: header. Test case 1122 and 1123 verify. --- lib/http.c | 72 +++++++++++++-- lib/url.c | 1 + lib/urldata.h | 23 ++--- tests/data/Makefile.am | 2 +- tests/data/test1122 | 69 ++++++++++++++ tests/data/test1123 | 199 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 349 insertions(+), 17 deletions(-) create mode 100644 tests/data/test1122 create mode 100644 tests/data/test1123 diff --git a/lib/http.c b/lib/http.c index 175ec7be7..6b14fab19 100644 --- a/lib/http.c +++ b/lib/http.c @@ -1728,6 +1728,15 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) return CURLE_OUT_OF_MEMORY; } +#if 0 + if(!Curl_checkheaders(data, "TE:")) { + Curl_safefree(conn->allocptr.te); + conn->allocptr.te = aprintf("TE: %s\r\n", "gzip"); + if(!conn->allocptr.te) + return CURLE_OUT_OF_MEMORY; + } +#endif + ptr = Curl_checkheaders(data, "Transfer-Encoding:"); if(ptr) { /* Some kind of TE is requested, check if 'chunked' is chosen */ @@ -2058,6 +2067,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) "%s" /* user agent */ "%s" /* host */ "%s" /* accept */ + "%s" /* TE: */ "%s" /* accept-encoding */ "%s" /* referer */ "%s" /* Proxy-Connection */ @@ -2075,6 +2085,7 @@ CURLcode Curl_http(struct connectdata *conn, bool *done) conn->allocptr.uagent:"", (conn->allocptr.host?conn->allocptr.host:""), /* Host: host */ http->p_accept?http->p_accept:"", + conn->allocptr.te?conn->allocptr.te:"", (data->set.str[STRING_ENCODING] && *data->set.str[STRING_ENCODING] && conn->allocptr.accept_encoding)? @@ -3127,8 +3138,9 @@ CURLcode Curl_http_readwrite_headers(struct SessionHandle *data, */ conn->bits.close = TRUE; /* close when done */ } - else if(Curl_compareheader(k->p, "Transfer-Encoding:", "chunked") && - !(conn->handler->protocol & CURLPROTO_RTSP)) { + else if(checkprefix("Transfer-Encoding:", k->p)) { + /* One or more encodings. We check for chunked and/or a compression + algorithm. */ /* * [RFC 2616, section 3.6.1] A 'chunked' transfer encoding * means that the server will send a series of "chunks". Each @@ -3137,10 +3149,60 @@ CURLcode Curl_http_readwrite_headers(struct SessionHandle *data, * with the previously mentioned size. There can be any amount * of chunks, and a chunk-data set to zero signals the * end-of-chunks. */ - k->chunk = TRUE; /* chunks coming our way */ - /* init our chunky engine */ - Curl_httpchunk_init(conn); + char *start; + + /* Find the first non-space letter */ + start = k->p + 18; + + do { + /* skip whitespaces and commas */ + while(*start && (ISSPACE(*start) || (*start == ','))) + start++; + + if(checkprefix("chunked", start)) { + k->chunk = TRUE; /* chunks coming our way */ + + /* init our chunky engine */ + Curl_httpchunk_init(conn); + + start += 7; + } + + if(k->content_encoding) + /* TODO: we only support the first mentioned compression for now */ + break; + + if(checkprefix("identity", start)) { + k->content_encoding = IDENTITY; + start += 8; + } + else if(checkprefix("deflate", start)) { + k->content_encoding = DEFLATE; + start += 7; + } + else if(checkprefix("gzip", start)) { + k->content_encoding = GZIP; + start += 4; + } + else if(checkprefix("x-gzip", start)) { + k->content_encoding = GZIP; + start += 6; + } + else if(checkprefix("compress", start)) { + k->content_encoding = COMPRESS; + start += 8; + } + else if(checkprefix("x-compress", start)) { + k->content_encoding = COMPRESS; + start += 10; + } + else + /* unknown! */ + break; + + } while(1); + } else if(checkprefix("Content-Encoding:", k->p) && data->set.str[STRING_ENCODING]) { diff --git a/lib/url.c b/lib/url.c index c7fcdfe96..a31c28033 100644 --- a/lib/url.c +++ b/lib/url.c @@ -2549,6 +2549,7 @@ static void conn_free(struct connectdata *conn) Curl_safefree(conn->allocptr.uagent); Curl_safefree(conn->allocptr.userpwd); Curl_safefree(conn->allocptr.accept_encoding); + Curl_safefree(conn->allocptr.te); Curl_safefree(conn->allocptr.rangeline); Curl_safefree(conn->allocptr.ref); Curl_safefree(conn->allocptr.host); diff --git a/lib/urldata.h b/lib/urldata.h index d1718a9a4..f2fb279d4 100644 --- a/lib/urldata.h +++ b/lib/urldata.h @@ -837,18 +837,19 @@ struct connectdata { well be the same we read from. CURL_SOCKET_BAD disables */ - /** Dynamicly allocated strings, may need to be freed before this **/ - /** struct is killed. **/ + /** Dynamicly allocated strings, MUST be freed before this **/ + /** struct is killed. **/ struct dynamically_allocated_data { - char *proxyuserpwd; /* free later if not NULL! */ - char *uagent; /* free later if not NULL! */ - char *accept_encoding; /* free later if not NULL! */ - char *userpwd; /* free later if not NULL! */ - char *rangeline; /* free later if not NULL! */ - char *ref; /* free later if not NULL! */ - char *host; /* free later if not NULL */ - char *cookiehost; /* free later if not NULL */ - char *rtsp_transport; /* free later if not NULL */ + char *proxyuserpwd; + char *uagent; + char *accept_encoding; + char *userpwd; + char *rangeline; + char *ref; + char *host; + char *cookiehost; + char *rtsp_transport; + char *te; /* TE: request header */ } allocptr; int sec_complete; /* if kerberos is enabled for this connection */ diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index 066ba5d90..7d3938a4c 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -71,7 +71,7 @@ EXTRA_DIST = test1 test108 test117 test127 test20 test27 test34 test46 \ test1203 test1117 test1118 test1119 test1120 test1300 test1301 test1302 \ test1303 test320 test321 test322 test323 test324 test1121 test581 test580 \ test1304 test1305 test1306 test1307 test582 test583 test808 test809 \ - test810 test811 test812 test813 test584 + test810 test811 test812 test813 test584 test1122 test1123 filecheck: @mkdir test-place; \ diff --git a/tests/data/test1122 b/tests/data/test1122 new file mode 100644 index 000000000..320ed9d3e --- /dev/null +++ b/tests/data/test1122 @@ -0,0 +1,69 @@ + + + +HTTP +HTTP GET +compressed +Transfer-Encoding + + +# +# Server-side + + +SFRUUC8xLjEgMjAwIE9LDQpEYXRlOiBNb24sIDI5IE5vdiAyMDA0IDIxOjU2OjUzIEdNVA0KU2Vy +dmVyOiBBcGFjaGUvMS4zLjMxIChEZWJpYW4gR05VL0xpbnV4KSBtb2RfZ3ppcC8xLjMuMjYuMWEg +UEhQLzQuMy45LTEgbW9kX3NzbC8yLjguMjAgT3BlblNTTC8wLjkuN2QgbW9kX3BlcmwvMS4yOQ0K +VmFyeTogQWNjZXB0LUVuY29kaW5nDQpDb250ZW50LVR5cGU6IHRleHQvaHRtbDsgY2hhcnNldD1J +U08tODg1OS0xDQpUcmFuc2Zlci1FbmNvZGluZzogZ3ppcA0KQ29udGVudC1MZW5ndGg6IDQ0DQoN +Ch+LCAh5nqtBAANsYWxhbGEAy8nMS1Uw5FLIAdFGXAoQhjEXAAoCcWAYAAAA + + + +HTTP/1.1 200 OK +Date: Mon, 29 Nov 2004 21:56:53 GMT +Server: Apache/1.3.31 (Debian GNU/Linux) mod_gzip/1.3.26.1a PHP/4.3.9-1 mod_ssl/2.8.20 OpenSSL/0.9.7d mod_perl/1.29 +Vary: Accept-Encoding +Content-Type: text/html; charset=ISO-8859-1 +Transfer-Encoding: gzip +Content-Length: 44 + +line 1 + line 2 + line 3 + + + + +# +# Client-side + + +libz + + +http + + +HTTP GET gzip transfer-encoded content + + +http://%HOSTIP:%HTTPPORT/1122 -H "TE: gzip" + + + +# +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET /1122 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +Accept: */* +TE: gzip + + + + diff --git a/tests/data/test1123 b/tests/data/test1123 new file mode 100644 index 000000000..c51fa9608 --- /dev/null +++ b/tests/data/test1123 @@ -0,0 +1,199 @@ + + + +HTTP +HTTP GET +compressed + + +# +# Server-side + + +SFRUUC8xLjEgMjAwIE9LDQpEYXRlOiBNb24sIDI5IE5vdiAyMDA0IDIxOjU2OjUzIEdNVA0KU2Vy +dmVyOiBBcGFjaGUvMS4zLjMxIChEZWJpYW4gR05VL0xpbnV4KSBtb2RfZ3ppcC8xLjMuMjYuMWEg +UEhQLzQuMy45LTEgbW9kX3NzbC8yLjguMjAgT3BlblNTTC8wLjkuN2QgbW9kX3BlcmwvMS4yOQ0K +VmFyeTogQWNjZXB0LUVuY29kaW5nDQpDb250ZW50LVR5cGU6IHRleHQvaHRtbDsgY2hhcnNldD1J +U08tODg1OS0xDQpUcmFuc2Zlci1FbmNvZGluZzogZGVmbGF0ZQ0KQ29udGVudC1MZW5ndGg6IDEz +MDUNCg0KeJzcWNtu4zYQfTeQf2D91AK2brYTOXC0KHLZBM02wdoFuk8GLdE2G0kUSMq5PPTbO5Qo +ibEcr3eDvgRIYnLmnMPhDEmTmXx6SmK0IVxQlp51XcvpIpKGLKLp6qx7M73r+/5o3He7n4LO5JeL +u/PZt/tLlHH2DwllP6ZCAhBNv01nl19Qdy1ldmrbS07EOiFYWimR9gKHDySN7GXS1zzRd6yhFcmo +C6JbWkEHocqm2k1vTqPAPXa9iW0YSkSEJZnjKCKAGI/9vuP3PRc5w1PHPfXGE9vwG4Q8Ux9R4DnO +sO86fddHjnfqwc9AUyrEqzhSnJC5WDMugzDncR2OYW/jl3kcF3CE0wjFdNGmFhAdHhGhljpnSVJS +UoIkY7UAx/wZLRlHYUxJKvuCRgT99fUWSY5TsYSKWjCLRshQ3hkMogLhcgSlWqlwVd8ljYk46uRC +ddQY4jmV+MlCNxKJPMtAX6Dr2ey+V/yd9tAVtI86V0X74uZ81kOzy9s/L+Hz9uJ3gF3d3F72iuE/ +391fX36FNgzwSGKIqZTqZ0zInm7m0AoZe6BEFNooz2KGIxgCllqekKiZdQ9lWIhHxiPVhMjSPFkU +9un09qgTEi7pkoZQVzD9QTj4mChDgWo8wQjFtCAbGXsknERHncVzlaQekmvyZsarslhHndkaqAjD +74KmajMJSG2dapVgBpsOec5RJ8bpKscrIooYSLqhnKUJDCBAR5fQWBsbKnFM5fNchIyTYHTiD63R +ycTesm+BM8JDkAwGlntsYCvzFhrm8wB7bWwgC5Ne1yzLY8ybsY5HY4hhCMt529MiVAO6A8t3XxFe +h2I4ymCc0Su0EQ7HxbnhWyNnYuuO6ZmHLAddz6282vAKUw7iD2qMMYDIFyLkNJNwRIpgoE6H16YS +BqVPw/Vc7eXggixxHsuJbRpLGNR/Xh1gGZQ92HloVielrdaLPbFbrEZszRLythAsYMpLFXV42iZD +69YCjaZcvRwuB2CtpGiNyOLFO1wEwFpE0RqRF5odLgJgLaJojUi4hj1GYrY6XKqmaMFGopHlWXK4 +IIC1lKI1IhFZHC4CYC2iaI0IE0+HiwBYiyiaUS8RqfPyB2pWEqq6abqxzHMOaRMk0Ou36hqF2Ygf +KMlGVMXYCENE3RwOV1FoLVMQG52Ecs744UolXmtpslnXhAVVraBZemIKhxyk4MvNzP4bncPpASmj +eYJuS8fErhAar76n5JyTmNSZa5nn+v4WnFiuZ8EF6Q33G2x1rzo5dvxRi1hdsNocdS/afXHaBSzn +Yu+azATOUQITXjM5l2v4qoactUwlEucSbjKiDqnsV93aoE9gnFISo6kkKXzDrya26WxRoEq76/7v +Aq8ioopsIFt0zmIS3D2mhNe4wlRFapuhVr1qCasveE4TmmJpzk5yuCEUtYGC1p2W1/OO97kHe7n7 +nK7v7+W6e8eFpbE/6r1u93i4zz3eS/bHe73OXrc7+k7c3wlsf2SD1tjl/W67/LAmMngywUMMrqO1 +Tm18RvI5I2ddTkJ4HSibeknVi7LBmRvZUUPtcuwk6nsLuE+Gqhg7XTuZxuOsRd1+uL3FlVSqDQV2 +uLOjX/Vt6redWiW23mkN4u28seLehuP/L2nOT2dsOHhnxtT76uMnyvUGI/cdmXqBp9jHz9LAc4Yn +78jSNaFJhOOPn6jhcDTw3pGosA9PffEzeTIs+qyv/ysUdP4DAAD//4IzEaNjAAAAAP//AwDdOI7R + + + +HTTP/1.1 200 OK +Date: Mon, 29 Nov 2004 21:56:53 GMT +Server: Apache/1.3.31 (Debian GNU/Linux) mod_gzip/1.3.26.1a PHP/4.3.9-1 mod_ssl/2.8.20 OpenSSL/0.9.7d mod_perl/1.29 +Vary: Accept-Encoding +Content-Type: text/html; charset=ISO-8859-1 +Transfer-Encoding: deflate +Content-Length: 1305 + + + + + + 1612 + 1998-08-21 04:01:29 + 2004-10-18 02:22:23 + curl + curl and libcurl + Command line tool and library for client-side URL transfers. + curl and libcurl is a tool for transferring files +using URL syntax. It supports HTTP, HTTPS, FTP, +FTPS, DICT, TELNET, LDAP, FILE, and GOPHER, as +well as HTTP-post, HTTP-put, cookies, FTP upload, +resumed transfers, passwords, portnumbers, SSL +certificates, Kerberos, and proxies. It is powered +by libcurl, the client-side URL transfer library. +There are bindings to libcurl for over 20 +languages and environments. + + 5784.57 + 3.16 + 169 + 6594.54 + 13.81 + 105 + 8.50 + 21 + 183 + 323 + Default + http://freshmeat.net/projects/curl/ + http://freshmeat.net/redir/curl/1612/url_homepage/ + http://freshmeat.net/redir/curl/1612/url_tgz/ + http://freshmeat.net/redir/curl/1612/url_bz2/ + http://freshmeat.net/redir/curl/1612/url_zip/ + http://freshmeat.net/redir/curl/1612/url_changelog/ + http://freshmeat.net/redir/curl/1612/url_rpm/ + http://freshmeat.net/redir/curl/1612/url_deb/ + http://freshmeat.net/redir/curl/1612/url_osx/ + http://freshmeat.net/redir/curl/1612/url_bsdport/ + + http://freshmeat.net/redir/curl/1612/url_cvs/ + http://freshmeat.net/redir/curl/1612/url_list/ + http://freshmeat.net/redir/curl/1612/url_mirror/ + + MIT/X Consortium License + + 7.12.2 + 176085 + 2004-10-18 02:22:23 + + + + + Daniel Stenberg + http://freshmeat.net/~bagder/ + Owner + + + + 12 + 226 + 3 + 2 + 188 + 216 + 200 + 220 + 164 + 90 + 89 + 809 + 150 + 224 + 900 + 839 + + + + 0 + 7464 + 7464 + OpenSSL (Default) + + + 0 + 0 + 7443 + OpenLDAP + + + 0 + 0 + 12351 + zlib + + + 0 + 0 + 32047 + Heimdal + + + 0 + 0 + 44532 + c-ares + + + + + + + + +# +# Client-side + + +libz + + +http + + +HTTP GET deflate transfer-encoded content + + +http://%HOSTIP:%HTTPPORT/1123 -H "TE: deflate, gzip" + + + +# +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET /1123 HTTP/1.1 +Host: %HOSTIP:%HTTPPORT +Accept: */* +TE: deflate, gzip + + + +