2002-09-03 07:52:59 -04:00
|
|
|
/***************************************************************************
|
2005-03-03 19:14:45 -05:00
|
|
|
* _ _ ____ _
|
|
|
|
* Project ___| | | | _ \| |
|
|
|
|
* / __| | | | |_) | |
|
|
|
|
* | (__| |_| | _ <| |___
|
2001-03-07 18:28:22 -05:00
|
|
|
* \___|\___/|_| \_\_____|
|
|
|
|
*
|
2021-01-13 05:53:41 -05:00
|
|
|
* Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
|
2001-03-07 18:28:22 -05:00
|
|
|
*
|
2002-09-03 07:52:59 -04:00
|
|
|
* This software is licensed as described in the file COPYING, which
|
|
|
|
* you should have received as part of this distribution. The terms
|
2020-11-04 08:02:01 -05:00
|
|
|
* are also available at https://curl.se/docs/copyright.html.
|
2005-03-03 19:14:45 -05:00
|
|
|
*
|
2001-03-07 18:28:22 -05:00
|
|
|
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
|
|
|
|
* copies of the Software, and permit persons to whom the Software is
|
2002-09-03 07:52:59 -04:00
|
|
|
* furnished to do so, under the terms of the COPYING file.
|
2001-03-07 18:28:22 -05:00
|
|
|
*
|
|
|
|
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
|
|
|
|
* KIND, either express or implied.
|
|
|
|
*
|
2002-09-03 07:52:59 -04:00
|
|
|
***************************************************************************/
|
2011-07-26 11:23:27 -04:00
|
|
|
|
2013-01-06 13:06:49 -05:00
|
|
|
#include "curl_setup.h"
|
2001-03-07 18:28:22 -05:00
|
|
|
|
2002-06-11 07:13:01 -04:00
|
|
|
#ifndef CURL_DISABLE_HTTP
|
2001-03-07 18:28:22 -05:00
|
|
|
|
2013-01-03 20:50:28 -05:00
|
|
|
#include "urldata.h" /* it includes http_chunks.h */
|
|
|
|
#include "sendf.h" /* for the client write stuff */
|
2020-06-05 08:04:22 -04:00
|
|
|
#include "dynbuf.h"
|
2013-01-03 20:50:28 -05:00
|
|
|
#include "content_encoding.h"
|
|
|
|
#include "http.h"
|
|
|
|
#include "non-ascii.h" /* for Curl_convert_to_network prototype */
|
2014-01-16 17:07:54 -05:00
|
|
|
#include "strtoofft.h"
|
2014-01-19 08:04:59 -05:00
|
|
|
#include "warnless.h"
|
2002-09-02 18:31:18 -04:00
|
|
|
|
2015-03-24 18:12:03 -04:00
|
|
|
/* The last #include files should be: */
|
|
|
|
#include "curl_memory.h"
|
2013-01-03 20:50:28 -05:00
|
|
|
#include "memdebug.h"
|
2001-03-07 18:28:22 -05:00
|
|
|
|
2005-03-03 19:14:45 -05:00
|
|
|
/*
|
2001-03-07 18:28:22 -05:00
|
|
|
* Chunk format (simplified):
|
|
|
|
*
|
|
|
|
* <HEX SIZE>[ chunk extension ] CRLF
|
2001-09-03 08:51:23 -04:00
|
|
|
* <DATA> CRLF
|
2001-03-07 18:28:22 -05:00
|
|
|
*
|
|
|
|
* Highlights from RFC2616 section 3.6 say:
|
|
|
|
|
|
|
|
The chunked encoding modifies the body of a message in order to
|
|
|
|
transfer it as a series of chunks, each with its own size indicator,
|
|
|
|
followed by an OPTIONAL trailer containing entity-header fields. This
|
|
|
|
allows dynamically produced content to be transferred along with the
|
|
|
|
information necessary for the recipient to verify that it has
|
|
|
|
received the full message.
|
|
|
|
|
|
|
|
Chunked-Body = *chunk
|
|
|
|
last-chunk
|
|
|
|
trailer
|
|
|
|
CRLF
|
|
|
|
|
|
|
|
chunk = chunk-size [ chunk-extension ] CRLF
|
|
|
|
chunk-data CRLF
|
|
|
|
chunk-size = 1*HEX
|
|
|
|
last-chunk = 1*("0") [ chunk-extension ] CRLF
|
|
|
|
|
|
|
|
chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
|
|
|
|
chunk-ext-name = token
|
|
|
|
chunk-ext-val = token | quoted-string
|
|
|
|
chunk-data = chunk-size(OCTET)
|
|
|
|
trailer = *(entity-header CRLF)
|
|
|
|
|
|
|
|
The chunk-size field is a string of hex digits indicating the size of
|
|
|
|
the chunk. The chunked encoding is ended by any chunk whose size is
|
|
|
|
zero, followed by the trailer, which is terminated by an empty line.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
2018-04-13 08:28:55 -04:00
|
|
|
#ifdef CURL_DOES_CONVERSIONS
|
|
|
|
/* Check for an ASCII hex digit.
|
|
|
|
We avoid the use of ISXDIGIT to accommodate non-ASCII hosts. */
|
2021-01-13 05:53:41 -05:00
|
|
|
static bool isxdigit_ascii(char digit)
|
2018-04-13 08:28:55 -04:00
|
|
|
{
|
|
|
|
return (digit >= 0x30 && digit <= 0x39) /* 0-9 */
|
2021-01-13 05:53:41 -05:00
|
|
|
|| (digit >= 0x41 && digit <= 0x46) /* A-F */
|
|
|
|
|| (digit >= 0x61 && digit <= 0x66); /* a-f */
|
2018-04-13 08:28:55 -04:00
|
|
|
}
|
|
|
|
#else
|
2021-01-13 05:53:41 -05:00
|
|
|
#define isxdigit_ascii(x) Curl_isxdigit(x)
|
2018-04-13 08:28:55 -04:00
|
|
|
#endif
|
|
|
|
|
2001-03-07 18:28:22 -05:00
|
|
|
void Curl_httpchunk_init(struct connectdata *conn)
|
|
|
|
{
|
2007-10-02 06:21:36 -04:00
|
|
|
struct Curl_chunker *chunk = &conn->chunk;
|
2017-09-09 17:09:06 -04:00
|
|
|
chunk->hexindex = 0; /* start at 0 */
|
|
|
|
chunk->dataleft = 0; /* no data left yet! */
|
2001-03-07 18:28:22 -05:00
|
|
|
chunk->state = CHUNK_HEX; /* we get hex first! */
|
2020-06-05 08:04:22 -04:00
|
|
|
Curl_dyn_init(&conn->trailer, DYN_H1_TRAILER);
|
2001-03-07 18:28:22 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2001-03-07 18:51:41 -05:00
|
|
|
* chunk_read() returns a OK for normal operations, or a positive return code
|
|
|
|
* for errors. STOP means this sequence of chunks is complete. The 'wrote'
|
|
|
|
* argument is set to tell the caller how many bytes we actually passed to the
|
|
|
|
* client (for byte-counting and whatever).
|
2001-03-07 18:28:22 -05:00
|
|
|
*
|
|
|
|
* The states and the state-machine is further explained in the header file.
|
2007-01-14 09:57:51 -05:00
|
|
|
*
|
|
|
|
* This function always uses ASCII hex values to accommodate non-ASCII hosts.
|
|
|
|
* For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
|
2001-03-07 18:28:22 -05:00
|
|
|
*/
|
|
|
|
CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
|
|
|
|
char *datap,
|
2004-03-04 10:25:06 -05:00
|
|
|
ssize_t datalen,
|
2019-10-01 09:40:05 -04:00
|
|
|
ssize_t *wrotep,
|
|
|
|
CURLcode *extrap)
|
2001-03-07 18:28:22 -05:00
|
|
|
{
|
2017-09-09 17:09:06 -04:00
|
|
|
CURLcode result = CURLE_OK;
|
2016-06-21 09:47:12 -04:00
|
|
|
struct Curl_easy *data = conn->data;
|
2007-10-02 06:21:36 -04:00
|
|
|
struct Curl_chunker *ch = &conn->chunk;
|
2007-11-24 18:16:55 -05:00
|
|
|
struct SingleRequest *k = &data->req;
|
2004-03-04 10:25:06 -05:00
|
|
|
size_t piece;
|
2014-01-16 17:07:54 -05:00
|
|
|
curl_off_t length = (curl_off_t)datalen;
|
2004-03-04 10:25:06 -05:00
|
|
|
size_t *wrote = (size_t *)wrotep;
|
|
|
|
|
|
|
|
*wrote = 0; /* nothing's written yet */
|
2001-03-07 18:28:22 -05:00
|
|
|
|
2007-02-12 16:13:47 -05:00
|
|
|
/* the original data is written to the client, but we go on with the
|
|
|
|
chunk read process, to properly calculate the content length*/
|
2008-01-02 17:30:34 -05:00
|
|
|
if(data->set.http_te_skip && !k->ignorebody) {
|
2021-01-08 11:58:15 -05:00
|
|
|
result = Curl_client_write(data, CLIENTWRITE_BODY, datap, datalen);
|
2019-10-01 09:40:05 -04:00
|
|
|
if(result) {
|
|
|
|
*extrap = result;
|
|
|
|
return CHUNKE_PASSTHRU_ERROR;
|
|
|
|
}
|
2008-01-02 17:30:34 -05:00
|
|
|
}
|
2007-02-12 16:13:47 -05:00
|
|
|
|
2001-03-07 18:28:22 -05:00
|
|
|
while(length) {
|
|
|
|
switch(ch->state) {
|
|
|
|
case CHUNK_HEX:
|
2021-01-13 05:53:41 -05:00
|
|
|
if(isxdigit_ascii(*datap)) {
|
2021-01-19 08:23:11 -05:00
|
|
|
if(ch->hexindex < CHUNK_MAXNUM_LEN) {
|
2001-03-07 18:28:22 -05:00
|
|
|
ch->hexbuffer[ch->hexindex] = *datap;
|
|
|
|
datap++;
|
|
|
|
length--;
|
|
|
|
ch->hexindex++;
|
|
|
|
}
|
|
|
|
else {
|
2001-03-12 10:20:35 -05:00
|
|
|
return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
|
2001-03-07 18:28:22 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2014-01-16 17:07:54 -05:00
|
|
|
char *endptr;
|
2014-01-16 18:34:36 -05:00
|
|
|
if(0 == ch->hexindex)
|
2001-03-12 10:20:35 -05:00
|
|
|
/* This is illegal data, we received junk where we expected
|
|
|
|
a hexadecimal digit. */
|
|
|
|
return CHUNKE_ILLEGAL_HEX;
|
2014-01-16 18:34:36 -05:00
|
|
|
|
2001-03-07 18:28:22 -05:00
|
|
|
/* length and datap are unmodified */
|
2017-09-09 17:09:06 -04:00
|
|
|
ch->hexbuffer[ch->hexindex] = 0;
|
2011-04-19 18:48:20 -04:00
|
|
|
|
2007-01-14 09:57:51 -05:00
|
|
|
/* convert to host encoding before calling strtoul */
|
2011-04-19 18:48:20 -04:00
|
|
|
result = Curl_convert_from_network(conn->data, ch->hexbuffer,
|
2007-01-14 09:57:51 -05:00
|
|
|
ch->hexindex);
|
2011-04-19 18:48:20 -04:00
|
|
|
if(result) {
|
2007-01-14 09:57:51 -05:00
|
|
|
/* Curl_convert_from_network calls failf if unsuccessful */
|
|
|
|
/* Treat it as a bad hex character */
|
2015-03-17 09:06:48 -04:00
|
|
|
return CHUNKE_ILLEGAL_HEX;
|
2007-01-14 09:57:51 -05:00
|
|
|
}
|
2011-04-19 18:48:20 -04:00
|
|
|
|
2017-08-14 17:33:23 -04:00
|
|
|
if(curlx_strtoofft(ch->hexbuffer, &endptr, 16, &ch->datasize))
|
2014-01-16 17:07:54 -05:00
|
|
|
return CHUNKE_ILLEGAL_HEX;
|
2014-01-16 18:34:36 -05:00
|
|
|
ch->state = CHUNK_LF; /* now wait for the CRLF */
|
2001-03-07 18:28:22 -05:00
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
2014-01-16 18:34:36 -05:00
|
|
|
case CHUNK_LF:
|
|
|
|
/* waiting for the LF after a chunk size */
|
2007-01-14 09:57:51 -05:00
|
|
|
if(*datap == 0x0a) {
|
2001-03-07 18:28:22 -05:00
|
|
|
/* we're now expecting data to come, unless size was zero! */
|
|
|
|
if(0 == ch->datasize) {
|
2010-08-25 07:42:14 -04:00
|
|
|
ch->state = CHUNK_TRAILER; /* now check for trailers */
|
2001-03-07 18:28:22 -05:00
|
|
|
}
|
2014-01-16 18:34:36 -05:00
|
|
|
else
|
2001-03-07 18:28:22 -05:00
|
|
|
ch->state = CHUNK_DATA;
|
|
|
|
}
|
2014-01-16 18:34:36 -05:00
|
|
|
|
2001-03-07 18:28:22 -05:00
|
|
|
datap++;
|
|
|
|
length--;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CHUNK_DATA:
|
2014-01-16 18:34:36 -05:00
|
|
|
/* We expect 'datasize' of data. We have 'length' right now, it can be
|
|
|
|
more or less than 'datasize'. Get the smallest piece.
|
2001-03-07 18:28:22 -05:00
|
|
|
*/
|
2014-01-19 08:04:59 -05:00
|
|
|
piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
|
2001-03-07 18:28:22 -05:00
|
|
|
|
|
|
|
/* Write the data portion available */
|
2018-02-11 21:38:18 -05:00
|
|
|
if(!conn->data->set.http_te_skip && !k->ignorebody) {
|
|
|
|
if(!conn->data->set.http_ce_skip && k->writer_stack)
|
|
|
|
result = Curl_unencode_write(conn, k->writer_stack, datap, piece);
|
|
|
|
else
|
2021-01-08 11:58:15 -05:00
|
|
|
result = Curl_client_write(data, CLIENTWRITE_BODY, datap, piece);
|
2002-09-02 18:31:18 -04:00
|
|
|
|
2019-10-01 09:40:05 -04:00
|
|
|
if(result) {
|
|
|
|
*extrap = result;
|
|
|
|
return CHUNKE_PASSTHRU_ERROR;
|
|
|
|
}
|
2018-02-11 21:38:18 -05:00
|
|
|
}
|
2004-03-04 10:25:06 -05:00
|
|
|
|
2001-03-07 18:28:22 -05:00
|
|
|
*wrote += piece;
|
|
|
|
ch->datasize -= piece; /* decrease amount left to expect */
|
|
|
|
datap += piece; /* move read pointer forward */
|
|
|
|
length -= piece; /* decrease space left in this round */
|
|
|
|
|
|
|
|
if(0 == ch->datasize)
|
2001-03-13 17:16:42 -05:00
|
|
|
/* end of data this round, we now expect a trailing CRLF */
|
|
|
|
ch->state = CHUNK_POSTLF;
|
|
|
|
break;
|
2001-03-07 18:28:22 -05:00
|
|
|
|
2001-03-13 17:16:42 -05:00
|
|
|
case CHUNK_POSTLF:
|
2007-01-14 09:57:51 -05:00
|
|
|
if(*datap == 0x0a) {
|
2014-01-16 18:34:36 -05:00
|
|
|
/* The last one before we go back to hex state and start all over. */
|
|
|
|
Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
|
2001-03-13 17:16:42 -05:00
|
|
|
}
|
2014-01-16 18:34:36 -05:00
|
|
|
else if(*datap != 0x0d)
|
2001-03-13 17:16:42 -05:00
|
|
|
return CHUNKE_BAD_CHUNK;
|
2014-01-16 18:34:36 -05:00
|
|
|
datap++;
|
|
|
|
length--;
|
2001-03-07 18:28:22 -05:00
|
|
|
break;
|
2001-03-13 17:16:42 -05:00
|
|
|
|
2005-07-12 14:15:34 -04:00
|
|
|
case CHUNK_TRAILER:
|
2014-01-16 18:34:36 -05:00
|
|
|
if((*datap == 0x0d) || (*datap == 0x0a)) {
|
2020-06-05 08:04:22 -04:00
|
|
|
char *tr = Curl_dyn_ptr(&conn->trailer);
|
2010-08-25 07:42:14 -04:00
|
|
|
/* this is the end of a trailer, but if the trailer was zero bytes
|
|
|
|
there was no trailer and we move on */
|
2007-02-21 16:59:40 -05:00
|
|
|
|
2020-06-05 08:04:22 -04:00
|
|
|
if(tr) {
|
|
|
|
size_t trlen;
|
|
|
|
result = Curl_dyn_add(&conn->trailer, (char *)"\x0d\x0a");
|
|
|
|
if(result)
|
|
|
|
return CHUNKE_OUT_OF_MEMORY;
|
2007-02-21 16:59:40 -05:00
|
|
|
|
2020-06-05 08:04:22 -04:00
|
|
|
tr = Curl_dyn_ptr(&conn->trailer);
|
|
|
|
trlen = Curl_dyn_len(&conn->trailer);
|
2007-01-16 17:26:50 -05:00
|
|
|
/* Convert to host encoding before calling Curl_client_write */
|
2020-06-05 08:04:22 -04:00
|
|
|
result = Curl_convert_from_network(conn->data, tr, trlen);
|
2011-04-19 18:48:20 -04:00
|
|
|
if(result)
|
2007-01-16 17:26:50 -05:00
|
|
|
/* Curl_convert_from_network calls failf if unsuccessful */
|
|
|
|
/* Treat it as a bad chunk */
|
2010-08-25 07:42:14 -04:00
|
|
|
return CHUNKE_BAD_CHUNK;
|
|
|
|
|
2008-01-02 17:30:34 -05:00
|
|
|
if(!data->set.http_te_skip) {
|
2021-01-08 11:58:15 -05:00
|
|
|
result = Curl_client_write(data, CLIENTWRITE_HEADER, tr, trlen);
|
2019-10-01 09:40:05 -04:00
|
|
|
if(result) {
|
|
|
|
*extrap = result;
|
|
|
|
return CHUNKE_PASSTHRU_ERROR;
|
|
|
|
}
|
2008-01-02 17:30:34 -05:00
|
|
|
}
|
2020-06-05 08:04:22 -04:00
|
|
|
Curl_dyn_reset(&conn->trailer);
|
2010-08-25 07:42:14 -04:00
|
|
|
ch->state = CHUNK_TRAILER_CR;
|
2014-01-16 18:34:36 -05:00
|
|
|
if(*datap == 0x0a)
|
|
|
|
/* already on the LF */
|
|
|
|
break;
|
2005-07-12 14:15:34 -04:00
|
|
|
}
|
2010-08-25 07:42:14 -04:00
|
|
|
else {
|
|
|
|
/* no trailer, we're on the final CRLF pair */
|
|
|
|
ch->state = CHUNK_TRAILER_POSTCR;
|
|
|
|
break; /* don't advance the pointer */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2020-06-05 08:04:22 -04:00
|
|
|
result = Curl_dyn_addn(&conn->trailer, datap, 1);
|
|
|
|
if(result)
|
|
|
|
return CHUNKE_OUT_OF_MEMORY;
|
2010-08-25 07:42:14 -04:00
|
|
|
}
|
|
|
|
datap++;
|
|
|
|
length--;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case CHUNK_TRAILER_CR:
|
|
|
|
if(*datap == 0x0a) {
|
|
|
|
ch->state = CHUNK_TRAILER_POSTCR;
|
2005-07-12 14:15:34 -04:00
|
|
|
datap++;
|
|
|
|
length--;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
return CHUNKE_BAD_CHUNK;
|
|
|
|
break;
|
|
|
|
|
2010-08-25 07:42:14 -04:00
|
|
|
case CHUNK_TRAILER_POSTCR:
|
|
|
|
/* We enter this state when a CR should arrive so we expect to
|
|
|
|
have to first pass a CR before we wait for LF */
|
2014-01-16 18:34:36 -05:00
|
|
|
if((*datap != 0x0d) && (*datap != 0x0a)) {
|
2010-08-25 07:42:14 -04:00
|
|
|
/* not a CR then it must be another header in the trailer */
|
|
|
|
ch->state = CHUNK_TRAILER;
|
|
|
|
break;
|
|
|
|
}
|
2007-02-21 16:59:40 -05:00
|
|
|
if(*datap == 0x0d) {
|
2014-01-16 18:34:36 -05:00
|
|
|
/* skip if CR */
|
2007-02-21 16:59:40 -05:00
|
|
|
datap++;
|
|
|
|
length--;
|
|
|
|
}
|
2014-01-16 18:34:36 -05:00
|
|
|
/* now wait for the final LF */
|
|
|
|
ch->state = CHUNK_STOP;
|
2007-02-21 16:59:40 -05:00
|
|
|
break;
|
|
|
|
|
2001-03-07 18:28:22 -05:00
|
|
|
case CHUNK_STOP:
|
2007-11-07 04:21:35 -05:00
|
|
|
if(*datap == 0x0a) {
|
2007-02-21 16:59:40 -05:00
|
|
|
length--;
|
|
|
|
|
|
|
|
/* Record the length of any data left in the end of the buffer
|
|
|
|
even if there's no more chunks to read */
|
2014-01-19 08:04:59 -05:00
|
|
|
ch->dataleft = curlx_sotouz(length);
|
2007-02-21 16:59:40 -05:00
|
|
|
|
|
|
|
return CHUNKE_STOP; /* return stop */
|
|
|
|
}
|
2010-08-25 07:42:14 -04:00
|
|
|
else
|
2007-02-21 16:59:40 -05:00
|
|
|
return CHUNKE_BAD_CHUNK;
|
2001-03-07 18:28:22 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return CHUNKE_OK;
|
|
|
|
}
|
2014-03-14 10:44:18 -04:00
|
|
|
|
|
|
|
const char *Curl_chunked_strerror(CHUNKcode code)
|
|
|
|
{
|
2016-12-13 17:34:59 -05:00
|
|
|
switch(code) {
|
2014-03-14 10:44:18 -04:00
|
|
|
default:
|
|
|
|
return "OK";
|
|
|
|
case CHUNKE_TOO_LONG_HEX:
|
|
|
|
return "Too long hexadecimal number";
|
|
|
|
case CHUNKE_ILLEGAL_HEX:
|
|
|
|
return "Illegal or missing hexadecimal sequence";
|
|
|
|
case CHUNKE_BAD_CHUNK:
|
|
|
|
return "Malformed encoding found";
|
2019-10-01 09:40:05 -04:00
|
|
|
case CHUNKE_PASSTHRU_ERROR:
|
|
|
|
DEBUGASSERT(0); /* never used */
|
|
|
|
return "";
|
2014-03-14 10:44:18 -04:00
|
|
|
case CHUNKE_BAD_ENCODING:
|
|
|
|
return "Bad content-encoding found";
|
|
|
|
case CHUNKE_OUT_OF_MEMORY:
|
|
|
|
return "Out of memory";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2002-06-11 07:13:01 -04:00
|
|
|
#endif /* CURL_DISABLE_HTTP */
|