1
0
mirror of https://github.com/moparisthebest/curl synced 2024-12-21 23:58:49 -05:00

chunked parsing: relax the CR strictness

Allow for chunked-encoding data to get parsed with only LF line endings.
This is allowed by browsers.
This commit is contained in:
Daniel Stenberg 2014-01-17 00:34:36 +01:00
parent 3f5546b2be
commit eb02a99c61
2 changed files with 35 additions and 92 deletions

View File

@ -88,8 +88,8 @@ static bool Curl_isxdigit(char digit)
void Curl_httpchunk_init(struct connectdata *conn) void Curl_httpchunk_init(struct connectdata *conn)
{ {
struct Curl_chunker *chunk = &conn->chunk; struct Curl_chunker *chunk = &conn->chunk;
chunk->hexindex=0; /* start at 0 */ chunk->hexindex=0; /* start at 0 */
chunk->dataleft=0; /* no data left yet! */ chunk->dataleft=0; /* no data left yet! */
chunk->state = CHUNK_HEX; /* we get hex first! */ chunk->state = CHUNK_HEX; /* we get hex first! */
} }
@ -143,11 +143,11 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
} }
else { else {
char *endptr; char *endptr;
if(0 == ch->hexindex) { if(0 == ch->hexindex)
/* This is illegal data, we received junk where we expected /* This is illegal data, we received junk where we expected
a hexadecimal digit. */ a hexadecimal digit. */
return CHUNKE_ILLEGAL_HEX; return CHUNKE_ILLEGAL_HEX;
}
/* length and datap are unmodified */ /* length and datap are unmodified */
ch->hexbuffer[ch->hexindex]=0; ch->hexbuffer[ch->hexindex]=0;
@ -164,44 +164,29 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
if(errno == ERANGE) if(errno == ERANGE)
/* over or underflow is an error */ /* over or underflow is an error */
return CHUNKE_ILLEGAL_HEX; return CHUNKE_ILLEGAL_HEX;
ch->state = CHUNK_POSTHEX; ch->state = CHUNK_LF; /* now wait for the CRLF */
} }
break; break;
case CHUNK_POSTHEX: case CHUNK_LF:
/* In this state, we're waiting for CRLF to arrive. We support /* waiting for the LF after a chunk size */
this to allow so called chunk-extensions to show up here
before the CRLF comes. */
if(*datap == 0x0d)
ch->state = CHUNK_CR;
length--;
datap++;
break;
case CHUNK_CR:
/* waiting for the LF */
if(*datap == 0x0a) { if(*datap == 0x0a) {
/* we're now expecting data to come, unless size was zero! */ /* we're now expecting data to come, unless size was zero! */
if(0 == ch->datasize) { if(0 == ch->datasize) {
ch->state = CHUNK_TRAILER; /* now check for trailers */ ch->state = CHUNK_TRAILER; /* now check for trailers */
conn->trlPos=0; conn->trlPos=0;
} }
else { else
ch->state = CHUNK_DATA; ch->state = CHUNK_DATA;
}
} }
else
/* previously we got a fake CR, go back to CR waiting! */
ch->state = CHUNK_CR;
datap++; datap++;
length--; length--;
break; break;
case CHUNK_DATA: case CHUNK_DATA:
/* we get pure and fine data /* We expect 'datasize' of data. We have 'length' right now, it can be
more or less than 'datasize'. Get the smallest piece.
We expect another 'datasize' of data. We have 'length' right now,
it can be more or less than 'datasize'. Get the smallest piece.
*/ */
piece = (ch->datasize >= length)?length:ch->datasize; piece = (ch->datasize >= length)?length:ch->datasize;
@ -256,37 +241,22 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
if(0 == ch->datasize) if(0 == ch->datasize)
/* end of data this round, we now expect a trailing CRLF */ /* end of data this round, we now expect a trailing CRLF */
ch->state = CHUNK_POSTCR;
break;
case CHUNK_POSTCR:
if(*datap == 0x0d) {
ch->state = CHUNK_POSTLF; ch->state = CHUNK_POSTLF;
datap++;
length--;
}
else
return CHUNKE_BAD_CHUNK;
break; break;
case CHUNK_POSTLF: case CHUNK_POSTLF:
if(*datap == 0x0a) { if(*datap == 0x0a) {
/* /* The last one before we go back to hex state and start all over. */
* The last one before we go back to hex state and start all Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
* over.
*/
Curl_httpchunk_init(conn);
datap++;
length--;
} }
else else if(*datap != 0x0d)
return CHUNKE_BAD_CHUNK; return CHUNKE_BAD_CHUNK;
datap++;
length--;
break; break;
case CHUNK_TRAILER: case CHUNK_TRAILER:
if(*datap == 0x0d) { if((*datap == 0x0d) || (*datap == 0x0a)) {
/* this is the end of a trailer, but if the trailer was zero bytes /* this is the end of a trailer, but if the trailer was zero bytes
there was no trailer and we move on */ there was no trailer and we move on */
@ -312,6 +282,9 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
} }
conn->trlPos=0; conn->trlPos=0;
ch->state = CHUNK_TRAILER_CR; ch->state = CHUNK_TRAILER_CR;
if(*datap == 0x0a)
/* already on the LF */
break;
} }
else { else {
/* no trailer, we're on the final CRLF pair */ /* no trailer, we're on the final CRLF pair */
@ -357,27 +330,18 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
case CHUNK_TRAILER_POSTCR: case CHUNK_TRAILER_POSTCR:
/* We enter this state when a CR should arrive so we expect to /* We enter this state when a CR should arrive so we expect to
have to first pass a CR before we wait for LF */ have to first pass a CR before we wait for LF */
if(*datap != 0x0d) { if((*datap != 0x0d) && (*datap != 0x0a)) {
/* not a CR then it must be another header in the trailer */ /* not a CR then it must be another header in the trailer */
ch->state = CHUNK_TRAILER; ch->state = CHUNK_TRAILER;
break; break;
} }
datap++;
length--;
/* now wait for the final LF */
ch->state = CHUNK_STOP;
break;
case CHUNK_STOPCR:
/* Read the final CRLF that ends all chunk bodies */
if(*datap == 0x0d) { if(*datap == 0x0d) {
ch->state = CHUNK_STOP; /* skip if CR */
datap++; datap++;
length--; length--;
} }
else /* now wait for the final LF */
return CHUNKE_BAD_CHUNK; ch->state = CHUNK_STOP;
break; break;
case CHUNK_STOP: case CHUNK_STOP:
@ -392,9 +356,6 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
} }
else else
return CHUNKE_BAD_CHUNK; return CHUNKE_BAD_CHUNK;
default:
return CHUNKE_STATE_ERROR;
} }
} }
return CHUNKE_OK; return CHUNKE_OK;

View File

@ -29,40 +29,25 @@
#define MAXNUM_SIZE 16 #define MAXNUM_SIZE 16
typedef enum { typedef enum {
CHUNK_FIRST, /* never use */ /* await and buffer all hexadecimal digits until we get one that isn't a
hexadecimal digit. When done, we go CHUNK_LF */
/* In this we await and buffer all hexadecimal digits until we get one
that isn't a hexadecimal digit. When done, we go POSTHEX */
CHUNK_HEX, CHUNK_HEX,
/* We have received the hexadecimal digit and we eat all characters until /* wait for LF, ignore all else */
we get a CRLF pair. When we see a CR we go to the CR state. */ CHUNK_LF,
CHUNK_POSTHEX,
/* A single CR has been found and we should get a LF right away in this
state or we go back to POSTHEX. When LF is received, we go to DATA.
If the size given was zero, we set state to STOP and return. */
CHUNK_CR,
/* We eat the amount of data specified. When done, we move on to the /* We eat the amount of data specified. When done, we move on to the
POST_CR state. */ POST_CR state. */
CHUNK_DATA, CHUNK_DATA,
/* POSTCR should get a CR and nothing else, then move to POSTLF */ /* POSTLF should get a CR and then a LF and nothing else, then move back to
CHUNK_POSTCR, HEX as the CRLF combination marks the end of a chunk. A missing CR is no
big deal. */
/* POSTLF should get a LF and nothing else, then move back to HEX as the
CRLF combination marks the end of a chunk */
CHUNK_POSTLF, CHUNK_POSTLF,
/* Each Chunk body should end with a CRLF. Read a CR and nothing else, /* Used to mark that we're out of the game. NOTE: that there's a 'dataleft'
then move to CHUNK_STOP */ field in the struct that will tell how many bytes that were not passed to
CHUNK_STOPCR, the client in the end of the last buffer! */
/* This is mainly used to really mark that we're out of the game.
NOTE: that there's a 'dataleft' field in the struct that will tell how
many bytes that were not passed to the client in the end of the last
buffer! */
CHUNK_STOP, CHUNK_STOP,
/* At this point optional trailer headers can be found, unless the next line /* At this point optional trailer headers can be found, unless the next line
@ -77,10 +62,7 @@ typedef enum {
signalled If this is an empty trailer CHUNKE_STOP will be signalled. signalled If this is an empty trailer CHUNKE_STOP will be signalled.
Otherwise the trailer will be broadcasted via Curl_client_write() and the Otherwise the trailer will be broadcasted via Curl_client_write() and the
next state will be CHUNK_TRAILER */ next state will be CHUNK_TRAILER */
CHUNK_TRAILER_POSTCR, CHUNK_TRAILER_POSTCR
CHUNK_LAST /* never use */
} ChunkyState; } ChunkyState;
typedef enum { typedef enum {