From 6d67d793f51af4e0a5a840751c15308ab76ba8b6 Mon Sep 17 00:00:00 2001 From: Gijs van Tulder Date: Sat, 28 Jan 2012 14:09:29 +0100 Subject: [PATCH] Add support for chunks to the WARC outputter. --- src/ChangeLog | 6 ++++++ src/retr.c | 17 +++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index e10d4c02..141b7e18 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2012-01-27 Gijs van Tulder + + * retr.c (fd_read_body): If the response is chunked, the chunk + headers are now written to the WARC file, making the WARC file + an exact copy of the HTTP response. + 2012-01-27 Gijs van Tulder * retr.c (fd_read_body): Fix a memory leak with chunked responses. diff --git a/src/retr.c b/src/retr.c index f57b2c6d..8c8cdf5b 100644 --- a/src/retr.c +++ b/src/retr.c @@ -213,6 +213,9 @@ write_data (FILE *out, FILE *out2, const char *buf, int bufsize, the data is stored to ELAPSED. If OUT2 is non-NULL, the contents is also written to OUT2. + OUT2 will get an exact copy of the response: if this is a chunked + response, everything -- including the chunk headers -- is written + to OUT2. (OUT will only get the unchunked response.) The function exits and returns the amount of data read. In case of error while reading data, -1 is returned. In case of error while @@ -305,6 +308,8 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, ret = -1; break; } + else if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); remaining_chunk_size = strtol (line, &endl, 16); xfree (line); @@ -316,7 +321,11 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, if (line == NULL) ret = -1; else - xfree (line); + { + if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + xfree (line); + } break; } } @@ -384,7 +393,11 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, break; } else - xfree (line); + { + if (out2 != NULL) + fwrite (line, 1, strlen (line), out2); + xfree (line); + } } } }