1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Minor improvements to fd_read_hunk.

This commit is contained in:
hniksic 2005-07-01 16:37:50 -07:00
parent f65e63492b
commit f238f09492
4 changed files with 68 additions and 35 deletions

View File

@ -1,3 +1,11 @@
2005-07-02 Hrvoje Niksic <hniksic@xemacs.org>
* http.c (response_head_terminator): Minor optimization.
* retr.c (fd_read_hunk): Call terminator with pointer to the start
of the data and the pointer to the current data. Changed all
callers.
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org> 2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (url_parse): Make sure u->params is not initialized for * url.c (url_parse): Make sure u->params is not initialized for

View File

@ -416,40 +416,51 @@ post_file (int sock, const char *file_name, wgint promised_size)
return 0; return 0;
} }
/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
If so, return the pointer to the position after the line, otherwise
return NULL. This is used as callback to fd_read_hunk. The data
between START and PEEKED has been read and cannot be "unread"; the
data after PEEKED has only been peeked. */
static const char * static const char *
response_head_terminator (const char *hunk, int oldlen, int peeklen) response_head_terminator (const char *start, const char *peeked, int peeklen)
{ {
const char *start, *end; const char *p, *end;
/* If at first peek, verify whether HUNK starts with "HTTP". If /* If at first peek, verify whether HUNK starts with "HTTP". If
not, this is a HTTP/0.9 request and we must bail out without not, this is a HTTP/0.9 request and we must bail out without
reading anything. */ reading anything. */
if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4))) if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
return hunk; return start;
if (oldlen < 4) /* Look for "\n[\r]\n", and return the following position if found.
start = hunk; Start two chars before the current to cover the possibility that
else part of the terminator (e.g. "\n\r") arrived in the previous
start = hunk + oldlen - 4; batch. */
end = hunk + oldlen + peeklen; p = peeked - start < 2 ? start : peeked - 2;
end = peeked + peeklen;
for (; start < end - 1; start++) /* Check for \n\r\n or \n\n anywhere in [p, end-2). */
if (*start == '\n') for (; p < end - 2; p++)
if (*p == '\n')
{ {
if (start < end - 2 if (p[1] == '\r' && p[2] == '\n')
&& start[1] == '\r' return p + 3;
&& start[2] == '\n') else if (p[1] == '\n')
return start + 3; return p + 2;
if (start[1] == '\n')
return start + 2;
} }
/* p==end-2: check for \n\n directly preceding END. */
if (p[0] == '\n' && p[1] == '\n')
return p + 2;
return NULL; return NULL;
} }
/* The maximum size of a single HTTP response we care to read. This /* The maximum size of a single HTTP response we care to read. Rather
is not meant to impose an arbitrary limit, but to protect the user than being a limit of the reader implementation, this limit
from Wget slurping up available memory upon encountering malicious prevents Wget from slurping all available memory upon encountering
or buggy server output. Define it to 0 to remove the limit. */ malicious or buggy server output, thus protecting the user. Define
it to 0 to remove the limit. */
#define HTTP_RESPONSE_MAX_SIZE 65536 #define HTTP_RESPONSE_MAX_SIZE 65536

View File

@ -336,22 +336,35 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
return ret; return ret;
} }
/* Read a hunk of data from FD, up until a terminator. The terminator /* Read a hunk of data from FD, up until a terminator. The hunk is
is whatever the TERMINATOR function determines it to be; for limited by whatever the TERMINATOR callback chooses as its
example, it can be a line of data, or the head of an HTTP response. terminator. For example, if terminator stops at newline, the hunk
The function returns the data read allocated with malloc. will consist of a line of data; if terminator stops at two
newlines, it can be used to read the head of an HTTP response.
Upon determining the boundary, the function returns the data (up to
the terminator) in malloc-allocated storage.
In case of error, NULL is returned. In case of EOF and no data In case of read error, NULL is returned. In case of EOF and no
read, NULL is returned and errno set to 0. In case of EOF with data read, NULL is returned and errno set to 0. In case of having
data having been read, the data is returned, but it will read some data, but encountering EOF before seeing the terminator,
(obviously) not contain the terminator. the data that has been read is returned, but it will (obviously)
not contain the terminator.
The TERMINATOR function is called with three arguments: the
beginning of the data read so far, the beginning of the current
block of peeked-at data, and the length of the current block.
Depending on its needs, the function is free to choose whether to
analyze all data or just the newly arrived data. If TERMINATOR
returns NULL, it means that the terminator has not been seen.
Otherwise it should return a pointer to the charactre immediately
following the terminator.
The idea is to be able to read a line of input, or otherwise a hunk The idea is to be able to read a line of input, or otherwise a hunk
of text, such as the head of an HTTP request, without crossing the of text, such as the head of an HTTP request, without crossing the
boundary, so that the next call to fd_read etc. reads the data boundary, so that the next call to fd_read etc. reads the data
after the hunk. To achieve that, this function does the following: after the hunk. To achieve that, this function does the following:
1. Peek at available data. 1. Peek at incoming data.
2. Determine whether the peeked data, along with the previously 2. Determine whether the peeked data, along with the previously
read data, includes the terminator. read data, includes the terminator.
@ -396,12 +409,13 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
xfree (hunk); xfree (hunk);
return NULL; return NULL;
} }
end = terminator (hunk, tail, pklen); end = terminator (hunk, hunk + tail, pklen);
if (end) if (end)
{ {
/* The data contains the terminator: we'll drain the data up /* The data contains the terminator: we'll drain the data up
to the end of the terminator. */ to the end of the terminator. */
remain = end - (hunk + tail); remain = end - (hunk + tail);
assert (remain >= 0);
if (remain == 0) if (remain == 0)
{ {
/* No more data needs to be read. */ /* No more data needs to be read. */
@ -471,11 +485,11 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
} }
static const char * static const char *
line_terminator (const char *hunk, int oldlen, int peeklen) line_terminator (const char *start, const char *peeked, int peeklen)
{ {
const char *p = memchr (hunk + oldlen, '\n', peeklen); const char *p = memchr (peeked, '\n', peeklen);
if (p) if (p)
/* p+1 because we want the line to include '\n' */ /* p+1 because the line must include '\n' */
return p + 1; return p + 1;
return NULL; return NULL;
} }

View File

@ -45,7 +45,7 @@ enum {
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int); int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
typedef const char *(*hunk_terminator_t) (const char *, int, int); typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
char *fd_read_hunk (int, hunk_terminator_t, long, long); char *fd_read_hunk (int, hunk_terminator_t, long, long);
char *fd_read_line (int); char *fd_read_line (int);