[svn] Minor improvements to fd_read_hunk.

This commit is contained in:
hniksic 2005-07-01 16:37:50 -07:00
parent f65e63492b
commit f238f09492
4 changed files with 68 additions and 35 deletions

View File

@ -1,3 +1,11 @@
2005-07-02 Hrvoje Niksic <hniksic@xemacs.org>
* http.c (response_head_terminator): Minor optimization.
* retr.c (fd_read_hunk): Call terminator with pointer to the start
of the data and the pointer to the current data. Changed all
callers.
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (url_parse): Make sure u->params is not initialized for

View File

@ -416,40 +416,51 @@ post_file (int sock, const char *file_name, wgint promised_size)
return 0;
}
/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
If so, return the pointer to the position after the line, otherwise
return NULL. This is used as callback to fd_read_hunk. The data
between START and PEEKED has been read and cannot be "unread"; the
data after PEEKED has only been peeked. */
static const char *
response_head_terminator (const char *hunk, int oldlen, int peeklen)
response_head_terminator (const char *start, const char *peeked, int peeklen)
{
const char *start, *end;
const char *p, *end;
/* If at first peek, verify whether HUNK starts with "HTTP". If
not, this is a HTTP/0.9 request and we must bail out without
reading anything. */
if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
return hunk;
if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
return start;
if (oldlen < 4)
start = hunk;
else
start = hunk + oldlen - 4;
end = hunk + oldlen + peeklen;
/* Look for "\n[\r]\n", and return the following position if found.
Start two chars before the current to cover the possibility that
part of the terminator (e.g. "\n\r") arrived in the previous
batch. */
p = peeked - start < 2 ? start : peeked - 2;
end = peeked + peeklen;
for (; start < end - 1; start++)
if (*start == '\n')
/* Check for \n\r\n or \n\n anywhere in [p, end-2). */
for (; p < end - 2; p++)
if (*p == '\n')
{
if (start < end - 2
&& start[1] == '\r'
&& start[2] == '\n')
return start + 3;
if (start[1] == '\n')
return start + 2;
if (p[1] == '\r' && p[2] == '\n')
return p + 3;
else if (p[1] == '\n')
return p + 2;
}
/* p==end-2: check for \n\n directly preceding END. */
if (p[0] == '\n' && p[1] == '\n')
return p + 2;
return NULL;
}
/* The maximum size of a single HTTP response we care to read. This
is not meant to impose an arbitrary limit, but to protect the user
from Wget slurping up available memory upon encountering malicious
or buggy server output. Define it to 0 to remove the limit. */
/* The maximum size of a single HTTP response we care to read. Rather
than being a limit of the reader implementation, this limit
prevents Wget from slurping all available memory upon encountering
malicious or buggy server output, thus protecting the user. Define
it to 0 to remove the limit. */
#define HTTP_RESPONSE_MAX_SIZE 65536

View File

@ -336,22 +336,35 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
return ret;
}
/* Read a hunk of data from FD, up until a terminator. The terminator
is whatever the TERMINATOR function determines it to be; for
example, it can be a line of data, or the head of an HTTP response.
The function returns the data read allocated with malloc.
/* Read a hunk of data from FD, up until a terminator. The hunk is
limited by whatever the TERMINATOR callback chooses as its
terminator. For example, if terminator stops at newline, the hunk
will consist of a line of data; if terminator stops at two
newlines, it can be used to read the head of an HTTP response.
Upon determining the boundary, the function returns the data (up to
the terminator) in malloc-allocated storage.
In case of error, NULL is returned. In case of EOF and no data
read, NULL is returned and errno set to 0. In case of EOF with
data having been read, the data is returned, but it will
(obviously) not contain the terminator.
In case of read error, NULL is returned. In case of EOF and no
data read, NULL is returned and errno set to 0. In case of having
read some data, but encountering EOF before seeing the terminator,
the data that has been read is returned, but it will (obviously)
not contain the terminator.
The TERMINATOR function is called with three arguments: the
beginning of the data read so far, the beginning of the current
block of peeked-at data, and the length of the current block.
Depending on its needs, the function is free to choose whether to
analyze all data or just the newly arrived data. If TERMINATOR
returns NULL, it means that the terminator has not been seen.
Otherwise it should return a pointer to the charactre immediately
following the terminator.
The idea is to be able to read a line of input, or otherwise a hunk
of text, such as the head of an HTTP request, without crossing the
boundary, so that the next call to fd_read etc. reads the data
after the hunk. To achieve that, this function does the following:
1. Peek at available data.
1. Peek at incoming data.
2. Determine whether the peeked data, along with the previously
read data, includes the terminator.
@ -396,12 +409,13 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
xfree (hunk);
return NULL;
}
end = terminator (hunk, tail, pklen);
end = terminator (hunk, hunk + tail, pklen);
if (end)
{
/* The data contains the terminator: we'll drain the data up
to the end of the terminator. */
remain = end - (hunk + tail);
assert (remain >= 0);
if (remain == 0)
{
/* No more data needs to be read. */
@ -471,11 +485,11 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
}
static const char *
line_terminator (const char *hunk, int oldlen, int peeklen)
line_terminator (const char *start, const char *peeked, int peeklen)
{
const char *p = memchr (hunk + oldlen, '\n', peeklen);
const char *p = memchr (peeked, '\n', peeklen);
if (p)
/* p+1 because we want the line to include '\n' */
/* p+1 because the line must include '\n' */
return p + 1;
return NULL;
}

View File

@ -45,7 +45,7 @@ enum {
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
typedef const char *(*hunk_terminator_t) (const char *, int, int);
typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
char *fd_read_hunk (int, hunk_terminator_t, long, long);
char *fd_read_line (int);