mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Minor improvements to fd_read_hunk.
This commit is contained in:
parent
f65e63492b
commit
f238f09492
@ -1,3 +1,11 @@
|
||||
2005-07-02 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* http.c (response_head_terminator): Minor optimization.
|
||||
|
||||
* retr.c (fd_read_hunk): Call terminator with pointer to the start
|
||||
of the data and the pointer to the current data. Changed all
|
||||
callers.
|
||||
|
||||
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* url.c (url_parse): Make sure u->params is not initialized for
|
||||
|
53
src/http.c
53
src/http.c
@ -416,40 +416,51 @@ post_file (int sock, const char *file_name, wgint promised_size)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
|
||||
If so, return the pointer to the position after the line, otherwise
|
||||
return NULL. This is used as callback to fd_read_hunk. The data
|
||||
between START and PEEKED has been read and cannot be "unread"; the
|
||||
data after PEEKED has only been peeked. */
|
||||
|
||||
static const char *
|
||||
response_head_terminator (const char *hunk, int oldlen, int peeklen)
|
||||
response_head_terminator (const char *start, const char *peeked, int peeklen)
|
||||
{
|
||||
const char *start, *end;
|
||||
const char *p, *end;
|
||||
|
||||
/* If at first peek, verify whether HUNK starts with "HTTP". If
|
||||
not, this is a HTTP/0.9 request and we must bail out without
|
||||
reading anything. */
|
||||
if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
|
||||
return hunk;
|
||||
if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
|
||||
return start;
|
||||
|
||||
if (oldlen < 4)
|
||||
start = hunk;
|
||||
else
|
||||
start = hunk + oldlen - 4;
|
||||
end = hunk + oldlen + peeklen;
|
||||
/* Look for "\n[\r]\n", and return the following position if found.
|
||||
Start two chars before the current to cover the possibility that
|
||||
part of the terminator (e.g. "\n\r") arrived in the previous
|
||||
batch. */
|
||||
p = peeked - start < 2 ? start : peeked - 2;
|
||||
end = peeked + peeklen;
|
||||
|
||||
for (; start < end - 1; start++)
|
||||
if (*start == '\n')
|
||||
/* Check for \n\r\n or \n\n anywhere in [p, end-2). */
|
||||
for (; p < end - 2; p++)
|
||||
if (*p == '\n')
|
||||
{
|
||||
if (start < end - 2
|
||||
&& start[1] == '\r'
|
||||
&& start[2] == '\n')
|
||||
return start + 3;
|
||||
if (start[1] == '\n')
|
||||
return start + 2;
|
||||
if (p[1] == '\r' && p[2] == '\n')
|
||||
return p + 3;
|
||||
else if (p[1] == '\n')
|
||||
return p + 2;
|
||||
}
|
||||
/* p==end-2: check for \n\n directly preceding END. */
|
||||
if (p[0] == '\n' && p[1] == '\n')
|
||||
return p + 2;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* The maximum size of a single HTTP response we care to read. This
|
||||
is not meant to impose an arbitrary limit, but to protect the user
|
||||
from Wget slurping up available memory upon encountering malicious
|
||||
or buggy server output. Define it to 0 to remove the limit. */
|
||||
/* The maximum size of a single HTTP response we care to read. Rather
|
||||
than being a limit of the reader implementation, this limit
|
||||
prevents Wget from slurping all available memory upon encountering
|
||||
malicious or buggy server output, thus protecting the user. Define
|
||||
it to 0 to remove the limit. */
|
||||
|
||||
#define HTTP_RESPONSE_MAX_SIZE 65536
|
||||
|
||||
|
40
src/retr.c
40
src/retr.c
@ -336,22 +336,35 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Read a hunk of data from FD, up until a terminator. The terminator
|
||||
is whatever the TERMINATOR function determines it to be; for
|
||||
example, it can be a line of data, or the head of an HTTP response.
|
||||
The function returns the data read allocated with malloc.
|
||||
/* Read a hunk of data from FD, up until a terminator. The hunk is
|
||||
limited by whatever the TERMINATOR callback chooses as its
|
||||
terminator. For example, if terminator stops at newline, the hunk
|
||||
will consist of a line of data; if terminator stops at two
|
||||
newlines, it can be used to read the head of an HTTP response.
|
||||
Upon determining the boundary, the function returns the data (up to
|
||||
the terminator) in malloc-allocated storage.
|
||||
|
||||
In case of error, NULL is returned. In case of EOF and no data
|
||||
read, NULL is returned and errno set to 0. In case of EOF with
|
||||
data having been read, the data is returned, but it will
|
||||
(obviously) not contain the terminator.
|
||||
In case of read error, NULL is returned. In case of EOF and no
|
||||
data read, NULL is returned and errno set to 0. In case of having
|
||||
read some data, but encountering EOF before seeing the terminator,
|
||||
the data that has been read is returned, but it will (obviously)
|
||||
not contain the terminator.
|
||||
|
||||
The TERMINATOR function is called with three arguments: the
|
||||
beginning of the data read so far, the beginning of the current
|
||||
block of peeked-at data, and the length of the current block.
|
||||
Depending on its needs, the function is free to choose whether to
|
||||
analyze all data or just the newly arrived data. If TERMINATOR
|
||||
returns NULL, it means that the terminator has not been seen.
|
||||
Otherwise it should return a pointer to the charactre immediately
|
||||
following the terminator.
|
||||
|
||||
The idea is to be able to read a line of input, or otherwise a hunk
|
||||
of text, such as the head of an HTTP request, without crossing the
|
||||
boundary, so that the next call to fd_read etc. reads the data
|
||||
after the hunk. To achieve that, this function does the following:
|
||||
|
||||
1. Peek at available data.
|
||||
1. Peek at incoming data.
|
||||
|
||||
2. Determine whether the peeked data, along with the previously
|
||||
read data, includes the terminator.
|
||||
@ -396,12 +409,13 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
|
||||
xfree (hunk);
|
||||
return NULL;
|
||||
}
|
||||
end = terminator (hunk, tail, pklen);
|
||||
end = terminator (hunk, hunk + tail, pklen);
|
||||
if (end)
|
||||
{
|
||||
/* The data contains the terminator: we'll drain the data up
|
||||
to the end of the terminator. */
|
||||
remain = end - (hunk + tail);
|
||||
assert (remain >= 0);
|
||||
if (remain == 0)
|
||||
{
|
||||
/* No more data needs to be read. */
|
||||
@ -471,11 +485,11 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
|
||||
}
|
||||
|
||||
static const char *
|
||||
line_terminator (const char *hunk, int oldlen, int peeklen)
|
||||
line_terminator (const char *start, const char *peeked, int peeklen)
|
||||
{
|
||||
const char *p = memchr (hunk + oldlen, '\n', peeklen);
|
||||
const char *p = memchr (peeked, '\n', peeklen);
|
||||
if (p)
|
||||
/* p+1 because we want the line to include '\n' */
|
||||
/* p+1 because the line must include '\n' */
|
||||
return p + 1;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ enum {
|
||||
|
||||
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
|
||||
|
||||
typedef const char *(*hunk_terminator_t) (const char *, int, int);
|
||||
typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
|
||||
|
||||
char *fd_read_hunk (int, hunk_terminator_t, long, long);
|
||||
char *fd_read_line (int);
|
||||
|
Loading…
Reference in New Issue
Block a user