mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Minor improvements to fd_read_hunk.
This commit is contained in:
parent
f65e63492b
commit
f238f09492
@ -1,3 +1,11 @@
|
|||||||
|
2005-07-02 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* http.c (response_head_terminator): Minor optimization.
|
||||||
|
|
||||||
|
* retr.c (fd_read_hunk): Call terminator with pointer to the start
|
||||||
|
of the data and the pointer to the current data. Changed all
|
||||||
|
callers.
|
||||||
|
|
||||||
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
|
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* url.c (url_parse): Make sure u->params is not initialized for
|
* url.c (url_parse): Make sure u->params is not initialized for
|
||||||
|
53
src/http.c
53
src/http.c
@ -416,40 +416,51 @@ post_file (int sock, const char *file_name, wgint promised_size)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Determine whether [START, PEEKED + PEEKLEN) contains an empty line.
|
||||||
|
If so, return the pointer to the position after the line, otherwise
|
||||||
|
return NULL. This is used as callback to fd_read_hunk. The data
|
||||||
|
between START and PEEKED has been read and cannot be "unread"; the
|
||||||
|
data after PEEKED has only been peeked. */
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
response_head_terminator (const char *hunk, int oldlen, int peeklen)
|
response_head_terminator (const char *start, const char *peeked, int peeklen)
|
||||||
{
|
{
|
||||||
const char *start, *end;
|
const char *p, *end;
|
||||||
|
|
||||||
/* If at first peek, verify whether HUNK starts with "HTTP". If
|
/* If at first peek, verify whether HUNK starts with "HTTP". If
|
||||||
not, this is a HTTP/0.9 request and we must bail out without
|
not, this is a HTTP/0.9 request and we must bail out without
|
||||||
reading anything. */
|
reading anything. */
|
||||||
if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
|
if (start == peeked && 0 != memcmp (start, "HTTP", MIN (peeklen, 4)))
|
||||||
return hunk;
|
return start;
|
||||||
|
|
||||||
if (oldlen < 4)
|
/* Look for "\n[\r]\n", and return the following position if found.
|
||||||
start = hunk;
|
Start two chars before the current to cover the possibility that
|
||||||
else
|
part of the terminator (e.g. "\n\r") arrived in the previous
|
||||||
start = hunk + oldlen - 4;
|
batch. */
|
||||||
end = hunk + oldlen + peeklen;
|
p = peeked - start < 2 ? start : peeked - 2;
|
||||||
|
end = peeked + peeklen;
|
||||||
|
|
||||||
for (; start < end - 1; start++)
|
/* Check for \n\r\n or \n\n anywhere in [p, end-2). */
|
||||||
if (*start == '\n')
|
for (; p < end - 2; p++)
|
||||||
|
if (*p == '\n')
|
||||||
{
|
{
|
||||||
if (start < end - 2
|
if (p[1] == '\r' && p[2] == '\n')
|
||||||
&& start[1] == '\r'
|
return p + 3;
|
||||||
&& start[2] == '\n')
|
else if (p[1] == '\n')
|
||||||
return start + 3;
|
return p + 2;
|
||||||
if (start[1] == '\n')
|
|
||||||
return start + 2;
|
|
||||||
}
|
}
|
||||||
|
/* p==end-2: check for \n\n directly preceding END. */
|
||||||
|
if (p[0] == '\n' && p[1] == '\n')
|
||||||
|
return p + 2;
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The maximum size of a single HTTP response we care to read. This
|
/* The maximum size of a single HTTP response we care to read. Rather
|
||||||
is not meant to impose an arbitrary limit, but to protect the user
|
than being a limit of the reader implementation, this limit
|
||||||
from Wget slurping up available memory upon encountering malicious
|
prevents Wget from slurping all available memory upon encountering
|
||||||
or buggy server output. Define it to 0 to remove the limit. */
|
malicious or buggy server output, thus protecting the user. Define
|
||||||
|
it to 0 to remove the limit. */
|
||||||
|
|
||||||
#define HTTP_RESPONSE_MAX_SIZE 65536
|
#define HTTP_RESPONSE_MAX_SIZE 65536
|
||||||
|
|
||||||
|
40
src/retr.c
40
src/retr.c
@ -336,22 +336,35 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos,
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read a hunk of data from FD, up until a terminator. The terminator
|
/* Read a hunk of data from FD, up until a terminator. The hunk is
|
||||||
is whatever the TERMINATOR function determines it to be; for
|
limited by whatever the TERMINATOR callback chooses as its
|
||||||
example, it can be a line of data, or the head of an HTTP response.
|
terminator. For example, if terminator stops at newline, the hunk
|
||||||
The function returns the data read allocated with malloc.
|
will consist of a line of data; if terminator stops at two
|
||||||
|
newlines, it can be used to read the head of an HTTP response.
|
||||||
|
Upon determining the boundary, the function returns the data (up to
|
||||||
|
the terminator) in malloc-allocated storage.
|
||||||
|
|
||||||
In case of error, NULL is returned. In case of EOF and no data
|
In case of read error, NULL is returned. In case of EOF and no
|
||||||
read, NULL is returned and errno set to 0. In case of EOF with
|
data read, NULL is returned and errno set to 0. In case of having
|
||||||
data having been read, the data is returned, but it will
|
read some data, but encountering EOF before seeing the terminator,
|
||||||
(obviously) not contain the terminator.
|
the data that has been read is returned, but it will (obviously)
|
||||||
|
not contain the terminator.
|
||||||
|
|
||||||
|
The TERMINATOR function is called with three arguments: the
|
||||||
|
beginning of the data read so far, the beginning of the current
|
||||||
|
block of peeked-at data, and the length of the current block.
|
||||||
|
Depending on its needs, the function is free to choose whether to
|
||||||
|
analyze all data or just the newly arrived data. If TERMINATOR
|
||||||
|
returns NULL, it means that the terminator has not been seen.
|
||||||
|
Otherwise it should return a pointer to the charactre immediately
|
||||||
|
following the terminator.
|
||||||
|
|
||||||
The idea is to be able to read a line of input, or otherwise a hunk
|
The idea is to be able to read a line of input, or otherwise a hunk
|
||||||
of text, such as the head of an HTTP request, without crossing the
|
of text, such as the head of an HTTP request, without crossing the
|
||||||
boundary, so that the next call to fd_read etc. reads the data
|
boundary, so that the next call to fd_read etc. reads the data
|
||||||
after the hunk. To achieve that, this function does the following:
|
after the hunk. To achieve that, this function does the following:
|
||||||
|
|
||||||
1. Peek at available data.
|
1. Peek at incoming data.
|
||||||
|
|
||||||
2. Determine whether the peeked data, along with the previously
|
2. Determine whether the peeked data, along with the previously
|
||||||
read data, includes the terminator.
|
read data, includes the terminator.
|
||||||
@ -396,12 +409,13 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
|
|||||||
xfree (hunk);
|
xfree (hunk);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
end = terminator (hunk, tail, pklen);
|
end = terminator (hunk, hunk + tail, pklen);
|
||||||
if (end)
|
if (end)
|
||||||
{
|
{
|
||||||
/* The data contains the terminator: we'll drain the data up
|
/* The data contains the terminator: we'll drain the data up
|
||||||
to the end of the terminator. */
|
to the end of the terminator. */
|
||||||
remain = end - (hunk + tail);
|
remain = end - (hunk + tail);
|
||||||
|
assert (remain >= 0);
|
||||||
if (remain == 0)
|
if (remain == 0)
|
||||||
{
|
{
|
||||||
/* No more data needs to be read. */
|
/* No more data needs to be read. */
|
||||||
@ -471,11 +485,11 @@ fd_read_hunk (int fd, hunk_terminator_t terminator, long sizehint, long maxsize)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static const char *
|
static const char *
|
||||||
line_terminator (const char *hunk, int oldlen, int peeklen)
|
line_terminator (const char *start, const char *peeked, int peeklen)
|
||||||
{
|
{
|
||||||
const char *p = memchr (hunk + oldlen, '\n', peeklen);
|
const char *p = memchr (peeked, '\n', peeklen);
|
||||||
if (p)
|
if (p)
|
||||||
/* p+1 because we want the line to include '\n' */
|
/* p+1 because the line must include '\n' */
|
||||||
return p + 1;
|
return p + 1;
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ enum {
|
|||||||
|
|
||||||
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
|
int fd_read_body (int, FILE *, wgint, wgint, wgint *, wgint *, double *, int);
|
||||||
|
|
||||||
typedef const char *(*hunk_terminator_t) (const char *, int, int);
|
typedef const char *(*hunk_terminator_t) (const char *, const char *, int);
|
||||||
|
|
||||||
char *fd_read_hunk (int, hunk_terminator_t, long, long);
|
char *fd_read_hunk (int, hunk_terminator_t, long, long);
|
||||||
char *fd_read_line (int);
|
char *fd_read_line (int);
|
||||||
|
Loading…
Reference in New Issue
Block a user