1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Use fd_read_hunk for reading HTTP response head. Support HTTP/0.9

responses.
This commit is contained in:
hniksic 2003-11-27 15:29:36 -08:00
parent b83acf88c1
commit dfe1f43dc0
4 changed files with 474 additions and 492 deletions

View File

@ -1,3 +1,18 @@
2003-11-28 Hrvoje Niksic <hniksic@xemacs.org>
* http.c: Deleted the old functions header_process,
header_extract_number, header_exists, header_strdup,
http_process_range, http_process_none, http_process_type, and
http_process_connection.
* http.c (response_new): New function.
(response_header_bounds): Ditto.
(response_header_copy): Ditto.
(response_header_strdup): Ditto.
(response_status): Ditto.
(gethttp): Use the new response_* functions to parse the response.
Support HTTP/0.9 responses.
2003-11-27 Hrvoje Niksic <hniksic@xemacs.org> 2003-11-27 Hrvoje Niksic <hniksic@xemacs.org>
* progress.c (create_image): Don't calculate ETA if nothing has * progress.c (create_image): Don't calculate ETA if nothing has

View File

@ -76,6 +76,10 @@ extern int errno;
extern char *version_string; extern char *version_string;
extern LARGE_INT total_downloaded_bytes; extern LARGE_INT total_downloaded_bytes;
#ifndef MIN
# define MIN(x, y) ((x) > (y) ? (y) : (x))
#endif
static int cookies_loaded_p; static int cookies_loaded_p;
struct cookie_jar *wget_cookie_jar; struct cookie_jar *wget_cookie_jar;
@ -118,73 +122,308 @@ struct cookie_jar *wget_cookie_jar;
#define HTTP_STATUS_BAD_GATEWAY 502 #define HTTP_STATUS_BAD_GATEWAY 502
#define HTTP_STATUS_UNAVAILABLE 503 #define HTTP_STATUS_UNAVAILABLE 503
static const char *
head_terminator (const char *hunk, int oldlen, int peeklen)
{
const char *start, *end;
/* If at first peek, verify whether HUNK starts with "HTTP". If
not, this is a HTTP/0.9 request and we must bail out without
reading anything. */
if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4)))
return hunk;
if (oldlen < 4)
start = hunk;
else
start = hunk + oldlen - 4;
end = hunk + oldlen + peeklen;
for (; start < end - 1; start++)
if (*start == '\n')
{
if (start < end - 2
&& start[1] == '\r'
&& start[2] == '\n')
return start + 3;
if (start[1] == '\n')
return start + 2;
}
return NULL;
}
/* Read the HTTP request head from FD and return it. The error
conditions are the same as with fd_read_hunk.
To support HTTP/0.9 responses, this function tries to make sure
that the data begins with "HTTP". If this is not the case, no data
is read and an empty request is returned, so that the remaining
data can be treated as body. */
static char *
fd_read_http_head (int fd)
{
return fd_read_hunk (fd, head_terminator, 512);
}
struct response {
/* The response data. */
const char *data;
/* The array of pointers that indicate where each header starts.
For example, given three headers "foo", "bar", and "baz":
foo: value\r\nbar: value\r\nbaz: value\r\n\r\n
0 1 2 3
I.e. headers[0] points to the beginning of foo, headers[1] points
to the end of foo and the beginning of bar, etc. */
const char **headers;
};
static struct response *
response_new (const char *head)
{
const char *hdr;
int count, size;
struct response *resp = xnew0 (struct response);
resp->data = head;
if (*head == '\0')
{
/* Empty head means that we're dealing with a headerless
(HTTP/0.9) response. In that case, don't set HEADERS at
all. */
return resp;
}
/* Split HEAD into header lines, so that response_header_* functions
don't need to do this over and over again. */
size = count = 0;
hdr = head;
while (1)
{
DO_REALLOC (resp->headers, size, count + 1, const char *);
resp->headers[count++] = hdr;
/* Break upon encountering an empty line. */
if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n')
break;
/* Find the end of HDR, including continuations. */
do
{
const char *end = strchr (hdr, '\n');
if (end)
hdr = end + 1;
else
hdr += strlen (hdr);
}
while (*hdr == ' ' || *hdr == '\t');
}
DO_REALLOC (resp->headers, size, count + 1, const char *);
resp->headers[count++] = NULL;
return resp;
}
static int
response_header_bounds (const struct response *resp, const char *name,
const char **begptr, const char **endptr)
{
int i;
const char **headers = resp->headers;
int name_len;
if (!headers || !headers[1])
return 0;
name_len = strlen (name);
for (i = 1; headers[i + 1]; i++)
{
const char *b = headers[i];
const char *e = headers[i + 1];
if (e - b > name_len
&& b[name_len] == ':'
&& 0 == strncasecmp (b, name, name_len))
{
b += name_len + 1;
while (b < e && ISSPACE (*b))
++b;
while (b < e && ISSPACE (e[-1]))
--e;
*begptr = b;
*endptr = e;
return 1;
}
}
return 0;
}
static int
response_header_copy (const struct response *resp, const char *name,
char *buf, int bufsize)
{
const char *b, *e;
if (!response_header_bounds (resp, name, &b, &e))
return 0;
if (bufsize)
{
int len = MIN (e - b, bufsize);
strncpy (buf, b, len);
buf[len] = '\0';
}
return 1;
}
static char *
response_header_strdup (const struct response *resp, const char *name)
{
const char *b, *e;
if (!response_header_bounds (resp, name, &b, &e))
return NULL;
return strdupdelim (b, e);
}
/* Parse the HTTP status line, which is of format: /* Parse the HTTP status line, which is of format:
HTTP-Version SP Status-Code SP Reason-Phrase HTTP-Version SP Status-Code SP Reason-Phrase
The function returns the status-code, or -1 if the status line is The function returns the status-code, or -1 if the status line
malformed. The pointer to reason-phrase is returned in RP. */ appears malformed. The pointer to "reason-phrase" message is
returned in *MESSAGE. */
static int static int
parse_http_status_line (const char *line, const char **reason_phrase_ptr) response_status (const struct response *resp, char **message)
{ {
/* (the variables must not be named `major' and `minor', because int status;
that breaks compilation with SunOS4 cc.) */ const char *p, *end;
int mjr, mnr, statcode;
const char *p;
*reason_phrase_ptr = NULL; if (!resp->headers)
/* The standard format of HTTP-Version is: `HTTP/X.Y', where X is
major version, and Y is minor version. */
if (strncmp (line, "HTTP/", 5) != 0)
return -1;
line += 5;
/* Calculate major HTTP version. */
p = line;
for (mjr = 0; ISDIGIT (*line); line++)
mjr = 10 * mjr + (*line - '0');
if (*line != '.' || p == line)
return -1;
++line;
/* Calculate minor HTTP version. */
p = line;
for (mnr = 0; ISDIGIT (*line); line++)
mnr = 10 * mnr + (*line - '0');
if (*line != ' ' || p == line)
return -1;
/* Wget will accept only 1.0 and higher HTTP-versions. The value of
minor version can be safely ignored. */
if (mjr < 1)
return -1;
++line;
/* Calculate status code. */
if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2])))
return -1;
statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0');
/* Set up the reason phrase pointer. */
line += 3;
/* RFC2068 requires SPC here, but we allow the string to finish
here, in case no reason-phrase is present. */
if (*line != ' ')
{ {
if (!*line) /* For a HTTP/0.9 response, always assume 200 response. */
*reason_phrase_ptr = line; if (message)
else *message = xstrdup ("OK");
return -1; return 200;
} }
else
*reason_phrase_ptr = line + 1;
return statcode; p = resp->headers[0];
end = resp->headers[1];
if (!end)
return -1;
/* "HTTP" */
if (end - p < 4 || 0 != strncmp (p, "HTTP", 4))
return -1;
p += 4;
/* "/x.x" (optional because some Gnutella servers have been reported
as not sending the "/x.x" part. */
if (p < end && *p == '/')
{
++p;
while (p < end && ISDIGIT (*p))
++p;
if (p < end && *p == '.')
++p;
while (p < end && ISDIGIT (*p))
++p;
}
while (p < end && ISSPACE (*p))
++p;
if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2]))
return -1;
status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0');
p += 3;
if (message)
{
while (p < end && ISSPACE (*p))
++p;
while (p < end && ISSPACE (end[-1]))
--end;
*message = strdupdelim (p, end);
}
return status;
}
static void
response_free (struct response *resp)
{
xfree_null (resp->headers);
xfree (resp);
}
static void
print_server_response_1 (const char *b, const char *e)
{
char *ln;
if (b < e && e[-1] == '\n')
--e;
if (b < e && e[-1] == '\r')
--e;
BOUNDED_TO_ALLOCA (b, e, ln);
logprintf (LOG_VERBOSE, " %s\n", ln);
}
static void
print_server_response (const struct response *resp)
{
int i;
if (!resp->headers)
return;
for (i = 0; resp->headers[i + 1]; i++)
print_server_response_1 (resp->headers[i], resp->headers[i + 1]);
}
/* Parse the `Content-Range' header and extract the information it
contains. Returns 1 if successful, -1 otherwise. */
static int
parse_content_range (const char *hdr, long *first_byte_ptr,
long *last_byte_ptr, long *entity_length_ptr)
{
long num;
/* Ancient versions of Netscape proxy server, presumably predating
rfc2068, sent out `Content-Range' without the "bytes"
specifier. */
if (!strncasecmp (hdr, "bytes", 5))
{
hdr += 5;
/* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
HTTP spec. */
if (*hdr == ':')
++hdr;
while (ISSPACE (*hdr))
++hdr;
if (!*hdr)
return 0;
}
if (!ISDIGIT (*hdr))
return 0;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
return 0;
*first_byte_ptr = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
return 0;
*last_byte_ptr = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
*entity_length_ptr = num;
return 1;
} }
#define WMIN(x, y) ((x) > (y) ? (y) : (x))
/* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly
PROMISED_SIZE bytes are sent over the wire -- if the file is PROMISED_SIZE bytes are sent over the wire -- if the file is
longer, read only that much; if the file is shorter, report an error. */ longer, read only that much; if the file is shorter, report an error. */
@ -208,7 +447,7 @@ post_file (int sock, const char *file_name, long promised_size)
int length = fread (chunk, 1, sizeof (chunk), fp); int length = fread (chunk, 1, sizeof (chunk), fp);
if (length == 0) if (length == 0)
break; break;
towrite = WMIN (promised_size - written, length); towrite = MIN (promised_size - written, length);
write_error = fd_write (sock, chunk, towrite, -1); write_error = fd_write (sock, chunk, towrite, -1);
if (write_error < 0) if (write_error < 0)
{ {
@ -231,204 +470,6 @@ post_file (int sock, const char *file_name, long promised_size)
DEBUGP (("done]\n")); DEBUGP (("done]\n"));
return 0; return 0;
} }
static const char *
next_header (const char *h)
{
const char *end = NULL;
const char *p = h;
do
{
p = strchr (p, '\n');
if (!p)
return end;
end = ++p;
}
while (*p == ' ' || *p == '\t');
return end;
}
/* Skip LWS (linear white space), if present. Returns number of
characters to skip. */
static int
skip_lws (const char *string)
{
const char *p = string;
while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n')
++p;
return p - string;
}
/* Check whether HEADER begins with NAME and, if yes, skip the `:' and
the whitespace, and call PROCFUN with the arguments of HEADER's
contents (after the `:' and space) and ARG. Otherwise, return 0. */
int
header_process (const char *header, const char *name,
int (*procfun) (const char *, void *),
void *arg)
{
/* Check whether HEADER matches NAME. */
while (*name && (TOLOWER (*name) == TOLOWER (*header)))
++name, ++header;
if (*name || *header++ != ':')
return 0;
header += skip_lws (header);
return ((*procfun) (header, arg));
}
/* Helper functions for use with header_process(). */
/* Extract a long integer from HEADER and store it to CLOSURE. If an
error is encountered, return 0, else 1. */
int
header_extract_number (const char *header, void *closure)
{
const char *p = header;
long result;
for (result = 0; ISDIGIT (*p); p++)
result = 10 * result + (*p - '0');
/* Failure if no number present. */
if (p == header)
return 0;
/* Skip trailing whitespace. */
p += skip_lws (p);
/* Indicate failure if trailing garbage is present. */
if (*p)
return 0;
*(long *)closure = result;
return 1;
}
/* Strdup HEADER, and place the pointer to CLOSURE. */
int
header_strdup (const char *header, void *closure)
{
*(char **)closure = xstrdup (header);
return 1;
}
/* Write the value 1 into the integer pointed to by CLOSURE. */
int
header_exists (const char *header, void *closure)
{
*(int *)closure = 1;
return 1;
}
/* Functions to be used as arguments to header_process(): */
struct http_process_range_closure {
long first_byte_pos;
long last_byte_pos;
long entity_length;
};
/* Parse the `Content-Range' header and extract the information it
contains. Returns 1 if successful, -1 otherwise. */
static int
http_process_range (const char *hdr, void *arg)
{
struct http_process_range_closure *closure
= (struct http_process_range_closure *)arg;
long num;
/* Certain versions of Nutscape proxy server send out
`Content-Length' without "bytes" specifier, which is a breach of
RFC2068 (as well as the HTTP/1.1 draft which was current at the
time). But hell, I must support it... */
if (!strncasecmp (hdr, "bytes", 5))
{
hdr += 5;
/* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the
HTTP spec. */
if (*hdr == ':')
++hdr;
hdr += skip_lws (hdr);
if (!*hdr)
return 0;
}
if (!ISDIGIT (*hdr))
return 0;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
if (*hdr != '-' || !ISDIGIT (*(hdr + 1)))
return 0;
closure->first_byte_pos = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
if (*hdr != '/' || !ISDIGIT (*(hdr + 1)))
return 0;
closure->last_byte_pos = num;
++hdr;
for (num = 0; ISDIGIT (*hdr); hdr++)
num = 10 * num + (*hdr - '0');
closure->entity_length = num;
return 1;
}
/* Place 1 to ARG if the HDR contains the word "none", 0 otherwise.
Used for `Accept-Ranges'. */
static int
http_process_none (const char *hdr, void *arg)
{
int *where = (int *)arg;
if (strstr (hdr, "none"))
*where = 1;
else
*where = 0;
return 1;
}
/* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */
static int
http_process_type (const char *hdr, void *arg)
{
char **result = (char **)arg;
/* Locate P on `;' or the terminating zero, whichever comes first. */
const char *p = strchr (hdr, ';');
if (!p)
p = hdr + strlen (hdr);
while (p > hdr && ISSPACE (*(p - 1)))
--p;
*result = strdupdelim (hdr, p);
return 1;
}
/* Check whether the `Connection' header is set to "keep-alive". */
static int
http_process_connection (const char *hdr, void *arg)
{
int *flag = (int *)arg;
if (!strcasecmp (hdr, "Keep-Alive"))
*flag = 1;
return 1;
}
/* Commit the cookie to the cookie jar. */
int
http_process_set_cookie (const char *hdr, void *arg)
{
struct url *u = (struct url *)arg;
/* The jar should have been created by now. */
assert (wget_cookie_jar != NULL);
cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, hdr);
return 1;
}
/* Persistent connections. Currently, we cache the most recently used /* Persistent connections. Currently, we cache the most recently used
connection as persistent, provided that the HTTP server agrees to connection as persistent, provided that the HTTP server agrees to
@ -690,7 +731,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
char *proxyauth; char *proxyauth;
char *port_maybe; char *port_maybe;
char *request_keep_alive; char *request_keep_alive;
int sock, hcount, statcode; int sock, statcode;
int write_error; int write_error;
long contlen, contrange; long contlen, contrange;
struct url *conn; struct url *conn;
@ -700,15 +741,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
char *cookies = NULL; char *cookies = NULL;
char *head; char *head;
const char *hdr_beg, *hdr_end; struct response *resp;
char hdrval[256];
char *message;
char *set_cookie;
/* Whether this connection will be kept alive after the HTTP request /* Whether this connection will be kept alive after the HTTP request
is done. */ is done. */
int keep_alive; int keep_alive;
/* Flags that detect the two ways of specifying HTTP keep-alive /* Flag that detects having received a keep-alive response. */
response. */ int keep_alive_confirmed;
int http_keep_alive_1, http_keep_alive_2;
/* Whether keep-alive should be inhibited. */ /* Whether keep-alive should be inhibited. */
int inhibit_keep_alive; int inhibit_keep_alive;
@ -758,7 +801,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
know the local filename so we can save to it. */ know the local filename so we can save to it. */
assert (*hs->local_file != NULL); assert (*hs->local_file != NULL);
authenticate_h = 0; authenticate_h = NULL;
auth_tried_already = 0; auth_tried_already = 0;
inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL; inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
@ -769,7 +812,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
for the Digest authorization scheme.) */ for the Digest authorization scheme.) */
keep_alive = 0; keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0; keep_alive_confirmed = 0;
post_content_type = NULL; post_content_type = NULL;
post_content_length = NULL; post_content_length = NULL;
@ -1080,9 +1123,7 @@ Accept: %s\r\n\
statcode = -1; statcode = -1;
*dt &= ~RETROKF; *dt &= ~RETROKF;
DEBUGP (("\n---response begin---\n")); head = fd_read_http_head (sock);
head = fd_read_head (sock);
if (!head) if (!head)
{ {
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
@ -1101,154 +1142,78 @@ Accept: %s\r\n\
} }
} }
/* Loop through the headers and process them. */ DEBUGP (("\n---response begin---\n"));
DEBUGP (("%s", head));
hcount = 0;
for (hdr_beg = head;
(hdr_end = next_header (hdr_beg));
hdr_beg = hdr_end)
{
char *hdr = strdupdelim (hdr_beg, hdr_end);
{
char *tmp = hdr + strlen (hdr);
if (tmp > hdr && tmp[-1] == '\n')
*--tmp = '\0';
if (tmp > hdr && tmp[-1] == '\r')
*--tmp = '\0';
}
++hcount;
/* Check for status line. */
if (hcount == 1)
{
const char *error;
/* Parse the first line of server response. */
statcode = parse_http_status_line (hdr, &error);
hs->statcode = statcode;
/* Store the descriptive response. */
if (statcode == -1) /* malformed response */
{
/* A common reason for "malformed response" error is the
case when no data was actually received. Handle this
special case. */
if (!*hdr)
hs->error = xstrdup (_("No data received"));
else
hs->error = xstrdup (_("Malformed status line"));
xfree (hdr);
break;
}
else if (!*error)
hs->error = xstrdup (_("(no description)"));
else
hs->error = xstrdup (error);
if ((statcode != -1)
#ifdef ENABLE_DEBUG
&& !opt.debug
#endif
)
{
if (opt.server_response)
logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
else
logprintf (LOG_VERBOSE, "%2d %s", statcode, error);
}
goto done_header;
}
/* Exit on empty header. */
if (!*hdr)
{
xfree (hdr);
break;
}
/* Print the header if requested. */
if (opt.server_response && hcount != 1)
logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr);
/* Try getting content-length. */
if (contlen == -1 && !opt.ignore_length)
if (header_process (hdr, "Content-Length", header_extract_number,
&contlen))
goto done_header;
/* Try getting content-type. */
if (!type)
if (header_process (hdr, "Content-Type", http_process_type, &type))
goto done_header;
/* Try getting location. */
if (!hs->newloc)
if (header_process (hdr, "Location", header_strdup, &hs->newloc))
goto done_header;
/* Try getting last-modified. */
if (!hs->remote_time)
if (header_process (hdr, "Last-Modified", header_strdup,
&hs->remote_time))
goto done_header;
/* Try getting cookies. */
if (opt.cookies)
if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u))
goto done_header;
/* Try getting www-authentication. */
if (!authenticate_h)
if (header_process (hdr, "WWW-Authenticate", header_strdup,
&authenticate_h))
goto done_header;
/* Check for accept-ranges header. If it contains the word
`none', disable the ranges. */
if (*dt & ACCEPTRANGES)
{
int nonep;
if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep))
{
if (nonep)
*dt &= ~ACCEPTRANGES;
goto done_header;
}
}
/* Try getting content-range. */
if (contrange == -1)
{
struct http_process_range_closure closure;
if (header_process (hdr, "Content-Range", http_process_range, &closure))
{
contrange = closure.first_byte_pos;
goto done_header;
}
}
/* Check for keep-alive related responses. */
if (!inhibit_keep_alive)
{
/* Check for the `Keep-Alive' header. */
if (!http_keep_alive_1)
{
if (header_process (hdr, "Keep-Alive", header_exists,
&http_keep_alive_1))
goto done_header;
}
/* Check for `Connection: Keep-Alive'. */
if (!http_keep_alive_2)
{
if (header_process (hdr, "Connection", http_process_connection,
&http_keep_alive_2))
goto done_header;
}
}
done_header:
xfree (hdr);
}
DEBUGP (("---response end---\n")); DEBUGP (("---response end---\n"));
logputs (LOG_VERBOSE, "\n"); resp = response_new (head);
if (contlen != -1 /* Check for status line. */
&& (http_keep_alive_1 || http_keep_alive_2)) message = NULL;
statcode = response_status (resp, &message);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : "");
else
{ {
assert (inhibit_keep_alive == 0); logprintf (LOG_VERBOSE, "\n");
print_server_response (resp);
}
hs->statcode = statcode;
if (statcode == -1)
hs->error = xstrdup (_("Malformed status line"));
else if (!*message)
hs->error = xstrdup (_("(no description)"));
else
hs->error = xstrdup (message);
if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
contlen = strtol (hdrval, NULL, 10);
type = response_header_strdup (resp, "Content-Type");
if (type)
{
char *tmp = strchr (type, ';');
if (tmp)
{
while (tmp > type && ISSPACE (tmp[-1]))
--tmp;
*tmp = '\0';
}
}
hs->newloc = response_header_strdup (resp, "Location");
hs->remote_time = response_header_strdup (resp, "Last-Modified");
set_cookie = response_header_strdup (resp, "Set-Cookie");
if (set_cookie)
{
/* The jar should have been created by now. */
assert (wget_cookie_jar != NULL);
cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path,
set_cookie);
xfree (set_cookie);
}
authenticate_h = response_header_strdup (resp, "WWW-Authenticate");
if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval)))
{
long first_byte_pos, last_byte_pos, entity_length;
if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos,
&entity_length))
contrange = first_byte_pos;
}
/* Check for keep-alive related responses. */
if (!inhibit_keep_alive && contlen != -1)
{
if (response_header_copy (resp, "Keep-Alive", NULL, 0))
keep_alive = 1;
else if (response_header_copy (resp, "Connection", hdrval,
sizeof (hdrval)))
{
if (0 == strcasecmp (hdrval, "Keep-Alive"))
keep_alive = 1; keep_alive = 1;
} }
}
response_free (resp);
if (keep_alive) if (keep_alive)
/* The server has promised that it will not close the connection /* The server has promised that it will not close the connection
when we're done. This means that we can register it. */ when we're done. This means that we can register it. */
@ -2290,6 +2255,11 @@ basic_authentication_encode (const char *user, const char *passwd,
return res; return res;
} }
#define SKIP_WS(x) do { \
while (ISSPACE (*(x))) \
++(x); \
} while (0)
#ifdef USE_DIGEST #ifdef USE_DIGEST
/* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning
of a field in such a header. If the field is the one specified by of a field in such a header. If the field is the one specified by
@ -2309,12 +2279,12 @@ extract_header_attr (const char *au, const char *attr_name, char **ret)
cp += strlen (attr_name); cp += strlen (attr_name);
if (!*cp) if (!*cp)
return -1; return -1;
cp += skip_lws (cp); SKIP_WS (cp);
if (*cp != '=') if (*cp != '=')
return -1; return -1;
if (!*++cp) if (!*++cp)
return -1; return -1;
cp += skip_lws (cp); SKIP_WS (cp);
if (*cp != '\"') if (*cp != '\"')
return -1; return -1;
if (!*++cp) if (!*++cp)
@ -2373,7 +2343,7 @@ digest_authentication_encode (const char *au, const char *user,
{ {
int i; int i;
au += skip_lws (au); SKIP_WS (au);
for (i = 0; i < countof (options); i++) for (i = 0; i < countof (options); i++)
{ {
int skip = extract_header_attr (au, options[i].name, int skip = extract_header_attr (au, options[i].name,
@ -2397,7 +2367,7 @@ digest_authentication_encode (const char *au, const char *user,
au++; au++;
if (*au && *++au) if (*au && *++au)
{ {
au += skip_lws (au); SKIP_WS (au);
if (*au == '\"') if (*au == '\"')
{ {
au++; au++;

View File

@ -129,7 +129,9 @@ limit_bandwidth (long bytes, struct wget_timer *timer)
limit_data.chunk_start = wtimer_read (timer); limit_data.chunk_start = wtimer_read (timer);
} }
#ifndef MIN
# define MIN(i, j) ((i) <= (j) ? (i) : (j)) # define MIN(i, j) ((i) <= (j) ? (i) : (j))
#endif
/* Reads the contents of file descriptor FD, until it is closed, or a /* Reads the contents of file descriptor FD, until it is closed, or a
read error occurs. The data is read in 8K chunks, and stored to read error occurs. The data is read in 8K chunks, and stored to
@ -269,16 +271,47 @@ fd_read_body (int fd, FILE *out, long *len, long restval, long expected,
typedef const char *(*finder_t) PARAMS ((const char *, int, int)); typedef const char *(*finder_t) PARAMS ((const char *, int, int));
/* Driver for fd_read_line and fd_read_head: keeps reading data until /* Read a hunk of data from FD, up until a terminator. The terminator
a terminator (as decided by FINDER) occurs in the data. The trick is whatever the TERMINATOR function determines it to be; for
is that the data is first peeked at, and only then actually read. example, it can be a line of data, or the head of an HTTP response.
That way the data after the terminator is never read. */ The function returns the data read allocated with malloc.
static char * In case of error, NULL is returned. In case of EOF and no data
fd_read_until (int fd, finder_t finder, int bufsize) read, NULL is returned and errno set to 0. In case of EOF with
data having been read, the data is returned, but it will
(obviously) not contain the terminator.
The idea is to be able to read a line of input, or otherwise a hunk
of text, such as the head of an HTTP request, without crossing the
boundary, so that the next call to fd_read etc. reads the data
after the hunk. To achieve that, this function does the following:
1. Peek at available data.
2. Determine whether the peeked data, along with the previously
read data, includes the terminator.
2a. If yes, read the data until the end of the terminator, and
exit.
2b. If no, read the peeked data and goto 1.
The function is careful to assume as little as possible about the
implementation of peeking. For example, every peek is followed by
a read. If the read returns a different amount of data, the
process is retried until all data arrives safely.
BUFSIZE is the size of the initial buffer expected to read all the
data in the typical case.
This function should be used as a building block for other
functions -- see fd_read_line as a simple example. */
char *
fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize)
{ {
int size = bufsize, tail = 0; char *hunk = xmalloc (bufsize);
char *buf = xmalloc (size); int tail = 0; /* tail position in HUNK */
while (1) while (1)
{ {
@ -287,23 +320,28 @@ fd_read_until (int fd, finder_t finder, int bufsize)
/* First, peek at the available data. */ /* First, peek at the available data. */
pklen = fd_peek (fd, buf + tail, size - tail, -1); pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1);
if (pklen < 0) if (pklen < 0)
{ {
xfree (buf); xfree (hunk);
return NULL; return NULL;
} }
end = finder (buf, tail, pklen); end = hunk_terminator (hunk, tail, pklen);
if (end) if (end)
{ {
/* The data contains the terminator: we'll read the data up /* The data contains the terminator: we'll drain the data up
to the end of the terminator. */ to the end of the terminator. */
remain = end - (buf + tail); remain = end - (hunk + tail);
/* Note +1 for trailing \0. */ if (remain == 0)
if (size < tail + remain + 1)
{ {
size = tail + remain + 1; /* No more data needs to be read. */
buf = xrealloc (buf, size); hunk[tail] = '\0';
return hunk;
}
if (bufsize - 1 < tail + remain)
{
bufsize = tail + remain + 1;
hunk = xrealloc (hunk, bufsize);
} }
} }
else else
@ -315,54 +353,47 @@ fd_read_until (int fd, finder_t finder, int bufsize)
how much data we'll get. (Some TCP stacks are notorious for how much data we'll get. (Some TCP stacks are notorious for
read returning less data than the previous MSG_PEEK.) */ read returning less data than the previous MSG_PEEK.) */
rdlen = fd_read (fd, buf + tail, remain, 0); rdlen = fd_read (fd, hunk + tail, remain, 0);
if (rdlen < 0) if (rdlen < 0)
{ {
xfree_null (buf); xfree_null (hunk);
return NULL; return NULL;
} }
tail += rdlen;
hunk[tail] = '\0';
if (rdlen == 0) if (rdlen == 0)
{ {
if (tail == 0) if (tail == 0)
{ {
/* EOF without anything having been read */ /* EOF without anything having been read */
xfree (buf); xfree (hunk);
errno = 0; errno = 0;
return NULL; return NULL;
} }
/* Return what we received so far. */ else
if (size < tail + 1) /* EOF seen: return the data we've read. */
{ return hunk;
size = tail + 1; /* expand the buffer to receive the
terminating \0 */
buf = xrealloc (buf, size);
} }
buf[tail] = '\0';
return buf;
}
tail += rdlen;
if (end && rdlen == remain) if (end && rdlen == remain)
{ /* The terminator was seen and the remaining data drained --
/* The end was seen and the data read -- we got what we came we got what we came for. */
for. */ return hunk;
buf[tail] = '\0';
return buf;
}
/* Keep looping until all the data arrives. */ /* Keep looping until all the data arrives. */
if (tail == size) if (tail == bufsize - 1)
{ {
size <<= 1; bufsize <<= 1;
buf = xrealloc (buf, size); hunk = xrealloc (hunk, bufsize);
} }
} }
} }
static const char * static const char *
line_terminator (const char *buf, int tail, int peeklen) line_terminator (const char *hunk, int oldlen, int peeklen)
{ {
const char *p = memchr (buf + tail, '\n', peeklen); const char *p = memchr (hunk + oldlen, '\n', peeklen);
if (p) if (p)
/* p+1 because we want the line to include '\n' */ /* p+1 because we want the line to include '\n' */
return p + 1; return p + 1;
@ -379,43 +410,7 @@ line_terminator (const char *buf, int tail, int peeklen)
char * char *
fd_read_line (int fd) fd_read_line (int fd)
{ {
return fd_read_until (fd, line_terminator, 128); return fd_read_hunk (fd, line_terminator, 128);
}
static const char *
head_terminator (const char *buf, int tail, int peeklen)
{
const char *start, *end;
if (tail < 4)
start = buf;
else
start = buf + tail - 4;
end = buf + tail + peeklen;
for (; start < end - 1; start++)
if (*start == '\n')
{
if (start < end - 2
&& start[1] == '\r'
&& start[2] == '\n')
return start + 3;
if (start[1] == '\n')
return start + 2;
}
return NULL;
}
/* Read the request head from FD and return it. The chunk of data is
allocated using malloc.
If an error occurs, or if no data can be read, NULL is returned.
In the former case errno indicates the error condition, and in the
latter case, errno is NULL. */
char *
fd_read_head (int fd)
{
return fd_read_until (fd, head_terminator, 512);
} }
/* Return a printed representation of the download rate, as /* Return a printed representation of the download rate, as

View File

@ -30,8 +30,10 @@ so, delete this exception statement from your version. */
#ifndef RETR_H #ifndef RETR_H
#define RETR_H #define RETR_H
typedef const char *(*hunk_terminator_t) PARAMS ((const char *, int, int));
char *fd_read_hunk PARAMS ((int, hunk_terminator_t, int));
char *fd_read_line PARAMS ((int)); char *fd_read_line PARAMS ((int));
char *fd_read_head PARAMS ((int));
int fd_read_body PARAMS ((int, FILE *, long *, long, long, int, double *)); int fd_read_body PARAMS ((int, FILE *, long *, long, long, int, double *));