diff --git a/src/ChangeLog b/src/ChangeLog index c0ebba2d..70358d58 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,18 @@ +2003-11-28 Hrvoje Niksic + + * http.c: Deleted the old functions header_process, + header_extract_number, header_exists, header_strdup, + http_process_range, http_process_none, http_process_type, and + http_process_connection. + + * http.c (response_new): New function. + (response_header_bounds): Ditto. + (response_header_copy): Ditto. + (response_header_strdup): Ditto. + (response_status): Ditto. + (gethttp): Use the new response_* functions to parse the response. + Support HTTP/0.9 responses. + 2003-11-27 Hrvoje Niksic * progress.c (create_image): Don't calculate ETA if nothing has diff --git a/src/http.c b/src/http.c index 09383af2..8bdfbbc1 100644 --- a/src/http.c +++ b/src/http.c @@ -76,6 +76,10 @@ extern int errno; extern char *version_string; extern LARGE_INT total_downloaded_bytes; +#ifndef MIN +# define MIN(x, y) ((x) > (y) ? (y) : (x)) +#endif + static int cookies_loaded_p; struct cookie_jar *wget_cookie_jar; @@ -118,73 +122,308 @@ struct cookie_jar *wget_cookie_jar; #define HTTP_STATUS_BAD_GATEWAY 502 #define HTTP_STATUS_UNAVAILABLE 503 - +static const char * +head_terminator (const char *hunk, int oldlen, int peeklen) +{ + const char *start, *end; + + /* If at first peek, verify whether HUNK starts with "HTTP". If + not, this is a HTTP/0.9 request and we must bail out without + reading anything. */ + if (oldlen == 0 && 0 != memcmp (hunk, "HTTP", MIN (peeklen, 4))) + return hunk; + + if (oldlen < 4) + start = hunk; + else + start = hunk + oldlen - 4; + end = hunk + oldlen + peeklen; + + for (; start < end - 1; start++) + if (*start == '\n') + { + if (start < end - 2 + && start[1] == '\r' + && start[2] == '\n') + return start + 3; + if (start[1] == '\n') + return start + 2; + } + return NULL; +} + +/* Read the HTTP request head from FD and return it. The error + conditions are the same as with fd_read_hunk. + + To support HTTP/0.9 responses, this function tries to make sure + that the data begins with "HTTP". If this is not the case, no data + is read and an empty request is returned, so that the remaining + data can be treated as body. */ + +static char * +fd_read_http_head (int fd) +{ + return fd_read_hunk (fd, head_terminator, 512); +} + +struct response { + /* The response data. */ + const char *data; + + /* The array of pointers that indicate where each header starts. + For example, given three headers "foo", "bar", and "baz": + foo: value\r\nbar: value\r\nbaz: value\r\n\r\n + 0 1 2 3 + I.e. headers[0] points to the beginning of foo, headers[1] points + to the end of foo and the beginning of bar, etc. */ + const char **headers; +}; + +static struct response * +response_new (const char *head) +{ + const char *hdr; + int count, size; + + struct response *resp = xnew0 (struct response); + resp->data = head; + + if (*head == '\0') + { + /* Empty head means that we're dealing with a headerless + (HTTP/0.9) response. In that case, don't set HEADERS at + all. */ + return resp; + } + + /* Split HEAD into header lines, so that response_header_* functions + don't need to do this over and over again. */ + + size = count = 0; + hdr = head; + while (1) + { + DO_REALLOC (resp->headers, size, count + 1, const char *); + resp->headers[count++] = hdr; + + /* Break upon encountering an empty line. */ + if (!hdr[0] || (hdr[0] == '\r' && hdr[1] == '\n') || hdr[0] == '\n') + break; + + /* Find the end of HDR, including continuations. */ + do + { + const char *end = strchr (hdr, '\n'); + if (end) + hdr = end + 1; + else + hdr += strlen (hdr); + } + while (*hdr == ' ' || *hdr == '\t'); + } + DO_REALLOC (resp->headers, size, count + 1, const char *); + resp->headers[count++] = NULL; + + return resp; +} + +static int +response_header_bounds (const struct response *resp, const char *name, + const char **begptr, const char **endptr) +{ + int i; + const char **headers = resp->headers; + int name_len; + + if (!headers || !headers[1]) + return 0; + + name_len = strlen (name); + + for (i = 1; headers[i + 1]; i++) + { + const char *b = headers[i]; + const char *e = headers[i + 1]; + if (e - b > name_len + && b[name_len] == ':' + && 0 == strncasecmp (b, name, name_len)) + { + b += name_len + 1; + while (b < e && ISSPACE (*b)) + ++b; + while (b < e && ISSPACE (e[-1])) + --e; + *begptr = b; + *endptr = e; + return 1; + } + } + return 0; +} + +static int +response_header_copy (const struct response *resp, const char *name, + char *buf, int bufsize) +{ + const char *b, *e; + if (!response_header_bounds (resp, name, &b, &e)) + return 0; + if (bufsize) + { + int len = MIN (e - b, bufsize); + strncpy (buf, b, len); + buf[len] = '\0'; + } + return 1; +} + +static char * +response_header_strdup (const struct response *resp, const char *name) +{ + const char *b, *e; + if (!response_header_bounds (resp, name, &b, &e)) + return NULL; + return strdupdelim (b, e); +} + /* Parse the HTTP status line, which is of format: HTTP-Version SP Status-Code SP Reason-Phrase - The function returns the status-code, or -1 if the status line is - malformed. The pointer to reason-phrase is returned in RP. */ + The function returns the status-code, or -1 if the status line + appears malformed. The pointer to "reason-phrase" message is + returned in *MESSAGE. */ + static int -parse_http_status_line (const char *line, const char **reason_phrase_ptr) +response_status (const struct response *resp, char **message) { - /* (the variables must not be named `major' and `minor', because - that breaks compilation with SunOS4 cc.) */ - int mjr, mnr, statcode; - const char *p; + int status; + const char *p, *end; - *reason_phrase_ptr = NULL; - - /* The standard format of HTTP-Version is: `HTTP/X.Y', where X is - major version, and Y is minor version. */ - if (strncmp (line, "HTTP/", 5) != 0) - return -1; - line += 5; - - /* Calculate major HTTP version. */ - p = line; - for (mjr = 0; ISDIGIT (*line); line++) - mjr = 10 * mjr + (*line - '0'); - if (*line != '.' || p == line) - return -1; - ++line; - - /* Calculate minor HTTP version. */ - p = line; - for (mnr = 0; ISDIGIT (*line); line++) - mnr = 10 * mnr + (*line - '0'); - if (*line != ' ' || p == line) - return -1; - /* Wget will accept only 1.0 and higher HTTP-versions. The value of - minor version can be safely ignored. */ - if (mjr < 1) - return -1; - ++line; - - /* Calculate status code. */ - if (!(ISDIGIT (*line) && ISDIGIT (line[1]) && ISDIGIT (line[2]))) - return -1; - statcode = 100 * (*line - '0') + 10 * (line[1] - '0') + (line[2] - '0'); - - /* Set up the reason phrase pointer. */ - line += 3; - /* RFC2068 requires SPC here, but we allow the string to finish - here, in case no reason-phrase is present. */ - if (*line != ' ') + if (!resp->headers) { - if (!*line) - *reason_phrase_ptr = line; - else - return -1; + /* For a HTTP/0.9 response, always assume 200 response. */ + if (message) + *message = xstrdup ("OK"); + return 200; } - else - *reason_phrase_ptr = line + 1; - return statcode; + p = resp->headers[0]; + end = resp->headers[1]; + + if (!end) + return -1; + + /* "HTTP" */ + if (end - p < 4 || 0 != strncmp (p, "HTTP", 4)) + return -1; + p += 4; + + /* "/x.x" (optional because some Gnutella servers have been reported + as not sending the "/x.x" part. */ + if (p < end && *p == '/') + { + ++p; + while (p < end && ISDIGIT (*p)) + ++p; + if (p < end && *p == '.') + ++p; + while (p < end && ISDIGIT (*p)) + ++p; + } + + while (p < end && ISSPACE (*p)) + ++p; + if (end - p < 3 || !ISDIGIT (p[0]) || !ISDIGIT (p[1]) || !ISDIGIT (p[2])) + return -1; + + status = 100 * (p[0] - '0') + 10 * (p[1] - '0') + (p[2] - '0'); + p += 3; + + if (message) + { + while (p < end && ISSPACE (*p)) + ++p; + while (p < end && ISSPACE (end[-1])) + --end; + *message = strdupdelim (p, end); + } + + return status; +} + +static void +response_free (struct response *resp) +{ + xfree_null (resp->headers); + xfree (resp); +} + +static void +print_server_response_1 (const char *b, const char *e) +{ + char *ln; + if (b < e && e[-1] == '\n') + --e; + if (b < e && e[-1] == '\r') + --e; + BOUNDED_TO_ALLOCA (b, e, ln); + logprintf (LOG_VERBOSE, " %s\n", ln); +} + +static void +print_server_response (const struct response *resp) +{ + int i; + if (!resp->headers) + return; + for (i = 0; resp->headers[i + 1]; i++) + print_server_response_1 (resp->headers[i], resp->headers[i + 1]); +} + +/* Parse the `Content-Range' header and extract the information it + contains. Returns 1 if successful, -1 otherwise. */ +static int +parse_content_range (const char *hdr, long *first_byte_ptr, + long *last_byte_ptr, long *entity_length_ptr) +{ + long num; + + /* Ancient versions of Netscape proxy server, presumably predating + rfc2068, sent out `Content-Range' without the "bytes" + specifier. */ + if (!strncasecmp (hdr, "bytes", 5)) + { + hdr += 5; + /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the + HTTP spec. */ + if (*hdr == ':') + ++hdr; + while (ISSPACE (*hdr)) + ++hdr; + if (!*hdr) + return 0; + } + if (!ISDIGIT (*hdr)) + return 0; + for (num = 0; ISDIGIT (*hdr); hdr++) + num = 10 * num + (*hdr - '0'); + if (*hdr != '-' || !ISDIGIT (*(hdr + 1))) + return 0; + *first_byte_ptr = num; + ++hdr; + for (num = 0; ISDIGIT (*hdr); hdr++) + num = 10 * num + (*hdr - '0'); + if (*hdr != '/' || !ISDIGIT (*(hdr + 1))) + return 0; + *last_byte_ptr = num; + ++hdr; + for (num = 0; ISDIGIT (*hdr); hdr++) + num = 10 * num + (*hdr - '0'); + *entity_length_ptr = num; + return 1; } -#define WMIN(x, y) ((x) > (y) ? (y) : (x)) - /* Send the contents of FILE_NAME to SOCK/SSL. Make sure that exactly PROMISED_SIZE bytes are sent over the wire -- if the file is longer, read only that much; if the file is shorter, report an error. */ @@ -208,7 +447,7 @@ post_file (int sock, const char *file_name, long promised_size) int length = fread (chunk, 1, sizeof (chunk), fp); if (length == 0) break; - towrite = WMIN (promised_size - written, length); + towrite = MIN (promised_size - written, length); write_error = fd_write (sock, chunk, towrite, -1); if (write_error < 0) { @@ -231,204 +470,6 @@ post_file (int sock, const char *file_name, long promised_size) DEBUGP (("done]\n")); return 0; } - -static const char * -next_header (const char *h) -{ - const char *end = NULL; - const char *p = h; - do - { - p = strchr (p, '\n'); - if (!p) - return end; - end = ++p; - } - while (*p == ' ' || *p == '\t'); - - return end; -} - -/* Skip LWS (linear white space), if present. Returns number of - characters to skip. */ -static int -skip_lws (const char *string) -{ - const char *p = string; - - while (*p == ' ' || *p == '\t' || *p == '\r' || *p == '\n') - ++p; - return p - string; -} - -/* Check whether HEADER begins with NAME and, if yes, skip the `:' and - the whitespace, and call PROCFUN with the arguments of HEADER's - contents (after the `:' and space) and ARG. Otherwise, return 0. */ -int -header_process (const char *header, const char *name, - int (*procfun) (const char *, void *), - void *arg) -{ - /* Check whether HEADER matches NAME. */ - while (*name && (TOLOWER (*name) == TOLOWER (*header))) - ++name, ++header; - if (*name || *header++ != ':') - return 0; - - header += skip_lws (header); - - return ((*procfun) (header, arg)); -} - -/* Helper functions for use with header_process(). */ - -/* Extract a long integer from HEADER and store it to CLOSURE. If an - error is encountered, return 0, else 1. */ -int -header_extract_number (const char *header, void *closure) -{ - const char *p = header; - long result; - - for (result = 0; ISDIGIT (*p); p++) - result = 10 * result + (*p - '0'); - - /* Failure if no number present. */ - if (p == header) - return 0; - - /* Skip trailing whitespace. */ - p += skip_lws (p); - - /* Indicate failure if trailing garbage is present. */ - if (*p) - return 0; - - *(long *)closure = result; - return 1; -} - -/* Strdup HEADER, and place the pointer to CLOSURE. */ -int -header_strdup (const char *header, void *closure) -{ - *(char **)closure = xstrdup (header); - return 1; -} - -/* Write the value 1 into the integer pointed to by CLOSURE. */ -int -header_exists (const char *header, void *closure) -{ - *(int *)closure = 1; - return 1; -} - -/* Functions to be used as arguments to header_process(): */ - -struct http_process_range_closure { - long first_byte_pos; - long last_byte_pos; - long entity_length; -}; - -/* Parse the `Content-Range' header and extract the information it - contains. Returns 1 if successful, -1 otherwise. */ -static int -http_process_range (const char *hdr, void *arg) -{ - struct http_process_range_closure *closure - = (struct http_process_range_closure *)arg; - long num; - - /* Certain versions of Nutscape proxy server send out - `Content-Length' without "bytes" specifier, which is a breach of - RFC2068 (as well as the HTTP/1.1 draft which was current at the - time). But hell, I must support it... */ - if (!strncasecmp (hdr, "bytes", 5)) - { - hdr += 5; - /* "JavaWebServer/1.1.1" sends "bytes: x-y/z", contrary to the - HTTP spec. */ - if (*hdr == ':') - ++hdr; - hdr += skip_lws (hdr); - if (!*hdr) - return 0; - } - if (!ISDIGIT (*hdr)) - return 0; - for (num = 0; ISDIGIT (*hdr); hdr++) - num = 10 * num + (*hdr - '0'); - if (*hdr != '-' || !ISDIGIT (*(hdr + 1))) - return 0; - closure->first_byte_pos = num; - ++hdr; - for (num = 0; ISDIGIT (*hdr); hdr++) - num = 10 * num + (*hdr - '0'); - if (*hdr != '/' || !ISDIGIT (*(hdr + 1))) - return 0; - closure->last_byte_pos = num; - ++hdr; - for (num = 0; ISDIGIT (*hdr); hdr++) - num = 10 * num + (*hdr - '0'); - closure->entity_length = num; - return 1; -} - -/* Place 1 to ARG if the HDR contains the word "none", 0 otherwise. - Used for `Accept-Ranges'. */ -static int -http_process_none (const char *hdr, void *arg) -{ - int *where = (int *)arg; - - if (strstr (hdr, "none")) - *where = 1; - else - *where = 0; - return 1; -} - -/* Place the malloc-ed copy of HDR hdr, to the first `;' to ARG. */ -static int -http_process_type (const char *hdr, void *arg) -{ - char **result = (char **)arg; - /* Locate P on `;' or the terminating zero, whichever comes first. */ - const char *p = strchr (hdr, ';'); - if (!p) - p = hdr + strlen (hdr); - while (p > hdr && ISSPACE (*(p - 1))) - --p; - *result = strdupdelim (hdr, p); - return 1; -} - -/* Check whether the `Connection' header is set to "keep-alive". */ -static int -http_process_connection (const char *hdr, void *arg) -{ - int *flag = (int *)arg; - if (!strcasecmp (hdr, "Keep-Alive")) - *flag = 1; - return 1; -} - -/* Commit the cookie to the cookie jar. */ - -int -http_process_set_cookie (const char *hdr, void *arg) -{ - struct url *u = (struct url *)arg; - - /* The jar should have been created by now. */ - assert (wget_cookie_jar != NULL); - - cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, hdr); - return 1; -} - /* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to @@ -690,7 +731,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) char *proxyauth; char *port_maybe; char *request_keep_alive; - int sock, hcount, statcode; + int sock, statcode; int write_error; long contlen, contrange; struct url *conn; @@ -700,15 +741,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) char *cookies = NULL; char *head; - const char *hdr_beg, *hdr_end; + struct response *resp; + char hdrval[256]; + char *message; + char *set_cookie; /* Whether this connection will be kept alive after the HTTP request is done. */ int keep_alive; - /* Flags that detect the two ways of specifying HTTP keep-alive - response. */ - int http_keep_alive_1, http_keep_alive_2; + /* Flag that detects having received a keep-alive response. */ + int keep_alive_confirmed; /* Whether keep-alive should be inhibited. */ int inhibit_keep_alive; @@ -758,7 +801,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) know the local filename so we can save to it. */ assert (*hs->local_file != NULL); - authenticate_h = 0; + authenticate_h = NULL; auth_tried_already = 0; inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL; @@ -769,7 +812,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) for the Digest authorization scheme.) */ keep_alive = 0; - http_keep_alive_1 = http_keep_alive_2 = 0; + keep_alive_confirmed = 0; post_content_type = NULL; post_content_length = NULL; @@ -1080,9 +1123,7 @@ Accept: %s\r\n\ statcode = -1; *dt &= ~RETROKF; - DEBUGP (("\n---response begin---\n")); - - head = fd_read_head (sock); + head = fd_read_http_head (sock); if (!head) { logputs (LOG_VERBOSE, "\n"); @@ -1101,154 +1142,78 @@ Accept: %s\r\n\ } } - /* Loop through the headers and process them. */ - - hcount = 0; - for (hdr_beg = head; - (hdr_end = next_header (hdr_beg)); - hdr_beg = hdr_end) - { - char *hdr = strdupdelim (hdr_beg, hdr_end); - { - char *tmp = hdr + strlen (hdr); - if (tmp > hdr && tmp[-1] == '\n') - *--tmp = '\0'; - if (tmp > hdr && tmp[-1] == '\r') - *--tmp = '\0'; - } - ++hcount; - - /* Check for status line. */ - if (hcount == 1) - { - const char *error; - /* Parse the first line of server response. */ - statcode = parse_http_status_line (hdr, &error); - hs->statcode = statcode; - /* Store the descriptive response. */ - if (statcode == -1) /* malformed response */ - { - /* A common reason for "malformed response" error is the - case when no data was actually received. Handle this - special case. */ - if (!*hdr) - hs->error = xstrdup (_("No data received")); - else - hs->error = xstrdup (_("Malformed status line")); - xfree (hdr); - break; - } - else if (!*error) - hs->error = xstrdup (_("(no description)")); - else - hs->error = xstrdup (error); - - if ((statcode != -1) -#ifdef ENABLE_DEBUG - && !opt.debug -#endif - ) - { - if (opt.server_response) - logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr); - else - logprintf (LOG_VERBOSE, "%2d %s", statcode, error); - } - - goto done_header; - } - - /* Exit on empty header. */ - if (!*hdr) - { - xfree (hdr); - break; - } - - /* Print the header if requested. */ - if (opt.server_response && hcount != 1) - logprintf (LOG_VERBOSE, "\n%2d %s", hcount, hdr); - - /* Try getting content-length. */ - if (contlen == -1 && !opt.ignore_length) - if (header_process (hdr, "Content-Length", header_extract_number, - &contlen)) - goto done_header; - /* Try getting content-type. */ - if (!type) - if (header_process (hdr, "Content-Type", http_process_type, &type)) - goto done_header; - /* Try getting location. */ - if (!hs->newloc) - if (header_process (hdr, "Location", header_strdup, &hs->newloc)) - goto done_header; - /* Try getting last-modified. */ - if (!hs->remote_time) - if (header_process (hdr, "Last-Modified", header_strdup, - &hs->remote_time)) - goto done_header; - /* Try getting cookies. */ - if (opt.cookies) - if (header_process (hdr, "Set-Cookie", http_process_set_cookie, u)) - goto done_header; - /* Try getting www-authentication. */ - if (!authenticate_h) - if (header_process (hdr, "WWW-Authenticate", header_strdup, - &authenticate_h)) - goto done_header; - /* Check for accept-ranges header. If it contains the word - `none', disable the ranges. */ - if (*dt & ACCEPTRANGES) - { - int nonep; - if (header_process (hdr, "Accept-Ranges", http_process_none, &nonep)) - { - if (nonep) - *dt &= ~ACCEPTRANGES; - goto done_header; - } - } - /* Try getting content-range. */ - if (contrange == -1) - { - struct http_process_range_closure closure; - if (header_process (hdr, "Content-Range", http_process_range, &closure)) - { - contrange = closure.first_byte_pos; - goto done_header; - } - } - /* Check for keep-alive related responses. */ - if (!inhibit_keep_alive) - { - /* Check for the `Keep-Alive' header. */ - if (!http_keep_alive_1) - { - if (header_process (hdr, "Keep-Alive", header_exists, - &http_keep_alive_1)) - goto done_header; - } - /* Check for `Connection: Keep-Alive'. */ - if (!http_keep_alive_2) - { - if (header_process (hdr, "Connection", http_process_connection, - &http_keep_alive_2)) - goto done_header; - } - } - done_header: - xfree (hdr); - } + DEBUGP (("\n---response begin---\n")); + DEBUGP (("%s", head)); DEBUGP (("---response end---\n")); - logputs (LOG_VERBOSE, "\n"); + resp = response_new (head); - if (contlen != -1 - && (http_keep_alive_1 || http_keep_alive_2)) + /* Check for status line. */ + message = NULL; + statcode = response_status (resp, &message); + if (!opt.server_response) + logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : ""); + else { - assert (inhibit_keep_alive == 0); - keep_alive = 1; + logprintf (LOG_VERBOSE, "\n"); + print_server_response (resp); } + + hs->statcode = statcode; + if (statcode == -1) + hs->error = xstrdup (_("Malformed status line")); + else if (!*message) + hs->error = xstrdup (_("(no description)")); + else + hs->error = xstrdup (message); + + if (response_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval))) + contlen = strtol (hdrval, NULL, 10); + type = response_header_strdup (resp, "Content-Type"); + if (type) + { + char *tmp = strchr (type, ';'); + if (tmp) + { + while (tmp > type && ISSPACE (tmp[-1])) + --tmp; + *tmp = '\0'; + } + } + hs->newloc = response_header_strdup (resp, "Location"); + hs->remote_time = response_header_strdup (resp, "Last-Modified"); + set_cookie = response_header_strdup (resp, "Set-Cookie"); + if (set_cookie) + { + /* The jar should have been created by now. */ + assert (wget_cookie_jar != NULL); + cookie_handle_set_cookie (wget_cookie_jar, u->host, u->port, u->path, + set_cookie); + xfree (set_cookie); + } + authenticate_h = response_header_strdup (resp, "WWW-Authenticate"); + if (response_header_copy (resp, "Content-Range", hdrval, sizeof (hdrval))) + { + long first_byte_pos, last_byte_pos, entity_length; + if (parse_content_range (hdrval, &first_byte_pos, &last_byte_pos, + &entity_length)) + contrange = first_byte_pos; + } + + /* Check for keep-alive related responses. */ + if (!inhibit_keep_alive && contlen != -1) + { + if (response_header_copy (resp, "Keep-Alive", NULL, 0)) + keep_alive = 1; + else if (response_header_copy (resp, "Connection", hdrval, + sizeof (hdrval))) + { + if (0 == strcasecmp (hdrval, "Keep-Alive")) + keep_alive = 1; + } + } + response_free (resp); + if (keep_alive) /* The server has promised that it will not close the connection when we're done. This means that we can register it. */ @@ -2290,6 +2255,11 @@ basic_authentication_encode (const char *user, const char *passwd, return res; } +#define SKIP_WS(x) do { \ + while (ISSPACE (*(x))) \ + ++(x); \ +} while (0) + #ifdef USE_DIGEST /* Parse HTTP `WWW-Authenticate:' header. AU points to the beginning of a field in such a header. If the field is the one specified by @@ -2309,12 +2279,12 @@ extract_header_attr (const char *au, const char *attr_name, char **ret) cp += strlen (attr_name); if (!*cp) return -1; - cp += skip_lws (cp); + SKIP_WS (cp); if (*cp != '=') return -1; if (!*++cp) return -1; - cp += skip_lws (cp); + SKIP_WS (cp); if (*cp != '\"') return -1; if (!*++cp) @@ -2373,7 +2343,7 @@ digest_authentication_encode (const char *au, const char *user, { int i; - au += skip_lws (au); + SKIP_WS (au); for (i = 0; i < countof (options); i++) { int skip = extract_header_attr (au, options[i].name, @@ -2397,7 +2367,7 @@ digest_authentication_encode (const char *au, const char *user, au++; if (*au && *++au) { - au += skip_lws (au); + SKIP_WS (au); if (*au == '\"') { au++; diff --git a/src/retr.c b/src/retr.c index 5d9795ad..912af63a 100644 --- a/src/retr.c +++ b/src/retr.c @@ -129,7 +129,9 @@ limit_bandwidth (long bytes, struct wget_timer *timer) limit_data.chunk_start = wtimer_read (timer); } -#define MIN(i, j) ((i) <= (j) ? (i) : (j)) +#ifndef MIN +# define MIN(i, j) ((i) <= (j) ? (i) : (j)) +#endif /* Reads the contents of file descriptor FD, until it is closed, or a read error occurs. The data is read in 8K chunks, and stored to @@ -269,16 +271,47 @@ fd_read_body (int fd, FILE *out, long *len, long restval, long expected, typedef const char *(*finder_t) PARAMS ((const char *, int, int)); -/* Driver for fd_read_line and fd_read_head: keeps reading data until - a terminator (as decided by FINDER) occurs in the data. The trick - is that the data is first peeked at, and only then actually read. - That way the data after the terminator is never read. */ +/* Read a hunk of data from FD, up until a terminator. The terminator + is whatever the TERMINATOR function determines it to be; for + example, it can be a line of data, or the head of an HTTP response. + The function returns the data read allocated with malloc. -static char * -fd_read_until (int fd, finder_t finder, int bufsize) + In case of error, NULL is returned. In case of EOF and no data + read, NULL is returned and errno set to 0. In case of EOF with + data having been read, the data is returned, but it will + (obviously) not contain the terminator. + + The idea is to be able to read a line of input, or otherwise a hunk + of text, such as the head of an HTTP request, without crossing the + boundary, so that the next call to fd_read etc. reads the data + after the hunk. To achieve that, this function does the following: + + 1. Peek at available data. + + 2. Determine whether the peeked data, along with the previously + read data, includes the terminator. + + 2a. If yes, read the data until the end of the terminator, and + exit. + + 2b. If no, read the peeked data and goto 1. + + The function is careful to assume as little as possible about the + implementation of peeking. For example, every peek is followed by + a read. If the read returns a different amount of data, the + process is retried until all data arrives safely. + + BUFSIZE is the size of the initial buffer expected to read all the + data in the typical case. + + This function should be used as a building block for other + functions -- see fd_read_line as a simple example. */ + +char * +fd_read_hunk (int fd, hunk_terminator_t hunk_terminator, int bufsize) { - int size = bufsize, tail = 0; - char *buf = xmalloc (size); + char *hunk = xmalloc (bufsize); + int tail = 0; /* tail position in HUNK */ while (1) { @@ -287,23 +320,28 @@ fd_read_until (int fd, finder_t finder, int bufsize) /* First, peek at the available data. */ - pklen = fd_peek (fd, buf + tail, size - tail, -1); + pklen = fd_peek (fd, hunk + tail, bufsize - 1 - tail, -1); if (pklen < 0) { - xfree (buf); + xfree (hunk); return NULL; } - end = finder (buf, tail, pklen); + end = hunk_terminator (hunk, tail, pklen); if (end) { - /* The data contains the terminator: we'll read the data up + /* The data contains the terminator: we'll drain the data up to the end of the terminator. */ - remain = end - (buf + tail); - /* Note +1 for trailing \0. */ - if (size < tail + remain + 1) + remain = end - (hunk + tail); + if (remain == 0) { - size = tail + remain + 1; - buf = xrealloc (buf, size); + /* No more data needs to be read. */ + hunk[tail] = '\0'; + return hunk; + } + if (bufsize - 1 < tail + remain) + { + bufsize = tail + remain + 1; + hunk = xrealloc (hunk, bufsize); } } else @@ -315,54 +353,47 @@ fd_read_until (int fd, finder_t finder, int bufsize) how much data we'll get. (Some TCP stacks are notorious for read returning less data than the previous MSG_PEEK.) */ - rdlen = fd_read (fd, buf + tail, remain, 0); + rdlen = fd_read (fd, hunk + tail, remain, 0); if (rdlen < 0) { - xfree_null (buf); + xfree_null (hunk); return NULL; } + tail += rdlen; + hunk[tail] = '\0'; + if (rdlen == 0) { if (tail == 0) { /* EOF without anything having been read */ - xfree (buf); + xfree (hunk); errno = 0; return NULL; } - /* Return what we received so far. */ - if (size < tail + 1) - { - size = tail + 1; /* expand the buffer to receive the - terminating \0 */ - buf = xrealloc (buf, size); - } - buf[tail] = '\0'; - return buf; + else + /* EOF seen: return the data we've read. */ + return hunk; } - tail += rdlen; if (end && rdlen == remain) - { - /* The end was seen and the data read -- we got what we came - for. */ - buf[tail] = '\0'; - return buf; - } + /* The terminator was seen and the remaining data drained -- + we got what we came for. */ + return hunk; /* Keep looping until all the data arrives. */ - if (tail == size) + if (tail == bufsize - 1) { - size <<= 1; - buf = xrealloc (buf, size); + bufsize <<= 1; + hunk = xrealloc (hunk, bufsize); } } } static const char * -line_terminator (const char *buf, int tail, int peeklen) +line_terminator (const char *hunk, int oldlen, int peeklen) { - const char *p = memchr (buf + tail, '\n', peeklen); + const char *p = memchr (hunk + oldlen, '\n', peeklen); if (p) /* p+1 because we want the line to include '\n' */ return p + 1; @@ -379,43 +410,7 @@ line_terminator (const char *buf, int tail, int peeklen) char * fd_read_line (int fd) { - return fd_read_until (fd, line_terminator, 128); -} - -static const char * -head_terminator (const char *buf, int tail, int peeklen) -{ - const char *start, *end; - if (tail < 4) - start = buf; - else - start = buf + tail - 4; - end = buf + tail + peeklen; - - for (; start < end - 1; start++) - if (*start == '\n') - { - if (start < end - 2 - && start[1] == '\r' - && start[2] == '\n') - return start + 3; - if (start[1] == '\n') - return start + 2; - } - return NULL; -} - -/* Read the request head from FD and return it. The chunk of data is - allocated using malloc. - - If an error occurs, or if no data can be read, NULL is returned. - In the former case errno indicates the error condition, and in the - latter case, errno is NULL. */ - -char * -fd_read_head (int fd) -{ - return fd_read_until (fd, head_terminator, 512); + return fd_read_hunk (fd, line_terminator, 128); } /* Return a printed representation of the download rate, as @@ -904,7 +899,7 @@ getproxy (struct url *u) rewritten_url = rewrite_shorthand_url (proxy); if (rewritten_url) { - strncpy (rewritten_storage, rewritten_url, sizeof(rewritten_storage)); + strncpy (rewritten_storage, rewritten_url, sizeof (rewritten_storage)); rewritten_storage[sizeof (rewritten_storage) - 1] = '\0'; proxy = rewritten_storage; } diff --git a/src/retr.h b/src/retr.h index 05434473..1b513f1f 100644 --- a/src/retr.h +++ b/src/retr.h @@ -30,8 +30,10 @@ so, delete this exception statement from your version. */ #ifndef RETR_H #define RETR_H +typedef const char *(*hunk_terminator_t) PARAMS ((const char *, int, int)); + +char *fd_read_hunk PARAMS ((int, hunk_terminator_t, int)); char *fd_read_line PARAMS ((int)); -char *fd_read_head PARAMS ((int)); int fd_read_body PARAMS ((int, FILE *, long *, long, long, int, double *));