1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Follow RFC 2616 and httpbis specifications when handling redirects

This commit is contained in:
Darshit Shah 2013-06-17 00:16:50 +05:30 committed by Giuseppe Scrivano
parent 5ce9389384
commit 90896e3314
6 changed files with 90 additions and 37 deletions

View File

@ -1,3 +1,9 @@
2013-06-17 Darshit Shah <darnir@gmail.com>
* wget.texi (POST): Explain the new redirection rules.
* wget.texi (Other HTTP Methods): Same.
* wget.texi (body-data): Fix typo in description.
2013-05-10 Darshit Shah <darnir@gmail.com> (tiny change) 2013-05-10 Darshit Shah <darnir@gmail.com> (tiny change)
* wget.texi (No of tries): Fix typo to make it clear that --tries * wget.texi (No of tries): Fix typo to make it clear that --tries

View File

@ -1475,14 +1475,15 @@ use chunked unless it knows it's talking to an HTTP/1.1 server. And it
can't know that until it receives a response, which in turn requires the can't know that until it receives a response, which in turn requires the
request to have been completed -- a chicken-and-egg problem. request to have been completed -- a chicken-and-egg problem.
Note: if Wget is redirected after the POST request is completed, it Note: As of version 1.15 if Wget is redirected after the POST request is
will not send the POST data to the redirected URL. This is because completed, its behaviour will depend on the response code returned by the
URLs that process POST often respond with a redirection to a regular server. In case of a 301 Moved Permanently, 302 Moved Temporarily or
page, which does not desire or accept POST. It is not completely 307 Temporary Redirect, Wget will, in accordance with RFC2616, continue
clear that this behavior is optimal; if it doesn't work out, it might to send a POST request.
be changed in the future. In case a server wants the client to change the Request method upon
redirection, it should send a 303 See Other response code.
This example shows how to log to a server using POST and then proceed to This example shows how to log in to a server using POST and then proceed to
download the desired pages, presumably only accessible to authorized download the desired pages, presumably only accessible to authorized
users: users:
@ -1515,8 +1516,8 @@ Method to the server.
@item --body-data=@var{Data-String} @item --body-data=@var{Data-String}
@itemx --body-file=@var{Data-File} @itemx --body-file=@var{Data-File}
Must be set when additional data needs to be sent to the server along with the Must be set when additional data needs to be sent to the server along with the
Method specified using @samp{--method}. @samp{--post-data} sends @var{string} as Method specified using @samp{--method}. @samp{--body-data} sends @var{string} as
data, whereas @samp{--post-file} sends the contents of @var{file}. Other than that, data, whereas @samp{--body-file} sends the contents of @var{file}. Other than that,
they work in exactly the same way. they work in exactly the same way.
Currently, @samp{--body-file} is @emph{not} for transmitting files as a whole. Currently, @samp{--body-file} is @emph{not} for transmitting files as a whole.
@ -1528,10 +1529,13 @@ BODY Data in advance, and hence the argument to @samp{--body-file} should be a
regular file. See @samp{--post-file} for a more detailed explanation. regular file. See @samp{--post-file} for a more detailed explanation.
Only one of @samp{--body-data} and @samp{--body-file} should be specified. Only one of @samp{--body-data} and @samp{--body-file} should be specified.
Wget handles these requests in the same way that it handles @samp{--post-data} If Wget is redirected after the request is completed, Wget will
and @samp{--post-file}. If you invoke Wget with @samp{--method=POST} and the server suspend the current method and send a GET request till the redirection
responds with a redirect request, then Wget will revert to a GET request during the is completed. This is true for all redirection response codes except
redirection as is explained in @samp{--post-data}. 307 Temporary Redirect which is used to explicitly specify that the
request method should @emph{not} change. Another exception is when
the method is set to @code{POST}, in which case the redirection rules
specified under @samp{--post-data} are followed.
@cindex Content-Disposition @cindex Content-Disposition
@item --content-disposition @item --content-disposition

View File

@ -1,3 +1,15 @@
2013-06-13 Darshit Shah <darnir@gmail.com>
* http.c (gethttp): Follow RFC 2616 and httpbis specifications when
handling redirections. Do not suspend the method on 301/302 redirects.
(gethttp): If method if not GET, we do not intend to download
anything.
* main.c (main): Set spider mode when opt.method is HEAD. This will
prevent Wget from downloading any file.
* retr.c (SUSPEND_METHOD): Rename macro SUSPEND_POST_DATA to
SUSPEND_METHOD to more accurately reflect its use. Similarly rename
related variables.
2013-05-14 Bykov Aleksey <gnfalex@rambler.ru> 2013-05-14 Bykov Aleksey <gnfalex@rambler.ru>
* warc.c (warc_tempfile): For fix "Could not open temporary WARC manifest * warc.c (warc_tempfile): For fix "Could not open temporary WARC manifest

View File

@ -2641,12 +2641,35 @@ read_header:
/* From RFC2616: The status codes 303 and 307 have /* From RFC2616: The status codes 303 and 307 have
been added for servers that wish to make unambiguously been added for servers that wish to make unambiguously
clear which kind of reaction is expected of the client. clear which kind of reaction is expected of the client.
A 307 should be redirected using the same method, A 307 should be redirected using the same method,
in other words, a POST should be preserved and not in other words, a POST should be preserved and not
converted to a GET in that case. */ converted to a GET in that case.
if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
return NEWLOCATION_KEEP_POST; With strict adherence to RFC2616, POST requests are not
converted to a GET request on 301 Permanent Redirect
or 302 Temporary Redirect.
A switch may be provided later based on the HTTPbis draft
that allows clients to convert POST requests to GET
requests on 301 and 302 response codes. */
switch (statcode)
{
case HTTP_STATUS_TEMPORARY_REDIRECT:
return NEWLOCATION_KEEP_POST;
break;
case HTTP_STATUS_MOVED_PERMANENTLY:
if (opt.method && strcasecmp (opt.method, "post") != 0)
return NEWLOCATION_KEEP_POST;
break;
case HTTP_STATUS_MOVED_TEMPORARILY:
if (opt.method && strcasecmp (opt.method, "post") != 0)
return NEWLOCATION_KEEP_POST;
break;
default:
return NEWLOCATION;
break;
}
return NEWLOCATION; return NEWLOCATION;
} }
} }
@ -2755,7 +2778,8 @@ read_header:
} }
/* Return if we have no intention of further downloading. */ /* Return if we have no intention of further downloading. */
if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only) if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only
|| (opt.method && strcasecmp (opt.method, "get") != 0))
{ {
/* In case the caller cares to look... */ /* In case the caller cares to look... */
hs->len = 0; hs->len = 0;

View File

@ -1397,6 +1397,13 @@ for details.\n\n"));
} }
} }
/* Set various options as required for opt.method. */
/* When user specifies HEAD as the method, we do not wish to download any
files. Hence, set wget to run in spider mode. */
if (opt.method && strcasecmp (opt.method, "HEAD") == 0)
setoptval ("spider", "1", "spider");
/* Convert post_data to body-data and post_file_name to body-file options. /* Convert post_data to body-data and post_file_name to body-file options.
This is required so as to remove redundant code later on in gethttp(). This is required so as to remove redundant code later on in gethttp().
The --post-data and --post-file options may also be removed in The --post-data and --post-file options may also be removed in

View File

@ -677,23 +677,23 @@ calc_rate (wgint bytes, double secs, int *units)
} }
#define SUSPEND_POST_DATA do { \ #define SUSPEND_METHOD do { \
post_data_suspended = true; \ method_suspended = true; \
saved_post_data = opt.body_data; \ saved_body_data = opt.body_data; \
saved_post_file_name = opt.body_file; \ saved_body_file_name = opt.body_file; \
saved_method = opt.method; \ saved_method = opt.method; \
opt.body_data = NULL; \ opt.body_data = NULL; \
opt.body_file = NULL; \ opt.body_file = NULL; \
opt.method = NULL; \ opt.method = NULL; \
} while (0) } while (0)
#define RESTORE_POST_DATA do { \ #define RESTORE_METHOD do { \
if (post_data_suspended) \ if (method_suspended) \
{ \ { \
opt.body_data = saved_post_data; \ opt.body_data = saved_body_data; \
opt.body_file = saved_post_file_name; \ opt.body_file = saved_body_file_name; \
opt.method = saved_method; \ opt.method = saved_method; \
post_data_suspended = false; \ method_suspended = false; \
} \ } \
} while (0) } while (0)
@ -721,10 +721,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
char *local_file; char *local_file;
int redirection_count = 0; int redirection_count = 0;
bool post_data_suspended = false; bool method_suspended = false;
char *saved_post_data = NULL; char *saved_body_data = NULL;
char *saved_method = NULL; char *saved_method = NULL;
char *saved_post_file_name = NULL; char *saved_body_file_name = NULL;
/* If dt is NULL, use local storage. */ /* If dt is NULL, use local storage. */
if (!dt) if (!dt)
@ -765,7 +765,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
proxy, error); proxy, error);
xfree (url); xfree (url);
xfree (error); xfree (error);
RESTORE_POST_DATA; RESTORE_METHOD;
result = PROXERR; result = PROXERR;
goto bail; goto bail;
} }
@ -774,7 +774,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
url_free (proxy_url); url_free (proxy_url);
xfree (url); xfree (url);
RESTORE_POST_DATA; RESTORE_METHOD;
result = PROXERR; result = PROXERR;
goto bail; goto bail;
} }
@ -858,7 +858,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
xfree (url); xfree (url);
xfree (mynewloc); xfree (mynewloc);
xfree (error); xfree (error);
RESTORE_POST_DATA; RESTORE_METHOD;
goto bail; goto bail;
} }
@ -880,7 +880,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
} }
xfree (url); xfree (url);
xfree (mynewloc); xfree (mynewloc);
RESTORE_POST_DATA; RESTORE_METHOD;
result = WRONGCODE; result = WRONGCODE;
goto bail; goto bail;
} }
@ -903,8 +903,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
specifically to preserve the method of the request. specifically to preserve the method of the request.
*/ */
if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) if (result != NEWLOCATION_KEEP_POST && !method_suspended)
SUSPEND_POST_DATA; SUSPEND_METHOD;
goto redirected; goto redirected;
} }
@ -967,7 +967,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
xfree (url); xfree (url);
} }
RESTORE_POST_DATA; RESTORE_METHOD;
bail: bail:
if (register_status) if (register_status)