From 90896e3314dc1741fbfa9ac53631af43ca7b530e Mon Sep 17 00:00:00 2001 From: Darshit Shah Date: Mon, 17 Jun 2013 00:16:50 +0530 Subject: [PATCH] Follow RFC 2616 and httpbis specifications when handling redirects --- doc/ChangeLog | 6 ++++++ doc/wget.texi | 30 +++++++++++++++++------------- src/ChangeLog | 12 ++++++++++++ src/http.c | 34 +++++++++++++++++++++++++++++----- src/main.c | 7 +++++++ src/retr.c | 38 +++++++++++++++++++------------------- 6 files changed, 90 insertions(+), 37 deletions(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 1b0173be..5e29c547 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,9 @@ +2013-06-17 Darshit Shah + + * wget.texi (POST): Explain the new redirection rules. + * wget.texi (Other HTTP Methods): Same. + * wget.texi (body-data): Fix typo in description. + 2013-05-10 Darshit Shah (tiny change) * wget.texi (No of tries): Fix typo to make it clear that --tries diff --git a/doc/wget.texi b/doc/wget.texi index c2230a9c..710f0ac4 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -1475,14 +1475,15 @@ use chunked unless it knows it's talking to an HTTP/1.1 server. And it can't know that until it receives a response, which in turn requires the request to have been completed -- a chicken-and-egg problem. -Note: if Wget is redirected after the POST request is completed, it -will not send the POST data to the redirected URL. This is because -URLs that process POST often respond with a redirection to a regular -page, which does not desire or accept POST. It is not completely -clear that this behavior is optimal; if it doesn't work out, it might -be changed in the future. +Note: As of version 1.15 if Wget is redirected after the POST request is +completed, its behaviour will depend on the response code returned by the +server. In case of a 301 Moved Permanently, 302 Moved Temporarily or +307 Temporary Redirect, Wget will, in accordance with RFC2616, continue +to send a POST request. +In case a server wants the client to change the Request method upon +redirection, it should send a 303 See Other response code. -This example shows how to log to a server using POST and then proceed to +This example shows how to log in to a server using POST and then proceed to download the desired pages, presumably only accessible to authorized users: @@ -1515,8 +1516,8 @@ Method to the server. @item --body-data=@var{Data-String} @itemx --body-file=@var{Data-File} Must be set when additional data needs to be sent to the server along with the -Method specified using @samp{--method}. @samp{--post-data} sends @var{string} as -data, whereas @samp{--post-file} sends the contents of @var{file}. Other than that, +Method specified using @samp{--method}. @samp{--body-data} sends @var{string} as +data, whereas @samp{--body-file} sends the contents of @var{file}. Other than that, they work in exactly the same way. Currently, @samp{--body-file} is @emph{not} for transmitting files as a whole. @@ -1528,10 +1529,13 @@ BODY Data in advance, and hence the argument to @samp{--body-file} should be a regular file. See @samp{--post-file} for a more detailed explanation. Only one of @samp{--body-data} and @samp{--body-file} should be specified. -Wget handles these requests in the same way that it handles @samp{--post-data} -and @samp{--post-file}. If you invoke Wget with @samp{--method=POST} and the server -responds with a redirect request, then Wget will revert to a GET request during the -redirection as is explained in @samp{--post-data}. +If Wget is redirected after the request is completed, Wget will +suspend the current method and send a GET request till the redirection +is completed. This is true for all redirection response codes except +307 Temporary Redirect which is used to explicitly specify that the +request method should @emph{not} change. Another exception is when +the method is set to @code{POST}, in which case the redirection rules +specified under @samp{--post-data} are followed. @cindex Content-Disposition @item --content-disposition diff --git a/src/ChangeLog b/src/ChangeLog index 32f3b824..f6096989 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,15 @@ +2013-06-13 Darshit Shah + + * http.c (gethttp): Follow RFC 2616 and httpbis specifications when + handling redirections. Do not suspend the method on 301/302 redirects. + (gethttp): If method if not GET, we do not intend to download + anything. + * main.c (main): Set spider mode when opt.method is HEAD. This will + prevent Wget from downloading any file. + * retr.c (SUSPEND_METHOD): Rename macro SUSPEND_POST_DATA to + SUSPEND_METHOD to more accurately reflect its use. Similarly rename + related variables. + 2013-05-14 Bykov Aleksey * warc.c (warc_tempfile): For fix "Could not open temporary WARC manifest diff --git a/src/http.c b/src/http.c index 644b8f87..d63c0e23 100644 --- a/src/http.c +++ b/src/http.c @@ -2641,12 +2641,35 @@ read_header: /* From RFC2616: The status codes 303 and 307 have been added for servers that wish to make unambiguously clear which kind of reaction is expected of the client. - + A 307 should be redirected using the same method, in other words, a POST should be preserved and not - converted to a GET in that case. */ - if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT) - return NEWLOCATION_KEEP_POST; + converted to a GET in that case. + + With strict adherence to RFC2616, POST requests are not + converted to a GET request on 301 Permanent Redirect + or 302 Temporary Redirect. + + A switch may be provided later based on the HTTPbis draft + that allows clients to convert POST requests to GET + requests on 301 and 302 response codes. */ + switch (statcode) + { + case HTTP_STATUS_TEMPORARY_REDIRECT: + return NEWLOCATION_KEEP_POST; + break; + case HTTP_STATUS_MOVED_PERMANENTLY: + if (opt.method && strcasecmp (opt.method, "post") != 0) + return NEWLOCATION_KEEP_POST; + break; + case HTTP_STATUS_MOVED_TEMPORARILY: + if (opt.method && strcasecmp (opt.method, "post") != 0) + return NEWLOCATION_KEEP_POST; + break; + default: + return NEWLOCATION; + break; + } return NEWLOCATION; } } @@ -2755,7 +2778,8 @@ read_header: } /* Return if we have no intention of further downloading. */ - if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only) + if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only + || (opt.method && strcasecmp (opt.method, "get") != 0)) { /* In case the caller cares to look... */ hs->len = 0; diff --git a/src/main.c b/src/main.c index 2b42d2d7..c895c4ee 100644 --- a/src/main.c +++ b/src/main.c @@ -1397,6 +1397,13 @@ for details.\n\n")); } } + /* Set various options as required for opt.method. */ + + /* When user specifies HEAD as the method, we do not wish to download any + files. Hence, set wget to run in spider mode. */ + if (opt.method && strcasecmp (opt.method, "HEAD") == 0) + setoptval ("spider", "1", "spider"); + /* Convert post_data to body-data and post_file_name to body-file options. This is required so as to remove redundant code later on in gethttp(). The --post-data and --post-file options may also be removed in diff --git a/src/retr.c b/src/retr.c index 9002b0ec..3d51ef93 100644 --- a/src/retr.c +++ b/src/retr.c @@ -677,23 +677,23 @@ calc_rate (wgint bytes, double secs, int *units) } -#define SUSPEND_POST_DATA do { \ - post_data_suspended = true; \ - saved_post_data = opt.body_data; \ - saved_post_file_name = opt.body_file; \ +#define SUSPEND_METHOD do { \ + method_suspended = true; \ + saved_body_data = opt.body_data; \ + saved_body_file_name = opt.body_file; \ saved_method = opt.method; \ opt.body_data = NULL; \ opt.body_file = NULL; \ opt.method = NULL; \ } while (0) -#define RESTORE_POST_DATA do { \ - if (post_data_suspended) \ +#define RESTORE_METHOD do { \ + if (method_suspended) \ { \ - opt.body_data = saved_post_data; \ - opt.body_file = saved_post_file_name; \ + opt.body_data = saved_body_data; \ + opt.body_file = saved_body_file_name; \ opt.method = saved_method; \ - post_data_suspended = false; \ + method_suspended = false; \ } \ } while (0) @@ -721,10 +721,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, char *local_file; int redirection_count = 0; - bool post_data_suspended = false; - char *saved_post_data = NULL; + bool method_suspended = false; + char *saved_body_data = NULL; char *saved_method = NULL; - char *saved_post_file_name = NULL; + char *saved_body_file_name = NULL; /* If dt is NULL, use local storage. */ if (!dt) @@ -765,7 +765,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, proxy, error); xfree (url); xfree (error); - RESTORE_POST_DATA; + RESTORE_METHOD; result = PROXERR; goto bail; } @@ -774,7 +774,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy); url_free (proxy_url); xfree (url); - RESTORE_POST_DATA; + RESTORE_METHOD; result = PROXERR; goto bail; } @@ -858,7 +858,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); xfree (mynewloc); xfree (error); - RESTORE_POST_DATA; + RESTORE_METHOD; goto bail; } @@ -880,7 +880,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, } xfree (url); xfree (mynewloc); - RESTORE_POST_DATA; + RESTORE_METHOD; result = WRONGCODE; goto bail; } @@ -903,8 +903,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect specifically to preserve the method of the request. */ - if (result != NEWLOCATION_KEEP_POST && !post_data_suspended) - SUSPEND_POST_DATA; + if (result != NEWLOCATION_KEEP_POST && !method_suspended) + SUSPEND_METHOD; goto redirected; } @@ -967,7 +967,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, xfree (url); } - RESTORE_POST_DATA; + RESTORE_METHOD; bail: if (register_status)