mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Follow RFC 2616 and httpbis specifications when handling redirects
This commit is contained in:
parent
5ce9389384
commit
90896e3314
@ -1,3 +1,9 @@
|
|||||||
|
2013-06-17 Darshit Shah <darnir@gmail.com>
|
||||||
|
|
||||||
|
* wget.texi (POST): Explain the new redirection rules.
|
||||||
|
* wget.texi (Other HTTP Methods): Same.
|
||||||
|
* wget.texi (body-data): Fix typo in description.
|
||||||
|
|
||||||
2013-05-10 Darshit Shah <darnir@gmail.com> (tiny change)
|
2013-05-10 Darshit Shah <darnir@gmail.com> (tiny change)
|
||||||
|
|
||||||
* wget.texi (No of tries): Fix typo to make it clear that --tries
|
* wget.texi (No of tries): Fix typo to make it clear that --tries
|
||||||
|
@ -1475,14 +1475,15 @@ use chunked unless it knows it's talking to an HTTP/1.1 server. And it
|
|||||||
can't know that until it receives a response, which in turn requires the
|
can't know that until it receives a response, which in turn requires the
|
||||||
request to have been completed -- a chicken-and-egg problem.
|
request to have been completed -- a chicken-and-egg problem.
|
||||||
|
|
||||||
Note: if Wget is redirected after the POST request is completed, it
|
Note: As of version 1.15 if Wget is redirected after the POST request is
|
||||||
will not send the POST data to the redirected URL. This is because
|
completed, its behaviour will depend on the response code returned by the
|
||||||
URLs that process POST often respond with a redirection to a regular
|
server. In case of a 301 Moved Permanently, 302 Moved Temporarily or
|
||||||
page, which does not desire or accept POST. It is not completely
|
307 Temporary Redirect, Wget will, in accordance with RFC2616, continue
|
||||||
clear that this behavior is optimal; if it doesn't work out, it might
|
to send a POST request.
|
||||||
be changed in the future.
|
In case a server wants the client to change the Request method upon
|
||||||
|
redirection, it should send a 303 See Other response code.
|
||||||
|
|
||||||
This example shows how to log to a server using POST and then proceed to
|
This example shows how to log in to a server using POST and then proceed to
|
||||||
download the desired pages, presumably only accessible to authorized
|
download the desired pages, presumably only accessible to authorized
|
||||||
users:
|
users:
|
||||||
|
|
||||||
@ -1515,8 +1516,8 @@ Method to the server.
|
|||||||
@item --body-data=@var{Data-String}
|
@item --body-data=@var{Data-String}
|
||||||
@itemx --body-file=@var{Data-File}
|
@itemx --body-file=@var{Data-File}
|
||||||
Must be set when additional data needs to be sent to the server along with the
|
Must be set when additional data needs to be sent to the server along with the
|
||||||
Method specified using @samp{--method}. @samp{--post-data} sends @var{string} as
|
Method specified using @samp{--method}. @samp{--body-data} sends @var{string} as
|
||||||
data, whereas @samp{--post-file} sends the contents of @var{file}. Other than that,
|
data, whereas @samp{--body-file} sends the contents of @var{file}. Other than that,
|
||||||
they work in exactly the same way.
|
they work in exactly the same way.
|
||||||
|
|
||||||
Currently, @samp{--body-file} is @emph{not} for transmitting files as a whole.
|
Currently, @samp{--body-file} is @emph{not} for transmitting files as a whole.
|
||||||
@ -1528,10 +1529,13 @@ BODY Data in advance, and hence the argument to @samp{--body-file} should be a
|
|||||||
regular file. See @samp{--post-file} for a more detailed explanation.
|
regular file. See @samp{--post-file} for a more detailed explanation.
|
||||||
Only one of @samp{--body-data} and @samp{--body-file} should be specified.
|
Only one of @samp{--body-data} and @samp{--body-file} should be specified.
|
||||||
|
|
||||||
Wget handles these requests in the same way that it handles @samp{--post-data}
|
If Wget is redirected after the request is completed, Wget will
|
||||||
and @samp{--post-file}. If you invoke Wget with @samp{--method=POST} and the server
|
suspend the current method and send a GET request till the redirection
|
||||||
responds with a redirect request, then Wget will revert to a GET request during the
|
is completed. This is true for all redirection response codes except
|
||||||
redirection as is explained in @samp{--post-data}.
|
307 Temporary Redirect which is used to explicitly specify that the
|
||||||
|
request method should @emph{not} change. Another exception is when
|
||||||
|
the method is set to @code{POST}, in which case the redirection rules
|
||||||
|
specified under @samp{--post-data} are followed.
|
||||||
|
|
||||||
@cindex Content-Disposition
|
@cindex Content-Disposition
|
||||||
@item --content-disposition
|
@item --content-disposition
|
||||||
|
@ -1,3 +1,15 @@
|
|||||||
|
2013-06-13 Darshit Shah <darnir@gmail.com>
|
||||||
|
|
||||||
|
* http.c (gethttp): Follow RFC 2616 and httpbis specifications when
|
||||||
|
handling redirections. Do not suspend the method on 301/302 redirects.
|
||||||
|
(gethttp): If method if not GET, we do not intend to download
|
||||||
|
anything.
|
||||||
|
* main.c (main): Set spider mode when opt.method is HEAD. This will
|
||||||
|
prevent Wget from downloading any file.
|
||||||
|
* retr.c (SUSPEND_METHOD): Rename macro SUSPEND_POST_DATA to
|
||||||
|
SUSPEND_METHOD to more accurately reflect its use. Similarly rename
|
||||||
|
related variables.
|
||||||
|
|
||||||
2013-05-14 Bykov Aleksey <gnfalex@rambler.ru>
|
2013-05-14 Bykov Aleksey <gnfalex@rambler.ru>
|
||||||
|
|
||||||
* warc.c (warc_tempfile): For fix "Could not open temporary WARC manifest
|
* warc.c (warc_tempfile): For fix "Could not open temporary WARC manifest
|
||||||
|
34
src/http.c
34
src/http.c
@ -2641,12 +2641,35 @@ read_header:
|
|||||||
/* From RFC2616: The status codes 303 and 307 have
|
/* From RFC2616: The status codes 303 and 307 have
|
||||||
been added for servers that wish to make unambiguously
|
been added for servers that wish to make unambiguously
|
||||||
clear which kind of reaction is expected of the client.
|
clear which kind of reaction is expected of the client.
|
||||||
|
|
||||||
A 307 should be redirected using the same method,
|
A 307 should be redirected using the same method,
|
||||||
in other words, a POST should be preserved and not
|
in other words, a POST should be preserved and not
|
||||||
converted to a GET in that case. */
|
converted to a GET in that case.
|
||||||
if (statcode == HTTP_STATUS_TEMPORARY_REDIRECT)
|
|
||||||
return NEWLOCATION_KEEP_POST;
|
With strict adherence to RFC2616, POST requests are not
|
||||||
|
converted to a GET request on 301 Permanent Redirect
|
||||||
|
or 302 Temporary Redirect.
|
||||||
|
|
||||||
|
A switch may be provided later based on the HTTPbis draft
|
||||||
|
that allows clients to convert POST requests to GET
|
||||||
|
requests on 301 and 302 response codes. */
|
||||||
|
switch (statcode)
|
||||||
|
{
|
||||||
|
case HTTP_STATUS_TEMPORARY_REDIRECT:
|
||||||
|
return NEWLOCATION_KEEP_POST;
|
||||||
|
break;
|
||||||
|
case HTTP_STATUS_MOVED_PERMANENTLY:
|
||||||
|
if (opt.method && strcasecmp (opt.method, "post") != 0)
|
||||||
|
return NEWLOCATION_KEEP_POST;
|
||||||
|
break;
|
||||||
|
case HTTP_STATUS_MOVED_TEMPORARILY:
|
||||||
|
if (opt.method && strcasecmp (opt.method, "post") != 0)
|
||||||
|
return NEWLOCATION_KEEP_POST;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
return NEWLOCATION;
|
||||||
|
break;
|
||||||
|
}
|
||||||
return NEWLOCATION;
|
return NEWLOCATION;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2755,7 +2778,8 @@ read_header:
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Return if we have no intention of further downloading. */
|
/* Return if we have no intention of further downloading. */
|
||||||
if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only)
|
if ((!(*dt & RETROKF) && !opt.content_on_error) || head_only
|
||||||
|
|| (opt.method && strcasecmp (opt.method, "get") != 0))
|
||||||
{
|
{
|
||||||
/* In case the caller cares to look... */
|
/* In case the caller cares to look... */
|
||||||
hs->len = 0;
|
hs->len = 0;
|
||||||
|
@ -1397,6 +1397,13 @@ for details.\n\n"));
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Set various options as required for opt.method. */
|
||||||
|
|
||||||
|
/* When user specifies HEAD as the method, we do not wish to download any
|
||||||
|
files. Hence, set wget to run in spider mode. */
|
||||||
|
if (opt.method && strcasecmp (opt.method, "HEAD") == 0)
|
||||||
|
setoptval ("spider", "1", "spider");
|
||||||
|
|
||||||
/* Convert post_data to body-data and post_file_name to body-file options.
|
/* Convert post_data to body-data and post_file_name to body-file options.
|
||||||
This is required so as to remove redundant code later on in gethttp().
|
This is required so as to remove redundant code later on in gethttp().
|
||||||
The --post-data and --post-file options may also be removed in
|
The --post-data and --post-file options may also be removed in
|
||||||
|
38
src/retr.c
38
src/retr.c
@ -677,23 +677,23 @@ calc_rate (wgint bytes, double secs, int *units)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#define SUSPEND_POST_DATA do { \
|
#define SUSPEND_METHOD do { \
|
||||||
post_data_suspended = true; \
|
method_suspended = true; \
|
||||||
saved_post_data = opt.body_data; \
|
saved_body_data = opt.body_data; \
|
||||||
saved_post_file_name = opt.body_file; \
|
saved_body_file_name = opt.body_file; \
|
||||||
saved_method = opt.method; \
|
saved_method = opt.method; \
|
||||||
opt.body_data = NULL; \
|
opt.body_data = NULL; \
|
||||||
opt.body_file = NULL; \
|
opt.body_file = NULL; \
|
||||||
opt.method = NULL; \
|
opt.method = NULL; \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
#define RESTORE_POST_DATA do { \
|
#define RESTORE_METHOD do { \
|
||||||
if (post_data_suspended) \
|
if (method_suspended) \
|
||||||
{ \
|
{ \
|
||||||
opt.body_data = saved_post_data; \
|
opt.body_data = saved_body_data; \
|
||||||
opt.body_file = saved_post_file_name; \
|
opt.body_file = saved_body_file_name; \
|
||||||
opt.method = saved_method; \
|
opt.method = saved_method; \
|
||||||
post_data_suspended = false; \
|
method_suspended = false; \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
@ -721,10 +721,10 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
char *local_file;
|
char *local_file;
|
||||||
int redirection_count = 0;
|
int redirection_count = 0;
|
||||||
|
|
||||||
bool post_data_suspended = false;
|
bool method_suspended = false;
|
||||||
char *saved_post_data = NULL;
|
char *saved_body_data = NULL;
|
||||||
char *saved_method = NULL;
|
char *saved_method = NULL;
|
||||||
char *saved_post_file_name = NULL;
|
char *saved_body_file_name = NULL;
|
||||||
|
|
||||||
/* If dt is NULL, use local storage. */
|
/* If dt is NULL, use local storage. */
|
||||||
if (!dt)
|
if (!dt)
|
||||||
@ -765,7 +765,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
proxy, error);
|
proxy, error);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
xfree (error);
|
xfree (error);
|
||||||
RESTORE_POST_DATA;
|
RESTORE_METHOD;
|
||||||
result = PROXERR;
|
result = PROXERR;
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
@ -774,7 +774,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
|
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
|
||||||
url_free (proxy_url);
|
url_free (proxy_url);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
RESTORE_POST_DATA;
|
RESTORE_METHOD;
|
||||||
result = PROXERR;
|
result = PROXERR;
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
@ -858,7 +858,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
xfree (url);
|
xfree (url);
|
||||||
xfree (mynewloc);
|
xfree (mynewloc);
|
||||||
xfree (error);
|
xfree (error);
|
||||||
RESTORE_POST_DATA;
|
RESTORE_METHOD;
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -880,7 +880,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
}
|
}
|
||||||
xfree (url);
|
xfree (url);
|
||||||
xfree (mynewloc);
|
xfree (mynewloc);
|
||||||
RESTORE_POST_DATA;
|
RESTORE_METHOD;
|
||||||
result = WRONGCODE;
|
result = WRONGCODE;
|
||||||
goto bail;
|
goto bail;
|
||||||
}
|
}
|
||||||
@ -903,8 +903,8 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
|
RFC2616 HTTP/1.1 introduces code 307 Temporary Redirect
|
||||||
specifically to preserve the method of the request.
|
specifically to preserve the method of the request.
|
||||||
*/
|
*/
|
||||||
if (result != NEWLOCATION_KEEP_POST && !post_data_suspended)
|
if (result != NEWLOCATION_KEEP_POST && !method_suspended)
|
||||||
SUSPEND_POST_DATA;
|
SUSPEND_METHOD;
|
||||||
|
|
||||||
goto redirected;
|
goto redirected;
|
||||||
}
|
}
|
||||||
@ -967,7 +967,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
|
|||||||
xfree (url);
|
xfree (url);
|
||||||
}
|
}
|
||||||
|
|
||||||
RESTORE_POST_DATA;
|
RESTORE_METHOD;
|
||||||
|
|
||||||
bail:
|
bail:
|
||||||
if (register_status)
|
if (register_status)
|
||||||
|
Loading…
Reference in New Issue
Block a user