1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Don't allow more than 20 successive redirections.

This commit is contained in:
hniksic 2001-11-30 13:33:31 -08:00
parent a4db28e20f
commit 406fb8bbef
4 changed files with 40 additions and 10 deletions

6
NEWS
View File

@ -37,6 +37,10 @@ conversion now makes the BASE tags point to an empty string.
same document (<a href="#anchorname">), which used to confuse Wget, same document (<a href="#anchorname">), which used to confuse Wget,
are now converted correctly. are now converted correctly.
*** When in page-requisites (-p) mode, no-parent (-np) is ignored when
retrieving for inline images, stylesheets, and other documents needed
to display the page.
** If a host has more than one IP address, Wget uses the other ** If a host has more than one IP address, Wget uses the other
addresses when accessing the first one fails. addresses when accessing the first one fails.
@ -50,6 +54,8 @@ non-standard port.
quoting. Wget no longer dequotes reserved characters, e.g. `%3F' is quoting. Wget no longer dequotes reserved characters, e.g. `%3F' is
no longer translated to `?', nor `%2B' to `+'. Unsafe characters no longer translated to `?', nor `%2B' to `+'. Unsafe characters
which are not reserved are still escaped, of course. which are not reserved are still escaped, of course.
** No more than 20 successive redirections are allowed.
* Wget 1.7.1 is a bugfix release with no user-visible changes. * Wget 1.7.1 is a bugfix release with no user-visible changes.

2
TODO
View File

@ -28,8 +28,6 @@ changes.
* Try to devise a scheme so that, when password is unknown, Wget asks * Try to devise a scheme so that, when password is unknown, Wget asks
the user for one. the user for one.
* Limit the number of successive redirection to max. 20 or so.
* If -c used with -N, check to make sure a file hasn't changed on the server * If -c used with -N, check to make sure a file hasn't changed on the server
before "continuing" to download it (preventing a bogus hybrid file). before "continuing" to download it (preventing a bogus hybrid file).

View File

@ -1,3 +1,7 @@
2001-11-30 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Don't allow more than 20 redirections.
2001-11-30 Hrvoje Niksic <hniksic@arsdigita.com> 2001-11-30 Hrvoje Niksic <hniksic@arsdigita.com>
* recur.c (retrieve_tree): Skip the non-inline entries when * recur.c (retrieve_tree): Skip the non-inline entries when

View File

@ -298,9 +298,16 @@ register_all_redirections (struct hash_table *redirections, const char *final)
&& no_proxy_match((u)->host, \ && no_proxy_match((u)->host, \
(const char **)opt.no_proxy)) (const char **)opt.no_proxy))
/* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP, /* Maximum number of allowed redirections. 20 was chosen as a
or simply copy it with file:// (#### the latter not yet "reasonable" value, which is low enough to not cause havoc, yet
implemented!). */ high enough to guarantee that normal retrievals will not be hurt by
the check. */
#define MAX_REDIRECTIONS 20
/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
FTP, proxy, etc. */
uerr_t uerr_t
retrieve_url (const char *origurl, char **file, char **newloc, retrieve_url (const char *origurl, char **file, char **newloc,
const char *refurl, int *dt) const char *refurl, int *dt)
@ -314,6 +321,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
int up_error_code; /* url parse error code */ int up_error_code; /* url parse error code */
char *local_file; char *local_file;
struct hash_table *redirections = NULL; struct hash_table *redirections = NULL;
int redirection_count = 0;
/* If dt is NULL, just ignore it. */ /* If dt is NULL, just ignore it. */
if (!dt) if (!dt)
@ -402,18 +410,17 @@ retrieve_url (const char *origurl, char **file, char **newloc,
opt.recursive = 0; opt.recursive = 0;
result = ftp_loop (u, dt); result = ftp_loop (u, dt);
opt.recursive = oldrec; opt.recursive = oldrec;
#if 0
/* There is a possibility of having HTTP being redirected to /* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML FTP. In these cases we must decide whether the text is HTML
according to the suffix. The HTML suffixes are `.html' and according to the suffix. The HTML suffixes are `.html' and
`.htm', case-insensitive. */ `.htm', case-insensitive. */
if (redirections && u->local && (u->scheme == SCHEME_FTP)) if (redirections && local_file && u->scheme == SCHEME_FTP)
{ {
char *suf = suffix (u->local); char *suf = suffix (local_file);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm"))) if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
*dt |= TEXTHTML; *dt |= TEXTHTML;
} }
#endif
} }
location_changed = (result == NEWLOCATION); location_changed = (result == NEWLOCATION);
if (location_changed) if (location_changed)
@ -462,7 +469,22 @@ retrieve_url (const char *origurl, char **file, char **newloc,
string_set_add (redirections, u->url); string_set_add (redirections, u->url);
} }
/* The new location is OK. Check for redirection cycle by /* The new location is OK. Check for max. number of
redirections. */
if (++redirection_count > MAX_REDIRECTIONS)
{
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
MAX_REDIRECTIONS);
url_free (newloc_parsed);
url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
xfree (mynewloc);
return WRONGCODE;
}
/*Check for redirection cycle by
peeking through the history of redirections. */ peeking through the history of redirections. */
if (string_set_contains (redirections, newloc_parsed->url)) if (string_set_contains (redirections, newloc_parsed->url))
{ {