1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Don't allow more than 20 successive redirections.

This commit is contained in:
hniksic 2001-11-30 13:33:31 -08:00
parent a4db28e20f
commit 406fb8bbef
4 changed files with 40 additions and 10 deletions

6
NEWS
View File

@ -37,6 +37,10 @@ conversion now makes the BASE tags point to an empty string.
same document (<a href="#anchorname">), which used to confuse Wget,
are now converted correctly.
*** When in page-requisites (-p) mode, no-parent (-np) is ignored when
retrieving for inline images, stylesheets, and other documents needed
to display the page.
** If a host has more than one IP address, Wget uses the other
addresses when accessing the first one fails.
@ -50,6 +54,8 @@ non-standard port.
quoting. Wget no longer dequotes reserved characters, e.g. `%3F' is
no longer translated to `?', nor `%2B' to `+'. Unsafe characters
which are not reserved are still escaped, of course.
** No more than 20 successive redirections are allowed.
* Wget 1.7.1 is a bugfix release with no user-visible changes.

2
TODO
View File

@ -28,8 +28,6 @@ changes.
* Try to devise a scheme so that, when password is unknown, Wget asks
the user for one.
* Limit the number of successive redirection to max. 20 or so.
* If -c used with -N, check to make sure a file hasn't changed on the server
before "continuing" to download it (preventing a bogus hybrid file).

View File

@ -1,3 +1,7 @@
2001-11-30 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Don't allow more than 20 redirections.
2001-11-30 Hrvoje Niksic <hniksic@arsdigita.com>
* recur.c (retrieve_tree): Skip the non-inline entries when

View File

@ -298,9 +298,16 @@ register_all_redirections (struct hash_table *redirections, const char *final)
&& no_proxy_match((u)->host, \
(const char **)opt.no_proxy))
/* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP,
or simply copy it with file:// (#### the latter not yet
implemented!). */
/* Maximum number of allowed redirections. 20 was chosen as a
"reasonable" value, which is low enough to not cause havoc, yet
high enough to guarantee that normal retrievals will not be hurt by
the check. */
#define MAX_REDIRECTIONS 20
/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP,
FTP, proxy, etc. */
uerr_t
retrieve_url (const char *origurl, char **file, char **newloc,
const char *refurl, int *dt)
@ -314,6 +321,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
int up_error_code; /* url parse error code */
char *local_file;
struct hash_table *redirections = NULL;
int redirection_count = 0;
/* If dt is NULL, just ignore it. */
if (!dt)
@ -402,18 +410,17 @@ retrieve_url (const char *origurl, char **file, char **newloc,
opt.recursive = 0;
result = ftp_loop (u, dt);
opt.recursive = oldrec;
#if 0
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML
according to the suffix. The HTML suffixes are `.html' and
`.htm', case-insensitive. */
if (redirections && u->local && (u->scheme == SCHEME_FTP))
if (redirections && local_file && u->scheme == SCHEME_FTP)
{
char *suf = suffix (u->local);
char *suf = suffix (local_file);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
*dt |= TEXTHTML;
}
#endif
}
location_changed = (result == NEWLOCATION);
if (location_changed)
@ -462,7 +469,22 @@ retrieve_url (const char *origurl, char **file, char **newloc,
string_set_add (redirections, u->url);
}
/* The new location is OK. Check for redirection cycle by
/* The new location is OK. Check for max. number of
redirections. */
if (++redirection_count > MAX_REDIRECTIONS)
{
logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"),
MAX_REDIRECTIONS);
url_free (newloc_parsed);
url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
xfree (mynewloc);
return WRONGCODE;
}
/*Check for redirection cycle by
peeking through the history of redirections. */
if (string_set_contains (redirections, newloc_parsed->url))
{