diff --git a/NEWS b/NEWS index 57141aa9..a1853afb 100644 --- a/NEWS +++ b/NEWS @@ -37,6 +37,10 @@ conversion now makes the BASE tags point to an empty string. same document (), which used to confuse Wget, are now converted correctly. +*** When in page-requisites (-p) mode, no-parent (-np) is ignored when +retrieving for inline images, stylesheets, and other documents needed +to display the page. + ** If a host has more than one IP address, Wget uses the other addresses when accessing the first one fails. @@ -50,6 +54,8 @@ non-standard port. quoting. Wget no longer dequotes reserved characters, e.g. `%3F' is no longer translated to `?', nor `%2B' to `+'. Unsafe characters which are not reserved are still escaped, of course. + +** No more than 20 successive redirections are allowed. * Wget 1.7.1 is a bugfix release with no user-visible changes. diff --git a/TODO b/TODO index 84c796a3..8dea09d6 100644 --- a/TODO +++ b/TODO @@ -28,8 +28,6 @@ changes. * Try to devise a scheme so that, when password is unknown, Wget asks the user for one. -* Limit the number of successive redirection to max. 20 or so. - * If -c used with -N, check to make sure a file hasn't changed on the server before "continuing" to download it (preventing a bogus hybrid file). diff --git a/src/ChangeLog b/src/ChangeLog index f480009a..c2592d13 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,7 @@ +2001-11-30 Hrvoje Niksic + + * retr.c (retrieve_url): Don't allow more than 20 redirections. + 2001-11-30 Hrvoje Niksic * recur.c (retrieve_tree): Skip the non-inline entries when diff --git a/src/retr.c b/src/retr.c index 6c12462a..1f54cf0b 100644 --- a/src/retr.c +++ b/src/retr.c @@ -298,9 +298,16 @@ register_all_redirections (struct hash_table *redirections, const char *final) && no_proxy_match((u)->host, \ (const char **)opt.no_proxy)) -/* Retrieve the given URL. Decides which loop to call -- HTTP(S), FTP, - or simply copy it with file:// (#### the latter not yet - implemented!). */ +/* Maximum number of allowed redirections. 20 was chosen as a + "reasonable" value, which is low enough to not cause havoc, yet + high enough to guarantee that normal retrievals will not be hurt by + the check. */ + +#define MAX_REDIRECTIONS 20 + +/* Retrieve the given URL. Decides which loop to call -- HTTP, FTP, + FTP, proxy, etc. */ + uerr_t retrieve_url (const char *origurl, char **file, char **newloc, const char *refurl, int *dt) @@ -314,6 +321,7 @@ retrieve_url (const char *origurl, char **file, char **newloc, int up_error_code; /* url parse error code */ char *local_file; struct hash_table *redirections = NULL; + int redirection_count = 0; /* If dt is NULL, just ignore it. */ if (!dt) @@ -402,18 +410,17 @@ retrieve_url (const char *origurl, char **file, char **newloc, opt.recursive = 0; result = ftp_loop (u, dt); opt.recursive = oldrec; -#if 0 + /* There is a possibility of having HTTP being redirected to FTP. In these cases we must decide whether the text is HTML according to the suffix. The HTML suffixes are `.html' and `.htm', case-insensitive. */ - if (redirections && u->local && (u->scheme == SCHEME_FTP)) + if (redirections && local_file && u->scheme == SCHEME_FTP) { - char *suf = suffix (u->local); + char *suf = suffix (local_file); if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm"))) *dt |= TEXTHTML; } -#endif } location_changed = (result == NEWLOCATION); if (location_changed) @@ -462,7 +469,22 @@ retrieve_url (const char *origurl, char **file, char **newloc, string_set_add (redirections, u->url); } - /* The new location is OK. Check for redirection cycle by + /* The new location is OK. Check for max. number of + redirections. */ + if (++redirection_count > MAX_REDIRECTIONS) + { + logprintf (LOG_NOTQUIET, _("%d redirections exceeded.\n"), + MAX_REDIRECTIONS); + url_free (newloc_parsed); + url_free (u); + if (redirections) + string_set_free (redirections); + xfree (url); + xfree (mynewloc); + return WRONGCODE; + } + + /*Check for redirection cycle by peeking through the history of redirections. */ if (string_set_contains (redirections, newloc_parsed->url)) {