1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Detect redirection cycles.

Published in <sxsd7ggtjac.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2000-10-31 20:21:50 -08:00
parent 515d82fb95
commit 6d13e17142
2 changed files with 70 additions and 20 deletions

View File

@ -1,3 +1,7 @@
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Detect redirection cycles.
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com> 2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c (get_urls_html): Decode HTML entities using * url.c (get_urls_html): Decode HTML entities using

View File

@ -319,11 +319,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
{ {
uerr_t result; uerr_t result;
char *url; char *url;
int location_changed, already_redirected, dummy; int location_changed, dummy;
int local_use_proxy; int local_use_proxy;
char *mynewloc, *proxy; char *mynewloc, *proxy;
struct urlinfo *u; struct urlinfo *u;
slist *redirections;
/* If dt is NULL, just ignore it. */ /* If dt is NULL, just ignore it. */
if (!dt) if (!dt)
@ -333,19 +333,22 @@ retrieve_url (const char *origurl, char **file, char **newloc,
*newloc = NULL; *newloc = NULL;
if (file) if (file)
*file = NULL; *file = NULL;
already_redirected = 0;
again: redirections = NULL;
u = newurl (); u = newurl ();
/* Parse the URL. */ /* Parse the URL. */
result = parseurl (url, u, already_redirected); result = parseurl (url, u, 0);
if (result != URLOK) if (result != URLOK)
{ {
freeurl (u, 1);
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result)); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
freeurl (u, 1);
free_slist (redirections);
return result; return result;
} }
redirected:
/* Set the referer. */ /* Set the referer. */
if (refurl) if (refurl)
u->referer = xstrdup (refurl); u->referer = xstrdup (refurl);
@ -375,6 +378,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
{ {
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n")); logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1); freeurl (u, 1);
free_slist (redirections);
return PROXERR; return PROXERR;
} }
/* Parse the proxy URL. */ /* Parse the proxy URL. */
@ -386,6 +390,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
else else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy); logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1); freeurl (u, 1);
free_slist (redirections);
return PROXERR; return PROXERR;
} }
u->proto = URLHTTP; u->proto = URLHTTP;
@ -402,7 +407,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
retrieval, so we save recursion to oldrec, and restore it retrieval, so we save recursion to oldrec, and restore it
later. */ later. */
int oldrec = opt.recursive; int oldrec = opt.recursive;
if (already_redirected) if (redirections)
opt.recursive = 0; opt.recursive = 0;
result = ftp_loop (u, dt); result = ftp_loop (u, dt);
opt.recursive = oldrec; opt.recursive = oldrec;
@ -413,7 +418,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
#### All of this is, of course, crap. These types should be #### All of this is, of course, crap. These types should be
determined through mailcap. */ determined through mailcap. */
if (already_redirected && u->local && (u->proto == URLFTP )) if (redirections && u->local && (u->proto == URLFTP ))
{ {
char *suf = suffix (u->local); char *suf = suffix (u->local);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm"))) if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
@ -424,30 +429,70 @@ retrieve_url (const char *origurl, char **file, char **newloc,
location_changed = (result == NEWLOCATION); location_changed = (result == NEWLOCATION);
if (location_changed) if (location_changed)
{ {
if (mynewloc) char *construced_newloc;
uerr_t newloc_result;
struct urlinfo *newloc_struct;
assert (mynewloc != NULL);
/* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs out
there break the rules and use relative URLs, and popular
browsers are lenient about this, so wget should be too. */
construced_newloc = url_concat (url, mynewloc);
free (mynewloc);
mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
newloc_struct = newurl ();
newloc_result = parseurl (mynewloc, newloc_struct, 1);
if (newloc_result != URLOK)
{ {
/* The HTTP specs only allow absolute URLs to appear in logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
redirects, but a ton of boneheaded webservers and CGIs freeurl (newloc_struct, 1);
out there break the rules and use relative URLs, and freeurl (u, 1);
popular browsers are lenient about this, so wget should free_slist (redirections);
be too. */ return result;
char *construced_newloc = url_concat (url, mynewloc);
free (mynewloc);
mynewloc = construced_newloc;
} }
/* Now mynewloc will become newloc_struct->url, because if the
Location contained relative paths like .././something, we
don't want that propagating as url. */
free (mynewloc);
mynewloc = xstrdup (newloc_struct->url);
/* Check for redirection to back to itself. */ /* Check for redirection to back to itself. */
if (url_equal (url, mynewloc)) if (!strcmp (u->url, newloc_struct->url))
{ {
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"), logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
mynewloc); mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
free_slist (redirections);
return WRONGCODE; return WRONGCODE;
} }
/* The new location is OK. Let's check for redirection cycle by
peeking through the history of redirections. */
if (in_slist (redirections, newloc_struct->url))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
free_slist (redirections);
return WRONGCODE;
}
redirections = add_slist (redirections, newloc_struct->url, NOSORT);
free (url); free (url);
url = mynewloc; url = mynewloc;
freeurl (u, 1); freeurl (u, 1);
already_redirected = 1; u = newloc_struct;
goto again; goto redirected;
} }
if (file) if (file)
{ {
if (u->local) if (u->local)
@ -456,6 +501,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
*file = NULL; *file = NULL;
} }
freeurl (u, 1); freeurl (u, 1);
free_slist (redirections);
if (newloc) if (newloc)
*newloc = url; *newloc = url;