1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Detect redirection cycles.

Published in <sxsd7ggtjac.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2000-10-31 20:21:50 -08:00
parent 515d82fb95
commit 6d13e17142
2 changed files with 70 additions and 20 deletions

View File

@ -1,3 +1,7 @@
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Detect redirection cycles.
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com> 2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c (get_urls_html): Decode HTML entities using * url.c (get_urls_html): Decode HTML entities using

View File

@ -319,11 +319,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
{ {
uerr_t result; uerr_t result;
char *url; char *url;
int location_changed, already_redirected, dummy; int location_changed, dummy;
int local_use_proxy; int local_use_proxy;
char *mynewloc, *proxy; char *mynewloc, *proxy;
struct urlinfo *u; struct urlinfo *u;
slist *redirections;
/* If dt is NULL, just ignore it. */ /* If dt is NULL, just ignore it. */
if (!dt) if (!dt)
@ -333,19 +333,22 @@ retrieve_url (const char *origurl, char **file, char **newloc,
*newloc = NULL; *newloc = NULL;
if (file) if (file)
*file = NULL; *file = NULL;
already_redirected = 0;
again: redirections = NULL;
u = newurl (); u = newurl ();
/* Parse the URL. */ /* Parse the URL. */
result = parseurl (url, u, already_redirected); result = parseurl (url, u, 0);
if (result != URLOK) if (result != URLOK)
{ {
freeurl (u, 1);
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result)); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
freeurl (u, 1);
free_slist (redirections);
return result; return result;
} }
redirected:
/* Set the referer. */ /* Set the referer. */
if (refurl) if (refurl)
u->referer = xstrdup (refurl); u->referer = xstrdup (refurl);
@ -375,6 +378,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
{ {
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n")); logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1); freeurl (u, 1);
free_slist (redirections);
return PROXERR; return PROXERR;
} }
/* Parse the proxy URL. */ /* Parse the proxy URL. */
@ -386,6 +390,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
else else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy); logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1); freeurl (u, 1);
free_slist (redirections);
return PROXERR; return PROXERR;
} }
u->proto = URLHTTP; u->proto = URLHTTP;
@ -402,7 +407,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
retrieval, so we save recursion to oldrec, and restore it retrieval, so we save recursion to oldrec, and restore it
later. */ later. */
int oldrec = opt.recursive; int oldrec = opt.recursive;
if (already_redirected) if (redirections)
opt.recursive = 0; opt.recursive = 0;
result = ftp_loop (u, dt); result = ftp_loop (u, dt);
opt.recursive = oldrec; opt.recursive = oldrec;
@ -413,7 +418,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
#### All of this is, of course, crap. These types should be #### All of this is, of course, crap. These types should be
determined through mailcap. */ determined through mailcap. */
if (already_redirected && u->local && (u->proto == URLFTP )) if (redirections && u->local && (u->proto == URLFTP ))
{ {
char *suf = suffix (u->local); char *suf = suffix (u->local);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm"))) if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
@ -424,30 +429,70 @@ retrieve_url (const char *origurl, char **file, char **newloc,
location_changed = (result == NEWLOCATION); location_changed = (result == NEWLOCATION);
if (location_changed) if (location_changed)
{ {
if (mynewloc) char *construced_newloc;
{ uerr_t newloc_result;
struct urlinfo *newloc_struct;
assert (mynewloc != NULL);
/* The HTTP specs only allow absolute URLs to appear in /* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs redirects, but a ton of boneheaded webservers and CGIs out
out there break the rules and use relative URLs, and there break the rules and use relative URLs, and popular
popular browsers are lenient about this, so wget should browsers are lenient about this, so wget should be too. */
be too. */ construced_newloc = url_concat (url, mynewloc);
char *construced_newloc = url_concat (url, mynewloc);
free (mynewloc); free (mynewloc);
mynewloc = construced_newloc; mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
newloc_struct = newurl ();
newloc_result = parseurl (mynewloc, newloc_struct, 1);
if (newloc_result != URLOK)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
freeurl (newloc_struct, 1);
freeurl (u, 1);
free_slist (redirections);
return result;
} }
/* Now mynewloc will become newloc_struct->url, because if the
Location contained relative paths like .././something, we
don't want that propagating as url. */
free (mynewloc);
mynewloc = xstrdup (newloc_struct->url);
/* Check for redirection to back to itself. */ /* Check for redirection to back to itself. */
if (url_equal (url, mynewloc)) if (!strcmp (u->url, newloc_struct->url))
{ {
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"), logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
mynewloc); mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
free_slist (redirections);
return WRONGCODE; return WRONGCODE;
} }
/* The new location is OK. Let's check for redirection cycle by
peeking through the history of redirections. */
if (in_slist (redirections, newloc_struct->url))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
free_slist (redirections);
return WRONGCODE;
}
redirections = add_slist (redirections, newloc_struct->url, NOSORT);
free (url); free (url);
url = mynewloc; url = mynewloc;
freeurl (u, 1); freeurl (u, 1);
already_redirected = 1; u = newloc_struct;
goto again; goto redirected;
} }
if (file) if (file)
{ {
if (u->local) if (u->local)
@ -456,6 +501,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
*file = NULL; *file = NULL;
} }
freeurl (u, 1); freeurl (u, 1);
free_slist (redirections);
if (newloc) if (newloc)
*newloc = url; *newloc = url;