mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Detect redirection cycles.
Published in <sxsd7ggtjac.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
515d82fb95
commit
6d13e17142
@ -1,3 +1,7 @@
|
|||||||
|
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* retr.c (retrieve_url): Detect redirection cycles.
|
||||||
|
|
||||||
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* url.c (get_urls_html): Decode HTML entities using
|
* url.c (get_urls_html): Decode HTML entities using
|
||||||
|
82
src/retr.c
82
src/retr.c
@ -319,11 +319,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
{
|
{
|
||||||
uerr_t result;
|
uerr_t result;
|
||||||
char *url;
|
char *url;
|
||||||
int location_changed, already_redirected, dummy;
|
int location_changed, dummy;
|
||||||
int local_use_proxy;
|
int local_use_proxy;
|
||||||
char *mynewloc, *proxy;
|
char *mynewloc, *proxy;
|
||||||
struct urlinfo *u;
|
struct urlinfo *u;
|
||||||
|
slist *redirections;
|
||||||
|
|
||||||
/* If dt is NULL, just ignore it. */
|
/* If dt is NULL, just ignore it. */
|
||||||
if (!dt)
|
if (!dt)
|
||||||
@ -333,19 +333,22 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
*newloc = NULL;
|
*newloc = NULL;
|
||||||
if (file)
|
if (file)
|
||||||
*file = NULL;
|
*file = NULL;
|
||||||
already_redirected = 0;
|
|
||||||
|
|
||||||
again:
|
redirections = NULL;
|
||||||
|
|
||||||
u = newurl ();
|
u = newurl ();
|
||||||
/* Parse the URL. */
|
/* Parse the URL. */
|
||||||
result = parseurl (url, u, already_redirected);
|
result = parseurl (url, u, 0);
|
||||||
if (result != URLOK)
|
if (result != URLOK)
|
||||||
{
|
{
|
||||||
freeurl (u, 1);
|
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
|
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
|
||||||
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
redirected:
|
||||||
|
|
||||||
/* Set the referer. */
|
/* Set the referer. */
|
||||||
if (refurl)
|
if (refurl)
|
||||||
u->referer = xstrdup (refurl);
|
u->referer = xstrdup (refurl);
|
||||||
@ -375,6 +378,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
{
|
{
|
||||||
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
return PROXERR;
|
return PROXERR;
|
||||||
}
|
}
|
||||||
/* Parse the proxy URL. */
|
/* Parse the proxy URL. */
|
||||||
@ -386,6 +390,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
else
|
else
|
||||||
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
return PROXERR;
|
return PROXERR;
|
||||||
}
|
}
|
||||||
u->proto = URLHTTP;
|
u->proto = URLHTTP;
|
||||||
@ -402,7 +407,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
retrieval, so we save recursion to oldrec, and restore it
|
retrieval, so we save recursion to oldrec, and restore it
|
||||||
later. */
|
later. */
|
||||||
int oldrec = opt.recursive;
|
int oldrec = opt.recursive;
|
||||||
if (already_redirected)
|
if (redirections)
|
||||||
opt.recursive = 0;
|
opt.recursive = 0;
|
||||||
result = ftp_loop (u, dt);
|
result = ftp_loop (u, dt);
|
||||||
opt.recursive = oldrec;
|
opt.recursive = oldrec;
|
||||||
@ -413,7 +418,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
|
|
||||||
#### All of this is, of course, crap. These types should be
|
#### All of this is, of course, crap. These types should be
|
||||||
determined through mailcap. */
|
determined through mailcap. */
|
||||||
if (already_redirected && u->local && (u->proto == URLFTP ))
|
if (redirections && u->local && (u->proto == URLFTP ))
|
||||||
{
|
{
|
||||||
char *suf = suffix (u->local);
|
char *suf = suffix (u->local);
|
||||||
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
||||||
@ -424,30 +429,70 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
location_changed = (result == NEWLOCATION);
|
location_changed = (result == NEWLOCATION);
|
||||||
if (location_changed)
|
if (location_changed)
|
||||||
{
|
{
|
||||||
if (mynewloc)
|
char *construced_newloc;
|
||||||
{
|
uerr_t newloc_result;
|
||||||
|
struct urlinfo *newloc_struct;
|
||||||
|
|
||||||
|
assert (mynewloc != NULL);
|
||||||
|
|
||||||
/* The HTTP specs only allow absolute URLs to appear in
|
/* The HTTP specs only allow absolute URLs to appear in
|
||||||
redirects, but a ton of boneheaded webservers and CGIs
|
redirects, but a ton of boneheaded webservers and CGIs out
|
||||||
out there break the rules and use relative URLs, and
|
there break the rules and use relative URLs, and popular
|
||||||
popular browsers are lenient about this, so wget should
|
browsers are lenient about this, so wget should be too. */
|
||||||
be too. */
|
construced_newloc = url_concat (url, mynewloc);
|
||||||
char *construced_newloc = url_concat (url, mynewloc);
|
|
||||||
free (mynewloc);
|
free (mynewloc);
|
||||||
mynewloc = construced_newloc;
|
mynewloc = construced_newloc;
|
||||||
|
|
||||||
|
/* Now, see if this new location makes sense. */
|
||||||
|
newloc_struct = newurl ();
|
||||||
|
newloc_result = parseurl (mynewloc, newloc_struct, 1);
|
||||||
|
if (newloc_result != URLOK)
|
||||||
|
{
|
||||||
|
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
|
||||||
|
freeurl (newloc_struct, 1);
|
||||||
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Now mynewloc will become newloc_struct->url, because if the
|
||||||
|
Location contained relative paths like .././something, we
|
||||||
|
don't want that propagating as url. */
|
||||||
|
free (mynewloc);
|
||||||
|
mynewloc = xstrdup (newloc_struct->url);
|
||||||
|
|
||||||
/* Check for redirection to back to itself. */
|
/* Check for redirection to back to itself. */
|
||||||
if (url_equal (url, mynewloc))
|
if (!strcmp (u->url, newloc_struct->url))
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
|
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
|
||||||
mynewloc);
|
mynewloc);
|
||||||
|
freeurl (newloc_struct, 1);
|
||||||
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
return WRONGCODE;
|
return WRONGCODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The new location is OK. Let's check for redirection cycle by
|
||||||
|
peeking through the history of redirections. */
|
||||||
|
if (in_slist (redirections, newloc_struct->url))
|
||||||
|
{
|
||||||
|
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
|
||||||
|
mynewloc);
|
||||||
|
freeurl (newloc_struct, 1);
|
||||||
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
|
return WRONGCODE;
|
||||||
|
}
|
||||||
|
|
||||||
|
redirections = add_slist (redirections, newloc_struct->url, NOSORT);
|
||||||
|
|
||||||
free (url);
|
free (url);
|
||||||
url = mynewloc;
|
url = mynewloc;
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
already_redirected = 1;
|
u = newloc_struct;
|
||||||
goto again;
|
goto redirected;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file)
|
if (file)
|
||||||
{
|
{
|
||||||
if (u->local)
|
if (u->local)
|
||||||
@ -456,6 +501,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
*file = NULL;
|
*file = NULL;
|
||||||
}
|
}
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
|
free_slist (redirections);
|
||||||
|
|
||||||
if (newloc)
|
if (newloc)
|
||||||
*newloc = url;
|
*newloc = url;
|
||||||
|
Loading…
Reference in New Issue
Block a user