From 24c465b5ade7a46f50bcf85ed490b1a4328963ad Mon Sep 17 00:00:00 2001 From: dan Date: Fri, 27 Oct 2000 20:18:20 -0700 Subject: [PATCH] [svn] retr.c (retrieve_url): Manually applied T. Bharath 's patch to get wget to grok illegal relative URL redirects. Reformatted and re-commented it. --- ChangeLog | 4 ++++ TODO | 3 --- src/ChangeLog | 6 ++++++ src/retr.c | 22 ++++++++++++++++++++++ 4 files changed, 32 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 164fadac..f226deab 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2000-10-27 Dan Harkless + + * TODO: wget now groks illegal relative URL HTTP redirects. + 2000-10-24 Dan Harkless * NEWS: Forgot to update regarding new --bind-address option. diff --git a/TODO b/TODO index 95d8c038..a1dcd0a7 100644 --- a/TODO +++ b/TODO @@ -28,9 +28,6 @@ may tend towards the top). Not all of these represent user-visible changes. * --retr-symlinks should cause wget to traverse links to directories too. -* Lots of noncompliant webservers issue HTTP redirects to relative URLs, and - browsers follow them, so wget should too. - * Make wget return non-zero status in more situations, like incorrect HTTP auth. * Timestamps are sometimes not copied over on files retrieved by FTP. diff --git a/src/ChangeLog b/src/ChangeLog index b2770ef7..95761f72 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2000-10-27 Dan Harkless + + * retr.c (retrieve_url): Manually applied T. Bharath + 's patch to get wget to grok + illegal relative URL redirects. Reformatted and re-commented it. + 2000-10-23 Dan Harkless * connect.c (make_connection and bindport): Manually applied Rob diff --git a/src/retr.c b/src/retr.c index 7e884f1b..8fa76175 100644 --- a/src/retr.c +++ b/src/retr.c @@ -406,6 +406,28 @@ retrieve_url (const char *origurl, char **file, char **newloc, } if (mynewloc) { + /* The HTTP specs only allow absolute URLs to appear in redirects, but + a ton of boneheaded webservers and CGIs out there break the rules + and use relative URLs, and popular browsers are lenient about this, + so wget should be too. */ + if (strstr(mynewloc, "://") == NULL) + /* Doesn't look like an absolute URL (this check will incorrectly + think that rare relative URLs containing "://" later in the + string are absolute). */ + { + char *temp = malloc(strlen(url) + strlen(mynewloc) + 1); + + if (mynewloc[0] == '/') + /* "Hostless absolute" URL. Convert to absolute. */ + sprintf(temp,"%s%s", url, mynewloc); + else + /* Relative URL. Convert to absolute. */ + sprintf(temp,"%s/%s", url, mynewloc); + + free(mynewloc); + mynewloc = temp; + } + free (url); url = mynewloc; }