From a00b834bb39fd2982ebb247d52e576208d4934ab Mon Sep 17 00:00:00 2001 From: Micah Cowan Date: Wed, 1 Jul 2009 23:17:33 -0700 Subject: [PATCH] Referer/IRI fix. --- src/ChangeLog | 11 +++++++++++ src/recur.c | 5 +++++ src/retr.c | 13 ++++++------- tests/ChangeLog | 9 +++++++++ tests/HTTPServer.pm | 19 +++++++++++++++++++ tests/Test-iri.px | 14 ++++++-------- 6 files changed, 56 insertions(+), 15 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 2d54cb34..fca73ac2 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,14 @@ +2009-07-01 Micah Cowan + + * retr.c (retrieve_url): Use the existing "redirect" label, + instead of superfluous "second_try". Removed no-longer-accurate + debug statement. Use the "newloc" parameter to store the fallback + URL, when IRI version was rejected. + + * recur.c (retrieve_tree): Always use the parsed URL for tracking + the Referer, since that's the one we actually requested (if + there's a difference in terms of percent-encodings and such). + 2009-07-01 Steven Schubiger * Makefile.am: Add a rule to generate build_info.c and list diff --git a/src/recur.c b/src/recur.c index e4ffa424..b0042dff 100644 --- a/src/recur.c +++ b/src/recur.c @@ -320,6 +320,11 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi) xfree (url); url = redirected; } + else + { + xfree (url); + url = xstrdup (url_parsed->url); + } url_free(url_parsed); } diff --git a/src/retr.c b/src/retr.c index 8752dce7..a1f045be 100644 --- a/src/retr.c +++ b/src/retr.c @@ -605,6 +605,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, uerr_t result; char *url; bool location_changed; + bool iri_fallbacked = 0; int dummy; char *mynewloc, *proxy; struct url *u = orig_parsed, *proxy_url; @@ -628,15 +629,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (file) *file = NULL; - second_try: - DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url), - iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None", - iri->utf8_encode)); - if (!refurl) refurl = opt.referer; redirected: + /* (also for IRI fallbacking) */ result = NOCONERROR; mynewloc = NULL; @@ -805,7 +802,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, if (u) { DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); - goto second_try; + url = xstrdup (u->url); + iri_fallbacked = 1; + goto redirected; } else DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); @@ -840,7 +839,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, url_free (u); } - if (redirection_count) + if (redirection_count || iri_fallbacked) { if (newloc) *newloc = url; diff --git a/tests/ChangeLog b/tests/ChangeLog index 6fd859c0..8e209251 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,12 @@ +2009-07-01 Micah Cowan + + * HTTPServer.pm (send_response): Invocation of + verify_request_headers, to support testing of Wget-sent header + values. + (verify_request_headers): Added. + + * Test-iri.px: Added verification checks for Referer values. + 2009-06-29 Micah Cowan * WgetTest.pm.in (_cleanup): Allow cleanup of test directories to diff --git a/tests/HTTPServer.pm b/tests/HTTPServer.pm index 5252b5b8..58b1a363 100644 --- a/tests/HTTPServer.pm +++ b/tests/HTTPServer.pm @@ -68,6 +68,9 @@ sub send_response { if (exists $url_rec->{'auth_method'}) { ($send_content, $code, $msg, $headers) = $self->handle_auth($req, $url_rec); + } elsif (!$self->verify_request_headers ($req, $url_rec)) { + ($send_content, $code, $msg, $headers) = + ('', 400, 'Mismatch on expected headers', {}); } else { ($code, $msg) = @{$url_rec}{'code', 'msg'}; $headers = $url_rec->{headers}; @@ -210,6 +213,22 @@ sub verify_auth_basic { } } +sub verify_request_headers { + my ($self, $req, $url_rec) = @_; + + return 1 unless exists $url_rec->{'request_headers'}; + for my $hdrname (keys %{$url_rec->{'request_headers'}}) { + my $rhdr = $req->header ($hdrname); + my $ehdr = $url_rec->{'request_headers'}{$hdrname}; + unless (defined $rhdr && $rhdr =~ $ehdr) { + print STDERR "\n*** Mismatch on $hdrname: $rhdr =~ $ehdr\n"; + return undef; + } + } + + return 1; +} + sub _substitute_port { my $self = shift; my $ret = shift; diff --git a/tests/Test-iri.px b/tests/Test-iri.px index 738c304a..01e1c50a 100755 --- a/tests/Test-iri.px +++ b/tests/Test-iri.px @@ -148,14 +148,9 @@ my %urls = ( '/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded code => "200", msg => "Ok", - headers => { - "Content-type" => "text/html; charset=ISO-8859-1", + request_headers => { + "Referer" => qr|http://localhost:[0-9]+/p1_fran%E7ais.html|, }, - content => $pageeen, - }, - '/p2_%E9%E9n.html' => { - code => "200", - msg => "Ok", headers => { "Content-type" => "text/html; charset=ISO-8859-1", }, @@ -180,6 +175,9 @@ my %urls = ( '/p4_m%C3%A9%C3%A9r.html' => { code => "200", msg => "Ok", + request_headers => { + "Referer" => qr|http://localhost:[0-9]+/p2_%C3%A9%C3%A9n.html|, + }, headers => { "Content-type" => "text/plain; charset=UTF-8", }, @@ -187,7 +185,7 @@ my %urls = ( }, ); -my $cmdline = $WgetTest::WGETPATH . " --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/"; +my $cmdline = $WgetTest::WGETPATH . " -d --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/"; my $expected_error_code = 0;