1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Referer/IRI fix.

This commit is contained in:
Micah Cowan 2009-07-01 23:17:33 -07:00
parent bb5af451a0
commit a00b834bb3
6 changed files with 56 additions and 15 deletions

View File

@ -1,3 +1,14 @@
2009-07-01 Micah Cowan <micah@cowan.name>
* retr.c (retrieve_url): Use the existing "redirect" label,
instead of superfluous "second_try". Removed no-longer-accurate
debug statement. Use the "newloc" parameter to store the fallback
URL, when IRI version was rejected.
* recur.c (retrieve_tree): Always use the parsed URL for tracking
the Referer, since that's the one we actually requested (if
there's a difference in terms of percent-encodings and such).
2009-07-01 Steven Schubiger <stsc@member.fsf.org> 2009-07-01 Steven Schubiger <stsc@member.fsf.org>
* Makefile.am: Add a rule to generate build_info.c and list * Makefile.am: Add a rule to generate build_info.c and list

View File

@ -320,6 +320,11 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi)
xfree (url); xfree (url);
url = redirected; url = redirected;
} }
else
{
xfree (url);
url = xstrdup (url_parsed->url);
}
url_free(url_parsed); url_free(url_parsed);
} }

View File

@ -605,6 +605,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
uerr_t result; uerr_t result;
char *url; char *url;
bool location_changed; bool location_changed;
bool iri_fallbacked = 0;
int dummy; int dummy;
char *mynewloc, *proxy; char *mynewloc, *proxy;
struct url *u = orig_parsed, *proxy_url; struct url *u = orig_parsed, *proxy_url;
@ -628,15 +629,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
if (file) if (file)
*file = NULL; *file = NULL;
second_try:
DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
iri->utf8_encode));
if (!refurl) if (!refurl)
refurl = opt.referer; refurl = opt.referer;
redirected: redirected:
/* (also for IRI fallbacking) */
result = NOCONERROR; result = NOCONERROR;
mynewloc = NULL; mynewloc = NULL;
@ -805,7 +802,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
if (u) if (u)
{ {
DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url))); DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
goto second_try; url = xstrdup (u->url);
iri_fallbacked = 1;
goto redirected;
} }
else else
DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url))); DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
@ -840,7 +839,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
url_free (u); url_free (u);
} }
if (redirection_count) if (redirection_count || iri_fallbacked)
{ {
if (newloc) if (newloc)
*newloc = url; *newloc = url;

View File

@ -1,3 +1,12 @@
2009-07-01 Micah Cowan <micah@cowan.name>
* HTTPServer.pm (send_response): Invocation of
verify_request_headers, to support testing of Wget-sent header
values.
(verify_request_headers): Added.
* Test-iri.px: Added verification checks for Referer values.
2009-06-29 Micah Cowan <micah@cowan.name> 2009-06-29 Micah Cowan <micah@cowan.name>
* WgetTest.pm.in (_cleanup): Allow cleanup of test directories to * WgetTest.pm.in (_cleanup): Allow cleanup of test directories to

View File

@ -68,6 +68,9 @@ sub send_response {
if (exists $url_rec->{'auth_method'}) { if (exists $url_rec->{'auth_method'}) {
($send_content, $code, $msg, $headers) = ($send_content, $code, $msg, $headers) =
$self->handle_auth($req, $url_rec); $self->handle_auth($req, $url_rec);
} elsif (!$self->verify_request_headers ($req, $url_rec)) {
($send_content, $code, $msg, $headers) =
('', 400, 'Mismatch on expected headers', {});
} else { } else {
($code, $msg) = @{$url_rec}{'code', 'msg'}; ($code, $msg) = @{$url_rec}{'code', 'msg'};
$headers = $url_rec->{headers}; $headers = $url_rec->{headers};
@ -210,6 +213,22 @@ sub verify_auth_basic {
} }
} }
sub verify_request_headers {
my ($self, $req, $url_rec) = @_;
return 1 unless exists $url_rec->{'request_headers'};
for my $hdrname (keys %{$url_rec->{'request_headers'}}) {
my $rhdr = $req->header ($hdrname);
my $ehdr = $url_rec->{'request_headers'}{$hdrname};
unless (defined $rhdr && $rhdr =~ $ehdr) {
print STDERR "\n*** Mismatch on $hdrname: $rhdr =~ $ehdr\n";
return undef;
}
}
return 1;
}
sub _substitute_port { sub _substitute_port {
my $self = shift; my $self = shift;
my $ret = shift; my $ret = shift;

View File

@ -148,14 +148,9 @@ my %urls = (
'/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded '/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded
code => "200", code => "200",
msg => "Ok", msg => "Ok",
headers => { request_headers => {
"Content-type" => "text/html; charset=ISO-8859-1", "Referer" => qr|http://localhost:[0-9]+/p1_fran%E7ais.html|,
}, },
content => $pageeen,
},
'/p2_%E9%E9n.html' => {
code => "200",
msg => "Ok",
headers => { headers => {
"Content-type" => "text/html; charset=ISO-8859-1", "Content-type" => "text/html; charset=ISO-8859-1",
}, },
@ -180,6 +175,9 @@ my %urls = (
'/p4_m%C3%A9%C3%A9r.html' => { '/p4_m%C3%A9%C3%A9r.html' => {
code => "200", code => "200",
msg => "Ok", msg => "Ok",
request_headers => {
"Referer" => qr|http://localhost:[0-9]+/p2_%C3%A9%C3%A9n.html|,
},
headers => { headers => {
"Content-type" => "text/plain; charset=UTF-8", "Content-type" => "text/plain; charset=UTF-8",
}, },
@ -187,7 +185,7 @@ my %urls = (
}, },
); );
my $cmdline = $WgetTest::WGETPATH . " --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/"; my $cmdline = $WgetTest::WGETPATH . " -d --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/";
my $expected_error_code = 0; my $expected_error_code = 0;