1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Referer/IRI fix.

This commit is contained in:
Micah Cowan 2009-07-01 23:17:33 -07:00
parent bb5af451a0
commit a00b834bb3
6 changed files with 56 additions and 15 deletions

View File

@ -1,3 +1,14 @@
2009-07-01 Micah Cowan <micah@cowan.name>
* retr.c (retrieve_url): Use the existing "redirect" label,
instead of superfluous "second_try". Removed no-longer-accurate
debug statement. Use the "newloc" parameter to store the fallback
URL, when IRI version was rejected.
* recur.c (retrieve_tree): Always use the parsed URL for tracking
the Referer, since that's the one we actually requested (if
there's a difference in terms of percent-encodings and such).
2009-07-01 Steven Schubiger <stsc@member.fsf.org>
* Makefile.am: Add a rule to generate build_info.c and list

View File

@ -320,6 +320,11 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi)
xfree (url);
url = redirected;
}
else
{
xfree (url);
url = xstrdup (url_parsed->url);
}
url_free(url_parsed);
}

View File

@ -605,6 +605,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
uerr_t result;
char *url;
bool location_changed;
bool iri_fallbacked = 0;
int dummy;
char *mynewloc, *proxy;
struct url *u = orig_parsed, *proxy_url;
@ -628,15 +629,11 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
if (file)
*file = NULL;
second_try:
DEBUGP (("[IRI Retrieving %s with %s (UTF-8=%d)\n", quote_n (0, url),
iri->uri_encoding ? quote_n (1, iri->uri_encoding) : "None",
iri->utf8_encode));
if (!refurl)
refurl = opt.referer;
redirected:
/* (also for IRI fallbacking) */
result = NOCONERROR;
mynewloc = NULL;
@ -805,7 +802,9 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
if (u)
{
DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
goto second_try;
url = xstrdup (u->url);
iri_fallbacked = 1;
goto redirected;
}
else
DEBUGP (("[Couldn't fallback to non-utf8 for %s\n", quote (url)));
@ -840,7 +839,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file,
url_free (u);
}
if (redirection_count)
if (redirection_count || iri_fallbacked)
{
if (newloc)
*newloc = url;

View File

@ -1,3 +1,12 @@
2009-07-01 Micah Cowan <micah@cowan.name>
* HTTPServer.pm (send_response): Invocation of
verify_request_headers, to support testing of Wget-sent header
values.
(verify_request_headers): Added.
* Test-iri.px: Added verification checks for Referer values.
2009-06-29 Micah Cowan <micah@cowan.name>
* WgetTest.pm.in (_cleanup): Allow cleanup of test directories to

View File

@ -68,6 +68,9 @@ sub send_response {
if (exists $url_rec->{'auth_method'}) {
($send_content, $code, $msg, $headers) =
$self->handle_auth($req, $url_rec);
} elsif (!$self->verify_request_headers ($req, $url_rec)) {
($send_content, $code, $msg, $headers) =
('', 400, 'Mismatch on expected headers', {});
} else {
($code, $msg) = @{$url_rec}{'code', 'msg'};
$headers = $url_rec->{headers};
@ -210,6 +213,22 @@ sub verify_auth_basic {
}
}
sub verify_request_headers {
my ($self, $req, $url_rec) = @_;
return 1 unless exists $url_rec->{'request_headers'};
for my $hdrname (keys %{$url_rec->{'request_headers'}}) {
my $rhdr = $req->header ($hdrname);
my $ehdr = $url_rec->{'request_headers'}{$hdrname};
unless (defined $rhdr && $rhdr =~ $ehdr) {
print STDERR "\n*** Mismatch on $hdrname: $rhdr =~ $ehdr\n";
return undef;
}
}
return 1;
}
sub _substitute_port {
my $self = shift;
my $ret = shift;

View File

@ -148,14 +148,9 @@ my %urls = (
'/p2_%C3%A9%C3%A9n.html' => { # UTF-8 encoded
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html; charset=ISO-8859-1",
request_headers => {
"Referer" => qr|http://localhost:[0-9]+/p1_fran%E7ais.html|,
},
content => $pageeen,
},
'/p2_%E9%E9n.html' => {
code => "200",
msg => "Ok",
headers => {
"Content-type" => "text/html; charset=ISO-8859-1",
},
@ -180,6 +175,9 @@ my %urls = (
'/p4_m%C3%A9%C3%A9r.html' => {
code => "200",
msg => "Ok",
request_headers => {
"Referer" => qr|http://localhost:[0-9]+/p2_%C3%A9%C3%A9n.html|,
},
headers => {
"Content-type" => "text/plain; charset=UTF-8",
},
@ -187,7 +185,7 @@ my %urls = (
},
);
my $cmdline = $WgetTest::WGETPATH . " --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/";
my $cmdline = $WgetTest::WGETPATH . " -d --iri --restrict-file-names=nocontrol -nH -r http://localhost:{{port}}/";
my $expected_error_code = 0;