added option --https-only

This commit is contained in:
Tim Ruehsen 2013-08-22 12:28:11 +02:00 committed by Giuseppe Scrivano
parent a7df7ecc2f
commit 42c78fdd71
11 changed files with 122 additions and 9 deletions

View File

@ -1,3 +1,7 @@
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* wget.texi: added description for --https-only
2013-08-13 Hrvoje Niksic <hniksic@xemacs.org>
* wget.texi (Download Options): Fix misspelling.

View File

@ -1606,6 +1606,9 @@ buggy SSL server implementations that make it hard for OpenSSL to
choose the correct protocol version. Fortunately, such servers are
quite rare.
@item --https-only
When in recursive mode, only HTTPS links are followed.
@cindex SSL certificate, check
@item --no-check-certificate
Don't check the server certificate against the available certificate

View File

@ -1,3 +1,9 @@
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* main.c: Add new option --https-only.
* options.h: Likewise.
* recur.c (download_child_p): add check for HTTPS.
2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de>
* gnutls.c (ssl_init): Prevent CA files from being loaded twice

View File

@ -194,6 +194,9 @@ static const struct {
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
{ "httppassword", &opt.http_passwd, cmd_string },
{ "httpproxy", &opt.http_proxy, cmd_string },
#ifdef HAVE_SSL
{ "httpsonly", &opt.https_only, cmd_boolean },
#endif
{ "httpsproxy", &opt.https_proxy, cmd_string },
{ "httpuser", &opt.http_user, cmd_string },
{ "ignorecase", &opt.ignore_case, cmd_boolean },

View File

@ -217,6 +217,7 @@ static struct cmdline_option option_data[] =
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
{ "http-password", 0, OPT_VALUE, "httppassword", -1 },
{ "http-user", 0, OPT_VALUE, "httpuser", -1 },
{ IF_SSL ("https-only"), 0, OPT_BOOLEAN, "httpsonly", -1 },
{ "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
{ "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
{ "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
@ -635,6 +636,8 @@ HTTPS (SSL/TLS) options:\n"),
N_("\
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
SSLv3, and TLSv1.\n"),
N_("\
--https-only only follow secure HTTPS links\n"),
N_("\
--no-check-certificate don't validate the server's certificate.\n"),
N_("\

View File

@ -215,9 +215,9 @@ struct options
char *ca_directory; /* CA directory (hash files) */
char *ca_cert; /* CA certificate file to use */
char *random_file; /* file with random data to seed the PRNG */
char *egd_file; /* file name of the egd daemon socket */
bool https_only; /* whether to follow HTTPS only */
#endif /* HAVE_SSL */
bool cookies; /* whether cookies are used. */

View File

@ -505,15 +505,16 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
}
/* Several things to check for:
1. if scheme is not http, and we don't load it
2. check for relative links (if relative_only is set)
3. check for domain
4. check for no-parent
5. check for excludes && includes
6. check for suffix
7. check for same host (if spanhost is unset), with possible
1. if scheme is not https and https_only requested
2. if scheme is not http, and we don't load it
3. check for relative links (if relative_only is set)
4. check for domain
5. check for no-parent
6. check for excludes && includes
7. check for suffix
8. check for same host (if spanhost is unset), with possible
gethostbyname baggage
8. check for robots.txt
9. check for robots.txt
Addendum: If the URL is FTP, and it is to be loaded, only the
domain and suffix settings are "stronger".
@ -525,6 +526,12 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
More time- and memory- consuming tests should be put later on
the list. */
if (opt.https_only && u->scheme != SCHEME_HTTPS)
{
DEBUGP (("Not following non-HTTPS links.\n"));
goto out;
}
/* Determine whether URL under consideration has a HTTP-like scheme. */
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);

View File

@ -1,3 +1,9 @@
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
* run-px (tests): Likewise.
* Test--httpsonly-r.px: New file.
2013-03-12 Darshit Shah <darnir@gmail.com>
* Makefile.am (EXTRA_DIST): Add Test--post-file.px.

View File

@ -132,6 +132,7 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \
Test--spider-r--no-content-disposition.px \
Test--spider-r--no-content-disposition-trivial.px \
Test--spider-r.px \
Test--httpsonly-r.px \
run-px certs
check_PROGRAMS = unit-tests

79
tests/Test--httpsonly-r.px Executable file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env perl
use strict;
use warnings;
use HTTPTest;
###############################################################################
my $mainpage = <<EOF;
<html>
<head>
<title>Main Page</title>
</head>
<body>
<p>
Some text and a link to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
</p>
</body>
</html>
EOF
my $secondpage = <<EOF;
<html>
<head>
<title>Second Page</title>
</head>
<body>
<p>
Anything.
</p>
</body>
</html>
EOF
# code, msg, headers, content
my %urls = (
'/index.html' => {
code => "200",
msg => "Dontcare",
headers => {
"Content-type" => "text/html",
},
content => $mainpage,
},
'/secondpage.html' => {
code => "200",
msg => "Dontcare",
headers => {
"Content-type" => "text/html",
},
content => $secondpage,
}
);
my $cmdline = $WgetTest::WGETPATH . " --https-only -r -nH http://localhost:{{port}}/";
my $expected_error_code = 0;
my %expected_downloaded_files = (
'index.html' => {
content => $mainpage,
},
);
###############################################################################
my $the_test = HTTPTest->new (name => "Test--httpsonly-r",
input => \%urls,
cmdline => $cmdline,
errcode => $expected_error_code,
output => \%expected_downloaded_files);
print $expected_error_code."\n";
exit $the_test->run();
# vim: et ts=4 sw=4

View File

@ -81,6 +81,7 @@ my @tests = (
'Test--spider-r--no-content-disposition.px',
'Test--spider-r--no-content-disposition-trivial.px',
'Test--spider-r.px',
'Test--httpsonly-r.px',
);
foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {