1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

added option --https-only

This commit is contained in:
Tim Ruehsen 2013-08-22 12:28:11 +02:00 committed by Giuseppe Scrivano
parent a7df7ecc2f
commit 42c78fdd71
11 changed files with 122 additions and 9 deletions

View File

@ -1,3 +1,7 @@
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* wget.texi: added description for --https-only
2013-08-13 Hrvoje Niksic <hniksic@xemacs.org> 2013-08-13 Hrvoje Niksic <hniksic@xemacs.org>
* wget.texi (Download Options): Fix misspelling. * wget.texi (Download Options): Fix misspelling.

View File

@ -1606,6 +1606,9 @@ buggy SSL server implementations that make it hard for OpenSSL to
choose the correct protocol version. Fortunately, such servers are choose the correct protocol version. Fortunately, such servers are
quite rare. quite rare.
@item --https-only
When in recursive mode, only HTTPS links are followed.
@cindex SSL certificate, check @cindex SSL certificate, check
@item --no-check-certificate @item --no-check-certificate
Don't check the server certificate against the available certificate Don't check the server certificate against the available certificate

View File

@ -1,3 +1,9 @@
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* main.c: Add new option --https-only.
* options.h: Likewise.
* recur.c (download_child_p): add check for HTTPS.
2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de> 2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de>
* gnutls.c (ssl_init): Prevent CA files from being loaded twice * gnutls.c (ssl_init): Prevent CA files from being loaded twice

View File

@ -194,6 +194,9 @@ static const struct {
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */ { "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
{ "httppassword", &opt.http_passwd, cmd_string }, { "httppassword", &opt.http_passwd, cmd_string },
{ "httpproxy", &opt.http_proxy, cmd_string }, { "httpproxy", &opt.http_proxy, cmd_string },
#ifdef HAVE_SSL
{ "httpsonly", &opt.https_only, cmd_boolean },
#endif
{ "httpsproxy", &opt.https_proxy, cmd_string }, { "httpsproxy", &opt.https_proxy, cmd_string },
{ "httpuser", &opt.http_user, cmd_string }, { "httpuser", &opt.http_user, cmd_string },
{ "ignorecase", &opt.ignore_case, cmd_boolean }, { "ignorecase", &opt.ignore_case, cmd_boolean },

View File

@ -217,6 +217,7 @@ static struct cmdline_option option_data[] =
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */ { "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
{ "http-password", 0, OPT_VALUE, "httppassword", -1 }, { "http-password", 0, OPT_VALUE, "httppassword", -1 },
{ "http-user", 0, OPT_VALUE, "httpuser", -1 }, { "http-user", 0, OPT_VALUE, "httpuser", -1 },
{ IF_SSL ("https-only"), 0, OPT_BOOLEAN, "httpsonly", -1 },
{ "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 }, { "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
{ "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 }, { "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
{ "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 }, { "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
@ -635,6 +636,8 @@ HTTPS (SSL/TLS) options:\n"),
N_("\ N_("\
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\ --secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
SSLv3, and TLSv1.\n"), SSLv3, and TLSv1.\n"),
N_("\
--https-only only follow secure HTTPS links\n"),
N_("\ N_("\
--no-check-certificate don't validate the server's certificate.\n"), --no-check-certificate don't validate the server's certificate.\n"),
N_("\ N_("\

View File

@ -215,9 +215,9 @@ struct options
char *ca_directory; /* CA directory (hash files) */ char *ca_directory; /* CA directory (hash files) */
char *ca_cert; /* CA certificate file to use */ char *ca_cert; /* CA certificate file to use */
char *random_file; /* file with random data to seed the PRNG */ char *random_file; /* file with random data to seed the PRNG */
char *egd_file; /* file name of the egd daemon socket */ char *egd_file; /* file name of the egd daemon socket */
bool https_only; /* whether to follow HTTPS only */
#endif /* HAVE_SSL */ #endif /* HAVE_SSL */
bool cookies; /* whether cookies are used. */ bool cookies; /* whether cookies are used. */

View File

@ -505,15 +505,16 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
} }
/* Several things to check for: /* Several things to check for:
1. if scheme is not http, and we don't load it 1. if scheme is not https and https_only requested
2. check for relative links (if relative_only is set) 2. if scheme is not http, and we don't load it
3. check for domain 3. check for relative links (if relative_only is set)
4. check for no-parent 4. check for domain
5. check for excludes && includes 5. check for no-parent
6. check for suffix 6. check for excludes && includes
7. check for same host (if spanhost is unset), with possible 7. check for suffix
8. check for same host (if spanhost is unset), with possible
gethostbyname baggage gethostbyname baggage
8. check for robots.txt 9. check for robots.txt
Addendum: If the URL is FTP, and it is to be loaded, only the Addendum: If the URL is FTP, and it is to be loaded, only the
domain and suffix settings are "stronger". domain and suffix settings are "stronger".
@ -525,6 +526,12 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
More time- and memory- consuming tests should be put later on More time- and memory- consuming tests should be put later on
the list. */ the list. */
if (opt.https_only && u->scheme != SCHEME_HTTPS)
{
DEBUGP (("Not following non-HTTPS links.\n"));
goto out;
}
/* Determine whether URL under consideration has a HTTP-like scheme. */ /* Determine whether URL under consideration has a HTTP-like scheme. */
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP); u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);

View File

@ -1,3 +1,9 @@
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
* run-px (tests): Likewise.
* Test--httpsonly-r.px: New file.
2013-03-12 Darshit Shah <darnir@gmail.com> 2013-03-12 Darshit Shah <darnir@gmail.com>
* Makefile.am (EXTRA_DIST): Add Test--post-file.px. * Makefile.am (EXTRA_DIST): Add Test--post-file.px.

View File

@ -132,6 +132,7 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \
Test--spider-r--no-content-disposition.px \ Test--spider-r--no-content-disposition.px \
Test--spider-r--no-content-disposition-trivial.px \ Test--spider-r--no-content-disposition-trivial.px \
Test--spider-r.px \ Test--spider-r.px \
Test--httpsonly-r.px \
run-px certs run-px certs
check_PROGRAMS = unit-tests check_PROGRAMS = unit-tests

79
tests/Test--httpsonly-r.px Executable file
View File

@ -0,0 +1,79 @@
#!/usr/bin/env perl
use strict;
use warnings;
use HTTPTest;
###############################################################################
my $mainpage = <<EOF;
<html>
<head>
<title>Main Page</title>
</head>
<body>
<p>
Some text and a link to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
</p>
</body>
</html>
EOF
my $secondpage = <<EOF;
<html>
<head>
<title>Second Page</title>
</head>
<body>
<p>
Anything.
</p>
</body>
</html>
EOF
# code, msg, headers, content
my %urls = (
'/index.html' => {
code => "200",
msg => "Dontcare",
headers => {
"Content-type" => "text/html",
},
content => $mainpage,
},
'/secondpage.html' => {
code => "200",
msg => "Dontcare",
headers => {
"Content-type" => "text/html",
},
content => $secondpage,
}
);
my $cmdline = $WgetTest::WGETPATH . " --https-only -r -nH http://localhost:{{port}}/";
my $expected_error_code = 0;
my %expected_downloaded_files = (
'index.html' => {
content => $mainpage,
},
);
###############################################################################
my $the_test = HTTPTest->new (name => "Test--httpsonly-r",
input => \%urls,
cmdline => $cmdline,
errcode => $expected_error_code,
output => \%expected_downloaded_files);
print $expected_error_code."\n";
exit $the_test->run();
# vim: et ts=4 sw=4

View File

@ -81,6 +81,7 @@ my @tests = (
'Test--spider-r--no-content-disposition.px', 'Test--spider-r--no-content-disposition.px',
'Test--spider-r--no-content-disposition-trivial.px', 'Test--spider-r--no-content-disposition-trivial.px',
'Test--spider-r.px', 'Test--spider-r.px',
'Test--httpsonly-r.px',
); );
foreach my $var (qw(SYSTEM_WGETRC WGETRC)) { foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {