mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
added option --https-only
This commit is contained in:
parent
a7df7ecc2f
commit
42c78fdd71
@ -1,3 +1,7 @@
|
||||
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* wget.texi: added description for --https-only
|
||||
|
||||
2013-08-13 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* wget.texi (Download Options): Fix misspelling.
|
||||
|
@ -1606,6 +1606,9 @@ buggy SSL server implementations that make it hard for OpenSSL to
|
||||
choose the correct protocol version. Fortunately, such servers are
|
||||
quite rare.
|
||||
|
||||
@item --https-only
|
||||
When in recursive mode, only HTTPS links are followed.
|
||||
|
||||
@cindex SSL certificate, check
|
||||
@item --no-check-certificate
|
||||
Don't check the server certificate against the available certificate
|
||||
|
@ -1,3 +1,9 @@
|
||||
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* main.c: Add new option --https-only.
|
||||
* options.h: Likewise.
|
||||
* recur.c (download_child_p): add check for HTTPS.
|
||||
|
||||
2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* gnutls.c (ssl_init): Prevent CA files from being loaded twice
|
||||
|
@ -194,6 +194,9 @@ static const struct {
|
||||
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
|
||||
{ "httppassword", &opt.http_passwd, cmd_string },
|
||||
{ "httpproxy", &opt.http_proxy, cmd_string },
|
||||
#ifdef HAVE_SSL
|
||||
{ "httpsonly", &opt.https_only, cmd_boolean },
|
||||
#endif
|
||||
{ "httpsproxy", &opt.https_proxy, cmd_string },
|
||||
{ "httpuser", &opt.http_user, cmd_string },
|
||||
{ "ignorecase", &opt.ignore_case, cmd_boolean },
|
||||
|
@ -217,6 +217,7 @@ static struct cmdline_option option_data[] =
|
||||
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
|
||||
{ "http-password", 0, OPT_VALUE, "httppassword", -1 },
|
||||
{ "http-user", 0, OPT_VALUE, "httpuser", -1 },
|
||||
{ IF_SSL ("https-only"), 0, OPT_BOOLEAN, "httpsonly", -1 },
|
||||
{ "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
|
||||
{ "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
|
||||
{ "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
|
||||
@ -635,6 +636,8 @@ HTTPS (SSL/TLS) options:\n"),
|
||||
N_("\
|
||||
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
|
||||
SSLv3, and TLSv1.\n"),
|
||||
N_("\
|
||||
--https-only only follow secure HTTPS links\n"),
|
||||
N_("\
|
||||
--no-check-certificate don't validate the server's certificate.\n"),
|
||||
N_("\
|
||||
|
@ -215,9 +215,9 @@ struct options
|
||||
char *ca_directory; /* CA directory (hash files) */
|
||||
char *ca_cert; /* CA certificate file to use */
|
||||
|
||||
|
||||
char *random_file; /* file with random data to seed the PRNG */
|
||||
char *egd_file; /* file name of the egd daemon socket */
|
||||
bool https_only; /* whether to follow HTTPS only */
|
||||
#endif /* HAVE_SSL */
|
||||
|
||||
bool cookies; /* whether cookies are used. */
|
||||
|
23
src/recur.c
23
src/recur.c
@ -505,15 +505,16 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
}
|
||||
|
||||
/* Several things to check for:
|
||||
1. if scheme is not http, and we don't load it
|
||||
2. check for relative links (if relative_only is set)
|
||||
3. check for domain
|
||||
4. check for no-parent
|
||||
5. check for excludes && includes
|
||||
6. check for suffix
|
||||
7. check for same host (if spanhost is unset), with possible
|
||||
1. if scheme is not https and https_only requested
|
||||
2. if scheme is not http, and we don't load it
|
||||
3. check for relative links (if relative_only is set)
|
||||
4. check for domain
|
||||
5. check for no-parent
|
||||
6. check for excludes && includes
|
||||
7. check for suffix
|
||||
8. check for same host (if spanhost is unset), with possible
|
||||
gethostbyname baggage
|
||||
8. check for robots.txt
|
||||
9. check for robots.txt
|
||||
|
||||
Addendum: If the URL is FTP, and it is to be loaded, only the
|
||||
domain and suffix settings are "stronger".
|
||||
@ -525,6 +526,12 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
More time- and memory- consuming tests should be put later on
|
||||
the list. */
|
||||
|
||||
if (opt.https_only && u->scheme != SCHEME_HTTPS)
|
||||
{
|
||||
DEBUGP (("Not following non-HTTPS links.\n"));
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Determine whether URL under consideration has a HTTP-like scheme. */
|
||||
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
|
||||
|
||||
|
@ -1,3 +1,9 @@
|
||||
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
|
||||
* run-px (tests): Likewise.
|
||||
* Test--httpsonly-r.px: New file.
|
||||
|
||||
2013-03-12 Darshit Shah <darnir@gmail.com>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add Test--post-file.px.
|
||||
|
@ -132,6 +132,7 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \
|
||||
Test--spider-r--no-content-disposition.px \
|
||||
Test--spider-r--no-content-disposition-trivial.px \
|
||||
Test--spider-r.px \
|
||||
Test--httpsonly-r.px \
|
||||
run-px certs
|
||||
|
||||
check_PROGRAMS = unit-tests
|
||||
|
79
tests/Test--httpsonly-r.px
Executable file
79
tests/Test--httpsonly-r.px
Executable file
@ -0,0 +1,79 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTTPTest;
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
||||
my $mainpage = <<EOF;
|
||||
<html>
|
||||
<head>
|
||||
<title>Main Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
Some text and a link to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
my $secondpage = <<EOF;
|
||||
<html>
|
||||
<head>
|
||||
<title>Second Page</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
Anything.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
# code, msg, headers, content
|
||||
my %urls = (
|
||||
'/index.html' => {
|
||||
code => "200",
|
||||
msg => "Dontcare",
|
||||
headers => {
|
||||
"Content-type" => "text/html",
|
||||
},
|
||||
content => $mainpage,
|
||||
},
|
||||
'/secondpage.html' => {
|
||||
code => "200",
|
||||
msg => "Dontcare",
|
||||
headers => {
|
||||
"Content-type" => "text/html",
|
||||
},
|
||||
content => $secondpage,
|
||||
}
|
||||
);
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " --https-only -r -nH http://localhost:{{port}}/";
|
||||
|
||||
my $expected_error_code = 0;
|
||||
|
||||
my %expected_downloaded_files = (
|
||||
'index.html' => {
|
||||
content => $mainpage,
|
||||
},
|
||||
);
|
||||
|
||||
###############################################################################
|
||||
|
||||
my $the_test = HTTPTest->new (name => "Test--httpsonly-r",
|
||||
input => \%urls,
|
||||
cmdline => $cmdline,
|
||||
errcode => $expected_error_code,
|
||||
output => \%expected_downloaded_files);
|
||||
print $expected_error_code."\n";
|
||||
|
||||
exit $the_test->run();
|
||||
|
||||
# vim: et ts=4 sw=4
|
||||
|
@ -81,6 +81,7 @@ my @tests = (
|
||||
'Test--spider-r--no-content-disposition.px',
|
||||
'Test--spider-r--no-content-disposition-trivial.px',
|
||||
'Test--spider-r.px',
|
||||
'Test--httpsonly-r.px',
|
||||
);
|
||||
|
||||
foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
|
||||
|
Loading…
Reference in New Issue
Block a user