mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
added option --https-only
This commit is contained in:
parent
a7df7ecc2f
commit
42c78fdd71
@ -1,3 +1,7 @@
|
|||||||
|
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* wget.texi: added description for --https-only
|
||||||
|
|
||||||
2013-08-13 Hrvoje Niksic <hniksic@xemacs.org>
|
2013-08-13 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* wget.texi (Download Options): Fix misspelling.
|
* wget.texi (Download Options): Fix misspelling.
|
||||||
|
@ -1606,6 +1606,9 @@ buggy SSL server implementations that make it hard for OpenSSL to
|
|||||||
choose the correct protocol version. Fortunately, such servers are
|
choose the correct protocol version. Fortunately, such servers are
|
||||||
quite rare.
|
quite rare.
|
||||||
|
|
||||||
|
@item --https-only
|
||||||
|
When in recursive mode, only HTTPS links are followed.
|
||||||
|
|
||||||
@cindex SSL certificate, check
|
@cindex SSL certificate, check
|
||||||
@item --no-check-certificate
|
@item --no-check-certificate
|
||||||
Don't check the server certificate against the available certificate
|
Don't check the server certificate against the available certificate
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* main.c: Add new option --https-only.
|
||||||
|
* options.h: Likewise.
|
||||||
|
* recur.c (download_child_p): add check for HTTPS.
|
||||||
|
|
||||||
2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2013-08-09 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
* gnutls.c (ssl_init): Prevent CA files from being loaded twice
|
* gnutls.c (ssl_init): Prevent CA files from being loaded twice
|
||||||
|
@ -194,6 +194,9 @@ static const struct {
|
|||||||
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
|
{ "httppasswd", &opt.http_passwd, cmd_string }, /* deprecated */
|
||||||
{ "httppassword", &opt.http_passwd, cmd_string },
|
{ "httppassword", &opt.http_passwd, cmd_string },
|
||||||
{ "httpproxy", &opt.http_proxy, cmd_string },
|
{ "httpproxy", &opt.http_proxy, cmd_string },
|
||||||
|
#ifdef HAVE_SSL
|
||||||
|
{ "httpsonly", &opt.https_only, cmd_boolean },
|
||||||
|
#endif
|
||||||
{ "httpsproxy", &opt.https_proxy, cmd_string },
|
{ "httpsproxy", &opt.https_proxy, cmd_string },
|
||||||
{ "httpuser", &opt.http_user, cmd_string },
|
{ "httpuser", &opt.http_user, cmd_string },
|
||||||
{ "ignorecase", &opt.ignore_case, cmd_boolean },
|
{ "ignorecase", &opt.ignore_case, cmd_boolean },
|
||||||
|
@ -217,6 +217,7 @@ static struct cmdline_option option_data[] =
|
|||||||
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
|
{ "http-passwd", 0, OPT_VALUE, "httppassword", -1 }, /* deprecated */
|
||||||
{ "http-password", 0, OPT_VALUE, "httppassword", -1 },
|
{ "http-password", 0, OPT_VALUE, "httppassword", -1 },
|
||||||
{ "http-user", 0, OPT_VALUE, "httpuser", -1 },
|
{ "http-user", 0, OPT_VALUE, "httpuser", -1 },
|
||||||
|
{ IF_SSL ("https-only"), 0, OPT_BOOLEAN, "httpsonly", -1 },
|
||||||
{ "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
|
{ "ignore-case", 0, OPT_BOOLEAN, "ignorecase", -1 },
|
||||||
{ "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
|
{ "ignore-length", 0, OPT_BOOLEAN, "ignorelength", -1 },
|
||||||
{ "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
|
{ "ignore-tags", 0, OPT_VALUE, "ignoretags", -1 },
|
||||||
@ -635,6 +636,8 @@ HTTPS (SSL/TLS) options:\n"),
|
|||||||
N_("\
|
N_("\
|
||||||
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
|
--secure-protocol=PR choose secure protocol, one of auto, SSLv2,\n\
|
||||||
SSLv3, and TLSv1.\n"),
|
SSLv3, and TLSv1.\n"),
|
||||||
|
N_("\
|
||||||
|
--https-only only follow secure HTTPS links\n"),
|
||||||
N_("\
|
N_("\
|
||||||
--no-check-certificate don't validate the server's certificate.\n"),
|
--no-check-certificate don't validate the server's certificate.\n"),
|
||||||
N_("\
|
N_("\
|
||||||
|
@ -215,9 +215,9 @@ struct options
|
|||||||
char *ca_directory; /* CA directory (hash files) */
|
char *ca_directory; /* CA directory (hash files) */
|
||||||
char *ca_cert; /* CA certificate file to use */
|
char *ca_cert; /* CA certificate file to use */
|
||||||
|
|
||||||
|
|
||||||
char *random_file; /* file with random data to seed the PRNG */
|
char *random_file; /* file with random data to seed the PRNG */
|
||||||
char *egd_file; /* file name of the egd daemon socket */
|
char *egd_file; /* file name of the egd daemon socket */
|
||||||
|
bool https_only; /* whether to follow HTTPS only */
|
||||||
#endif /* HAVE_SSL */
|
#endif /* HAVE_SSL */
|
||||||
|
|
||||||
bool cookies; /* whether cookies are used. */
|
bool cookies; /* whether cookies are used. */
|
||||||
|
23
src/recur.c
23
src/recur.c
@ -505,15 +505,16 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Several things to check for:
|
/* Several things to check for:
|
||||||
1. if scheme is not http, and we don't load it
|
1. if scheme is not https and https_only requested
|
||||||
2. check for relative links (if relative_only is set)
|
2. if scheme is not http, and we don't load it
|
||||||
3. check for domain
|
3. check for relative links (if relative_only is set)
|
||||||
4. check for no-parent
|
4. check for domain
|
||||||
5. check for excludes && includes
|
5. check for no-parent
|
||||||
6. check for suffix
|
6. check for excludes && includes
|
||||||
7. check for same host (if spanhost is unset), with possible
|
7. check for suffix
|
||||||
|
8. check for same host (if spanhost is unset), with possible
|
||||||
gethostbyname baggage
|
gethostbyname baggage
|
||||||
8. check for robots.txt
|
9. check for robots.txt
|
||||||
|
|
||||||
Addendum: If the URL is FTP, and it is to be loaded, only the
|
Addendum: If the URL is FTP, and it is to be loaded, only the
|
||||||
domain and suffix settings are "stronger".
|
domain and suffix settings are "stronger".
|
||||||
@ -525,6 +526,12 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
More time- and memory- consuming tests should be put later on
|
More time- and memory- consuming tests should be put later on
|
||||||
the list. */
|
the list. */
|
||||||
|
|
||||||
|
if (opt.https_only && u->scheme != SCHEME_HTTPS)
|
||||||
|
{
|
||||||
|
DEBUGP (("Not following non-HTTPS links.\n"));
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
/* Determine whether URL under consideration has a HTTP-like scheme. */
|
/* Determine whether URL under consideration has a HTTP-like scheme. */
|
||||||
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
|
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
|
||||||
|
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
|
||||||
|
* run-px (tests): Likewise.
|
||||||
|
* Test--httpsonly-r.px: New file.
|
||||||
|
|
||||||
2013-03-12 Darshit Shah <darnir@gmail.com>
|
2013-03-12 Darshit Shah <darnir@gmail.com>
|
||||||
|
|
||||||
* Makefile.am (EXTRA_DIST): Add Test--post-file.px.
|
* Makefile.am (EXTRA_DIST): Add Test--post-file.px.
|
||||||
|
@ -132,6 +132,7 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \
|
|||||||
Test--spider-r--no-content-disposition.px \
|
Test--spider-r--no-content-disposition.px \
|
||||||
Test--spider-r--no-content-disposition-trivial.px \
|
Test--spider-r--no-content-disposition-trivial.px \
|
||||||
Test--spider-r.px \
|
Test--spider-r.px \
|
||||||
|
Test--httpsonly-r.px \
|
||||||
run-px certs
|
run-px certs
|
||||||
|
|
||||||
check_PROGRAMS = unit-tests
|
check_PROGRAMS = unit-tests
|
||||||
|
79
tests/Test--httpsonly-r.px
Executable file
79
tests/Test--httpsonly-r.px
Executable file
@ -0,0 +1,79 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use HTTPTest;
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
my $mainpage = <<EOF;
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Main Page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
Some text and a link to a <a href="http://localhost:{{port}}/secondpage.html">second page</a>.
|
||||||
|
</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
my $secondpage = <<EOF;
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Second Page</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
Anything.
|
||||||
|
</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# code, msg, headers, content
|
||||||
|
my %urls = (
|
||||||
|
'/index.html' => {
|
||||||
|
code => "200",
|
||||||
|
msg => "Dontcare",
|
||||||
|
headers => {
|
||||||
|
"Content-type" => "text/html",
|
||||||
|
},
|
||||||
|
content => $mainpage,
|
||||||
|
},
|
||||||
|
'/secondpage.html' => {
|
||||||
|
code => "200",
|
||||||
|
msg => "Dontcare",
|
||||||
|
headers => {
|
||||||
|
"Content-type" => "text/html",
|
||||||
|
},
|
||||||
|
content => $secondpage,
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
my $cmdline = $WgetTest::WGETPATH . " --https-only -r -nH http://localhost:{{port}}/";
|
||||||
|
|
||||||
|
my $expected_error_code = 0;
|
||||||
|
|
||||||
|
my %expected_downloaded_files = (
|
||||||
|
'index.html' => {
|
||||||
|
content => $mainpage,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
my $the_test = HTTPTest->new (name => "Test--httpsonly-r",
|
||||||
|
input => \%urls,
|
||||||
|
cmdline => $cmdline,
|
||||||
|
errcode => $expected_error_code,
|
||||||
|
output => \%expected_downloaded_files);
|
||||||
|
print $expected_error_code."\n";
|
||||||
|
|
||||||
|
exit $the_test->run();
|
||||||
|
|
||||||
|
# vim: et ts=4 sw=4
|
||||||
|
|
@ -81,6 +81,7 @@ my @tests = (
|
|||||||
'Test--spider-r--no-content-disposition.px',
|
'Test--spider-r--no-content-disposition.px',
|
||||||
'Test--spider-r--no-content-disposition-trivial.px',
|
'Test--spider-r--no-content-disposition-trivial.px',
|
||||||
'Test--spider-r.px',
|
'Test--spider-r.px',
|
||||||
|
'Test--httpsonly-r.px',
|
||||||
);
|
);
|
||||||
|
|
||||||
foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
|
foreach my $var (qw(SYSTEM_WGETRC WGETRC)) {
|
||||||
|
Loading…
Reference in New Issue
Block a user