mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
--restrict-file-names=ascii
This commit is contained in:
parent
a9a2b34b05
commit
fb0946c7fc
@ -1,3 +1,14 @@
|
|||||||
|
2009-07-27 Micah Cowan <micah@cowan.name>
|
||||||
|
|
||||||
|
* options.h (struct options): Added restrict_files_nonascii
|
||||||
|
boolean field.
|
||||||
|
|
||||||
|
* url.c (FILE_CHAR_TEST): Add check for chars outside the ASCII
|
||||||
|
range.
|
||||||
|
|
||||||
|
* init.c (defaults): Add restrict_files_nonascii to initialization.
|
||||||
|
(cmd_spec_restrict_file_names): Allow parsing of "ascii" keyword.
|
||||||
|
|
||||||
2009-07-27 Marcel Telka <marcel@telka.sk>
|
2009-07-27 Marcel Telka <marcel@telka.sk>
|
||||||
|
|
||||||
* iri.c (do_conversion): Typo: invalide -> invalid
|
* iri.c (do_conversion): Typo: invalide -> invalid
|
||||||
|
10
src/init.c
10
src/init.c
@ -329,6 +329,7 @@ defaults (void)
|
|||||||
opt.restrict_files_os = restrict_unix;
|
opt.restrict_files_os = restrict_unix;
|
||||||
#endif
|
#endif
|
||||||
opt.restrict_files_ctrl = true;
|
opt.restrict_files_ctrl = true;
|
||||||
|
opt.restrict_files_nonascii = false;
|
||||||
opt.restrict_files_case = restrict_no_case_restriction;
|
opt.restrict_files_case = restrict_no_case_restriction;
|
||||||
|
|
||||||
opt.max_redirect = 20;
|
opt.max_redirect = 20;
|
||||||
@ -1275,6 +1276,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
|||||||
int restrict_os = opt.restrict_files_os;
|
int restrict_os = opt.restrict_files_os;
|
||||||
int restrict_ctrl = opt.restrict_files_ctrl;
|
int restrict_ctrl = opt.restrict_files_ctrl;
|
||||||
int restrict_case = opt.restrict_files_case;
|
int restrict_case = opt.restrict_files_case;
|
||||||
|
int restrict_nonascii = opt.restrict_files_nonascii;
|
||||||
|
|
||||||
const char *end;
|
const char *end;
|
||||||
|
|
||||||
@ -1296,10 +1298,13 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
|||||||
restrict_case = restrict_uppercase;
|
restrict_case = restrict_uppercase;
|
||||||
else if (VAL_IS ("nocontrol"))
|
else if (VAL_IS ("nocontrol"))
|
||||||
restrict_ctrl = false;
|
restrict_ctrl = false;
|
||||||
|
else if (VAL_IS ("ascii"))
|
||||||
|
restrict_nonascii = true;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fprintf (stderr,
|
fprintf (stderr, _("\
|
||||||
_("%s: %s: Invalid restriction %s, use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
|
%s: %s: Invalid restriction %s,\n\
|
||||||
|
use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
|
||||||
exec_name, com, quote (val));
|
exec_name, com, quote (val));
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1314,6 +1319,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
|||||||
opt.restrict_files_os = restrict_os;
|
opt.restrict_files_os = restrict_os;
|
||||||
opt.restrict_files_ctrl = restrict_ctrl;
|
opt.restrict_files_ctrl = restrict_ctrl;
|
||||||
opt.restrict_files_case = restrict_case;
|
opt.restrict_files_case = restrict_case;
|
||||||
|
opt.restrict_files_nonascii = restrict_nonascii;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -209,6 +209,8 @@ struct options
|
|||||||
bool restrict_files_ctrl; /* non-zero if control chars in URLs
|
bool restrict_files_ctrl; /* non-zero if control chars in URLs
|
||||||
are restricted from appearing in
|
are restricted from appearing in
|
||||||
generated file names. */
|
generated file names. */
|
||||||
|
bool restrict_files_nonascii; /* non-zero if bytes with values greater
|
||||||
|
than 127 are restricted. */
|
||||||
enum {
|
enum {
|
||||||
restrict_no_case_restriction,
|
restrict_no_case_restriction,
|
||||||
restrict_lowercase,
|
restrict_lowercase,
|
||||||
|
@ -1291,7 +1291,9 @@ enum {
|
|||||||
filechr_control = 4 /* a control character, e.g. 0-31 */
|
filechr_control = 4 /* a control character, e.g. 0-31 */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
|
#define FILE_CHAR_TEST(c, mask) \
|
||||||
|
((opt.restrict_files_nonascii && !c_isascii ((unsigned char)(c))) || \
|
||||||
|
(filechr_table[(unsigned char)(c)] & (mask)))
|
||||||
|
|
||||||
/* Shorthands for the table: */
|
/* Shorthands for the table: */
|
||||||
#define U filechr_not_unix
|
#define U filechr_not_unix
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2009-07-27 Micah Cowan <micah@cowan.name>
|
||||||
|
|
||||||
|
* Test-restrict-ascii.px: New.
|
||||||
|
|
||||||
|
* run-px: Added Test-restrict-ascii.px.
|
||||||
|
|
||||||
2009-07-26 Micah Cowan <micah@cowan.name>
|
2009-07-26 Micah Cowan <micah@cowan.name>
|
||||||
|
|
||||||
* Test-ftp-iri.px, Test-ftp-iri-fallback.px,
|
* Test-ftp-iri.px, Test-ftp-iri-fallback.px,
|
||||||
|
69
tests/Test-restrict-ascii.px
Executable file
69
tests/Test-restrict-ascii.px
Executable file
@ -0,0 +1,69 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
use HTTPTest;
|
||||||
|
|
||||||
|
# This program tests that --restrict-file-names=ascii can be used to
|
||||||
|
# ensure that all high-valued bytes are escaped. The sample filename was
|
||||||
|
# chosen because in former versions of Wget, one could either choose not
|
||||||
|
# to escape any portion of the UTF-8 filename via
|
||||||
|
# --restrict-file-names=nocontrol (which would only be helpful if one
|
||||||
|
# was _on_ a UTF-8 system), or else Wget would escape _portions_ of
|
||||||
|
# characters, leaving irrelevant "latin1"-looking characters combined
|
||||||
|
# with percent-encoded "control" characters, instead of encoding all the
|
||||||
|
# bytes of an entire non-ASCII UTF-8 character.
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
# "gnosis" in UTF-8 greek.
|
||||||
|
my $gnosis = '%CE%B3%CE%BD%CF%89%CF%83%CE%B9%CF%82';
|
||||||
|
|
||||||
|
my $mainpage = <<EOF;
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<title>Some Page Title</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<p>
|
||||||
|
Some text...
|
||||||
|
</p>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# code, msg, headers, content
|
||||||
|
my %urls = (
|
||||||
|
"/$gnosis.html" => {
|
||||||
|
code => "200",
|
||||||
|
msg => "Dontcare",
|
||||||
|
headers => {
|
||||||
|
"Content-type" => "text/html",
|
||||||
|
},
|
||||||
|
content => $mainpage,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
my $cmdline = $WgetTest::WGETPATH . " --restrict-file-names=ascii "
|
||||||
|
. "http://localhost:{{port}}/${gnosis}.html";
|
||||||
|
|
||||||
|
my $expected_error_code = 0;
|
||||||
|
|
||||||
|
my %expected_downloaded_files = (
|
||||||
|
"${gnosis}.html" => {
|
||||||
|
content => $mainpage,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
my $the_test = HTTPTest->new (name => "Test-restrict-ascii",
|
||||||
|
input => \%urls,
|
||||||
|
cmdline => $cmdline,
|
||||||
|
errcode => $expected_error_code,
|
||||||
|
output => \%expected_downloaded_files);
|
||||||
|
exit $the_test->run();
|
||||||
|
|
||||||
|
# vim: et ts=4 sw=4
|
||||||
|
|
@ -60,6 +60,7 @@ my @tests = (
|
|||||||
'Test-O-nonexisting.px',
|
'Test-O-nonexisting.px',
|
||||||
'Test-O.px',
|
'Test-O.px',
|
||||||
'Test-O-nc.px',
|
'Test-O-nc.px',
|
||||||
|
'Test-restrict-ascii.px',
|
||||||
'Test-Restrict-Lowercase.px',
|
'Test-Restrict-Lowercase.px',
|
||||||
'Test-Restrict-Uppercase.px',
|
'Test-Restrict-Uppercase.px',
|
||||||
'Test--spider-fail.px',
|
'Test--spider-fail.px',
|
||||||
|
Loading…
Reference in New Issue
Block a user