mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
--restrict-file-names=ascii
This commit is contained in:
parent
a9a2b34b05
commit
fb0946c7fc
@ -1,3 +1,14 @@
|
||||
2009-07-27 Micah Cowan <micah@cowan.name>
|
||||
|
||||
* options.h (struct options): Added restrict_files_nonascii
|
||||
boolean field.
|
||||
|
||||
* url.c (FILE_CHAR_TEST): Add check for chars outside the ASCII
|
||||
range.
|
||||
|
||||
* init.c (defaults): Add restrict_files_nonascii to initialization.
|
||||
(cmd_spec_restrict_file_names): Allow parsing of "ascii" keyword.
|
||||
|
||||
2009-07-27 Marcel Telka <marcel@telka.sk>
|
||||
|
||||
* iri.c (do_conversion): Typo: invalide -> invalid
|
||||
|
12
src/init.c
12
src/init.c
@ -329,6 +329,7 @@ defaults (void)
|
||||
opt.restrict_files_os = restrict_unix;
|
||||
#endif
|
||||
opt.restrict_files_ctrl = true;
|
||||
opt.restrict_files_nonascii = false;
|
||||
opt.restrict_files_case = restrict_no_case_restriction;
|
||||
|
||||
opt.max_redirect = 20;
|
||||
@ -1275,6 +1276,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
||||
int restrict_os = opt.restrict_files_os;
|
||||
int restrict_ctrl = opt.restrict_files_ctrl;
|
||||
int restrict_case = opt.restrict_files_case;
|
||||
int restrict_nonascii = opt.restrict_files_nonascii;
|
||||
|
||||
const char *end;
|
||||
|
||||
@ -1285,7 +1287,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
||||
end = strchr (val, ',');
|
||||
if (!end)
|
||||
end = val + strlen (val);
|
||||
|
||||
|
||||
if (VAL_IS ("unix"))
|
||||
restrict_os = restrict_unix;
|
||||
else if (VAL_IS ("windows"))
|
||||
@ -1296,10 +1298,13 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
||||
restrict_case = restrict_uppercase;
|
||||
else if (VAL_IS ("nocontrol"))
|
||||
restrict_ctrl = false;
|
||||
else if (VAL_IS ("ascii"))
|
||||
restrict_nonascii = true;
|
||||
else
|
||||
{
|
||||
fprintf (stderr,
|
||||
_("%s: %s: Invalid restriction %s, use [unix|windows],[lowercase|uppercase],[nocontrol].\n"),
|
||||
fprintf (stderr, _("\
|
||||
%s: %s: Invalid restriction %s,\n\
|
||||
use [unix|windows],[lowercase|uppercase],[nocontrol],[ascii].\n"),
|
||||
exec_name, com, quote (val));
|
||||
return false;
|
||||
}
|
||||
@ -1314,6 +1319,7 @@ cmd_spec_restrict_file_names (const char *com, const char *val, void *place_igno
|
||||
opt.restrict_files_os = restrict_os;
|
||||
opt.restrict_files_ctrl = restrict_ctrl;
|
||||
opt.restrict_files_case = restrict_case;
|
||||
opt.restrict_files_nonascii = restrict_nonascii;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -209,6 +209,8 @@ struct options
|
||||
bool restrict_files_ctrl; /* non-zero if control chars in URLs
|
||||
are restricted from appearing in
|
||||
generated file names. */
|
||||
bool restrict_files_nonascii; /* non-zero if bytes with values greater
|
||||
than 127 are restricted. */
|
||||
enum {
|
||||
restrict_no_case_restriction,
|
||||
restrict_lowercase,
|
||||
|
@ -1291,7 +1291,9 @@ enum {
|
||||
filechr_control = 4 /* a control character, e.g. 0-31 */
|
||||
};
|
||||
|
||||
#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
|
||||
#define FILE_CHAR_TEST(c, mask) \
|
||||
((opt.restrict_files_nonascii && !c_isascii ((unsigned char)(c))) || \
|
||||
(filechr_table[(unsigned char)(c)] & (mask)))
|
||||
|
||||
/* Shorthands for the table: */
|
||||
#define U filechr_not_unix
|
||||
|
@ -1,3 +1,9 @@
|
||||
2009-07-27 Micah Cowan <micah@cowan.name>
|
||||
|
||||
* Test-restrict-ascii.px: New.
|
||||
|
||||
* run-px: Added Test-restrict-ascii.px.
|
||||
|
||||
2009-07-26 Micah Cowan <micah@cowan.name>
|
||||
|
||||
* Test-ftp-iri.px, Test-ftp-iri-fallback.px,
|
||||
|
69
tests/Test-restrict-ascii.px
Executable file
69
tests/Test-restrict-ascii.px
Executable file
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
use HTTPTest;
|
||||
|
||||
# This program tests that --restrict-file-names=ascii can be used to
|
||||
# ensure that all high-valued bytes are escaped. The sample filename was
|
||||
# chosen because in former versions of Wget, one could either choose not
|
||||
# to escape any portion of the UTF-8 filename via
|
||||
# --restrict-file-names=nocontrol (which would only be helpful if one
|
||||
# was _on_ a UTF-8 system), or else Wget would escape _portions_ of
|
||||
# characters, leaving irrelevant "latin1"-looking characters combined
|
||||
# with percent-encoded "control" characters, instead of encoding all the
|
||||
# bytes of an entire non-ASCII UTF-8 character.
|
||||
|
||||
###############################################################################
|
||||
|
||||
# "gnosis" in UTF-8 greek.
|
||||
my $gnosis = '%CE%B3%CE%BD%CF%89%CF%83%CE%B9%CF%82';
|
||||
|
||||
my $mainpage = <<EOF;
|
||||
<html>
|
||||
<head>
|
||||
<title>Some Page Title</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
Some text...
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
EOF
|
||||
|
||||
# code, msg, headers, content
|
||||
my %urls = (
|
||||
"/$gnosis.html" => {
|
||||
code => "200",
|
||||
msg => "Dontcare",
|
||||
headers => {
|
||||
"Content-type" => "text/html",
|
||||
},
|
||||
content => $mainpage,
|
||||
},
|
||||
);
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " --restrict-file-names=ascii "
|
||||
. "http://localhost:{{port}}/${gnosis}.html";
|
||||
|
||||
my $expected_error_code = 0;
|
||||
|
||||
my %expected_downloaded_files = (
|
||||
"${gnosis}.html" => {
|
||||
content => $mainpage,
|
||||
},
|
||||
);
|
||||
|
||||
###############################################################################
|
||||
|
||||
my $the_test = HTTPTest->new (name => "Test-restrict-ascii",
|
||||
input => \%urls,
|
||||
cmdline => $cmdline,
|
||||
errcode => $expected_error_code,
|
||||
output => \%expected_downloaded_files);
|
||||
exit $the_test->run();
|
||||
|
||||
# vim: et ts=4 sw=4
|
||||
|
@ -60,6 +60,7 @@ my @tests = (
|
||||
'Test-O-nonexisting.px',
|
||||
'Test-O.px',
|
||||
'Test-O-nc.px',
|
||||
'Test-restrict-ascii.px',
|
||||
'Test-Restrict-Lowercase.px',
|
||||
'Test-Restrict-Uppercase.px',
|
||||
'Test--spider-fail.px',
|
||||
|
Loading…
Reference in New Issue
Block a user