mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] New mechanism for quoting file names.
Published in <m3smmzt4px.fsf@hniksic.iskon.hr>.
This commit is contained in:
parent
ebea9e7e0b
commit
0a3697ad65
11
NEWS
11
NEWS
@ -7,8 +7,6 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
|
||||
|
||||
* Changes in Wget 1.9.
|
||||
|
||||
** The build process now requires Autoconf 2.5x.
|
||||
|
||||
** It is now possible to specify that POST method be used for HTTP
|
||||
requests. For example, `wget --post-data="id=foo&data=bar" URL' will
|
||||
send a POST request with the specified contents.
|
||||
@ -32,6 +30,15 @@ considered a fatal error.
|
||||
|
||||
** The new option `--dns-cache=off' may be used to prevent Wget from
|
||||
caching DNS lookups.
|
||||
|
||||
** The build process now requires Autoconf 2.5x.
|
||||
|
||||
** Wget no longer quotes characters in local file names that would be
|
||||
considered "unsafe" as part of URL. Quoting can still occur for
|
||||
control characters or for '/', but no longer for frequent characters
|
||||
such as space. You can use the new option --restrict-file-names to
|
||||
enforce even stricter rules, which is useful when downloading to
|
||||
Windows partitions.
|
||||
|
||||
* Wget 1.8.1 is a bugfix release with no user-visible changes.
|
||||
|
||||
|
@ -1,3 +1,8 @@
|
||||
2003-09-14 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* wget.texi (Download Options): Document the new option
|
||||
--restrict-file-names and the corresponding wgetrc command.
|
||||
|
||||
2003-09-10 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* wget.texi (Download Options): Documented new option --dns-cache.
|
||||
|
@ -800,6 +800,39 @@ lookups where they're probably not needed.
|
||||
|
||||
If you don't understand the above description, you probably won't need
|
||||
this option.
|
||||
|
||||
@cindex file names, restrict
|
||||
@cindex Windows file names
|
||||
@itemx --restrict-file-names=none|unix|windows
|
||||
Restrict characters that may occur in local file names created by Wget
|
||||
from remote URLs. Characters that are considered @dfn{unsafe} under a
|
||||
set of restrictions are escaped, i.e. replaced with @samp{%XX}, where
|
||||
@samp{XX} is the hexadecimal code of the character.
|
||||
|
||||
The default for this option depends on the operating system: on Unix and
|
||||
Unix-like OS'es, it defaults to ``unix''. Under Windows and Cygwin, it
|
||||
defaults to ``windows''. Changing the default is useful when you are
|
||||
using a non-native partition, e.g. when downloading files to a Windows
|
||||
partition mounted from Linux, or when using NFS-mounted or SMB-mounted
|
||||
Windows drives.
|
||||
|
||||
When set to ``none'', the only characters that are quoted are those that
|
||||
are impossible to get into a file name---the NUL character and @samp{/}.
|
||||
The control characters, newline, etc. are all placed into file names.
|
||||
|
||||
When set to ``unix'', additional unsafe characters are those in the
|
||||
0--31 range and in the 128--159 range. This is because those characters
|
||||
are typically not printable.
|
||||
|
||||
When set to ``windows'', all of the above are quoted, along with
|
||||
@samp{\}, @samp{|}, @samp{:}, @samp{?}, @samp{"}, @samp{*}, @samp{<},
|
||||
and @samp{>}. Additionally, Wget in Windows mode uses @samp{+} instead
|
||||
of @samp{:} to separate host and port in local file names, and uses
|
||||
@samp{@@} instead of @samp{?} to separate the query portion of the file
|
||||
name from the rest. Therefore, a URL that would be saved as
|
||||
@samp{www.xemacs.org:4300/search.pl?input=blah} in Unix mode would be
|
||||
saved as @samp{www.xemacs.org+4300/search.pl@@input=blah} in Windows
|
||||
mode.
|
||||
@end table
|
||||
|
||||
@node Directory Options, HTTP Options, Download Options, Invoking
|
||||
@ -2241,6 +2274,10 @@ Links}).
|
||||
If set to on, remove @sc{ftp} listings downloaded by Wget. Setting it
|
||||
to off is the same as @samp{-nr}.
|
||||
|
||||
@item restrict_file_names = off/unix/windows
|
||||
Restrict the file names generated by Wget from URLs. See
|
||||
@samp{--restrict-file-names} for a more detailed description.
|
||||
|
||||
@item retr_symlinks = on/off
|
||||
When set to on, retrieve symbolic links as if they were plain files; the
|
||||
same as @samp{--retr-symlinks}.
|
||||
|
@ -1,3 +1,31 @@
|
||||
2003-09-14 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* url.c (append_uri_pathel): Use opt.restrict_file_names when
|
||||
calling file_unsafe_char.
|
||||
|
||||
* init.c: New command restrict_file_names.
|
||||
|
||||
* main.c (main): New option --restrict-file-names[=windows,unix].
|
||||
|
||||
* url.c (url_file_name): Renamed from url_filename.
|
||||
(url_file_name): Add directory and hostdir prefix here, not in
|
||||
mkstruct.
|
||||
(append_dir_structure): New function, does part of the work that
|
||||
used to be in mkstruct. Iterates over path elements in u->path,
|
||||
calling append_uri_pathel on each one to append it to the file
|
||||
name.
|
||||
(append_uri_pathel): URL-unescape a path element and reencode it
|
||||
with a different set of rules, more appropriate for handling of
|
||||
files.
|
||||
(file_unsafe_char): New function, uses a lookup table to decide
|
||||
whether a character should be escaped for use in file name.
|
||||
(append_string): New utility function.
|
||||
(append_char): Ditto.
|
||||
(file_unsafe_char): New argument restrict_for_windows, decide
|
||||
whether Windows file names should be escaped in run-time.
|
||||
|
||||
* connect.c: Include <stdlib.h> to get prototype for abort().
|
||||
|
||||
2003-09-14 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* utils.c (wtimer_sys_set): Extracted the code that sets the
|
||||
|
@ -30,6 +30,7 @@ so, delete this exception statement from your version. */
|
||||
#include <config.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#ifdef HAVE_UNISTD_H
|
||||
# include <unistd.h>
|
||||
|
@ -842,8 +842,8 @@ ftp_index (const char *file, struct url *u, struct fileinfo *f)
|
||||
{
|
||||
char *tmpu, *tmpp; /* temporary, clean user and passwd */
|
||||
|
||||
tmpu = encode_string (u->user);
|
||||
tmpp = u->passwd ? encode_string (u->passwd) : NULL;
|
||||
tmpu = url_escape (u->user);
|
||||
tmpp = u->passwd ? url_escape (u->passwd) : NULL;
|
||||
upwd = (char *)xmalloc (strlen (tmpu)
|
||||
+ (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
|
||||
sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
|
||||
@ -863,7 +863,8 @@ ftp_index (const char *file, struct url *u, struct fileinfo *f)
|
||||
fprintf (fp, " ");
|
||||
if (f->tstamp != -1)
|
||||
{
|
||||
/* #### Should we translate the months? */
|
||||
/* #### Should we translate the months? Or, even better, use
|
||||
ISO 8601 dates? */
|
||||
static char *months[] = {
|
||||
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
|
||||
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
|
||||
|
@ -1025,7 +1025,7 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
|
||||
struct stat st;
|
||||
|
||||
if (!con->target)
|
||||
con->target = url_filename (u);
|
||||
con->target = url_file_name (u);
|
||||
|
||||
if (opt.noclobber && file_exists_p (con->target))
|
||||
{
|
||||
@ -1245,7 +1245,7 @@ ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f)
|
||||
/* Find the listing file name. We do it by taking the file name of
|
||||
the URL and replacing the last component with the listing file
|
||||
name. */
|
||||
uf = url_filename (u);
|
||||
uf = url_file_name (u);
|
||||
lf = file_merge (uf, LIST_FILENAME);
|
||||
xfree (uf);
|
||||
DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
|
||||
@ -1335,7 +1335,7 @@ ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con)
|
||||
ofile = xstrdup (u->file);
|
||||
url_set_file (u, f->name);
|
||||
|
||||
con->target = url_filename (u);
|
||||
con->target = url_file_name (u);
|
||||
err = RETROK;
|
||||
|
||||
dlthis = 1;
|
||||
@ -1723,7 +1723,7 @@ ftp_loop (struct url *u, int *dt, struct url *proxy)
|
||||
char *filename = (opt.output_document
|
||||
? xstrdup (opt.output_document)
|
||||
: (con.target ? xstrdup (con.target)
|
||||
: url_filename (u)));
|
||||
: url_file_name (u)));
|
||||
res = ftp_index (filename, u, f);
|
||||
if (res == FTPOK && opt.verbose)
|
||||
{
|
||||
|
@ -1614,12 +1614,12 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
||||
hstat.local_file = local_file;
|
||||
else if (local_file)
|
||||
{
|
||||
*local_file = url_filename (u);
|
||||
*local_file = url_file_name (u);
|
||||
hstat.local_file = local_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
dummy = url_filename (u);
|
||||
dummy = url_file_name (u);
|
||||
hstat.local_file = &dummy;
|
||||
}
|
||||
|
||||
|
29
src/init.c
29
src/init.c
@ -100,6 +100,7 @@ CMD_DECLARE (cmd_spec_htmlify);
|
||||
CMD_DECLARE (cmd_spec_mirror);
|
||||
CMD_DECLARE (cmd_spec_progress);
|
||||
CMD_DECLARE (cmd_spec_recursive);
|
||||
CMD_DECLARE (cmd_spec_restrict_file_names);
|
||||
CMD_DECLARE (cmd_spec_useragent);
|
||||
|
||||
/* List of recognized commands, each consisting of name, closure and function.
|
||||
@ -188,6 +189,7 @@ static struct {
|
||||
{ "reject", &opt.rejects, cmd_vector },
|
||||
{ "relativeonly", &opt.relative_only, cmd_boolean },
|
||||
{ "removelisting", &opt.remove_listing, cmd_boolean },
|
||||
{ "restrictfilenames", &opt.restrict_file_names, cmd_spec_restrict_file_names },
|
||||
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
|
||||
{ "retryconnrefused", &opt.retry_connrefused, cmd_boolean },
|
||||
{ "robots", &opt.use_robots, cmd_boolean },
|
||||
@ -281,6 +283,13 @@ defaults (void)
|
||||
opt.dots_in_line = 50;
|
||||
|
||||
opt.dns_cache = 1;
|
||||
|
||||
/* The default for file name restriction defaults to the OS type. */
|
||||
#if !defined(WINDOWS) && !defined(__CYGWIN__)
|
||||
opt.restrict_file_names = restrict_shell;
|
||||
#else
|
||||
opt.restrict_file_names = restrict_windows;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Return the user's home directory (strdup-ed), or NULL if none is
|
||||
@ -1008,6 +1017,26 @@ cmd_spec_recursive (const char *com, const char *val, void *closure)
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
cmd_spec_restrict_file_names (const char *com, const char *val, void *closure)
|
||||
{
|
||||
/* The currently accepted values are `none', `unix', and
|
||||
`windows'. */
|
||||
if (0 == strcasecmp (val, "none"))
|
||||
opt.restrict_file_names = restrict_none;
|
||||
else if (0 == strcasecmp (val, "unix"))
|
||||
opt.restrict_file_names = restrict_shell;
|
||||
else if (0 == strcasecmp (val, "windows"))
|
||||
opt.restrict_file_names = restrict_windows;
|
||||
else
|
||||
{
|
||||
fprintf (stderr, _("%s: %s: Invalid specification `%s'.\n"),
|
||||
exec_name, com, val);
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int
|
||||
cmd_spec_useragent (const char *com, const char *val, void *closure)
|
||||
{
|
||||
|
@ -179,10 +179,11 @@ Download:\n\
|
||||
--bind-address=ADDRESS bind to ADDRESS (hostname or IP) on local host.\n\
|
||||
--limit-rate=RATE limit download rate to RATE.\n\
|
||||
--dns-cache=off disable caching DNS lookups.\n\
|
||||
--restrict-file-names=MODE restrict chars in file names to MODE.\n\
|
||||
\n"), stdout);
|
||||
fputs (_("\
|
||||
Directories:\n\
|
||||
-nd --no-directories don\'t create directories.\n\
|
||||
-nd, --no-directories don\'t create directories.\n\
|
||||
-x, --force-directories force creation of directories.\n\
|
||||
-nH, --no-host-directories don\'t create host directories.\n\
|
||||
-P, --directory-prefix=PREFIX save files to PREFIX/...\n\
|
||||
@ -344,6 +345,7 @@ main (int argc, char *const *argv)
|
||||
{ "proxy-user", required_argument, NULL, 143 },
|
||||
{ "quota", required_argument, NULL, 'Q' },
|
||||
{ "reject", required_argument, NULL, 'R' },
|
||||
{ "restrict-file-names", required_argument, NULL, 176 },
|
||||
{ "save-cookies", required_argument, NULL, 162 },
|
||||
{ "timeout", required_argument, NULL, 'T' },
|
||||
{ "tries", required_argument, NULL, 't' },
|
||||
@ -610,6 +612,9 @@ GNU General Public License for more details.\n"));
|
||||
case 175:
|
||||
setval ("dnscache", optarg);
|
||||
break;
|
||||
case 176:
|
||||
setval ("restrictfilenames", optarg);
|
||||
break;
|
||||
case 'A':
|
||||
setval ("accept", optarg);
|
||||
break;
|
||||
|
@ -184,6 +184,12 @@ struct options
|
||||
|
||||
char *post_data; /* POST query string */
|
||||
char *post_file_name; /* File to post */
|
||||
|
||||
enum {
|
||||
restrict_none,
|
||||
restrict_shell,
|
||||
restrict_windows
|
||||
} restrict_file_names; /* whether we restrict file name chars. */
|
||||
};
|
||||
|
||||
extern struct options opt;
|
||||
|
557
src/url.c
557
src/url.c
@ -1,5 +1,6 @@
|
||||
/* URL handling.
|
||||
Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc.
|
||||
Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GNU Wget.
|
||||
|
||||
@ -95,24 +96,22 @@ static int path_simplify PARAMS ((char *));
|
||||
code assumes ASCII character set and 8-bit chars. */
|
||||
|
||||
enum {
|
||||
/* rfc1738 reserved chars, preserved from encoding. */
|
||||
urlchr_reserved = 1,
|
||||
|
||||
/* rfc1738 unsafe chars, plus some more. */
|
||||
urlchr_unsafe = 2
|
||||
};
|
||||
|
||||
#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
|
||||
#define URL_RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
|
||||
#define URL_UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
|
||||
|
||||
/* Shorthands for the table: */
|
||||
#define R urlchr_reserved
|
||||
#define U urlchr_unsafe
|
||||
#define RU R|U
|
||||
|
||||
#define urlchr_test(c, mask) (urlchr_table[(unsigned char)(c)] & (mask))
|
||||
|
||||
/* rfc1738 reserved chars, preserved from encoding. */
|
||||
|
||||
#define RESERVED_CHAR(c) urlchr_test(c, urlchr_reserved)
|
||||
|
||||
/* rfc1738 unsafe chars, plus some more. */
|
||||
|
||||
#define UNSAFE_CHAR(c) urlchr_test(c, urlchr_unsafe)
|
||||
|
||||
const static unsigned char urlchr_table[256] =
|
||||
{
|
||||
U, U, U, U, U, U, U, U, /* NUL SOH STX ETX EOT ENQ ACK BEL */
|
||||
@ -142,6 +141,9 @@ const static unsigned char urlchr_table[256] =
|
||||
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
||||
U, U, U, U, U, U, U, U, U, U, U, U, U, U, U, U,
|
||||
};
|
||||
#undef R
|
||||
#undef U
|
||||
#undef RU
|
||||
|
||||
/* Decodes the forms %xy in a URL to the character the hexadecimal
|
||||
code of which is xy. xy are hexadecimal digits from
|
||||
@ -150,7 +152,7 @@ const static unsigned char urlchr_table[256] =
|
||||
literally. */
|
||||
|
||||
static void
|
||||
decode_string (char *s)
|
||||
url_unescape (char *s)
|
||||
{
|
||||
char *t = s; /* t - tortoise */
|
||||
char *h = s; /* h - hare */
|
||||
@ -175,10 +177,10 @@ decode_string (char *s)
|
||||
*t = '\0';
|
||||
}
|
||||
|
||||
/* Like encode_string, but return S if there are no unsafe chars. */
|
||||
/* Like url_escape, but return S if there are no unsafe chars. */
|
||||
|
||||
static char *
|
||||
encode_string_maybe (const char *s)
|
||||
url_escape_allow_passthrough (const char *s)
|
||||
{
|
||||
const char *p1;
|
||||
char *p2, *newstr;
|
||||
@ -186,7 +188,7 @@ encode_string_maybe (const char *s)
|
||||
int addition = 0;
|
||||
|
||||
for (p1 = s; *p1; p1++)
|
||||
if (UNSAFE_CHAR (*p1))
|
||||
if (URL_UNSAFE_CHAR (*p1))
|
||||
addition += 2; /* Two more characters (hex digits) */
|
||||
|
||||
if (!addition)
|
||||
@ -199,7 +201,7 @@ encode_string_maybe (const char *s)
|
||||
p2 = newstr;
|
||||
while (*p1)
|
||||
{
|
||||
if (UNSAFE_CHAR (*p1))
|
||||
if (URL_UNSAFE_CHAR (*p1))
|
||||
{
|
||||
unsigned char c = *p1++;
|
||||
*p2++ = '%';
|
||||
@ -215,13 +217,13 @@ encode_string_maybe (const char *s)
|
||||
return newstr;
|
||||
}
|
||||
|
||||
/* Encode the unsafe characters (as determined by UNSAFE_CHAR) in a
|
||||
/* Encode the unsafe characters (as determined by URL_UNSAFE_CHAR) in a
|
||||
given string, returning a malloc-ed %XX encoded string. */
|
||||
|
||||
char *
|
||||
encode_string (const char *s)
|
||||
url_escape (const char *s)
|
||||
{
|
||||
char *encoded = encode_string_maybe (s);
|
||||
char *encoded = url_escape_allow_passthrough (s);
|
||||
if (encoded != s)
|
||||
return encoded;
|
||||
else
|
||||
@ -233,7 +235,7 @@ encode_string (const char *s)
|
||||
allocated storage. */
|
||||
|
||||
#define ENCODE(ptr) do { \
|
||||
char *e_new = encode_string_maybe (ptr); \
|
||||
char *e_new = url_escape_allow_passthrough (ptr); \
|
||||
if (e_new != ptr) \
|
||||
{ \
|
||||
xfree (ptr); \
|
||||
@ -258,7 +260,7 @@ decide_copy_method (const char *p)
|
||||
char preempt = (XCHAR_TO_XDIGIT (*(p + 1)) << 4) +
|
||||
XCHAR_TO_XDIGIT (*(p + 2));
|
||||
|
||||
if (UNSAFE_CHAR (preempt) || RESERVED_CHAR (preempt))
|
||||
if (URL_UNSAFE_CHAR (preempt) || URL_RESERVED_CHAR (preempt))
|
||||
return CM_PASSTHROUGH;
|
||||
else
|
||||
return CM_DECODE;
|
||||
@ -267,20 +269,20 @@ decide_copy_method (const char *p)
|
||||
/* Garbled %.. sequence: encode `%'. */
|
||||
return CM_ENCODE;
|
||||
}
|
||||
else if (UNSAFE_CHAR (*p) && !RESERVED_CHAR (*p))
|
||||
else if (URL_UNSAFE_CHAR (*p) && !URL_RESERVED_CHAR (*p))
|
||||
return CM_ENCODE;
|
||||
else
|
||||
return CM_PASSTHROUGH;
|
||||
}
|
||||
|
||||
/* Translate a %-quoting (but possibly non-conformant) input string S
|
||||
into a %-quoting (and conformant) output string. If no characters
|
||||
/* Translate a %-escaped (but possibly non-conformant) input string S
|
||||
into a %-escaped (and conformant) output string. If no characters
|
||||
are encoded or decoded, return the same string S; otherwise, return
|
||||
a freshly allocated string with the new contents.
|
||||
|
||||
After a URL has been run through this function, the protocols that
|
||||
use `%' as the quote character can use the resulting string as-is,
|
||||
while those that don't call decode_string() to get to the intended
|
||||
while those that don't call url_unescape() to get to the intended
|
||||
data. This function is also stable: after an input string is
|
||||
transformed the first time, all further transformations of the
|
||||
result yield the same result string.
|
||||
@ -293,20 +295,21 @@ decide_copy_method (const char *p)
|
||||
|
||||
GET /abc%20def HTTP/1.0
|
||||
|
||||
So it appears that the unsafe chars need to be quoted, as with
|
||||
encode_string. But what if we're requested to download
|
||||
`abc%20def'? Remember that %-encoding is valid URL syntax, so what
|
||||
the user meant was a literal space, and he was kind enough to quote
|
||||
it. In that case, Wget should obviously leave the `%20' as is, and
|
||||
send the same request as above. So in this case we may not call
|
||||
encode_string.
|
||||
It appears that the unsafe chars need to be quoted, for example
|
||||
with url_escape. But what if we're requested to download
|
||||
`abc%20def'? url_escape transforms "%" to "%25", which would leave
|
||||
us with `abc%2520def'. This is incorrect -- since %-escapes are
|
||||
part of URL syntax, "%20" is the correct way to denote a literal
|
||||
space on the Wget command line. This leaves us in the conclusion
|
||||
that in that case Wget should not call url_escape, but leave the
|
||||
`%20' as is.
|
||||
|
||||
But what if the requested URI is `abc%20 def'? If we call
|
||||
encode_string, we end up with `/abc%2520%20def', which is almost
|
||||
certainly not intended. If we don't call encode_string, we are
|
||||
left with the embedded space and cannot send the request. What the
|
||||
And what if the requested URI is `abc%20 def'? If we call
|
||||
url_escape, we end up with `/abc%2520%20def', which is almost
|
||||
certainly not intended. If we don't call url_escape, we are left
|
||||
with the embedded space and cannot complete the request. What the
|
||||
user meant was for Wget to request `/abc%20%20def', and this is
|
||||
where reencode_string kicks in.
|
||||
where reencode_escapes kicks in.
|
||||
|
||||
Wget used to solve this by first decoding %-quotes, and then
|
||||
encoding all the "unsafe" characters found in the resulting string.
|
||||
@ -317,7 +320,7 @@ decide_copy_method (const char *p)
|
||||
is inevitable because by the second step we would lose information
|
||||
on whether the `+' was originally encoded or not. Both results
|
||||
were wrong because in CGI parameters + means space, while %2B means
|
||||
literal plus. reencode_string correctly translates the above to
|
||||
literal plus. reencode_escapes correctly translates the above to
|
||||
"a%2B+b", i.e. returns the original string.
|
||||
|
||||
This function uses an algorithm proposed by Anon Sricharoenchai:
|
||||
@ -352,7 +355,7 @@ decide_copy_method (const char *p)
|
||||
"foo%2b+bar" -> "foo%2b+bar" */
|
||||
|
||||
static char *
|
||||
reencode_string (const char *s)
|
||||
reencode_escapes (const char *s)
|
||||
{
|
||||
const char *p1;
|
||||
char *newstr, *p2;
|
||||
@ -417,12 +420,12 @@ reencode_string (const char *s)
|
||||
return newstr;
|
||||
}
|
||||
|
||||
/* Run PTR_VAR through reencode_string. If a new string is consed,
|
||||
/* Run PTR_VAR through reencode_escapes. If a new string is consed,
|
||||
free PTR_VAR and make it point to the new storage. Obviously,
|
||||
PTR_VAR needs to be an lvalue. */
|
||||
|
||||
#define REENCODE(ptr_var) do { \
|
||||
char *rf_new = reencode_string (ptr_var); \
|
||||
char *rf_new = reencode_escapes (ptr_var); \
|
||||
if (rf_new != ptr_var) \
|
||||
{ \
|
||||
xfree (ptr_var); \
|
||||
@ -544,9 +547,9 @@ parse_uname (const char *str, int len, char **user, char **passwd)
|
||||
(*user)[len] = '\0';
|
||||
|
||||
if (*user)
|
||||
decode_string (*user);
|
||||
url_unescape (*user);
|
||||
if (*passwd)
|
||||
decode_string (*passwd);
|
||||
url_unescape (*passwd);
|
||||
|
||||
return 1;
|
||||
}
|
||||
@ -611,6 +614,10 @@ rewrite_shorthand_url (const char *url)
|
||||
|
||||
static void parse_path PARAMS ((const char *, char **, char **));
|
||||
|
||||
/* Like strpbrk, with the exception that it returns the pointer to the
|
||||
terminating zero (end-of-string aka "eos") if no matching character
|
||||
is found. */
|
||||
|
||||
static char *
|
||||
strpbrk_or_eos (const char *s, const char *accept)
|
||||
{
|
||||
@ -825,7 +832,7 @@ url_parse (const char *url, int *error)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
url_encoded = reencode_string (url);
|
||||
url_encoded = reencode_escapes (url);
|
||||
p = url_encoded;
|
||||
|
||||
p += strlen (supported_schemes[scheme].leading_string);
|
||||
@ -1032,13 +1039,13 @@ url_error (int error_code)
|
||||
return parse_errors[error_code];
|
||||
}
|
||||
|
||||
static void
|
||||
parse_path (const char *quoted_path, char **dir, char **file)
|
||||
{
|
||||
char *path, *last_slash;
|
||||
/* Parse PATH into dir and file. PATH is extracted from the URL and
|
||||
is URL-escaped. The function returns unescaped DIR and FILE. */
|
||||
|
||||
STRDUP_ALLOCA (path, quoted_path);
|
||||
decode_string (path);
|
||||
static void
|
||||
parse_path (const char *path, char **dir, char **file)
|
||||
{
|
||||
char *last_slash;
|
||||
|
||||
last_slash = strrchr (path, '/');
|
||||
if (!last_slash)
|
||||
@ -1051,6 +1058,8 @@ parse_path (const char *quoted_path, char **dir, char **file)
|
||||
*dir = strdupdelim (path, last_slash);
|
||||
*file = xstrdup (last_slash + 1);
|
||||
}
|
||||
url_unescape (*dir);
|
||||
url_unescape (*file);
|
||||
}
|
||||
|
||||
/* Note: URL's "full path" is the path with the query string and
|
||||
@ -1303,8 +1312,6 @@ rotate_backups(const char *fname)
|
||||
{
|
||||
sprintf (from, "%s.%d", fname, i - 1);
|
||||
sprintf (to, "%s.%d", fname, i);
|
||||
/* #### This will fail on machines without the rename() system
|
||||
call. */
|
||||
rename (from, to);
|
||||
}
|
||||
|
||||
@ -1323,11 +1330,14 @@ mkalldirs (const char *path)
|
||||
int res;
|
||||
|
||||
p = path + strlen (path);
|
||||
for (; *p != '/' && p != path; p--);
|
||||
for (; *p != '/' && p != path; p--)
|
||||
;
|
||||
|
||||
/* Don't create if it's just a file. */
|
||||
if ((p == path) && (*p != '/'))
|
||||
return 0;
|
||||
t = strdupdelim (path, p);
|
||||
|
||||
/* Check whether the directory exists. */
|
||||
if ((stat (t, &st) == 0))
|
||||
{
|
||||
@ -1360,194 +1370,302 @@ mkalldirs (const char *path)
|
||||
xfree (t);
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Functions for constructing the file name out of URL components. */
|
||||
|
||||
static int
|
||||
count_slashes (const char *s)
|
||||
/* A growable string structure, used by url_file_name and friends.
|
||||
This should perhaps be moved to utils.c.
|
||||
|
||||
The idea is to have an easy way to construct a string by having
|
||||
various functions append data to it. Instead of passing the
|
||||
obligatory BASEVAR, SIZEVAR and TAILPOS to all the functions in
|
||||
questions, we pass the pointer to this struct. */
|
||||
|
||||
struct growable {
|
||||
char *base;
|
||||
int size;
|
||||
int tail;
|
||||
};
|
||||
|
||||
/* Ensure that the string can accept APPEND_COUNT more characters past
|
||||
the current TAIL position. If necessary, this will grow the string
|
||||
and update its allocated size. If the string is already large
|
||||
enough to take TAIL+APPEND_COUNT characters, this does nothing. */
|
||||
#define GROW(g, append_size) do { \
|
||||
struct growable *G_ = g; \
|
||||
DO_REALLOC (G_->base, G_->size, G_->tail + append_size, char); \
|
||||
} while (0)
|
||||
|
||||
/* Return the tail position of the string. */
|
||||
#define TAIL(r) ((r)->base + (r)->tail)
|
||||
|
||||
/* Move the tail position by APPEND_COUNT characters. */
|
||||
#define TAIL_INCR(r, append_count) ((r)->tail += append_count)
|
||||
|
||||
/* Append the string STR to DEST. NOTICE: the string in DEST is not
|
||||
terminated. */
|
||||
|
||||
static void
|
||||
append_string (const char *str, struct growable *dest)
|
||||
{
|
||||
int i = 0;
|
||||
while (*s)
|
||||
if (*s++ == '/')
|
||||
++i;
|
||||
return i;
|
||||
int l = strlen (str);
|
||||
GROW (dest, l);
|
||||
memcpy (TAIL (dest), str, l);
|
||||
TAIL_INCR (dest, l);
|
||||
}
|
||||
|
||||
/* Return the path name of the URL-equivalent file name, with a
|
||||
remote-like structure of directories. */
|
||||
static char *
|
||||
mkstruct (const struct url *u)
|
||||
{
|
||||
char *dir, *file;
|
||||
char *res, *dirpref;
|
||||
int l;
|
||||
/* Append CH to DEST. For example, append_char (0, DEST)
|
||||
zero-terminates DEST. */
|
||||
|
||||
if (opt.cut_dirs)
|
||||
static void
|
||||
append_char (char ch, struct growable *dest)
|
||||
{
|
||||
char *ptr = u->dir + (*u->dir == '/');
|
||||
int slash_count = 1 + count_slashes (ptr);
|
||||
int cut = MINVAL (opt.cut_dirs, slash_count);
|
||||
for (; cut && *ptr; ptr++)
|
||||
if (*ptr == '/')
|
||||
--cut;
|
||||
STRDUP_ALLOCA (dir, ptr);
|
||||
GROW (dest, 1);
|
||||
*TAIL (dest) = ch;
|
||||
TAIL_INCR (dest, 1);
|
||||
}
|
||||
|
||||
enum {
|
||||
filechr_unsafe_always = 1, /* always unsafe, e.g. / or \0 */
|
||||
filechr_unsafe_shell = 2, /* unsafe for shell use, e.g. control chars */
|
||||
filechr_unsafe_windows = 2, /* disallowed on Windows file system */
|
||||
};
|
||||
|
||||
#define FILE_CHAR_TEST(c, mask) (filechr_table[(unsigned char)(c)] & (mask))
|
||||
|
||||
/* Shorthands for the table: */
|
||||
#define A filechr_unsafe_always
|
||||
#define S filechr_unsafe_shell
|
||||
#define W filechr_unsafe_windows
|
||||
|
||||
/* Forbidden chars:
|
||||
|
||||
always: \0, /
|
||||
Unix shell: 0-31, 128-159
|
||||
Windows: \, |, /, <, >, ?, :
|
||||
|
||||
Arguably we could also claim `%' to be unsafe, since we use it as
|
||||
the escape character. If we ever want to be able to reliably
|
||||
translate file name back to URL, this would become important
|
||||
crucial. Right now, it's better to be minimal in escaping. */
|
||||
|
||||
const static unsigned char filechr_table[256] =
|
||||
{
|
||||
A, S, S, S, S, S, S, S, /* NUL SOH STX ETX EOT ENQ ACK BEL */
|
||||
S, S, S, S, S, S, S, S, /* BS HT LF VT FF CR SO SI */
|
||||
S, S, S, S, S, S, S, S, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */
|
||||
S, S, S, S, S, S, S, S, /* CAN EM SUB ESC FS GS RS US */
|
||||
0, 0, W, 0, 0, 0, 0, 0, /* SP ! " # $ % & ' */
|
||||
0, 0, W, 0, 0, 0, 0, A, /* ( ) * + , - . / */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* 0 1 2 3 4 5 6 7 */
|
||||
0, 0, W, 0, W, 0, W, W, /* 8 9 : ; < = > ? */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* @ A B C D E F G */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* H I J K L M N O */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* P Q R S T U V W */
|
||||
0, 0, 0, 0, W, 0, 0, 0, /* X Y Z [ \ ] ^ _ */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* ` a b c d e f g */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* h i j k l m n o */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, /* x y z { | } ~ DEL */
|
||||
|
||||
S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 128-143 */
|
||||
S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, S, /* 144-159 */
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
};
|
||||
|
||||
/* Return non-zero if character CH is unsafe for use in file or
|
||||
directory name. Called by append_uri_pathel. */
|
||||
|
||||
static inline int
|
||||
file_unsafe_char (char ch, int restrict)
|
||||
{
|
||||
int mask = filechr_unsafe_always;
|
||||
if (restrict == restrict_shell)
|
||||
mask |= filechr_unsafe_shell;
|
||||
else if (restrict == restrict_windows)
|
||||
mask |= (filechr_unsafe_shell | filechr_unsafe_windows);
|
||||
return FILE_CHAR_TEST (ch, mask);
|
||||
}
|
||||
|
||||
/* FN_PORT_SEP is the separator between host and port in file names
|
||||
for non-standard port numbers. On Unix this is normally ':', as in
|
||||
"www.xemacs.org:4001/index.html". Under Windows, we set it to +
|
||||
because Windows can't handle ':' in file names. */
|
||||
#define FN_PORT_SEP (opt.restrict_file_names != restrict_windows ? ':' : '+')
|
||||
|
||||
/* FN_QUERY_SEP is the separator between the file name and the URL
|
||||
query, normally '?'. Since Windows cannot handle '?' as part of
|
||||
file name, we use '@' instead there. */
|
||||
#define FN_QUERY_SEP (opt.restrict_file_names != restrict_windows ? '?' : '@')
|
||||
|
||||
/* Quote path element, characters in [b, e), as file name, and append
|
||||
the quoted string to DEST. Each character is quoted as per
|
||||
file_unsafe_char and the corresponding table. */
|
||||
|
||||
static void
|
||||
append_uri_pathel (const char *b, const char *e, struct growable *dest)
|
||||
{
|
||||
char *pathel;
|
||||
int pathlen;
|
||||
|
||||
const char *p;
|
||||
int quoted, outlen;
|
||||
|
||||
/* Currently restrict_for_windows is determined at compile time
|
||||
only. But some users download files to Windows partitions; they
|
||||
should be able to say --windows-file-names so Wget escapes
|
||||
characters invalid on Windows. Similar run-time restrictions for
|
||||
other file systems can be implemented. */
|
||||
const int restrict = opt.restrict_file_names;
|
||||
|
||||
/* Copy [b, e) to PATHEL and URL-unescape it. */
|
||||
BOUNDED_TO_ALLOCA (b, e, pathel);
|
||||
url_unescape (pathel);
|
||||
pathlen = strlen (pathel);
|
||||
|
||||
/* Go through PATHEL and check how many characters we'll need to
|
||||
add for file quoting. */
|
||||
quoted = 0;
|
||||
for (p = pathel; *p; p++)
|
||||
if (file_unsafe_char (*p, restrict))
|
||||
++quoted;
|
||||
|
||||
/* p - pathel is the string length. Each quoted char means two
|
||||
additional characters in the string, hence 2*quoted. */
|
||||
outlen = (p - pathel) + (2 * quoted);
|
||||
GROW (dest, outlen);
|
||||
|
||||
if (!quoted)
|
||||
{
|
||||
/* If there's nothing to quote, we don't need to go through the
|
||||
string the second time. */
|
||||
memcpy (TAIL (dest), pathel, outlen);
|
||||
}
|
||||
else
|
||||
dir = u->dir + (*u->dir == '/');
|
||||
|
||||
/* Check for the true name (or at least a consistent name for saving
|
||||
to directory) of HOST, reusing the hlist if possible. */
|
||||
if (opt.add_hostdir)
|
||||
{
|
||||
/* Add dir_prefix and hostname (if required) to the beginning of
|
||||
dir. */
|
||||
dirpref = (char *)alloca (strlen (opt.dir_prefix) + 1
|
||||
+ strlen (u->host)
|
||||
+ 1 + numdigit (u->port)
|
||||
+ 1);
|
||||
if (!DOTP (opt.dir_prefix))
|
||||
sprintf (dirpref, "%s/%s", opt.dir_prefix, u->host);
|
||||
char *q = TAIL (dest);
|
||||
for (p = pathel; *p; p++)
|
||||
{
|
||||
if (!file_unsafe_char (*p, restrict))
|
||||
*q++ = *p;
|
||||
else
|
||||
strcpy (dirpref, u->host);
|
||||
|
||||
if (u->port != scheme_default_port (u->scheme))
|
||||
{
|
||||
int len = strlen (dirpref);
|
||||
dirpref[len] = ':';
|
||||
number_to_string (dirpref + len + 1, u->port);
|
||||
unsigned char ch = *p;
|
||||
*q++ = '%';
|
||||
*q++ = XDIGIT_TO_XCHAR (ch >> 4);
|
||||
*q++ = XDIGIT_TO_XCHAR (ch & 0xf);
|
||||
}
|
||||
}
|
||||
else /* not add_hostdir */
|
||||
assert (q - TAIL (dest) == outlen);
|
||||
}
|
||||
TAIL_INCR (dest, outlen);
|
||||
}
|
||||
|
||||
/* Append to DEST the directory structure that corresponds the
|
||||
directory part of URL's path. For example, if the URL is
|
||||
http://server/dir1/dir2/file, this appends "/dir1/dir2".
|
||||
|
||||
Each path element ("dir1" and "dir2" in the above example) is
|
||||
examined, url-unescaped, and re-escaped as file name element.
|
||||
|
||||
Additionally, it cuts as many directories from the path as
|
||||
specified by opt.cut_dirs. For example, if opt.cut_dirs is 1, it
|
||||
will produce "bar" for the above example. For 2 or more, it will
|
||||
produce "".
|
||||
|
||||
Each component of the path is quoted for use as file name. */
|
||||
|
||||
static void
|
||||
append_dir_structure (const struct url *u, struct growable *dest)
|
||||
{
|
||||
if (!DOTP (opt.dir_prefix))
|
||||
dirpref = opt.dir_prefix;
|
||||
else
|
||||
dirpref = "";
|
||||
}
|
||||
char *pathel, *next;
|
||||
int cut = opt.cut_dirs;
|
||||
|
||||
/* If there is a prefix, prepend it. */
|
||||
if (*dirpref)
|
||||
/* Go through the path components, de-URL-quote them, and quote them
|
||||
(if necessary) as file names. */
|
||||
|
||||
pathel = u->path;
|
||||
for (; (next = strchr (pathel, '/')) != NULL; pathel = next + 1)
|
||||
{
|
||||
char *newdir = (char *)alloca (strlen (dirpref) + 1 + strlen (dir) + 2);
|
||||
sprintf (newdir, "%s%s%s", dirpref, *dir == '/' ? "" : "/", dir);
|
||||
dir = newdir;
|
||||
}
|
||||
if (cut-- > 0)
|
||||
continue;
|
||||
if (pathel == next)
|
||||
/* Ignore empty pathels. path_simplify should remove
|
||||
occurrences of "//" from the path, but it has special cases
|
||||
for starting / which generates an empty pathel here. */
|
||||
continue;
|
||||
|
||||
l = strlen (dir);
|
||||
if (l && dir[l - 1] == '/')
|
||||
dir[l - 1] = '\0';
|
||||
|
||||
if (!*u->file)
|
||||
file = "index.html";
|
||||
else
|
||||
file = u->file;
|
||||
|
||||
/* Finally, construct the full name. */
|
||||
res = (char *)xmalloc (strlen (dir) + 1 + strlen (file)
|
||||
+ 1);
|
||||
sprintf (res, "%s%s%s", dir, *dir ? "/" : "", file);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Compose a file name out of BASE, an unescaped file name, and QUERY,
|
||||
an escaped query string. The trick is to make sure that unsafe
|
||||
characters in BASE are escaped, and that slashes in QUERY are also
|
||||
escaped. */
|
||||
|
||||
static char *
|
||||
compose_file_name (char *base, char *query)
|
||||
{
|
||||
char result[256];
|
||||
char *from;
|
||||
char *to = result;
|
||||
|
||||
/* Copy BASE to RESULT and encode all unsafe characters. */
|
||||
from = base;
|
||||
while (*from && to - result < sizeof (result))
|
||||
{
|
||||
if (UNSAFE_CHAR (*from))
|
||||
{
|
||||
unsigned char c = *from++;
|
||||
*to++ = '%';
|
||||
*to++ = XDIGIT_TO_XCHAR (c >> 4);
|
||||
*to++ = XDIGIT_TO_XCHAR (c & 0xf);
|
||||
}
|
||||
else
|
||||
*to++ = *from++;
|
||||
}
|
||||
|
||||
if (query && to - result < sizeof (result))
|
||||
{
|
||||
*to++ = '?';
|
||||
|
||||
/* Copy QUERY to RESULT and encode all '/' characters. */
|
||||
from = query;
|
||||
while (*from && to - result < sizeof (result))
|
||||
{
|
||||
if (*from == '/')
|
||||
{
|
||||
*to++ = '%';
|
||||
*to++ = '2';
|
||||
*to++ = 'F';
|
||||
++from;
|
||||
}
|
||||
else
|
||||
*to++ = *from++;
|
||||
if (dest->tail)
|
||||
append_char ('/', dest);
|
||||
append_uri_pathel (pathel, next, dest);
|
||||
}
|
||||
}
|
||||
|
||||
if (to - result < sizeof (result))
|
||||
*to = '\0';
|
||||
else
|
||||
/* Truncate input which is too long, presumably due to a huge
|
||||
query string. */
|
||||
result[sizeof (result) - 1] = '\0';
|
||||
/* Return a unique file name that matches the given URL as good as
|
||||
possible. Does not create directories on the file system. */
|
||||
|
||||
return xstrdup (result);
|
||||
}
|
||||
|
||||
/* Create a unique filename, corresponding to a given URL. Calls
|
||||
mkstruct if necessary. Does *not* actually create any directories. */
|
||||
char *
|
||||
url_filename (const struct url *u)
|
||||
url_file_name (const struct url *u)
|
||||
{
|
||||
char *file, *name;
|
||||
struct growable fnres;
|
||||
|
||||
char *query = u->query && *u->query ? u->query : NULL;
|
||||
char *u_file, *u_query;
|
||||
char *fname, *unique;
|
||||
|
||||
fnres.base = NULL;
|
||||
fnres.size = 0;
|
||||
fnres.tail = 0;
|
||||
|
||||
/* Start with the directory prefix, if specified. */
|
||||
if (!DOTP (opt.dir_prefix))
|
||||
append_string (opt.dir_prefix, &fnres);
|
||||
|
||||
/* If "dirstruct" is turned on (typically the case with -r), add
|
||||
the host and port (unless those have been turned off) and
|
||||
directory structure. */
|
||||
if (opt.dirstruct)
|
||||
{
|
||||
char *base = mkstruct (u);
|
||||
file = compose_file_name (base, query);
|
||||
xfree (base);
|
||||
}
|
||||
else
|
||||
if (opt.add_hostdir)
|
||||
{
|
||||
char *base = *u->file ? u->file : "index.html";
|
||||
file = compose_file_name (base, query);
|
||||
|
||||
/* Check whether the prefix directory is something other than "."
|
||||
before prepending it. */
|
||||
if (!DOTP (opt.dir_prefix))
|
||||
if (fnres.tail)
|
||||
append_char ('/', &fnres);
|
||||
append_string (u->host, &fnres);
|
||||
if (u->port != scheme_default_port (u->scheme))
|
||||
{
|
||||
/* #### should just realloc FILE and prepend dir_prefix. */
|
||||
char *nfile = (char *)xmalloc (strlen (opt.dir_prefix)
|
||||
+ 1 + strlen (file) + 1);
|
||||
sprintf (nfile, "%s/%s", opt.dir_prefix, file);
|
||||
xfree (file);
|
||||
file = nfile;
|
||||
char portstr[24];
|
||||
number_to_string (portstr, u->port);
|
||||
append_char (FN_PORT_SEP, &fnres);
|
||||
append_string (portstr, &fnres);
|
||||
}
|
||||
}
|
||||
|
||||
/* DOS-ish file systems don't like `%' signs in them; we change it
|
||||
to `@'. */
|
||||
#ifdef WINDOWS
|
||||
{
|
||||
char *p = file;
|
||||
for (p = file; *p; p++)
|
||||
if (*p == '%')
|
||||
*p = '@';
|
||||
append_dir_structure (u, &fnres);
|
||||
}
|
||||
#endif /* WINDOWS */
|
||||
|
||||
/* Add the file name. */
|
||||
if (fnres.tail)
|
||||
append_char ('/', &fnres);
|
||||
u_file = *u->file ? u->file : "index.html";
|
||||
append_uri_pathel (u_file, u_file + strlen (u_file), &fnres);
|
||||
|
||||
/* Append "?query" to the file name. */
|
||||
u_query = u->query && *u->query ? u->query : NULL;
|
||||
if (u_query)
|
||||
{
|
||||
append_char (FN_QUERY_SEP, &fnres);
|
||||
append_uri_pathel (u_query, u_query + strlen (u_query), &fnres);
|
||||
}
|
||||
|
||||
/* Zero-terminate the file name. */
|
||||
append_char ('\0', &fnres);
|
||||
|
||||
fname = fnres.base;
|
||||
|
||||
/* Check the cases in which the unique extensions are not used:
|
||||
1) Clobbering is turned off (-nc).
|
||||
@ -1557,17 +1675,18 @@ url_filename (const struct url *u)
|
||||
|
||||
The exception is the case when file does exist and is a
|
||||
directory (actually support for bad httpd-s). */
|
||||
|
||||
if ((opt.noclobber || opt.always_rest || opt.timestamping || opt.dirstruct)
|
||||
&& !(file_exists_p (file) && !file_non_directory_p (file)))
|
||||
return file;
|
||||
&& !(file_exists_p (fname) && !file_non_directory_p (fname)))
|
||||
return fnres.base;
|
||||
|
||||
/* Find a unique name. */
|
||||
name = unique_name (file);
|
||||
xfree (file);
|
||||
return name;
|
||||
unique = unique_name (fname);
|
||||
xfree (fname);
|
||||
return unique;
|
||||
}
|
||||
|
||||
/* Return the langth of URL's path. Path is considered to be
|
||||
/* Return the length of URL's path. Path is considered to be
|
||||
terminated by one of '?', ';', '#', or by the end of the
|
||||
string. */
|
||||
static int
|
||||
@ -1680,8 +1799,10 @@ path_simplify (char *path)
|
||||
else if (*p == '/')
|
||||
{
|
||||
/* Remove empty path elements. Not mandated by rfc1808 et
|
||||
al, but empty path elements are not all that useful, and
|
||||
the rest of Wget might not deal with them well. */
|
||||
al, but it seems like a good idea to get rid of them.
|
||||
Supporting them properly is hard (in which directory do
|
||||
you save http://x.com///y.html?) and they don't seem to
|
||||
bring much gain. */
|
||||
char *q = p;
|
||||
while (*q == '/')
|
||||
++q;
|
||||
@ -1964,13 +2085,13 @@ url_string (const struct url *url, int hide_password)
|
||||
/* Make sure the user name and password are quoted. */
|
||||
if (url->user)
|
||||
{
|
||||
quoted_user = encode_string_maybe (url->user);
|
||||
quoted_user = url_escape_allow_passthrough (url->user);
|
||||
if (url->passwd)
|
||||
{
|
||||
if (hide_password)
|
||||
quoted_passwd = HIDDEN_PASSWORD;
|
||||
else
|
||||
quoted_passwd = encode_string_maybe (url->passwd);
|
||||
quoted_passwd = url_escape_allow_passthrough (url->passwd);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ typedef enum
|
||||
|
||||
/* Function declarations */
|
||||
|
||||
char *encode_string PARAMS ((const char *));
|
||||
char *url_escape PARAMS ((const char *));
|
||||
|
||||
struct url *url_parse PARAMS ((const char *, int *));
|
||||
const char *url_error PARAMS ((int));
|
||||
@ -157,7 +157,7 @@ char *uri_merge PARAMS ((const char *, const char *));
|
||||
|
||||
void rotate_backups PARAMS ((const char *));
|
||||
int mkalldirs PARAMS ((const char *));
|
||||
char *url_filename PARAMS ((const struct url *));
|
||||
char *url_file_name PARAMS ((const struct url *));
|
||||
|
||||
char *getproxy PARAMS ((struct url *));
|
||||
int no_proxy_match PARAMS ((const char *, const char **));
|
||||
|
Loading…
Reference in New Issue
Block a user