1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Made sync_path more resilient to pathological values of u->file and u->dir.

This commit is contained in:
hniksic 2003-09-16 17:18:52 -07:00
parent 4b1afddab3
commit d4281f04b2
2 changed files with 110 additions and 68 deletions

View File

@ -1,3 +1,12 @@
2003-09-17 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (url_escape_1): New function.
(url_escape): Use it.
(sync_path): Handle pathological cases where u->file and u->dir
contain really strange characters.
(ENCODE): Deleted.
(REENCODE): Deleted.
2003-09-16 Hrvoje Niksic <hniksic@xemacs.org> 2003-09-16 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (url_file_name): Don't reallocate FNAME if the file * url.c (url_file_name): Don't reallocate FNAME if the file

169
src/url.c
View File

@ -145,11 +145,14 @@ const static unsigned char urlchr_table[256] =
#undef U #undef U
#undef RU #undef RU
/* Decodes the forms %xy in a URL to the character the hexadecimal /* URL-unescape the string S.
code of which is xy. xy are hexadecimal digits from
[0123456789ABCDEF] (case-insensitive). If x or y are not This is done by transforming the sequences "%HH" to the character
hex-digits or `%' precedes `\0', the sequence is inserted represented by the hexadecimal digits HH. If % is not followed by
literally. */ two hexadecimal digits, it is inserted literally.
The transformation is done in place. If you need the original
string intact, make a copy before calling this function. */
static void static void
url_unescape (char *s) url_unescape (char *s)
@ -177,10 +180,15 @@ url_unescape (char *s)
*t = '\0'; *t = '\0';
} }
/* Like url_escape, but return S if there are no unsafe chars. */ /* The core of url_escape_* functions. Escapes the characters that
match the provided mask in urlchr_table.
If ALLOW_PASSTHROUGH is non-zero, a string with no unsafe chars
will be returned unchanged. If ALLOW_PASSTHROUGH is zero, a
freshly allocated string will be returned in all cases. */
static char * static char *
url_escape_allow_passthrough (const char *s) url_escape_1 (const char *s, unsigned char mask, int allow_passthrough)
{ {
const char *p1; const char *p1;
char *p2, *newstr; char *p2, *newstr;
@ -188,11 +196,11 @@ url_escape_allow_passthrough (const char *s)
int addition = 0; int addition = 0;
for (p1 = s; *p1; p1++) for (p1 = s; *p1; p1++)
if (URL_UNSAFE_CHAR (*p1)) if (urlchr_test (*p1, mask))
addition += 2; /* Two more characters (hex digits) */ addition += 2; /* Two more characters (hex digits) */
if (!addition) if (!addition)
return (char *)s; return allow_passthrough ? (char *)s : xstrdup (s);
newlen = (p1 - s) + addition; newlen = (p1 - s) + addition;
newstr = (char *)xmalloc (newlen + 1); newstr = (char *)xmalloc (newlen + 1);
@ -201,7 +209,8 @@ url_escape_allow_passthrough (const char *s)
p2 = newstr; p2 = newstr;
while (*p1) while (*p1)
{ {
if (URL_UNSAFE_CHAR (*p1)) /* Quote the characters that match the test mask. */
if (urlchr_test (*p1, mask))
{ {
unsigned char c = *p1++; unsigned char c = *p1++;
*p2++ = '%'; *p2++ = '%';
@ -211,37 +220,29 @@ url_escape_allow_passthrough (const char *s)
else else
*p2++ = *p1++; *p2++ = *p1++;
} }
*p2 = '\0';
assert (p2 - newstr == newlen); assert (p2 - newstr == newlen);
*p2 = '\0';
return newstr; return newstr;
} }
/* Encode the unsafe characters (as determined by URL_UNSAFE_CHAR) in a /* URL-escape the unsafe characters (see urlchr_table) in a given
given string, returning a malloc-ed %XX encoded string. */ string, returning a freshly allocated string. */
char * char *
url_escape (const char *s) url_escape (const char *s)
{ {
char *encoded = url_escape_allow_passthrough (s); return url_escape_1 (s, urlchr_unsafe, 0);
if (encoded != s)
return encoded;
else
return xstrdup (s);
} }
/* Encode unsafe characters in PTR to %xx. If such encoding is done, /* URL-escape the unsafe characters (see urlchr_table) in a given
the old value of PTR is freed and PTR is made to point to the newly string. If no characters are unsafe, S is returned. */
allocated storage. */
#define ENCODE(ptr) do { \ static char *
char *e_new = url_escape_allow_passthrough (ptr); \ url_escape_allow_passthrough (const char *s)
if (e_new != ptr) \ {
{ \ return url_escape_1 (s, urlchr_unsafe, 1);
xfree (ptr); \ }
ptr = e_new; \
} \
} while (0)
enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH }; enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
@ -419,19 +420,6 @@ reencode_escapes (const char *s)
assert (p2 - newstr == newlen); assert (p2 - newstr == newlen);
return newstr; return newstr;
} }
/* Run PTR_VAR through reencode_escapes. If a new string is consed,
free PTR_VAR and make it point to the new storage. Obviously,
PTR_VAR needs to be an lvalue. */
#define REENCODE(ptr_var) do { \
char *rf_new = reencode_escapes (ptr_var); \
if (rf_new != ptr_var) \
{ \
xfree (ptr_var); \
ptr_var = rf_new; \
} \
} while (0)
/* Returns the scheme type if the scheme is supported, or /* Returns the scheme type if the scheme is supported, or
SCHEME_INVALID if not. */ SCHEME_INVALID if not. */
@ -1145,38 +1133,83 @@ url_full_path (const struct url *url)
return full_path; return full_path;
} }
/* Sync u->path and u->url with u->dir and u->file. */ /* Escape unsafe and reserved characters, except for the slash
characters. */
static char *
url_escape_dir (const char *dir)
{
char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
char *h, *t;
if (newdir == dir)
return (char *)dir;
/* Unescape slashes in NEWDIR. */
h = newdir; /* hare */
t = newdir; /* tortoise */
for (; *h; h++, t++)
{
if (*h == '%' && h[1] == '2' && h[2] == 'F')
{
*t = '/';
h += 2;
}
else
*t = *h;
}
*t = '\0';
return newdir;
}
/* Sync u->path and u->url with u->dir and u->file. Called after
u->file or u->dir have been changed, typically by the FTP code. */
static void static void
sync_path (struct url *url) sync_path (struct url *u)
{ {
char *newpath; char *newpath, *efile, *edir;
xfree (url->path); xfree (u->path);
if (!*url->dir) /* u->dir and u->file are not escaped. URL-escape them before
{ reassembling them into u->path. That way, if they contain
newpath = xstrdup (url->file); separators like '?' or even if u->file contains slashes, the
REENCODE (newpath); path will be correctly assembled. (u->file can contain slashes
} if the URL specifies it with %2f, or if an FTP server returns
it.) */
edir = url_escape_dir (u->dir);
efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1);
if (!*edir)
newpath = xstrdup (efile);
else else
{ {
int dirlen = strlen (url->dir); int dirlen = strlen (edir);
int filelen = strlen (url->file); int filelen = strlen (efile);
newpath = xmalloc (dirlen + 1 + filelen + 1); /* Copy "DIR/FILE" to newpath. */
memcpy (newpath, url->dir, dirlen); char *p = newpath = xmalloc (dirlen + 1 + filelen + 1);
newpath[dirlen] = '/'; memcpy (p, edir, dirlen);
memcpy (newpath + dirlen + 1, url->file, filelen); p += dirlen;
newpath[dirlen + 1 + filelen] = '\0'; *p++ = '/';
REENCODE (newpath); memcpy (p, efile, filelen);
p += filelen;
*p++ = '\0';
} }
url->path = newpath; u->path = newpath;
/* Synchronize u->url. */ if (edir != u->dir)
xfree (url->url); xfree (edir);
url->url = url_string (url, 0); if (efile != u->file)
xfree (efile);
/* Regenerate u->url as well. */
xfree (u->url);
u->url = url_string (u, 0);
} }
/* Mutators. Code in ftp.c insists on changing u->dir and u->file. /* Mutators. Code in ftp.c insists on changing u->dir and u->file.
@ -1396,10 +1429,10 @@ mkalldirs (const char *path)
/* A growable string structure, used by url_file_name and friends. /* A growable string structure, used by url_file_name and friends.
This should perhaps be moved to utils.c. This should perhaps be moved to utils.c.
The idea is to have an easy way to construct a string by having The idea is to have a convenient and efficient way to construct a
various functions append data to it. Instead of passing the string by having various functions append data to it. Instead of
obligatory BASEVAR, SIZEVAR and TAILPOS to all the functions in passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the
questions, we pass the pointer to this struct. */ functions in questions, we pass the pointer to this struct. */
struct growable { struct growable {
char *base; char *base;