1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Parse Content-Disposition better. Implement memrchr where missing.

This commit is contained in:
hniksic 2006-02-28 11:27:52 -08:00
parent aa07e689f2
commit c90e4f9c1a
7 changed files with 127 additions and 36 deletions

View File

@ -1,3 +1,7 @@
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
* configure.in: Check for memrchr.
2005-11-19 Hrvoje Niksic <hniksic@xemacs.org> 2005-11-19 Hrvoje Niksic <hniksic@xemacs.org>
* configure.in: Check for uintptr_t. * configure.in: Check for uintptr_t.

View File

@ -206,7 +206,7 @@ AC_FUNC_ALLOCA
AC_FUNC_MMAP AC_FUNC_MMAP
AC_FUNC_FSEEKO AC_FUNC_FSEEKO
AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48) AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48)
AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp) AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp memrchr)
dnl We expect to have these functions on Unix-like systems configure dnl We expect to have these functions on Unix-like systems configure
dnl runs on. The defines are provided to get them in config.h.in so dnl runs on. The defines are provided to get them in config.h.in so

View File

@ -1,3 +1,14 @@
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (find_last_char): Define in terms of memrchr.
* cmpt.c (memrchr): Define it on systems that don't have it.
* http.c (extract_param): New function for parsing header values
with parameters.
(parse_content_disposition): Use it. Don't allow slashes and
backslashes in the file name.
2006-02-27 Hrvoje Niksic <hniksic@xemacs.org> 2006-02-27 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (path_simplify): Don't preserve ".." at beginning of path. * url.c (path_simplify): Don't preserve ".." at beginning of path.

View File

@ -111,10 +111,28 @@ strncasecmp (const char *s1, const char *s2, size_t n)
return c1 - c2; return c1 - c2;
} }
#endif /* not HAVE_STRNCASECMP */ #endif /* not HAVE_STRNCASECMP */
#ifndef HAVE_MEMRCHR
/* memrchr is a GNU extension. It is like the memchr function, except
that it searches backwards from the end of the n bytes pointed to
by s instead of forwards from the front. */
void *
memrchr (const void *s, int c, size_t n)
{
const char *b = s;
const char *e = b + n;
while (e > b)
if (*--e == c)
return (void *) e;
return NULL;
}
#endif
/* strptime is required by POSIX, but it is missing from Windows, /* strptime is required by POSIX, but it is missing from Windows,
which means we must keep a fallback implementation. It is which means we must keep a fallback implementation. It is
reportedly missing or broken on many older systems as well. */ reportedly missing or broken on many older Unix systems as well, so
it's good to have around. */
#ifndef HAVE_STRPTIME #ifndef HAVE_STRPTIME
/* From GNU libc 2.1.3. */ /* From GNU libc 2.1.3. */

View File

@ -894,37 +894,101 @@ extract_param_value_delim (const char *begin, const char *end,
return false; return false;
} }
/* Parse the `Content-Disposition' header and extract the information it typedef struct {
contains. Returns true if successful, false otherwise. */ /* A token consists of characters in the [b, e) range. */
const char *b, *e;
} param_token;
/* Extract a parameter from the HTTP header at *SOURCE and advance
*SOURCE to the next parameter. Return false when there are no more
parameters to extract. The name of the parameter is returned in
NAME, and the value in VALUE. If the parameter has no value, the
token's value is zeroed out.
For example, if *SOURCE points to the string "attachment;
filename=\"foo bar\"", the first call to this function will return
the token named "attachment" and no value, and the second call will
return the token named "filename" and value "foo bar". The third
call will return false, indicating no more valid tokens. */
static bool static bool
parse_content_disposition (const char *hdrval, char **filename) extract_param (const char **source, param_token *name, param_token *value)
{ {
const char *b = hdrval; /* b - begin */ const char *p = *source;
const char *e = hdrval; /* e - end */
assert (hdrval); while (ISSPACE (*p)) ++p;
assert (filename); if (!*p)
return false; /* nothing more to extract */
for (; *e; ++e) /* Extract name. */
name->b = p;
while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p;
name->e = p;
while (ISSPACE (*p)) ++p;
if (*p == ';' || !*p) /* no value */
{ {
if (*e == ';' xzero (*value);
&& e > b) if (*p == ';') ++p;
{ *source = p;
/* process chars b->e-1 */ return true;
if (true == extract_param_value_delim (b, e - 1, "filename", filename))
return true;
b = e + 1;
}
} }
if (*p != '=')
return false; /* error */
if (b != e) /* *p is '=', extract value */
++p;
while (ISSPACE (*p)) ++p;
if (*p == '"') /* quoted */
{ {
/* process chars b->e */ value->b = ++p;
if (true == extract_param_value_delim (b, e, "filename", filename)) while (*p && *p != '"') ++p;
if (!*p)
return false;
value->e = p++;
/* Currently at closing quote; find the end of param. */
while (ISSPACE (*p)) ++p;
while (*p && *p != ';') ++p;
if (*p == ';')
++p;
else if (*p)
/* garbage after closed quote, e.g. foo="bar"baz */
return false;
}
else /* unquoted */
{
value->b = p;
while (*p && *p != ';') ++p;
value->e = p;
while (value->e != value->b && ISSPACE (value->e[-1]))
--value->e;
if (*p == ';') ++p;
}
*source = p;
return true;
}
#undef MAX
#define MAX(p, q) ((p) > (q) ? (p) : (q))
static bool
parse_content_disposition (const char *hdr, char **filename)
{
param_token name, value;
while (extract_param (&hdr, &name, &value))
if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
{
/* Make the file name begin at the last slash or backslash. */
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
if (last_slash && last_bs)
value.b = 1 + MAX (last_slash, last_bs);
else if (last_slash || last_bs)
value.b = 1 + (last_slash ? last_slash : last_bs);
if (value.b == value.e)
continue;
*filename = strdupdelim (value.b, value.e);
return true; return true;
} }
return false; return false;
} }
@ -1687,7 +1751,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
{ {
/* Honor Content-Disposition whether possible. */ /* Honor Content-Disposition whether possible. */
if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)) if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
|| false == parse_content_disposition (hdrval, &hs->local_file)) || !parse_content_disposition (hdrval, &hs->local_file))
{ {
/* Choose filename according to URL name. */ /* Choose filename according to URL name. */
hs->local_file = url_file_name (u); hs->local_file = url_file_name (u);

View File

@ -135,6 +135,9 @@ char *strptime ();
# include <time.h> # include <time.h>
time_t timegm (struct tm *); time_t timegm (struct tm *);
#endif #endif
#ifndef HAVE_MEMRCHR
void *memrchr (const void *, int, size_t);
#endif
/* These are defined in snprintf.c. It would be nice to have an /* These are defined in snprintf.c. It would be nice to have an
snprintf.h, though. */ snprintf.h, though. */

View File

@ -1582,17 +1582,8 @@ path_end (const char *url)
} }
/* Find the last occurrence of character C in the range [b, e), or /* Find the last occurrence of character C in the range [b, e), or
NULL, if none are present. We might want to use memrchr (a GNU NULL, if none are present. */
extension) under GNU libc. */ #define find_last_char(b, e, c) memrchr ((b), (c), (e) - (b))
static const char *
find_last_char (const char *b, const char *e, char c)
{
for (; e > b; e--)
if (*e == c)
return e;
return NULL;
}
/* Merge BASE with LINK and return the resulting URI. /* Merge BASE with LINK and return the resulting URI.