mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Parse Content-Disposition better. Implement memrchr where missing.
This commit is contained in:
parent
aa07e689f2
commit
c90e4f9c1a
@ -1,3 +1,7 @@
|
|||||||
|
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* configure.in: Check for memrchr.
|
||||||
|
|
||||||
2005-11-19 Hrvoje Niksic <hniksic@xemacs.org>
|
2005-11-19 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* configure.in: Check for uintptr_t.
|
* configure.in: Check for uintptr_t.
|
||||||
|
@ -206,7 +206,7 @@ AC_FUNC_ALLOCA
|
|||||||
AC_FUNC_MMAP
|
AC_FUNC_MMAP
|
||||||
AC_FUNC_FSEEKO
|
AC_FUNC_FSEEKO
|
||||||
AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48)
|
AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48)
|
||||||
AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp)
|
AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp memrchr)
|
||||||
|
|
||||||
dnl We expect to have these functions on Unix-like systems configure
|
dnl We expect to have these functions on Unix-like systems configure
|
||||||
dnl runs on. The defines are provided to get them in config.h.in so
|
dnl runs on. The defines are provided to get them in config.h.in so
|
||||||
|
@ -1,3 +1,14 @@
|
|||||||
|
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* url.c (find_last_char): Define in terms of memrchr.
|
||||||
|
|
||||||
|
* cmpt.c (memrchr): Define it on systems that don't have it.
|
||||||
|
|
||||||
|
* http.c (extract_param): New function for parsing header values
|
||||||
|
with parameters.
|
||||||
|
(parse_content_disposition): Use it. Don't allow slashes and
|
||||||
|
backslashes in the file name.
|
||||||
|
|
||||||
2006-02-27 Hrvoje Niksic <hniksic@xemacs.org>
|
2006-02-27 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* url.c (path_simplify): Don't preserve ".." at beginning of path.
|
* url.c (path_simplify): Don't preserve ".." at beginning of path.
|
||||||
|
20
src/cmpt.c
20
src/cmpt.c
@ -111,10 +111,28 @@ strncasecmp (const char *s1, const char *s2, size_t n)
|
|||||||
return c1 - c2;
|
return c1 - c2;
|
||||||
}
|
}
|
||||||
#endif /* not HAVE_STRNCASECMP */
|
#endif /* not HAVE_STRNCASECMP */
|
||||||
|
|
||||||
|
#ifndef HAVE_MEMRCHR
|
||||||
|
/* memrchr is a GNU extension. It is like the memchr function, except
|
||||||
|
that it searches backwards from the end of the n bytes pointed to
|
||||||
|
by s instead of forwards from the front. */
|
||||||
|
|
||||||
|
void *
|
||||||
|
memrchr (const void *s, int c, size_t n)
|
||||||
|
{
|
||||||
|
const char *b = s;
|
||||||
|
const char *e = b + n;
|
||||||
|
while (e > b)
|
||||||
|
if (*--e == c)
|
||||||
|
return (void *) e;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* strptime is required by POSIX, but it is missing from Windows,
|
/* strptime is required by POSIX, but it is missing from Windows,
|
||||||
which means we must keep a fallback implementation. It is
|
which means we must keep a fallback implementation. It is
|
||||||
reportedly missing or broken on many older systems as well. */
|
reportedly missing or broken on many older Unix systems as well, so
|
||||||
|
it's good to have around. */
|
||||||
|
|
||||||
#ifndef HAVE_STRPTIME
|
#ifndef HAVE_STRPTIME
|
||||||
/* From GNU libc 2.1.3. */
|
/* From GNU libc 2.1.3. */
|
||||||
|
110
src/http.c
110
src/http.c
@ -894,37 +894,101 @@ extract_param_value_delim (const char *begin, const char *end,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Parse the `Content-Disposition' header and extract the information it
|
typedef struct {
|
||||||
contains. Returns true if successful, false otherwise. */
|
/* A token consists of characters in the [b, e) range. */
|
||||||
|
const char *b, *e;
|
||||||
|
} param_token;
|
||||||
|
|
||||||
|
/* Extract a parameter from the HTTP header at *SOURCE and advance
|
||||||
|
*SOURCE to the next parameter. Return false when there are no more
|
||||||
|
parameters to extract. The name of the parameter is returned in
|
||||||
|
NAME, and the value in VALUE. If the parameter has no value, the
|
||||||
|
token's value is zeroed out.
|
||||||
|
|
||||||
|
For example, if *SOURCE points to the string "attachment;
|
||||||
|
filename=\"foo bar\"", the first call to this function will return
|
||||||
|
the token named "attachment" and no value, and the second call will
|
||||||
|
return the token named "filename" and value "foo bar". The third
|
||||||
|
call will return false, indicating no more valid tokens. */
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
parse_content_disposition (const char *hdrval, char **filename)
|
extract_param (const char **source, param_token *name, param_token *value)
|
||||||
{
|
{
|
||||||
const char *b = hdrval; /* b - begin */
|
const char *p = *source;
|
||||||
const char *e = hdrval; /* e - end */
|
|
||||||
|
|
||||||
assert (hdrval);
|
while (ISSPACE (*p)) ++p;
|
||||||
assert (filename);
|
if (!*p)
|
||||||
|
return false; /* nothing more to extract */
|
||||||
|
|
||||||
for (; *e; ++e)
|
/* Extract name. */
|
||||||
|
name->b = p;
|
||||||
|
while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p;
|
||||||
|
name->e = p;
|
||||||
|
while (ISSPACE (*p)) ++p;
|
||||||
|
if (*p == ';' || !*p) /* no value */
|
||||||
{
|
{
|
||||||
if (*e == ';'
|
xzero (*value);
|
||||||
&& e > b)
|
if (*p == ';') ++p;
|
||||||
{
|
*source = p;
|
||||||
/* process chars b->e-1 */
|
return true;
|
||||||
if (true == extract_param_value_delim (b, e - 1, "filename", filename))
|
|
||||||
return true;
|
|
||||||
|
|
||||||
b = e + 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
if (*p != '=')
|
||||||
|
return false; /* error */
|
||||||
|
|
||||||
if (b != e)
|
/* *p is '=', extract value */
|
||||||
|
++p;
|
||||||
|
while (ISSPACE (*p)) ++p;
|
||||||
|
if (*p == '"') /* quoted */
|
||||||
{
|
{
|
||||||
/* process chars b->e */
|
value->b = ++p;
|
||||||
if (true == extract_param_value_delim (b, e, "filename", filename))
|
while (*p && *p != '"') ++p;
|
||||||
|
if (!*p)
|
||||||
|
return false;
|
||||||
|
value->e = p++;
|
||||||
|
/* Currently at closing quote; find the end of param. */
|
||||||
|
while (ISSPACE (*p)) ++p;
|
||||||
|
while (*p && *p != ';') ++p;
|
||||||
|
if (*p == ';')
|
||||||
|
++p;
|
||||||
|
else if (*p)
|
||||||
|
/* garbage after closed quote, e.g. foo="bar"baz */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
else /* unquoted */
|
||||||
|
{
|
||||||
|
value->b = p;
|
||||||
|
while (*p && *p != ';') ++p;
|
||||||
|
value->e = p;
|
||||||
|
while (value->e != value->b && ISSPACE (value->e[-1]))
|
||||||
|
--value->e;
|
||||||
|
if (*p == ';') ++p;
|
||||||
|
}
|
||||||
|
*source = p;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
#undef MAX
|
||||||
|
#define MAX(p, q) ((p) > (q) ? (p) : (q))
|
||||||
|
|
||||||
|
static bool
|
||||||
|
parse_content_disposition (const char *hdr, char **filename)
|
||||||
|
{
|
||||||
|
param_token name, value;
|
||||||
|
while (extract_param (&hdr, &name, &value))
|
||||||
|
if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL)
|
||||||
|
{
|
||||||
|
/* Make the file name begin at the last slash or backslash. */
|
||||||
|
const char *last_slash = memrchr (value.b, '/', value.e - value.b);
|
||||||
|
const char *last_bs = memrchr (value.b, '\\', value.e - value.b);
|
||||||
|
if (last_slash && last_bs)
|
||||||
|
value.b = 1 + MAX (last_slash, last_bs);
|
||||||
|
else if (last_slash || last_bs)
|
||||||
|
value.b = 1 + (last_slash ? last_slash : last_bs);
|
||||||
|
if (value.b == value.e)
|
||||||
|
continue;
|
||||||
|
*filename = strdupdelim (value.b, value.e);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1687,7 +1751,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
{
|
{
|
||||||
/* Honor Content-Disposition whether possible. */
|
/* Honor Content-Disposition whether possible. */
|
||||||
if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
|
if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval))
|
||||||
|| false == parse_content_disposition (hdrval, &hs->local_file))
|
|| !parse_content_disposition (hdrval, &hs->local_file))
|
||||||
{
|
{
|
||||||
/* Choose filename according to URL name. */
|
/* Choose filename according to URL name. */
|
||||||
hs->local_file = url_file_name (u);
|
hs->local_file = url_file_name (u);
|
||||||
|
@ -135,6 +135,9 @@ char *strptime ();
|
|||||||
# include <time.h>
|
# include <time.h>
|
||||||
time_t timegm (struct tm *);
|
time_t timegm (struct tm *);
|
||||||
#endif
|
#endif
|
||||||
|
#ifndef HAVE_MEMRCHR
|
||||||
|
void *memrchr (const void *, int, size_t);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* These are defined in snprintf.c. It would be nice to have an
|
/* These are defined in snprintf.c. It would be nice to have an
|
||||||
snprintf.h, though. */
|
snprintf.h, though. */
|
||||||
|
13
src/url.c
13
src/url.c
@ -1582,17 +1582,8 @@ path_end (const char *url)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Find the last occurrence of character C in the range [b, e), or
|
/* Find the last occurrence of character C in the range [b, e), or
|
||||||
NULL, if none are present. We might want to use memrchr (a GNU
|
NULL, if none are present. */
|
||||||
extension) under GNU libc. */
|
#define find_last_char(b, e, c) memrchr ((b), (c), (e) - (b))
|
||||||
|
|
||||||
static const char *
|
|
||||||
find_last_char (const char *b, const char *e, char c)
|
|
||||||
{
|
|
||||||
for (; e > b; e--)
|
|
||||||
if (*e == c)
|
|
||||||
return e;
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Merge BASE with LINK and return the resulting URI.
|
/* Merge BASE with LINK and return the resulting URI.
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user