From c90e4f9c1a291e9d45ca0f473a3dd0d16a9319f8 Mon Sep 17 00:00:00 2001 From: hniksic Date: Tue, 28 Feb 2006 11:27:52 -0800 Subject: [PATCH] [svn] Parse Content-Disposition better. Implement memrchr where missing. --- ChangeLog | 4 ++ configure.in | 2 +- src/ChangeLog | 11 +++++ src/cmpt.c | 20 ++++++++- src/http.c | 110 +++++++++++++++++++++++++++++++++++++++----------- src/sysdep.h | 3 ++ src/url.c | 13 +----- 7 files changed, 127 insertions(+), 36 deletions(-) diff --git a/ChangeLog b/ChangeLog index 56ee8ac4..a2ba9aa5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2006-02-28 Hrvoje Niksic + + * configure.in: Check for memrchr. + 2005-11-19 Hrvoje Niksic * configure.in: Check for uintptr_t. diff --git a/configure.in b/configure.in index 413309e1..436efc99 100644 --- a/configure.in +++ b/configure.in @@ -206,7 +206,7 @@ AC_FUNC_ALLOCA AC_FUNC_MMAP AC_FUNC_FSEEKO AC_CHECK_FUNCS(strptime timegm snprintf vsnprintf vasprintf drand48) -AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp) +AC_CHECK_FUNCS(strtoll strtoimax usleep ftello sigblock sigsetjmp memrchr) dnl We expect to have these functions on Unix-like systems configure dnl runs on. The defines are provided to get them in config.h.in so diff --git a/src/ChangeLog b/src/ChangeLog index 46a550fe..54452b5b 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,14 @@ +2006-02-28 Hrvoje Niksic + + * url.c (find_last_char): Define in terms of memrchr. + + * cmpt.c (memrchr): Define it on systems that don't have it. + + * http.c (extract_param): New function for parsing header values + with parameters. + (parse_content_disposition): Use it. Don't allow slashes and + backslashes in the file name. + 2006-02-27 Hrvoje Niksic * url.c (path_simplify): Don't preserve ".." at beginning of path. diff --git a/src/cmpt.c b/src/cmpt.c index 03ff3ceb..d1cff3b7 100644 --- a/src/cmpt.c +++ b/src/cmpt.c @@ -111,10 +111,28 @@ strncasecmp (const char *s1, const char *s2, size_t n) return c1 - c2; } #endif /* not HAVE_STRNCASECMP */ + +#ifndef HAVE_MEMRCHR +/* memrchr is a GNU extension. It is like the memchr function, except + that it searches backwards from the end of the n bytes pointed to + by s instead of forwards from the front. */ + +void * +memrchr (const void *s, int c, size_t n) +{ + const char *b = s; + const char *e = b + n; + while (e > b) + if (*--e == c) + return (void *) e; + return NULL; +} +#endif /* strptime is required by POSIX, but it is missing from Windows, which means we must keep a fallback implementation. It is - reportedly missing or broken on many older systems as well. */ + reportedly missing or broken on many older Unix systems as well, so + it's good to have around. */ #ifndef HAVE_STRPTIME /* From GNU libc 2.1.3. */ diff --git a/src/http.c b/src/http.c index c5827275..9668d504 100644 --- a/src/http.c +++ b/src/http.c @@ -894,37 +894,101 @@ extract_param_value_delim (const char *begin, const char *end, return false; } -/* Parse the `Content-Disposition' header and extract the information it - contains. Returns true if successful, false otherwise. */ +typedef struct { + /* A token consists of characters in the [b, e) range. */ + const char *b, *e; +} param_token; + +/* Extract a parameter from the HTTP header at *SOURCE and advance + *SOURCE to the next parameter. Return false when there are no more + parameters to extract. The name of the parameter is returned in + NAME, and the value in VALUE. If the parameter has no value, the + token's value is zeroed out. + + For example, if *SOURCE points to the string "attachment; + filename=\"foo bar\"", the first call to this function will return + the token named "attachment" and no value, and the second call will + return the token named "filename" and value "foo bar". The third + call will return false, indicating no more valid tokens. */ + static bool -parse_content_disposition (const char *hdrval, char **filename) +extract_param (const char **source, param_token *name, param_token *value) { - const char *b = hdrval; /* b - begin */ - const char *e = hdrval; /* e - end */ + const char *p = *source; - assert (hdrval); - assert (filename); + while (ISSPACE (*p)) ++p; + if (!*p) + return false; /* nothing more to extract */ - for (; *e; ++e) + /* Extract name. */ + name->b = p; + while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p; + name->e = p; + while (ISSPACE (*p)) ++p; + if (*p == ';' || !*p) /* no value */ { - if (*e == ';' - && e > b) - { - /* process chars b->e-1 */ - if (true == extract_param_value_delim (b, e - 1, "filename", filename)) - return true; - - b = e + 1; - } + xzero (*value); + if (*p == ';') ++p; + *source = p; + return true; } + if (*p != '=') + return false; /* error */ - if (b != e) + /* *p is '=', extract value */ + ++p; + while (ISSPACE (*p)) ++p; + if (*p == '"') /* quoted */ { - /* process chars b->e */ - if (true == extract_param_value_delim (b, e, "filename", filename)) + value->b = ++p; + while (*p && *p != '"') ++p; + if (!*p) + return false; + value->e = p++; + /* Currently at closing quote; find the end of param. */ + while (ISSPACE (*p)) ++p; + while (*p && *p != ';') ++p; + if (*p == ';') + ++p; + else if (*p) + /* garbage after closed quote, e.g. foo="bar"baz */ + return false; + } + else /* unquoted */ + { + value->b = p; + while (*p && *p != ';') ++p; + value->e = p; + while (value->e != value->b && ISSPACE (value->e[-1])) + --value->e; + if (*p == ';') ++p; + } + *source = p; + return true; +} + +#undef MAX +#define MAX(p, q) ((p) > (q) ? (p) : (q)) + +static bool +parse_content_disposition (const char *hdr, char **filename) +{ + param_token name, value; + while (extract_param (&hdr, &name, &value)) + if (BOUNDED_EQUAL_NO_CASE (name.b, name.e, "filename") && value.b != NULL) + { + /* Make the file name begin at the last slash or backslash. */ + const char *last_slash = memrchr (value.b, '/', value.e - value.b); + const char *last_bs = memrchr (value.b, '\\', value.e - value.b); + if (last_slash && last_bs) + value.b = 1 + MAX (last_slash, last_bs); + else if (last_slash || last_bs) + value.b = 1 + (last_slash ? last_slash : last_bs); + if (value.b == value.e) + continue; + *filename = strdupdelim (value.b, value.e); return true; - } - + } return false; } @@ -1687,7 +1751,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { /* Honor Content-Disposition whether possible. */ if (!resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)) - || false == parse_content_disposition (hdrval, &hs->local_file)) + || !parse_content_disposition (hdrval, &hs->local_file)) { /* Choose filename according to URL name. */ hs->local_file = url_file_name (u); diff --git a/src/sysdep.h b/src/sysdep.h index 50c0d14d..38963901 100644 --- a/src/sysdep.h +++ b/src/sysdep.h @@ -135,6 +135,9 @@ char *strptime (); # include time_t timegm (struct tm *); #endif +#ifndef HAVE_MEMRCHR +void *memrchr (const void *, int, size_t); +#endif /* These are defined in snprintf.c. It would be nice to have an snprintf.h, though. */ diff --git a/src/url.c b/src/url.c index f97a3180..30828c2a 100644 --- a/src/url.c +++ b/src/url.c @@ -1582,17 +1582,8 @@ path_end (const char *url) } /* Find the last occurrence of character C in the range [b, e), or - NULL, if none are present. We might want to use memrchr (a GNU - extension) under GNU libc. */ - -static const char * -find_last_char (const char *b, const char *e, char c) -{ - for (; e > b; e--) - if (*e == c) - return e; - return NULL; -} + NULL, if none are present. */ +#define find_last_char(b, e, c) memrchr ((b), (c), (e) - (b)) /* Merge BASE with LINK and return the resulting URI.