1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Use extract_params to parse the Set-Cookies header.

This commit is contained in:
hniksic 2006-02-28 12:50:37 -08:00
parent 66ee895734
commit 6d2dc0a1b3
4 changed files with 158 additions and 333 deletions

View File

@ -1,3 +1,9 @@
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
* http.c (extract_param): Declare extern so it can be used from
other files.
(extract_param): Return error for empty name.
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (find_last_char): Define in terms of memrchr.

View File

@ -319,72 +319,71 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
/* Functions for parsing the `Set-Cookie' header, and creating new
cookies from the wire. */
#define NAME_IS(string_literal) \
BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
#define TOKEN_IS(token, string_literal) \
BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
#define VALUE_EXISTS (value_b && value_e)
#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
#define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
/* Parse the contents of the `Set-Cookie' header. The header looks
like this:
/* Update the appropriate cookie field. [name_b, name_e) are expected
to delimit the attribute name, while [value_b, value_e) (optional)
should delimit the attribute value.
name1=value1; name2=value2; ...
When called the first time, it will set the cookie's attribute name
and value. After that, it will check the attribute name for
special fields such as `domain', `path', etc. Where appropriate,
it will parse the values of the fields it recognizes and fill the
corresponding fields in COOKIE.
Trailing semicolon is optional; spaces are allowed between all
tokens. Additionally, values may be quoted.
Returns true on success. Returns false in case a syntax error is
found; such a cookie should be discarded. */
A new cookie is returned upon success, NULL otherwise.
static bool
update_cookie_field (struct cookie *cookie,
const char *name_b, const char *name_e,
const char *value_b, const char *value_e)
The first name-value pair will be used to set the cookie's
attribute name and value. Subsequent parameters will be checked
against field names such as `domain', `path', etc. Recognized
fields will be parsed and the corresponding members of COOKIE
filled. */
static struct cookie *
parse_set_cookie (const char *set_cookie, bool silent)
{
assert (name_b != NULL && name_e != NULL);
const char *ptr = set_cookie;
struct cookie *cookie = cookie_new ();
param_token name, value;
if (!cookie->attr)
{
if (!VALUE_EXISTS)
return false;
cookie->attr = strdupdelim (name_b, name_e);
cookie->value = strdupdelim (value_b, value_e);
return true;
}
if (!extract_param (&ptr, &name, &value))
goto error;
if (!value.b)
goto error;
cookie->attr = strdupdelim (name.b, name.e);
cookie->value = strdupdelim (value.b, value.e);
if (NAME_IS ("domain"))
while (extract_param (&ptr, &name, &value))
{
if (!VALUE_NON_EMPTY)
return false;
if (TOKEN_IS (name, "domain"))
{
if (!TOKEN_NON_EMPTY (value))
goto error;
xfree_null (cookie->domain);
/* Strictly speaking, we should set cookie->domain_exact if the
domain doesn't begin with a dot. But many sites set the
domain to "foo.com" and expect "subhost.foo.com" to get the
cookie, and it apparently works. */
if (*value_b == '.')
++value_b;
cookie->domain = strdupdelim (value_b, value_e);
return true;
cookie, and it apparently works in browsers. */
if (*value.b == '.')
++value.b;
cookie->domain = strdupdelim (value.b, value.e);
}
else if (NAME_IS ("path"))
else if (TOKEN_IS (name, "path"))
{
if (!VALUE_NON_EMPTY)
return false;
if (!TOKEN_NON_EMPTY (value))
goto error;
xfree_null (cookie->path);
cookie->path = strdupdelim (value_b, value_e);
return true;
cookie->path = strdupdelim (value.b, value.e);
}
else if (NAME_IS ("expires"))
else if (TOKEN_IS (name, "expires"))
{
char *value_copy;
time_t expires;
if (!VALUE_NON_EMPTY)
return false;
BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
if (!TOKEN_NON_EMPTY (value))
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
expires = http_atotm (value_copy);
if (expires != (time_t) -1)
@ -402,22 +401,20 @@ update_cookie_field (struct cookie *cookie,
requested. */
if (cookie->expiry_time < cookies_now)
cookie->discard_requested = 1;
return true;
}
else if (NAME_IS ("max-age"))
else if (TOKEN_IS (name, "max-age"))
{
double maxage = -1;
char *value_copy;
if (!VALUE_NON_EMPTY)
return false;
BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
if (!TOKEN_NON_EMPTY (value))
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
sscanf (value_copy, "%lf", &maxage);
if (maxage == -1)
/* something went wrong. */
return false;
goto error;
cookie->permanent = 1;
cookie->expiry_time = cookies_now + maxage;
@ -425,218 +422,34 @@ update_cookie_field (struct cookie *cookie,
discarding of a matching cookie is requested. */
if (maxage == 0)
cookie->discard_requested = 1;
return true;
}
else if (NAME_IS ("secure"))
else if (TOKEN_IS (name, "secure"))
{
/* ignore value completely */
cookie->secure = 1;
return true;
}
else
/* Unrecognized attribute; ignore it. */
return true;
}
/* Ignore unrecognized attribute. */
;
}
if (*ptr)
/* extract_param has encountered a syntax error */
goto error;
#undef NAME_IS
/* Returns true for characters that are legal in the name of an
attribute. This used to allow only alphanumerics, '-', and '_',
but we need to be more lenient because a number of sites wants to
use weirder attribute names. rfc2965 "informally specifies"
attribute name (token) as "a sequence of non-special, non-white
space characters". So we allow everything except the stuff we know
could harm us. */
#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
&& (c) != '"' && (c) != '=' \
&& (c) != ';' && (c) != ',')
/* Parse the contents of the `Set-Cookie' header. The header looks
like this:
name1=value1; name2=value2; ...
Trailing semicolon is optional; spaces are allowed between all
tokens. Additionally, values may be quoted.
A new cookie is returned upon success, NULL otherwise. The
specified CALLBACK function (normally `update_cookie_field' is used
to update the fields of the newly created cookie structure. */
static struct cookie *
parse_set_cookies (const char *sc,
bool (*callback) (struct cookie *,
const char *, const char *,
const char *, const char *),
bool silent)
{
struct cookie *cookie = cookie_new ();
/* #### Hand-written DFAs are no fun to debug. We'de be better off
to rewrite this as an inline parser. */
enum { S_START, S_NAME, S_NAME_POST,
S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
S_ATTR_ACTION, S_DONE, S_ERROR
} state = S_START;
const char *p = sc;
char c;
const char *name_b = NULL, *name_e = NULL;
const char *value_b = NULL, *value_e = NULL;
c = *p;
while (state != S_DONE && state != S_ERROR)
{
switch (state)
{
case S_START:
if (!c)
state = S_DONE;
else if (ISSPACE (c))
/* Strip all whitespace preceding the name. */
c = *++p;
else if (ATTR_NAME_CHAR (c))
{
name_b = p;
state = S_NAME;
}
else
/* empty attr name not allowed */
state = S_ERROR;
break;
case S_NAME:
if (!c || c == ';' || c == '=' || ISSPACE (c))
{
name_e = p;
state = S_NAME_POST;
}
else if (ATTR_NAME_CHAR (c))
c = *++p;
else
state = S_ERROR;
break;
case S_NAME_POST:
if (!c || c == ';')
{
value_b = value_e = NULL;
if (c == ';')
c = *++p;
state = S_ATTR_ACTION;
}
else if (c == '=')
{
c = *++p;
state = S_VALUE_PRE;
}
else if (ISSPACE (c))
/* Ignore space and keep the state. */
c = *++p;
else
state = S_ERROR;
break;
case S_VALUE_PRE:
if (!c || c == ';')
{
value_b = value_e = p;
if (c == ';')
c = *++p;
state = S_ATTR_ACTION;
}
else if (c == '"')
{
c = *++p;
value_b = p;
state = S_QUOTED_VALUE;
}
else if (ISSPACE (c))
c = *++p;
else
{
value_b = p;
value_e = NULL;
state = S_VALUE;
}
break;
case S_VALUE:
if (!c || c == ';' || ISSPACE (c))
{
value_e = p;
state = S_VALUE_TRAILSPACE;
}
else
{
value_e = NULL; /* no trailing space */
c = *++p;
}
break;
case S_QUOTED_VALUE:
if (c == '"')
{
value_e = p;
c = *++p;
state = S_VALUE_TRAILSPACE;
}
else if (!c)
state = S_ERROR;
else
c = *++p;
break;
case S_VALUE_TRAILSPACE:
if (c == ';')
{
c = *++p;
state = S_ATTR_ACTION;
}
else if (!c)
state = S_ATTR_ACTION;
else if (ISSPACE (c))
c = *++p;
else
state = S_VALUE;
break;
case S_ATTR_ACTION:
{
bool legal = callback (cookie, name_b, name_e, value_b, value_e);
if (!legal)
{
if (!silent)
{
char *name;
BOUNDED_TO_ALLOCA (name_b, name_e, name);
logprintf (LOG_NOTQUIET,
_("Error in Set-Cookie, field `%s'"),
escnonprint (name));
}
state = S_ERROR;
break;
}
state = S_START;
}
break;
case S_DONE:
case S_ERROR:
/* handled by loop condition */
break;
}
}
if (state == S_DONE)
/* The cookie has been successfully constructed; return it. */
return cookie;
delete_cookie (cookie);
if (state != S_ERROR)
abort ();
error:
if (!silent)
logprintf (LOG_NOTQUIET,
_("Syntax error in Set-Cookie: %s at position %d.\n"),
escnonprint (sc), (int) (p - sc));
escnonprint (set_cookie), (int) (ptr - set_cookie));
delete_cookie (cookie);
return NULL;
}
#undef TOKEN_IS
#undef TOKEN_NON_EMPTY
/* Sanity checks. These are important, otherwise it is possible for
mailcious attackers to destroy important cookie information and/or
@ -849,7 +662,7 @@ cookie_handle_set_cookie (struct cookie_jar *jar,
simply prepend slash to PATH. */
PREPEND_SLASH (path);
cookie = parse_set_cookies (set_cookie, update_cookie_field, false);
cookie = parse_set_cookie (set_cookie, false);
if (!cookie)
goto out;
@ -1510,27 +1323,14 @@ cookie_jar_delete (struct cookie_jar *jar)
from main. */
#ifdef TEST_COOKIES
int test_count;
char *test_results[10];
static bool test_parse_cookies_callback (struct cookie *ignored,
const char *nb, const char *ne,
const char *vb, const char *ve)
{
test_results[test_count++] = strdupdelim (nb, ne);
test_results[test_count++] = strdupdelim (vb, ve);
return true;
}
void
test_cookies (void)
{
/* Tests expected to succeed: */
static struct {
char *data;
char *results[10];
const char *data;
const char *results[10];
} tests_succ[] = {
{ "", {NULL} },
{ "arg=value", {"arg", "value", NULL} },
{ "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
{ "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
@ -1554,39 +1354,51 @@ test_cookies (void)
for (i = 0; i < countof (tests_succ); i++)
{
int ind;
char *data = tests_succ[i].data;
char **expected = tests_succ[i].results;
const char *data = tests_succ[i].data;
const char **expected = tests_succ[i].results;
struct cookie *c;
test_count = 0;
c = parse_set_cookies (data, test_parse_cookies_callback, true);
c = parse_set_cookie (data, true);
if (!c)
{
printf ("NULL cookie returned for valid data: %s\n", data);
continue;
}
for (ind = 0; ind < test_count; ind += 2)
/* Test whether extract_param handles these cases correctly. */
{
if (!expected[ind])
param_token name, value;
const char *ptr = data;
int j = 0;
while (extract_param (&ptr, &name, &value))
{
char *n = strdupdelim (name.b, name.e);
char *v = strdupdelim (value.b, value.e);
if (!expected[j])
{
printf ("Too many parameters for '%s'\n", data);
break;
if (0 != strcmp (expected[ind], test_results[ind]))
printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
ind / 2 + 1, data, expected[ind], test_results[ind]);
if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
}
if (ind < test_count || expected[ind])
printf ("Unmatched number of results: %s\n", data);
if (0 != strcmp (expected[j], n))
printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
j / 2 + 1, data, expected[j], n);
if (0 != strcmp (expected[j + 1], v))
printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
j / 2 + 1, data, expected[j + 1], v);
j += 2;
free (n);
free (v);
}
if (expected[j])
printf ("Too few parameters for '%s'\n", data);
}
}
for (i = 0; i < countof (tests_fail); i++)
{
struct cookie *c;
char *data = tests_fail[i];
test_count = 0;
c = parse_set_cookies (data, test_parse_cookies_callback, 1);
c = parse_set_cookie (data, true);
if (c)
printf ("Failed to report error on invalid data: %s\n", data);
}

View File

@ -855,11 +855,6 @@ skip_short_body (int fd, wgint contlen)
return true;
}
typedef struct {
/* A token consists of characters in the [b, e) range. */
const char *b, *e;
} param_token;
/* Extract a parameter from the HTTP header at *SOURCE and advance
*SOURCE to the next parameter. Return false when there are no more
parameters to extract. The name of the parameter is returned in
@ -872,19 +867,24 @@ typedef struct {
return the token named "filename" and value "foo bar". The third
call will return false, indicating no more valid tokens. */
static bool
bool
extract_param (const char **source, param_token *name, param_token *value)
{
const char *p = *source;
while (ISSPACE (*p)) ++p;
if (!*p)
return false; /* nothing more to extract */
{
*source = p;
return false; /* no error; nothing more to extract */
}
/* Extract name. */
name->b = p;
while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p;
name->e = p;
if (name->b == name->e)
return false; /* empty name: error */
while (ISSPACE (*p)) ++p;
if (*p == ';' || !*p) /* no value */
{

View File

@ -38,4 +38,11 @@ void save_cookies (void);
void http_cleanup (void);
time_t http_atotm (const char *);
typedef struct {
/* A token consists of characters in the [b, e) range. */
const char *b, *e;
} param_token;
bool extract_param (const char **, param_token *, param_token *);
#endif /* HTTP_H */