1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Use extract_params to parse the Set-Cookies header.

This commit is contained in:
hniksic 2006-02-28 12:50:37 -08:00
parent 66ee895734
commit 6d2dc0a1b3
4 changed files with 158 additions and 333 deletions

View File

@ -1,3 +1,9 @@
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
* http.c (extract_param): Declare extern so it can be used from
other files.
(extract_param): Return error for empty name.
2006-02-28 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (find_last_char): Define in terms of memrchr.

View File

@ -319,139 +319,10 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
/* Functions for parsing the `Set-Cookie' header, and creating new
cookies from the wire. */
#define NAME_IS(string_literal) \
BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
#define TOKEN_IS(token, string_literal) \
BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
#define VALUE_EXISTS (value_b && value_e)
#define VALUE_NON_EMPTY (VALUE_EXISTS && (value_b != value_e))
/* Update the appropriate cookie field. [name_b, name_e) are expected
to delimit the attribute name, while [value_b, value_e) (optional)
should delimit the attribute value.
When called the first time, it will set the cookie's attribute name
and value. After that, it will check the attribute name for
special fields such as `domain', `path', etc. Where appropriate,
it will parse the values of the fields it recognizes and fill the
corresponding fields in COOKIE.
Returns true on success. Returns false in case a syntax error is
found; such a cookie should be discarded. */
static bool
update_cookie_field (struct cookie *cookie,
const char *name_b, const char *name_e,
const char *value_b, const char *value_e)
{
assert (name_b != NULL && name_e != NULL);
if (!cookie->attr)
{
if (!VALUE_EXISTS)
return false;
cookie->attr = strdupdelim (name_b, name_e);
cookie->value = strdupdelim (value_b, value_e);
return true;
}
if (NAME_IS ("domain"))
{
if (!VALUE_NON_EMPTY)
return false;
xfree_null (cookie->domain);
/* Strictly speaking, we should set cookie->domain_exact if the
domain doesn't begin with a dot. But many sites set the
domain to "foo.com" and expect "subhost.foo.com" to get the
cookie, and it apparently works. */
if (*value_b == '.')
++value_b;
cookie->domain = strdupdelim (value_b, value_e);
return true;
}
else if (NAME_IS ("path"))
{
if (!VALUE_NON_EMPTY)
return false;
xfree_null (cookie->path);
cookie->path = strdupdelim (value_b, value_e);
return true;
}
else if (NAME_IS ("expires"))
{
char *value_copy;
time_t expires;
if (!VALUE_NON_EMPTY)
return false;
BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
expires = http_atotm (value_copy);
if (expires != (time_t) -1)
{
cookie->permanent = 1;
cookie->expiry_time = expires;
}
else
/* Error in expiration spec. Assume default (cookie doesn't
expire, but valid only for this session.) */
;
/* According to netscape's specification, expiry time in the
past means that discarding of a matching cookie is
requested. */
if (cookie->expiry_time < cookies_now)
cookie->discard_requested = 1;
return true;
}
else if (NAME_IS ("max-age"))
{
double maxage = -1;
char *value_copy;
if (!VALUE_NON_EMPTY)
return false;
BOUNDED_TO_ALLOCA (value_b, value_e, value_copy);
sscanf (value_copy, "%lf", &maxage);
if (maxage == -1)
/* something went wrong. */
return false;
cookie->permanent = 1;
cookie->expiry_time = cookies_now + maxage;
/* According to rfc2109, a cookie with max-age of 0 means that
discarding of a matching cookie is requested. */
if (maxage == 0)
cookie->discard_requested = 1;
return true;
}
else if (NAME_IS ("secure"))
{
/* ignore value completely */
cookie->secure = 1;
return true;
}
else
/* Unrecognized attribute; ignore it. */
return true;
}
#undef NAME_IS
/* Returns true for characters that are legal in the name of an
attribute. This used to allow only alphanumerics, '-', and '_',
but we need to be more lenient because a number of sites wants to
use weirder attribute names. rfc2965 "informally specifies"
attribute name (token) as "a sequence of non-special, non-white
space characters". So we allow everything except the stuff we know
could harm us. */
#define ATTR_NAME_CHAR(c) ((c) > 32 && (c) < 127 \
&& (c) != '"' && (c) != '=' \
&& (c) != ';' && (c) != ',')
#define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
/* Parse the contents of the `Set-Cookie' header. The header looks
like this:
@ -461,182 +332,124 @@ update_cookie_field (struct cookie *cookie,
Trailing semicolon is optional; spaces are allowed between all
tokens. Additionally, values may be quoted.
A new cookie is returned upon success, NULL otherwise. The
specified CALLBACK function (normally `update_cookie_field' is used
to update the fields of the newly created cookie structure. */
A new cookie is returned upon success, NULL otherwise.
The first name-value pair will be used to set the cookie's
attribute name and value. Subsequent parameters will be checked
against field names such as `domain', `path', etc. Recognized
fields will be parsed and the corresponding members of COOKIE
filled. */
static struct cookie *
parse_set_cookies (const char *sc,
bool (*callback) (struct cookie *,
const char *, const char *,
const char *, const char *),
bool silent)
parse_set_cookie (const char *set_cookie, bool silent)
{
const char *ptr = set_cookie;
struct cookie *cookie = cookie_new ();
param_token name, value;
/* #### Hand-written DFAs are no fun to debug. We'de be better off
to rewrite this as an inline parser. */
if (!extract_param (&ptr, &name, &value))
goto error;
if (!value.b)
goto error;
cookie->attr = strdupdelim (name.b, name.e);
cookie->value = strdupdelim (value.b, value.e);
enum { S_START, S_NAME, S_NAME_POST,
S_VALUE_PRE, S_VALUE, S_QUOTED_VALUE, S_VALUE_TRAILSPACE,
S_ATTR_ACTION, S_DONE, S_ERROR
} state = S_START;
const char *p = sc;
char c;
const char *name_b = NULL, *name_e = NULL;
const char *value_b = NULL, *value_e = NULL;
c = *p;
while (state != S_DONE && state != S_ERROR)
while (extract_param (&ptr, &name, &value))
{
switch (state)
if (TOKEN_IS (name, "domain"))
{
case S_START:
if (!c)
state = S_DONE;
else if (ISSPACE (c))
/* Strip all whitespace preceding the name. */
c = *++p;
else if (ATTR_NAME_CHAR (c))
{
name_b = p;
state = S_NAME;
}
else
/* empty attr name not allowed */
state = S_ERROR;
break;
case S_NAME:
if (!c || c == ';' || c == '=' || ISSPACE (c))
{
name_e = p;
state = S_NAME_POST;
}
else if (ATTR_NAME_CHAR (c))
c = *++p;
else
state = S_ERROR;
break;
case S_NAME_POST:
if (!c || c == ';')
{
value_b = value_e = NULL;
if (c == ';')
c = *++p;
state = S_ATTR_ACTION;
}
else if (c == '=')
{
c = *++p;
state = S_VALUE_PRE;
}
else if (ISSPACE (c))
/* Ignore space and keep the state. */
c = *++p;
else
state = S_ERROR;
break;
case S_VALUE_PRE:
if (!c || c == ';')
{
value_b = value_e = p;
if (c == ';')
c = *++p;
state = S_ATTR_ACTION;
}
else if (c == '"')
{
c = *++p;
value_b = p;
state = S_QUOTED_VALUE;
}
else if (ISSPACE (c))
c = *++p;
else
{
value_b = p;
value_e = NULL;
state = S_VALUE;
}
break;
case S_VALUE:
if (!c || c == ';' || ISSPACE (c))
{
value_e = p;
state = S_VALUE_TRAILSPACE;
}
else
{
value_e = NULL; /* no trailing space */
c = *++p;
}
break;
case S_QUOTED_VALUE:
if (c == '"')
{
value_e = p;
c = *++p;
state = S_VALUE_TRAILSPACE;
}
else if (!c)
state = S_ERROR;
else
c = *++p;
break;
case S_VALUE_TRAILSPACE:
if (c == ';')
{
c = *++p;
state = S_ATTR_ACTION;
}
else if (!c)
state = S_ATTR_ACTION;
else if (ISSPACE (c))
c = *++p;
else
state = S_VALUE;
break;
case S_ATTR_ACTION:
{
bool legal = callback (cookie, name_b, name_e, value_b, value_e);
if (!legal)
{
if (!silent)
{
char *name;
BOUNDED_TO_ALLOCA (name_b, name_e, name);
logprintf (LOG_NOTQUIET,
_("Error in Set-Cookie, field `%s'"),
escnonprint (name));
}
state = S_ERROR;
break;
}
state = S_START;
}
break;
case S_DONE:
case S_ERROR:
/* handled by loop condition */
break;
if (!TOKEN_NON_EMPTY (value))
goto error;
xfree_null (cookie->domain);
/* Strictly speaking, we should set cookie->domain_exact if the
domain doesn't begin with a dot. But many sites set the
domain to "foo.com" and expect "subhost.foo.com" to get the
cookie, and it apparently works in browsers. */
if (*value.b == '.')
++value.b;
cookie->domain = strdupdelim (value.b, value.e);
}
else if (TOKEN_IS (name, "path"))
{
if (!TOKEN_NON_EMPTY (value))
goto error;
xfree_null (cookie->path);
cookie->path = strdupdelim (value.b, value.e);
}
else if (TOKEN_IS (name, "expires"))
{
char *value_copy;
time_t expires;
if (!TOKEN_NON_EMPTY (value))
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
expires = http_atotm (value_copy);
if (expires != (time_t) -1)
{
cookie->permanent = 1;
cookie->expiry_time = expires;
}
else
/* Error in expiration spec. Assume default (cookie doesn't
expire, but valid only for this session.) */
;
/* According to netscape's specification, expiry time in the
past means that discarding of a matching cookie is
requested. */
if (cookie->expiry_time < cookies_now)
cookie->discard_requested = 1;
}
else if (TOKEN_IS (name, "max-age"))
{
double maxage = -1;
char *value_copy;
if (!TOKEN_NON_EMPTY (value))
goto error;
BOUNDED_TO_ALLOCA (value.b, value.e, value_copy);
sscanf (value_copy, "%lf", &maxage);
if (maxage == -1)
/* something went wrong. */
goto error;
cookie->permanent = 1;
cookie->expiry_time = cookies_now + maxage;
/* According to rfc2109, a cookie with max-age of 0 means that
discarding of a matching cookie is requested. */
if (maxage == 0)
cookie->discard_requested = 1;
}
else if (TOKEN_IS (name, "secure"))
{
/* ignore value completely */
cookie->secure = 1;
}
else
/* Ignore unrecognized attribute. */
;
}
if (state == S_DONE)
return cookie;
if (*ptr)
/* extract_param has encountered a syntax error */
goto error;
delete_cookie (cookie);
if (state != S_ERROR)
abort ();
/* The cookie has been successfully constructed; return it. */
return cookie;
error:
if (!silent)
logprintf (LOG_NOTQUIET,
_("Syntax error in Set-Cookie: %s at position %d.\n"),
escnonprint (sc), (int) (p - sc));
escnonprint (set_cookie), (int) (ptr - set_cookie));
delete_cookie (cookie);
return NULL;
}
#undef TOKEN_IS
#undef TOKEN_NON_EMPTY
/* Sanity checks. These are important, otherwise it is possible for
mailcious attackers to destroy important cookie information and/or
@ -849,7 +662,7 @@ cookie_handle_set_cookie (struct cookie_jar *jar,
simply prepend slash to PATH. */
PREPEND_SLASH (path);
cookie = parse_set_cookies (set_cookie, update_cookie_field, false);
cookie = parse_set_cookie (set_cookie, false);
if (!cookie)
goto out;
@ -1510,27 +1323,14 @@ cookie_jar_delete (struct cookie_jar *jar)
from main. */
#ifdef TEST_COOKIES
int test_count;
char *test_results[10];
static bool test_parse_cookies_callback (struct cookie *ignored,
const char *nb, const char *ne,
const char *vb, const char *ve)
{
test_results[test_count++] = strdupdelim (nb, ne);
test_results[test_count++] = strdupdelim (vb, ve);
return true;
}
void
test_cookies (void)
{
/* Tests expected to succeed: */
static struct {
char *data;
char *results[10];
const char *data;
const char *results[10];
} tests_succ[] = {
{ "", {NULL} },
{ "arg=value", {"arg", "value", NULL} },
{ "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
{ "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
@ -1554,39 +1354,51 @@ test_cookies (void)
for (i = 0; i < countof (tests_succ); i++)
{
int ind;
char *data = tests_succ[i].data;
char **expected = tests_succ[i].results;
const char *data = tests_succ[i].data;
const char **expected = tests_succ[i].results;
struct cookie *c;
test_count = 0;
c = parse_set_cookies (data, test_parse_cookies_callback, true);
c = parse_set_cookie (data, true);
if (!c)
{
printf ("NULL cookie returned for valid data: %s\n", data);
continue;
}
for (ind = 0; ind < test_count; ind += 2)
{
if (!expected[ind])
break;
if (0 != strcmp (expected[ind], test_results[ind]))
printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
ind / 2 + 1, data, expected[ind], test_results[ind]);
if (0 != strcmp (expected[ind + 1], test_results[ind + 1]))
printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
ind / 2 + 1, data, expected[ind + 1], test_results[ind + 1]);
}
if (ind < test_count || expected[ind])
printf ("Unmatched number of results: %s\n", data);
/* Test whether extract_param handles these cases correctly. */
{
param_token name, value;
const char *ptr = data;
int j = 0;
while (extract_param (&ptr, &name, &value))
{
char *n = strdupdelim (name.b, name.e);
char *v = strdupdelim (value.b, value.e);
if (!expected[j])
{
printf ("Too many parameters for '%s'\n", data);
break;
}
if (0 != strcmp (expected[j], n))
printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
j / 2 + 1, data, expected[j], n);
if (0 != strcmp (expected[j + 1], v))
printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
j / 2 + 1, data, expected[j + 1], v);
j += 2;
free (n);
free (v);
}
if (expected[j])
printf ("Too few parameters for '%s'\n", data);
}
}
for (i = 0; i < countof (tests_fail); i++)
{
struct cookie *c;
char *data = tests_fail[i];
test_count = 0;
c = parse_set_cookies (data, test_parse_cookies_callback, 1);
c = parse_set_cookie (data, true);
if (c)
printf ("Failed to report error on invalid data: %s\n", data);
}

View File

@ -855,11 +855,6 @@ skip_short_body (int fd, wgint contlen)
return true;
}
typedef struct {
/* A token consists of characters in the [b, e) range. */
const char *b, *e;
} param_token;
/* Extract a parameter from the HTTP header at *SOURCE and advance
*SOURCE to the next parameter. Return false when there are no more
parameters to extract. The name of the parameter is returned in
@ -872,19 +867,24 @@ typedef struct {
return the token named "filename" and value "foo bar". The third
call will return false, indicating no more valid tokens. */
static bool
bool
extract_param (const char **source, param_token *name, param_token *value)
{
const char *p = *source;
while (ISSPACE (*p)) ++p;
if (!*p)
return false; /* nothing more to extract */
{
*source = p;
return false; /* no error; nothing more to extract */
}
/* Extract name. */
name->b = p;
while (*p && !ISSPACE (*p) && *p != '=' && *p != ';') ++p;
name->e = p;
if (name->b == name->e)
return false; /* empty name: error */
while (ISSPACE (*p)) ++p;
if (*p == ';' || !*p) /* no value */
{

View File

@ -38,4 +38,11 @@ void save_cookies (void);
void http_cleanup (void);
time_t http_atotm (const char *);
typedef struct {
/* A token consists of characters in the [b, e) range. */
const char *b, *e;
} param_token;
bool extract_param (const char **, param_token *, param_token *);
#endif /* HTTP_H */