1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Match hosts against domains per Netscape cookie "specification".

Published in <sxszo53v8by.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-11-30 21:08:03 -08:00
parent f4d019a423
commit 92b269acad
4 changed files with 50 additions and 70 deletions

View File

@ -1,3 +1,8 @@
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
* cookies.c (check_domain_match): Reimplement to match Netscape's
"preliminary specification" for cookies.
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com> 2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c (replace_attr_refresh_hack): New function. * url.c (replace_attr_refresh_hack): New function.

View File

@ -670,97 +670,71 @@ numeric_address_p (const char *addr)
} }
/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST. /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
This check is compliant with rfc2109. */ Originally I tried to make the check compliant with rfc2109, but
the sites deviated too often, so I had to fall back to "tail
matching", as defined by the original Netscape's cookie spec. */
static int static int
check_domain_match (const char *cookie_domain, const char *host) check_domain_match (const char *cookie_domain, const char *host)
{ {
int headlen; static char *special_toplevel_domains[] = {
const char *tail; ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
};
int i, required_dots;
DEBUGP (("cdm: 1")); DEBUGP (("cdm: 1"));
/* Numeric address requires exact match. It also requires HOST to /* Numeric address requires exact match. It also requires HOST to
be an IP address. I suppose we *could* resolve HOST with be an IP address. */
store_hostaddress (it would hit the hash table), but rfc2109
doesn't require it, and it doesn't seem very useful, so we
don't. */
if (numeric_address_p (cookie_domain)) if (numeric_address_p (cookie_domain))
return !strcmp (cookie_domain, host); return 0 == strcmp (cookie_domain, host);
DEBUGP ((" 2")); DEBUGP ((" 2"));
/* The domain must contain at least one embedded dot. */
{
const char *rest = cookie_domain;
int len = strlen (rest);
if (*rest == '.')
++rest, --len; /* ignore first dot */
if (len <= 0)
return 0;
if (rest[len - 1] == '.')
--len; /* ignore last dot */
if (!memchr (rest, '.', len))
/* No dots. */
return 0;
}
DEBUGP ((" 3"));
/* For the sake of efficiency, check for exact match first. */ /* For the sake of efficiency, check for exact match first. */
if (!strcasecmp (cookie_domain, host)) if (!strcasecmp (cookie_domain, host))
return 1; return 1;
DEBUGP ((" 3"));
required_dots = 3;
for (i = 0; i < ARRAY_SIZE (special_toplevel_domains); i++)
if (match_tail (cookie_domain, special_toplevel_domains[i]))
{
required_dots = 2;
break;
}
/* If the domain does not start with '.', require one less dot.
This is so that domains like "altavista.com" (which should be
".altavista.com") are accepted. */
if (*cookie_domain != '.')
--required_dots;
if (count_char (cookie_domain, '.') < required_dots)
return 0;
DEBUGP ((" 4")); DEBUGP ((" 4"));
/* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN. if (!match_tail (host, cookie_domain))
This means that COOKIE_DOMAIN needs to start with `.' and be an
FQDN, and that HOST must end with COOKIE_DOMAIN. */
if (*cookie_domain != '.')
return 0; return 0;
DEBUGP ((" 5")); DEBUGP ((" 5"));
/* Two proceed, we need to examine two parts of HOST: its head and /* Don't allow domain "bar.com" to match host "foobar.com". */
its tail. Head and tail are defined in terms of the length of if (*cookie_domain != '.')
the domain, like this: {
int dlen = strlen (cookie_domain);
HHHHTTTTTTTTTTTTTTT <- host int hlen = strlen (host);
DDDDDDDDDDDDDDD <- domain /* hostname.foobar.com */
/* bar.com */
That is, "head" is the part of the host before (dlen - hlen), and /* ^ <-- must be '.' for host */
"tail" is what follows. if (hlen > dlen && host[hlen - dlen - 1] != '.')
For the domain to match, two conditions need to be true:
1. Tail must equal DOMAIN.
2. Head must not contain an embedded dot. */
headlen = strlen (host) - strlen (cookie_domain);
if (headlen <= 0)
/* DOMAIN must be a proper subset of HOST. */
return 0; return 0;
tail = host + headlen; }
DEBUGP ((" 6")); DEBUGP ((" 6"));
/* (1) */
if (strcasecmp (tail, cookie_domain))
return 0;
DEBUGP ((" 7"));
/* Test (2) is not part of the "domain-match" itself, but is
recommended by rfc2109 for reasons of privacy. */
/* (2) */
if (memchr (host, '.', headlen))
return 0;
DEBUGP ((" 8"));
return 1; return 1;
} }

View File

@ -854,8 +854,8 @@ accdir (const char *directory, enum accd flags)
match_backwards ("abc", "bc") -> 1 match_backwards ("abc", "bc") -> 1
match_backwards ("abc", "ab") -> 0 match_backwards ("abc", "ab") -> 0
match_backwards ("abc", "abc") -> 1 */ match_backwards ("abc", "abc") -> 1 */
static int int
match_backwards (const char *string, const char *pattern) match_tail (const char *string, const char *pattern)
{ {
int i, j; int i, j;
@ -870,7 +870,7 @@ match_backwards (const char *string, const char *pattern)
} }
/* Checks whether string S matches each element of ACCEPTS. A list /* Checks whether string S matches each element of ACCEPTS. A list
element are matched either with fnmatch() or match_backwards(), element are matched either with fnmatch() or match_tail(),
according to whether the element contains wildcards or not. according to whether the element contains wildcards or not.
If the BACKWARD is 0, don't do backward comparison -- just compare If the BACKWARD is 0, don't do backward comparison -- just compare
@ -891,7 +891,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
{ {
if (backward) if (backward)
{ {
if (match_backwards (s, *accepts)) if (match_tail (s, *accepts))
return 1; return 1;
} }
else else

View File

@ -69,6 +69,7 @@ char *file_merge PARAMS ((const char *, const char *));
int acceptable PARAMS ((const char *)); int acceptable PARAMS ((const char *));
int accdir PARAMS ((const char *s, enum accd)); int accdir PARAMS ((const char *s, enum accd));
char *suffix PARAMS ((const char *s)); char *suffix PARAMS ((const char *s));
int match_tail PARAMS ((const char *, const char *));
char *read_whole_line PARAMS ((FILE *)); char *read_whole_line PARAMS ((FILE *));
struct file_memory *read_file PARAMS ((const char *)); struct file_memory *read_file PARAMS ((const char *));