mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Match hosts against domains per Netscape cookie "specification".
Published in <sxszo53v8by.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
f4d019a423
commit
92b269acad
@ -1,3 +1,8 @@
|
|||||||
|
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* cookies.c (check_domain_match): Reimplement to match Netscape's
|
||||||
|
"preliminary specification" for cookies.
|
||||||
|
|
||||||
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* url.c (replace_attr_refresh_hack): New function.
|
* url.c (replace_attr_refresh_hack): New function.
|
||||||
|
104
src/cookies.c
104
src/cookies.c
@ -670,97 +670,71 @@ numeric_address_p (const char *addr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
|
/* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
|
||||||
This check is compliant with rfc2109. */
|
Originally I tried to make the check compliant with rfc2109, but
|
||||||
|
the sites deviated too often, so I had to fall back to "tail
|
||||||
|
matching", as defined by the original Netscape's cookie spec. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
check_domain_match (const char *cookie_domain, const char *host)
|
check_domain_match (const char *cookie_domain, const char *host)
|
||||||
{
|
{
|
||||||
int headlen;
|
static char *special_toplevel_domains[] = {
|
||||||
const char *tail;
|
".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
|
||||||
|
};
|
||||||
|
int i, required_dots;
|
||||||
|
|
||||||
DEBUGP (("cdm: 1"));
|
DEBUGP (("cdm: 1"));
|
||||||
|
|
||||||
/* Numeric address requires exact match. It also requires HOST to
|
/* Numeric address requires exact match. It also requires HOST to
|
||||||
be an IP address. I suppose we *could* resolve HOST with
|
be an IP address. */
|
||||||
store_hostaddress (it would hit the hash table), but rfc2109
|
|
||||||
doesn't require it, and it doesn't seem very useful, so we
|
|
||||||
don't. */
|
|
||||||
if (numeric_address_p (cookie_domain))
|
if (numeric_address_p (cookie_domain))
|
||||||
return !strcmp (cookie_domain, host);
|
return 0 == strcmp (cookie_domain, host);
|
||||||
|
|
||||||
DEBUGP ((" 2"));
|
DEBUGP ((" 2"));
|
||||||
|
|
||||||
/* The domain must contain at least one embedded dot. */
|
|
||||||
{
|
|
||||||
const char *rest = cookie_domain;
|
|
||||||
int len = strlen (rest);
|
|
||||||
if (*rest == '.')
|
|
||||||
++rest, --len; /* ignore first dot */
|
|
||||||
if (len <= 0)
|
|
||||||
return 0;
|
|
||||||
if (rest[len - 1] == '.')
|
|
||||||
--len; /* ignore last dot */
|
|
||||||
|
|
||||||
if (!memchr (rest, '.', len))
|
|
||||||
/* No dots. */
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
DEBUGP ((" 3"));
|
|
||||||
|
|
||||||
/* For the sake of efficiency, check for exact match first. */
|
/* For the sake of efficiency, check for exact match first. */
|
||||||
if (!strcasecmp (cookie_domain, host))
|
if (!strcasecmp (cookie_domain, host))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
DEBUGP ((" 3"));
|
||||||
|
|
||||||
|
required_dots = 3;
|
||||||
|
for (i = 0; i < ARRAY_SIZE (special_toplevel_domains); i++)
|
||||||
|
if (match_tail (cookie_domain, special_toplevel_domains[i]))
|
||||||
|
{
|
||||||
|
required_dots = 2;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If the domain does not start with '.', require one less dot.
|
||||||
|
This is so that domains like "altavista.com" (which should be
|
||||||
|
".altavista.com") are accepted. */
|
||||||
|
if (*cookie_domain != '.')
|
||||||
|
--required_dots;
|
||||||
|
|
||||||
|
if (count_char (cookie_domain, '.') < required_dots)
|
||||||
|
return 0;
|
||||||
|
|
||||||
DEBUGP ((" 4"));
|
DEBUGP ((" 4"));
|
||||||
|
|
||||||
/* In rfc2109 terminology, HOST needs domain-match COOKIE_DOMAIN.
|
if (!match_tail (host, cookie_domain))
|
||||||
This means that COOKIE_DOMAIN needs to start with `.' and be an
|
|
||||||
FQDN, and that HOST must end with COOKIE_DOMAIN. */
|
|
||||||
if (*cookie_domain != '.')
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
DEBUGP ((" 5"));
|
DEBUGP ((" 5"));
|
||||||
|
|
||||||
/* Two proceed, we need to examine two parts of HOST: its head and
|
/* Don't allow domain "bar.com" to match host "foobar.com". */
|
||||||
its tail. Head and tail are defined in terms of the length of
|
if (*cookie_domain != '.')
|
||||||
the domain, like this:
|
{
|
||||||
|
int dlen = strlen (cookie_domain);
|
||||||
HHHHTTTTTTTTTTTTTTT <- host
|
int hlen = strlen (host);
|
||||||
DDDDDDDDDDDDDDD <- domain
|
/* hostname.foobar.com */
|
||||||
|
/* bar.com */
|
||||||
That is, "head" is the part of the host before (dlen - hlen), and
|
/* ^ <-- must be '.' for host */
|
||||||
"tail" is what follows.
|
if (hlen > dlen && host[hlen - dlen - 1] != '.')
|
||||||
|
|
||||||
For the domain to match, two conditions need to be true:
|
|
||||||
|
|
||||||
1. Tail must equal DOMAIN.
|
|
||||||
2. Head must not contain an embedded dot. */
|
|
||||||
|
|
||||||
headlen = strlen (host) - strlen (cookie_domain);
|
|
||||||
|
|
||||||
if (headlen <= 0)
|
|
||||||
/* DOMAIN must be a proper subset of HOST. */
|
|
||||||
return 0;
|
return 0;
|
||||||
tail = host + headlen;
|
}
|
||||||
|
|
||||||
DEBUGP ((" 6"));
|
DEBUGP ((" 6"));
|
||||||
|
|
||||||
/* (1) */
|
|
||||||
if (strcasecmp (tail, cookie_domain))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
DEBUGP ((" 7"));
|
|
||||||
|
|
||||||
/* Test (2) is not part of the "domain-match" itself, but is
|
|
||||||
recommended by rfc2109 for reasons of privacy. */
|
|
||||||
|
|
||||||
/* (2) */
|
|
||||||
if (memchr (host, '.', headlen))
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
DEBUGP ((" 8"));
|
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -854,8 +854,8 @@ accdir (const char *directory, enum accd flags)
|
|||||||
match_backwards ("abc", "bc") -> 1
|
match_backwards ("abc", "bc") -> 1
|
||||||
match_backwards ("abc", "ab") -> 0
|
match_backwards ("abc", "ab") -> 0
|
||||||
match_backwards ("abc", "abc") -> 1 */
|
match_backwards ("abc", "abc") -> 1 */
|
||||||
static int
|
int
|
||||||
match_backwards (const char *string, const char *pattern)
|
match_tail (const char *string, const char *pattern)
|
||||||
{
|
{
|
||||||
int i, j;
|
int i, j;
|
||||||
|
|
||||||
@ -870,7 +870,7 @@ match_backwards (const char *string, const char *pattern)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Checks whether string S matches each element of ACCEPTS. A list
|
/* Checks whether string S matches each element of ACCEPTS. A list
|
||||||
element are matched either with fnmatch() or match_backwards(),
|
element are matched either with fnmatch() or match_tail(),
|
||||||
according to whether the element contains wildcards or not.
|
according to whether the element contains wildcards or not.
|
||||||
|
|
||||||
If the BACKWARD is 0, don't do backward comparison -- just compare
|
If the BACKWARD is 0, don't do backward comparison -- just compare
|
||||||
@ -891,7 +891,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
|
|||||||
{
|
{
|
||||||
if (backward)
|
if (backward)
|
||||||
{
|
{
|
||||||
if (match_backwards (s, *accepts))
|
if (match_tail (s, *accepts))
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -69,6 +69,7 @@ char *file_merge PARAMS ((const char *, const char *));
|
|||||||
int acceptable PARAMS ((const char *));
|
int acceptable PARAMS ((const char *));
|
||||||
int accdir PARAMS ((const char *s, enum accd));
|
int accdir PARAMS ((const char *s, enum accd));
|
||||||
char *suffix PARAMS ((const char *s));
|
char *suffix PARAMS ((const char *s));
|
||||||
|
int match_tail PARAMS ((const char *, const char *));
|
||||||
|
|
||||||
char *read_whole_line PARAMS ((FILE *));
|
char *read_whole_line PARAMS ((FILE *));
|
||||||
struct file_memory *read_file PARAMS ((const char *));
|
struct file_memory *read_file PARAMS ((const char *));
|
||||||
|
Loading…
Reference in New Issue
Block a user