1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Implement better sorting and matching of cookies.

Fix previously broken command-line options.
Published in <sxspuek9255.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-04-10 09:04:18 -07:00
parent 579d051d57
commit 5fa53b5a1d
4 changed files with 179 additions and 73 deletions

View File

@ -1,3 +1,15 @@
2001-04-10 Hrvoje Niksic <hniksic@arsdigita.com>
* cookies.c (build_cookies_request): Use and sort cookies from all
matching domains.
(build_cookies_request): Check for duplicates before generating
the `Cookies' header.
* main.c (main): Don't load cookies here.
(main): Make loadcookies and savecookies call the correct command.
* http.c (http_loop): Load cookies on-demand.
2001-04-09 Hrvoje Niksic <hniksic@arsdigita.com>
* http.c (gethttp): Fix indentation of SSL ifdef.

View File

@ -646,7 +646,7 @@ parse_set_cookies (const char *sc)
delete_cookie (cookie);
if (state == S_ERROR)
logprintf (LOG_NOTQUIET, _("Error in Set-Cookie, at character `%c'.\n"), c);
logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie at character `%c'.\n"), c);
else
abort ();
return NULL;
@ -654,7 +654,7 @@ parse_set_cookies (const char *sc)
eof:
delete_cookie (cookie);
logprintf (LOG_NOTQUIET,
_("Error in Set-Cookie: premature end of string.\n"));
_("Syntax error in Set-Cookie: premature end of string.\n"));
return NULL;
}
@ -864,24 +864,39 @@ count_char (const char *string, char chr)
return count;
}
/* Return the head of the cookie chain that matches HOST. */
/* Store CHAIN to STORE if there is room in STORE. If not, inrecement
COUNT anyway, so that when the function is done, we end up with the
exact count of how much place we actually need. */
static struct cookie *
find_cookie_chain (const char *host, int port)
#define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \
if (st_count < st_size) \
store[st_count] = st_chain; \
++st_count; \
} while (0)
/* Store cookie chains that match HOST, PORT. Since more than one
chain can match, the matches are written to STORE. No more than
SIZE matches are written; if more matches are present, return the
number of chains that would have been written. */
int
find_matching_chains (const char *host, int port,
struct cookie *store[], int size)
{
struct cookie *chain;
int dot_count;
char *hash_key;
struct cookie *chain = NULL;
int count = 0;
if (!cookies_hash_table)
return NULL;
return 0;
SET_HOSTPORT (host, port, hash_key);
/* Exact match. */
chain = hash_table_get (cookies_hash_table, hash_key);
if (chain)
return chain;
STORE_CHAIN (chain, store, size, count);
dot_count = count_char (host, '.');
@ -896,10 +911,10 @@ find_cookie_chain (const char *host, int port)
assert (p != NULL);
chain = hash_table_get (cookies_hash_table, p);
if (chain)
return chain;
STORE_CHAIN (chain, store, size, count);
hash_key = p + 1;
}
return NULL;
return count;
}
/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
@ -917,19 +932,71 @@ path_matches (const char *full_path, const char *prefix)
return len;
}
static int
matching_cookie (const struct cookie *cookie, const char *path,
int connection_secure_p, int *path_goodness)
{
int pg;
if (cookie->expiry_time < cookies_now)
/* Ignore stale cookies. There is no need to unchain the cookie
at this point -- Wget is a relatively short-lived application,
and stale cookies will not be saved by `save_cookies'. */
return 0;
if (cookie->secure && !connection_secure_p)
/* Don't transmit secure cookies over an insecure connection. */
return 0;
pg = path_matches (path, cookie->path);
if (!pg)
return 0;
if (path_goodness)
/* If the caller requested path_goodness, we return it. This is
an optimization, so that the caller doesn't need to call
path_matches() again. */
*path_goodness = pg;
return 1;
}
struct weighed_cookie {
struct cookie *cookie;
int domain_goodness;
int path_goodness;
};
/* Comparator used for uniquifying the list. */
static int
equality_comparator (const void *p1, const void *p2)
{
struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
/* We only really care whether both name and value are equal. We
return them in this order only for consistency... */
return namecmp ? namecmp : valuecmp;
}
/* Comparator used for sorting by quality. */
static int
goodness_comparator (const void *p1, const void *p2)
{
struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
/* It's goodness2-goodness1 because we want a sort in *decreasing*
order of goodness. */
return wc2->path_goodness - wc1->path_goodness;
/* Subtractions take `wc2' as the first argument becauase we want a
sort in *decreasing* order of goodness. */
int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
int pgdiff = wc2->path_goodness - wc1->path_goodness;
/* Sort by domain goodness; if these are the same, sort by path
goodness. (The sorting order isn't really specified; maybe it
should be the other way around.) */
return dgdiff ? dgdiff : pgdiff;
}
/* Build a `Cookies' header for a request that goes to HOST:PORT and
@ -942,70 +1009,97 @@ char *
build_cookies_request (const char *host, int port, const char *path,
int connection_secure_p)
{
struct cookie *chain = find_cookie_chain (host, port);
struct cookie *chain_default_store[20];
struct cookie **all_chains = chain_default_store;
int chain_store_size = ARRAY_SIZE (chain_default_store);
int chain_count;
struct cookie *cookie;
struct weighed_cookie *outgoing;
int count, i;
int count, i, ocnt;
char *result;
int result_size, pos;
if (!chain)
again:
chain_count = find_matching_chains (host, port, all_chains, chain_store_size);
if (chain_count > chain_store_size)
{
/* It's extremely unlikely that more than 20 chains will ever
match. But in this case it's easy to not have the
limitation, so we don't. */
all_chains = alloca (chain_count * sizeof (struct cookie *));
goto again;
}
if (!chain_count)
return NULL;
cookies_now = time (NULL);
/* Count the number of cookies whose path matches. */
count = 0;
result_size = 0;
for (cookie = chain; cookie; cookie = cookie->next)
{
if (cookie->expiry_time < cookies_now)
/* Ignore stale cookies. There is no need to unchain the
cookie at this point -- Wget is a relatively short-lived
application, and stale cookies will not be saved by
`save_cookies'. */
continue;
if (cookie->secure && !connection_secure_p)
/* Don't transmit secure cookies over an insecure
connection. */
continue;
if (path_matches (path, cookie->path))
{
++count;
/* name=value */
result_size += strlen (cookie->attr) + 1 + strlen (cookie->value);
}
}
for (i = 0; i < chain_count; i++)
for (cookie = all_chains[i]; cookie; cookie = cookie->next)
if (matching_cookie (cookie, path, connection_secure_p, NULL))
++count;
if (!count)
/* No matching cookies. */
return NULL;
/* Allocate the array. */
outgoing = alloca (count * sizeof (struct weighed_cookie));
i = 0;
for (cookie = chain; cookie; cookie = cookie->next)
{
int goodness;
/* #### These two if's are repeated verbatim from the loop
above. Should I put them in a separate function? */
if (cookie->expiry_time < cookies_now)
continue;
if (cookie->secure && !connection_secure_p)
/* Don't transmit secure cookies over an insecure
connection. */
continue;
goodness = path_matches (path, cookie->path);
if (!goodness)
continue;
outgoing[i].cookie = cookie;
outgoing[i].path_goodness = goodness;
++i;
}
assert (i == count);
/* Sort the array so that paths that match our path better come
first. */
ocnt = 0;
for (i = 0; i < chain_count; i++)
for (cookie = all_chains[i]; cookie; cookie = cookie->next)
{
int pg;
if (!matching_cookie (cookie, path, connection_secure_p, &pg))
continue;
outgoing[ocnt].cookie = cookie;
outgoing[ocnt].domain_goodness = strlen (cookie->domain);
outgoing[ocnt].path_goodness = pg;
++ocnt;
}
assert (ocnt == count);
/* Eliminate duplicate cookies; that is, those whose name and value
are the same. We do it by first sorting the array, and then
uniq'ing it. */
qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
for (i = 0; i < count - 1; i++)
{
struct cookie *c1 = outgoing[i].cookie;
struct cookie *c2 = outgoing[i + 1].cookie;
if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
{
/* c1 and c2 are the same; get rid of c2. */
if (count > i + 1)
/* move all ptrs from positions [i + 1, count) to i. */
memmove (outgoing + i, outgoing + i + 1,
(count - (i + 1)) * sizeof (struct weighed_cookie));
/* We decrement i to counter the ++i above. Remember that
we've just removed the element in front of us; we need to
remain in place to check whether outgoing[i] what used to
be outgoing[i + 2]. */
--i;
--count;
}
}
/* Sort the array so that best-matching domains come first, and
that, within one domain, best-matching paths come first. */
qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
/* Count the space the name=value pairs will take. */
result_size = 0;
for (i = 0; i < count; i++)
{
struct cookie *c = outgoing[i].cookie;
/* name=value */
result_size += strlen (c->attr) + 1 + strlen (c->value);
}
/* Allocate output buffer:
"Cookie: " -- 8
name=value pairs -- result_size
@ -1252,6 +1346,8 @@ save_cookies (const char *file)
/* no cookies stored; nothing to do. */
return;
DEBUGP (("Saving cookies to %s.\n", file));
cookies_now = time (NULL);
fp = fopen (file, "w");
@ -1275,6 +1371,8 @@ save_cookies (const char *file)
if (fclose (fp) < 0)
logprintf (LOG_NOTQUIET, _("Error closing `%s': %s\n"),
file, strerror (errno));
DEBUGP (("Done saving cookies.\n", file));
}
static int

View File

@ -78,6 +78,7 @@ extern int h_errno;
# endif
#endif
static int cookies_loaded_p;
#define TEXTHTML_S "text/html"
#define HTTP_ACCEPT "*/*"
@ -1374,6 +1375,12 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
struct http_stat hstat; /* HTTP status */
struct stat st;
/* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using
FTP or whatever. */
if (opt.cookies && opt.cookies_input && !cookies_loaded_p)
load_cookies (opt.cookies_input);
*newloc = NULL;
/* Warn on (likely bogus) wildcard usage in HTTP. Don't use

View File

@ -290,7 +290,6 @@ main (int argc, char *const *argv)
{ "base", required_argument, NULL, 'B' },
{ "bind-address", required_argument, NULL, 155 },
{ "cache", required_argument, NULL, 'C' },
{ "cookie-file", required_argument, NULL, 161 },
{ "cut-dirs", required_argument, NULL, 145 },
{ "directory-prefix", required_argument, NULL, 'P' },
{ "domains", required_argument, NULL, 'D' },
@ -308,7 +307,7 @@ main (int argc, char *const *argv)
{ "include-directories", required_argument, NULL, 'I' },
{ "input-file", required_argument, NULL, 'i' },
{ "level", required_argument, NULL, 'l' },
{ "load-cookies", required_argument, NULL, 162 },
{ "load-cookies", required_argument, NULL, 161 },
{ "no", required_argument, NULL, 'n' },
{ "output-document", required_argument, NULL, 'O' },
{ "output-file", required_argument, NULL, 'o' },
@ -317,7 +316,7 @@ main (int argc, char *const *argv)
{ "proxy-user", required_argument, NULL, 143 },
{ "quota", required_argument, NULL, 'Q' },
{ "reject", required_argument, NULL, 'R' },
{ "save-cookies", required_argument, NULL, 163 },
{ "save-cookies", required_argument, NULL, 162 },
{ "timeout", required_argument, NULL, 'T' },
{ "tries", required_argument, NULL, 't' },
{ "user-agent", required_argument, NULL, 'U' },
@ -531,17 +530,10 @@ GNU General Public License for more details.\n"));
setval ("cookies", "on");
break;
case 161:
setval ("cookies", "on");
setval ("cookiein", optarg);
setval ("cookieout", optarg);
setval ("loadcookies", optarg);
break;
case 162:
setval ("cookies", "on");
setval ("cookiein", optarg);
break;
case 163:
setval ("cookies", "on");
setval ("cookieout", optarg);
setval ("savecookies", optarg);
break;
case 157:
setval ("referer", optarg);
@ -792,9 +784,6 @@ Can't timestamp and not clobber old files at the same time.\n"));
ws_startup ();
#endif
if (opt.cookies_input)
load_cookies (opt.cookies_input);
/* Setup the signal handler to redirect output when hangup is
received. */
#ifdef HAVE_SIGNAL