[svn] Added support for cookies.

This commit is contained in:
hniksic 2001-04-08 15:25:24 -07:00
parent 7827420378
commit 2cfb2d2ef6
13 changed files with 1508 additions and 28 deletions

10
NEWS
View File

@ -11,8 +11,14 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
use the `--with-ssl' configure flag. You need to have OpenSSL
installed.
** "Keep-alive" (persistent) HTTP connections are now supported. This
means that multiple requests from the same hosts are now much faster.
** Cookies are now supported. Wget will accept cookies sent by the
server and return them in later requests. Additionally, it can load
and save cookies to disk, in the same format that Netscape uses.
** "Keep-alive" (persistent) HTTP connections are now supported.
Using keep-alive allows Wget to share one TCP/IP connection for
many retrievals, making multiple-file downloads faster and less
stressing for the server and the network.
** Wget now recognizes FTP directory listings generated by NT and VMS
servers.

2
TODO
View File

@ -131,8 +131,6 @@ changes.
* Implement correct RFC1808 URL parsing.
* Implement HTTP cookies.
* Implement more HTTP/1.1 bells and whistles (ETag, Content-MD5 etc.)
* Add a "rollback" option to have --continue throw away a configurable number of

View File

@ -1,3 +1,20 @@
2001-04-08 Hrvoje Niksic <hniksic@arsdigita.com>
* init.c: Include cookie-related options.
* main.c (main): Include cookie-specific options.
(main): Load cookies before download is finished.
(main): Save cookies when done.
* http.c (gethttp): Process the `Set-Cookie' header.
(gethttp): Include cookies in the response.
* cookies.c: New file.
2001-04-08 Hrvoje Niksic <hniksic@arsdigita.com>
* utils.c (datetime_str): New function.
2001-04-08 Jan Prikryl <prikryl@cg.tuwien.ac.at>
* ftp-ls.c (ftp_parse_winnt_ls): The AM/PM change did assume

View File

@ -59,11 +59,11 @@ MD5_OBJ = @MD5_OBJ@
OPIE_OBJ = @OPIE_OBJ@
SSL_OBJ = @SSL_OBJ@
OBJ = $(ALLOCA) cmpt$o connect$o fnmatch$o ftp$o ftp-basic$o \
ftp-ls$o $(OPIE_OBJ) getopt$o hash$o headers$o host$o \
html-parse$o html-url$o http$o init$o log$o main$o \
$(MD5_OBJ) netrc$o rbuf$o recur$o retr$o snprintf$o \
$(SSL_OBJ) url$o utils$o version$o safe-ctype$o
OBJ = $(ALLOCA) cmpt$o connect$o cookies$o fnmatch$o ftp$o \
ftp-basic$o ftp-ls$o $(OPIE_OBJ) getopt$o hash$o \
headers$o host$o html-parse$o html-url$o http$o init$o \
log$o main$o $(MD5_OBJ) netrc$o rbuf$o recur$o retr$o \
snprintf$o $(SSL_OBJ) url$o utils$o version$o safe-ctype$o
.SUFFIXES:
.SUFFIXES: .c .o ._c ._o
@ -139,6 +139,7 @@ TAGS: *.c *.h
cmpt$o: wget.h
connect$o: wget.h connect.h host.h
cookies$o: wget.h cookies.h hash.h url.h utils.h
fnmatch$o: wget.h fnmatch.h
ftp-basic$o: wget.h utils.h rbuf.h connect.h host.h
ftp-ls$o: wget.h utils.h ftp.h url.h
@ -151,10 +152,10 @@ host$o: wget.h utils.h host.h url.h hash.h
html-parse$o: wget.h html-parse.h
html-url$o: wget.h html-parse.h url.h utils.h
html$o: wget.h url.h utils.h ftp.h
http$o: wget.h utils.h url.h host.h rbuf.h retr.h headers.h connect.h fnmatch.h netrc.h md5.h
http$o: wget.h utils.h url.h host.h rbuf.h retr.h headers.h cookies.h connect.h fnmatch.h netrc.h md5.h
init$o: wget.h utils.h init.h host.h recur.h netrc.h
log$o: wget.h utils.h
main$o: wget.h utils.h getopt.h init.h retr.h recur.h host.h
main$o: wget.h utils.h getopt.h init.h retr.h recur.h host.h cookies.h
md5$o: wget.h md5.h
mswindows$o: wget.h url.h
netrc$o: wget.h utils.h netrc.h init.h

1312
src/cookies.c Normal file

File diff suppressed because it is too large Load Diff

28
src/cookies.h Normal file
View File

@ -0,0 +1,28 @@
/* Support for cookies.
Copyright (C) 2001 Free Software Foundation, Inc.
This file is part of Wget.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* struct cookie is not exported; this file only exports functions for
manipulating cookie contents. */
int set_cookie_header_cb PARAMS ((const char *, void *));
char *build_cookies_request PARAMS ((const char *, int, const char *, int));
void load_cookies PARAMS ((const char *));
void save_cookies PARAMS ((const char *));

View File

@ -65,6 +65,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifdef HAVE_SSL
# include "gen_sslfunc.h"
#endif /* HAVE_SSL */
#include "cookies.h"
extern char *version_string;
@ -482,7 +483,7 @@ static char *basic_authentication_encode PARAMS ((const char *, const char *,
const char *));
static int known_authentication_scheme_p PARAMS ((const char *));
static time_t http_atotm PARAMS ((char *));
time_t http_atotm PARAMS ((char *));
#define BEGINS_WITH(line, string_constant) \
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
@ -524,6 +525,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
static SSL_CTX *ssl_ctx = NULL;
SSL *ssl = NULL;
#endif /* HAVE_SSL */
char *cookies = NULL;
/* Whether this connection will be kept alive after the HTTP request
is done. */
@ -592,6 +594,10 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0;
if (opt.cookies)
cookies = build_cookies_request (u->host, u->port, u->path,
u->proto == URLHTTPS);
/* Initialize certain elements of struct http_stat. */
hs->len = 0L;
hs->contlen = -1;
@ -805,6 +811,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
+ (request_keep_alive
? strlen (request_keep_alive) : 0)
+ (referer ? strlen (referer) : 0)
+ (cookies ? strlen (cookies) : 0)
+ (wwwauth ? strlen (wwwauth) : 0)
+ (proxyauth ? strlen (proxyauth) : 0)
+ (range ? strlen (range) : 0)
@ -817,12 +824,13 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
User-Agent: %s\r\n\
Host: %s%s\r\n\
Accept: %s\r\n\
%s%s%s%s%s%s%s\r\n",
%s%s%s%s%s%s%s%s\r\n",
command, path, useragent, remhost,
port_maybe ? port_maybe : "",
HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
referer ? referer : "",
cookies ? cookies : "",
wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "",
range ? range : "",
@ -832,6 +840,7 @@ Accept: %s\r\n\
/* Free the temporary memory. */
FREE_MAYBE (wwwauth);
FREE_MAYBE (proxyauth);
FREE_MAYBE (cookies);
/* Send the request to server. */
#ifdef HAVE_SSL
@ -989,6 +998,10 @@ Accept: %s\r\n\
if (header_process (hdr, "Last-Modified", header_strdup,
&hs->remote_time))
goto done_header;
/* Try getting cookies. */
if (opt.cookies)
if (header_process (hdr, "Set-Cookie", set_cookie_header_cb, u))
goto done_header;
/* Try getting www-authentication. */
if (!authenticate_h)
if (header_process (hdr, "WWW-Authenticate", header_strdup,
@ -1858,7 +1871,7 @@ check_end (const char *p)
Marcus Hennecke's atotm(), which is forgiving, fast, to-the-point,
and does not use strptime(). atotm() is to be found in the sources
of `phttpd', a little-known HTTP server written by Peter Erikson. */
static time_t
time_t
http_atotm (char *time_string)
{
struct tm t;
@ -1901,6 +1914,10 @@ http_atotm (char *time_string)
/* RFC850: Thursday, 29-Jan-98 22:12:57 */
if (check_end (strptime (time_string, "%A, %d-%b-%y %T", &t)))
return mktime_from_utc (&t);
/* pseudo-RFC850: Thu, 29-Jan-1998 22:12:57
(google.com uses this for their cookies.)*/
if (check_end (strptime (time_string, "%a, %d-%b-%Y %T", &t)))
return mktime_from_utc (&t);
/* asctime: Thu Jan 29 22:12:57 1998 */
if (check_end (strptime (time_string, "%a %b %d %T %Y", &t)))
return mktime_from_utc (&t);

View File

@ -99,6 +99,7 @@ static struct {
{ "cache", &opt.proxy_cache, cmd_boolean },
{ "continue", &opt.always_rest, cmd_boolean },
{ "convertlinks", &opt.convert_links, cmd_boolean },
{ "cookies", &opt.cookies, cmd_boolean },
{ "cutdirs", &opt.cut_dirs, cmd_number },
#ifdef DEBUG
{ "debug", &opt.debug, cmd_boolean },
@ -131,6 +132,7 @@ static struct {
{ "includedirectories", &opt.includes, cmd_directory_vector },
{ "input", &opt.input_filename, cmd_string },
{ "killlonger", &opt.kill_longer, cmd_boolean },
{ "loadcookies", &opt.cookies_input, cmd_string },
{ "logfile", &opt.lfilename, cmd_string },
{ "login", &opt.ftp_acc, cmd_string },
{ "mirror", NULL, cmd_spec_mirror },
@ -155,6 +157,7 @@ static struct {
{ "removelisting", &opt.remove_listing, cmd_boolean },
{ "retrsymlinks", &opt.retr_symlinks, cmd_boolean },
{ "robots", &opt.use_robots, cmd_boolean },
{ "savecookies", &opt.cookies_output, cmd_string },
{ "saveheaders", &opt.save_headers, cmd_boolean },
{ "serverresponse", &opt.server_response, cmd_boolean },
{ "simplehostcheck", &opt.simple_check, cmd_boolean },
@ -209,6 +212,8 @@ defaults (void)
of the implementors' worries. */
memset (&opt, 0, sizeof (opt));
opt.cookies = 1;
opt.verbose = -1;
opt.dir_prefix = xstrdup (".");
opt.ntry = 20;
@ -1007,6 +1012,7 @@ cleanup (void)
fclose (opt.dfp);
cleanup_html_url ();
downloaded_files_free ();
cookies_cleanup ();
FREE_MAYBE (opt.lfilename);
xfree (opt.dir_prefix);
FREE_MAYBE (opt.input_filename);
@ -1034,4 +1040,6 @@ cleanup (void)
FREE_MAYBE (opt.sslcertfile);
#endif /* HAVE_SSL */
FREE_MAYBE (opt.bind_address);
FREE_MAYBE (opt.cookies_input);
FREE_MAYBE (opt.cookies_output);
}

View File

@ -49,6 +49,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#include "retr.h"
#include "recur.h"
#include "host.h"
#include "cookies.h"
#ifndef PATH_SEPARATOR
# define PATH_SEPARATOR '/'
@ -193,6 +194,9 @@ HTTP options:\n\
--referer=URL include `Referer: URL\' header in HTTP request.\n\
-s, --save-headers save the HTTP headers to file.\n\
-U, --user-agent=AGENT identify as AGENT instead of Wget/VERSION.\n\
--cookies=no don't use cookies.\n\
--load-cookies=FILE load cookies from FILE before session.\n\
--save-cookies=FILE save cookies to FILE after session.\n\
\n"), stdout);
fputs (_("\
FTP options:\n\
@ -242,9 +246,10 @@ main (int argc, char *const *argv)
{
/* Options without arguments: */
{ "background", no_argument, NULL, 'b' },
{ "backup-converted", no_argument, NULL, 'K' },
{ "continue", no_argument, NULL, 'c' },
{ "convert-links", no_argument, NULL, 'k' },
{ "backup-converted", no_argument, NULL, 'K' },
{ "cookies", no_argument, NULL, 160 },
{ "debug", no_argument, NULL, 'd' },
{ "delete-after", no_argument, NULL, 136 },
{ "dont-remove-listing", no_argument, NULL, 149 },
@ -285,6 +290,7 @@ main (int argc, char *const *argv)
{ "base", required_argument, NULL, 'B' },
{ "bind-address", required_argument, NULL, 155 },
{ "cache", required_argument, NULL, 'C' },
{ "cookie-file", required_argument, NULL, 161 },
{ "cut-dirs", required_argument, NULL, 145 },
{ "directory-prefix", required_argument, NULL, 'P' },
{ "domains", required_argument, NULL, 'D' },
@ -302,6 +308,7 @@ main (int argc, char *const *argv)
{ "include-directories", required_argument, NULL, 'I' },
{ "input-file", required_argument, NULL, 'i' },
{ "level", required_argument, NULL, 'l' },
{ "load-cookies", required_argument, NULL, 162 },
{ "no", required_argument, NULL, 'n' },
{ "output-document", required_argument, NULL, 'O' },
{ "output-file", required_argument, NULL, 'o' },
@ -310,6 +317,7 @@ main (int argc, char *const *argv)
{ "proxy-user", required_argument, NULL, 143 },
{ "quota", required_argument, NULL, 'Q' },
{ "reject", required_argument, NULL, 'R' },
{ "save-cookies", required_argument, NULL, 163 },
{ "timeout", required_argument, NULL, 'T' },
{ "tries", required_argument, NULL, 't' },
{ "user-agent", required_argument, NULL, 'U' },
@ -519,6 +527,22 @@ GNU General Public License for more details.\n"));
case 153:
setval ("followtags", optarg);
break;
case 160:
setval ("cookies", "on");
break;
case 161:
setval ("cookies", "on");
setval ("cookiein", optarg);
setval ("cookieout", optarg);
break;
case 162:
setval ("cookies", "on");
setval ("cookiein", optarg);
break;
case 163:
setval ("cookies", "on");
setval ("cookieout", optarg);
break;
case 157:
setval ("referer", optarg);
break;
@ -744,6 +768,7 @@ Can't timestamp and not clobber old files at the same time.\n"));
DEBUGP (("DEBUG output created by Wget %s on %s.\n\n", version_string,
OS_TYPE));
/* Open the output filename if necessary. */
if (opt.output_document)
{
@ -767,6 +792,9 @@ Can't timestamp and not clobber old files at the same time.\n"));
ws_startup ();
#endif
if (opt.cookies_input)
load_cookies (opt.cookies_input);
/* Setup the signal handler to redirect output when hangup is
received. */
#ifdef HAVE_SIGNAL
@ -831,6 +859,10 @@ Can't timestamp and not clobber old files at the same time.\n"));
_("Download quota (%s bytes) EXCEEDED!\n"),
legible (opt.quota));
}
if (opt.cookies_output)
save_cookies (opt.cookies_output);
if (opt.convert_links && !opt.delete_after)
{
convert_all_links ();

View File

@ -160,6 +160,10 @@ struct options
(if not internal) included in the
certfile. */
#endif /* HAVE_SSL */
int cookies;
char *cookies_input;
char *cookies_output;
};
#ifndef OPTIONS_DEFINED_HERE

View File

@ -352,25 +352,56 @@ sepstring (const char *s)
}
/* Return pointer to a static char[] buffer in which zero-terminated
string-representation of TM (in form hh:mm:ss) is printed. It is
shamelessly non-reentrant, but it doesn't matter, really.
string-representation of TM (in form hh:mm:ss) is printed.
If TM is non-NULL, the current time-in-seconds will be stored
there.
(#### This is misleading: one would expect TM would be used instead
of the current time in that case. This design was probably
influenced by the design time(2), and should be changed at some
points. No callers use non-NULL TM anyway.) */
If TM is non-NULL, the time_t of the current time will be stored
there. */
char *
time_str (time_t *tm)
{
static char tms[15];
static char output[15];
struct tm *ptm;
time_t tim;
time_t secs = time (tm);
*tms = '\0';
tim = time (tm);
if (tim == -1)
return tms;
ptm = localtime (&tim);
sprintf (tms, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return tms;
if (secs == -1)
{
/* In case of error, return the empty string. Maybe we should
just abort if this happens? */
*output = '\0';
return output;
}
ptm = localtime (&secs);
sprintf (output, "%02d:%02d:%02d", ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return output;
}
/* Like the above, but include the date: YYYY-MM-DD hh:mm:ss. */
char *
datetime_str (time_t *tm)
{
static char output[20]; /* "YYYY-MM-DD hh:mm:ss" + \0 */
struct tm *ptm;
time_t secs = time (tm);
if (secs == -1)
{
/* In case of error, return the empty string. Maybe we should
just abort if this happens? */
*output = '\0';
return output;
}
ptm = localtime (&secs);
sprintf (output, "%04d-%02d-%02d %02d:%02d:%02d",
ptm->tm_year + 1900, ptm->tm_mon + 1, ptm->tm_mday,
ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return output;
}
/* Returns an error message for ERRNUM. #### This requires more work.

View File

@ -40,6 +40,8 @@ struct file_memory {
};
char *time_str PARAMS ((time_t *));
char *datetime_str PARAMS ((time_t *));
const char *uerrmsg PARAMS ((uerr_t));
#ifdef DEBUG_MALLOC

View File

@ -148,6 +148,30 @@ char *xstrdup_debug PARAMS ((const char *, const char *, int));
#define ARRAY_SIZE(array) (sizeof (array) / sizeof (*(array)))
/* Copy the data delimited with BEG and END to alloca-allocated
storage, and zero-terminate it. BEG and END are evaluated only
once, in that order. */
#define BOUNDED_TO_ALLOCA(beg, end, place) do { \
const char *DTA_beg = (beg); \
int DTA_len = (end) - DTA_beg; \
place = alloca (DTA_len + 1); \
memcpy (place, DTA_beg, DTA_len); \
place[DTA_len] = '\0'; \
} while (0)
/* Return non-zero if string bounded between BEG and END is equal to
STRING_LITERAL. The comparison is case-sensitive. */
#define BOUNDED_EQUAL(beg, end, string_literal) \
((end) - (beg) == sizeof (string_literal) - 1 \
&& !memcmp ((beg), (string_literal), \
sizeof (string_literal) - 1))
/* The same as above, except the comparison is case-insensitive. */
#define BOUNDED_EQUAL_NO_CASE(beg, end, string_literal) \
((end) - (beg) == sizeof (string_literal) - 1 \
&& !strncasecmp ((beg), (string_literal), \
sizeof (string_literal) - 1))
/* Note that this much more elegant definition cannot be used:
#define STRDUP_ALLOCA(str) (strcpy ((char *)alloca (strlen (str) + 1), str))