mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Major refactoring of HTTP code. Added support for Content-Disposition header.
This commit is contained in:
parent
3e145c6018
commit
d40086ba5f
@ -1,3 +1,8 @@
|
|||||||
|
2005-11-23 Mauro Tortonesi <mauro@ferrara.linux.it>
|
||||||
|
|
||||||
|
* http.c: Refactored HTTP code. Added support for
|
||||||
|
Content-Disposition header.
|
||||||
|
|
||||||
2005-11-19 Hrvoje Niksic <hniksic@xemacs.org>
|
2005-11-19 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* hash.c (INVALID_PTR): Use uintptr_t instead of unsigned long.
|
* hash.c (INVALID_PTR): Use uintptr_t instead of unsigned long.
|
||||||
|
416
src/http.c
416
src/http.c
@ -1083,7 +1083,14 @@ struct http_stat
|
|||||||
wgint rd_size; /* amount of data read from socket */
|
wgint rd_size; /* amount of data read from socket */
|
||||||
double dltime; /* time it took to download the data */
|
double dltime; /* time it took to download the data */
|
||||||
const char *referer; /* value of the referer header. */
|
const char *referer; /* value of the referer header. */
|
||||||
char **local_file; /* local file. */
|
char *local_file; /* local file name. */
|
||||||
|
bool timestamp_checked; /* true if pre-download time-stamping checks
|
||||||
|
* have already been performed */
|
||||||
|
char *orig_file_name; /* name of file to compare for time-stamping
|
||||||
|
* (might be != local_file if -K is set) */
|
||||||
|
wgint orig_file_size; /* size of file to compare for time-stamping */
|
||||||
|
time_t orig_file_tstamp; /* time-stamp of file to compare for
|
||||||
|
* time-stamping */
|
||||||
};
|
};
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -1093,6 +1100,8 @@ free_hstat (struct http_stat *hs)
|
|||||||
xfree_null (hs->remote_time);
|
xfree_null (hs->remote_time);
|
||||||
xfree_null (hs->error);
|
xfree_null (hs->error);
|
||||||
xfree_null (hs->rderrmsg);
|
xfree_null (hs->rderrmsg);
|
||||||
|
xfree_null (hs->local_file);
|
||||||
|
xfree_null (hs->orig_file_name);
|
||||||
|
|
||||||
/* Guard against being called twice. */
|
/* Guard against being called twice. */
|
||||||
hs->newloc = NULL;
|
hs->newloc = NULL;
|
||||||
@ -1105,6 +1114,7 @@ static char *create_authorization_line (const char *, const char *,
|
|||||||
const char *, bool *);
|
const char *, bool *);
|
||||||
static char *basic_authentication_encode (const char *, const char *);
|
static char *basic_authentication_encode (const char *, const char *);
|
||||||
static bool known_authentication_scheme_p (const char *, const char *);
|
static bool known_authentication_scheme_p (const char *, const char *);
|
||||||
|
static void load_cookies (void);
|
||||||
|
|
||||||
#define BEGINS_WITH(line, string_constant) \
|
#define BEGINS_WITH(line, string_constant) \
|
||||||
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
|
(!strncasecmp (line, string_constant, sizeof (string_constant) - 1) \
|
||||||
@ -1190,6 +1200,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
|
|
||||||
bool host_lookup_failed = false;
|
bool host_lookup_failed = false;
|
||||||
|
|
||||||
|
DEBUGP(("in gethttp 1\n"));
|
||||||
|
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
if (u->scheme == SCHEME_HTTPS)
|
if (u->scheme == SCHEME_HTTPS)
|
||||||
{
|
{
|
||||||
@ -1205,10 +1217,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
}
|
}
|
||||||
#endif /* HAVE_SSL */
|
#endif /* HAVE_SSL */
|
||||||
|
|
||||||
if (!head_only)
|
DEBUGP(("in gethttp 2\n"));
|
||||||
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
|
DEBUGP(("in gethttp 3\n"));
|
||||||
know the local filename so we can save to it. */
|
|
||||||
assert (*hs->local_file != NULL);
|
|
||||||
|
|
||||||
/* Initialize certain elements of struct http_stat. */
|
/* Initialize certain elements of struct http_stat. */
|
||||||
hs->len = 0;
|
hs->len = 0;
|
||||||
@ -1599,6 +1609,105 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
print_server_response (resp, " ");
|
print_server_response (resp, " ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEBUGP(("in gethttp 4\n"));
|
||||||
|
|
||||||
|
/* Determine the local filename if needed. Notice that if -O is used
|
||||||
|
* hstat.local_file is set by http_loop to the argument of -O. */
|
||||||
|
if (!hs->local_file)
|
||||||
|
{
|
||||||
|
if (resp_header_copy (resp, "Content-Disposition", hdrval, sizeof (hdrval)))
|
||||||
|
/* Honor Content-Disposition. */
|
||||||
|
{
|
||||||
|
hs->local_file = xstrdup (hdrval);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* Choose filename according to URL name. */
|
||||||
|
{
|
||||||
|
hs->local_file = url_file_name (u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
DEBUGP(("in gethttp 5\n"));
|
||||||
|
|
||||||
|
/* TODO: perform this check only once. */
|
||||||
|
if (opt.noclobber && file_exists_p (hs->local_file))
|
||||||
|
{
|
||||||
|
/* If opt.noclobber is turned on and file already exists, do not
|
||||||
|
retrieve the file */
|
||||||
|
logprintf (LOG_VERBOSE, _("\
|
||||||
|
File `%s' already there; not retrieving.\n\n"), hs->local_file);
|
||||||
|
/* If the file is there, we suppose it's retrieved OK. */
|
||||||
|
*dt |= RETROKF;
|
||||||
|
|
||||||
|
/* #### Bogusness alert. */
|
||||||
|
/* If its suffix is "html" or "htm" or similar, assume text/html. */
|
||||||
|
if (has_html_suffix_p (hs->local_file))
|
||||||
|
*dt |= TEXTHTML;
|
||||||
|
|
||||||
|
return RETROK;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Support timestamping */
|
||||||
|
/* TODO: move this code out of gethttp. */
|
||||||
|
if (opt.timestamping && !hs->timestamp_checked)
|
||||||
|
{
|
||||||
|
size_t filename_len = strlen (hs->local_file);
|
||||||
|
char *filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
||||||
|
bool local_dot_orig_file_exists = false;
|
||||||
|
char *local_filename = NULL;
|
||||||
|
struct_stat st;
|
||||||
|
|
||||||
|
if (opt.backup_converted)
|
||||||
|
/* If -K is specified, we'll act on the assumption that it was specified
|
||||||
|
last time these files were downloaded as well, and instead of just
|
||||||
|
comparing local file X against server file X, we'll compare local
|
||||||
|
file X.orig (if extant, else X) against server file X. If -K
|
||||||
|
_wasn't_ specified last time, or the server contains files called
|
||||||
|
*.orig, -N will be back to not operating correctly with -k. */
|
||||||
|
{
|
||||||
|
/* Would a single s[n]printf() call be faster? --dan
|
||||||
|
|
||||||
|
Definitely not. sprintf() is horribly slow. It's a
|
||||||
|
different question whether the difference between the two
|
||||||
|
affects a program. Usually I'd say "no", but at one
|
||||||
|
point I profiled Wget, and found that a measurable and
|
||||||
|
non-negligible amount of time was lost calling sprintf()
|
||||||
|
in url.c. Replacing sprintf with inline calls to
|
||||||
|
strcpy() and number_to_string() made a difference.
|
||||||
|
--hniksic */
|
||||||
|
memcpy (filename_plus_orig_suffix, hs->local_file, filename_len);
|
||||||
|
memcpy (filename_plus_orig_suffix + filename_len,
|
||||||
|
".orig", sizeof (".orig"));
|
||||||
|
|
||||||
|
/* Try to stat() the .orig file. */
|
||||||
|
if (stat (filename_plus_orig_suffix, &st) == 0)
|
||||||
|
{
|
||||||
|
local_dot_orig_file_exists = 1;
|
||||||
|
local_filename = filename_plus_orig_suffix;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!local_dot_orig_file_exists)
|
||||||
|
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
||||||
|
if (stat (hs->local_file, &st) == 0)
|
||||||
|
local_filename = hs->local_file;
|
||||||
|
|
||||||
|
if (local_filename != NULL)
|
||||||
|
/* There was a local file, so we'll check later to see if the version
|
||||||
|
the server has is the same version we already have, allowing us to
|
||||||
|
skip a download. */
|
||||||
|
{
|
||||||
|
hs->orig_file_name = xstrdup (local_filename);
|
||||||
|
hs->orig_file_size = st.st_size;
|
||||||
|
hs->orig_file_tstamp = st.st_mtime;
|
||||||
|
#ifdef WINDOWS
|
||||||
|
/* Modification time granularity is 2 seconds for Windows, so
|
||||||
|
increase local time by 1 second for later comparison. */
|
||||||
|
++hs->orig_file_tstamp;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!opt.ignore_length
|
if (!opt.ignore_length
|
||||||
&& resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
|
&& resp_header_copy (resp, "Content-Length", hdrval, sizeof (hdrval)))
|
||||||
{
|
{
|
||||||
@ -1791,27 +1900,27 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
||||||
already the file's suffix, tack on ".html". */
|
already the file's suffix, tack on ".html". */
|
||||||
{
|
{
|
||||||
char *last_period_in_local_filename = strrchr (*hs->local_file, '.');
|
char *last_period_in_local_filename = strrchr (hs->local_file, '.');
|
||||||
|
|
||||||
if (last_period_in_local_filename == NULL
|
if (last_period_in_local_filename == NULL
|
||||||
|| !(0 == strcasecmp (last_period_in_local_filename, ".htm")
|
|| !(0 == strcasecmp (last_period_in_local_filename, ".htm")
|
||||||
|| 0 == strcasecmp (last_period_in_local_filename, ".html")))
|
|| 0 == strcasecmp (last_period_in_local_filename, ".html")))
|
||||||
{
|
{
|
||||||
int local_filename_len = strlen (*hs->local_file);
|
int local_filename_len = strlen (hs->local_file);
|
||||||
/* Resize the local file, allowing for ".html" preceded by
|
/* Resize the local file, allowing for ".html" preceded by
|
||||||
optional ".NUMBER". */
|
optional ".NUMBER". */
|
||||||
*hs->local_file = xrealloc (*hs->local_file,
|
hs->local_file = xrealloc (hs->local_file,
|
||||||
local_filename_len + 24 + sizeof (".html"));
|
local_filename_len + 24 + sizeof (".html"));
|
||||||
strcpy(*hs->local_file + local_filename_len, ".html");
|
strcpy(hs->local_file + local_filename_len, ".html");
|
||||||
/* If clobbering is not allowed and the file, as named,
|
/* If clobbering is not allowed and the file, as named,
|
||||||
exists, tack on ".NUMBER.html" instead. */
|
exists, tack on ".NUMBER.html" instead. */
|
||||||
if (!ALLOW_CLOBBER)
|
if (!ALLOW_CLOBBER)
|
||||||
{
|
{
|
||||||
int ext_num = 1;
|
int ext_num = 1;
|
||||||
do
|
do
|
||||||
sprintf (*hs->local_file + local_filename_len,
|
sprintf (hs->local_file + local_filename_len,
|
||||||
".%d.html", ext_num++);
|
".%d.html", ext_num++);
|
||||||
while (file_exists_p (*hs->local_file));
|
while (file_exists_p (hs->local_file));
|
||||||
}
|
}
|
||||||
*dt |= ADDED_HTML_EXTENSION;
|
*dt |= ADDED_HTML_EXTENSION;
|
||||||
}
|
}
|
||||||
@ -1907,16 +2016,16 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
/* Open the local file. */
|
/* Open the local file. */
|
||||||
if (!output_stream)
|
if (!output_stream)
|
||||||
{
|
{
|
||||||
mkalldirs (*hs->local_file);
|
mkalldirs (hs->local_file);
|
||||||
if (opt.backups)
|
if (opt.backups)
|
||||||
rotate_backups (*hs->local_file);
|
rotate_backups (hs->local_file);
|
||||||
if (hs->restval)
|
if (hs->restval)
|
||||||
fp = fopen (*hs->local_file, "ab");
|
fp = fopen (hs->local_file, "ab");
|
||||||
else if (ALLOW_CLOBBER)
|
else if (ALLOW_CLOBBER)
|
||||||
fp = fopen (*hs->local_file, "wb");
|
fp = fopen (hs->local_file, "wb");
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
fp = fopen_excl (*hs->local_file, true);
|
fp = fopen_excl (hs->local_file, true);
|
||||||
if (!fp && errno == EEXIST)
|
if (!fp && errno == EEXIST)
|
||||||
{
|
{
|
||||||
/* We cannot just invent a new name and use it (which is
|
/* We cannot just invent a new name and use it (which is
|
||||||
@ -1925,14 +2034,14 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
|||||||
Instead, return and retry the download. */
|
Instead, return and retry the download. */
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("%s has sprung into existence.\n"),
|
_("%s has sprung into existence.\n"),
|
||||||
*hs->local_file);
|
hs->local_file);
|
||||||
CLOSE_INVALIDATE (sock);
|
CLOSE_INVALIDATE (sock);
|
||||||
return FOPEN_EXCL_ERR;
|
return FOPEN_EXCL_ERR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!fp)
|
if (!fp)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
|
logprintf (LOG_NOTQUIET, "%s: %s\n", hs->local_file, strerror (errno));
|
||||||
CLOSE_INVALIDATE (sock);
|
CLOSE_INVALIDATE (sock);
|
||||||
return FOPENERR;
|
return FOPENERR;
|
||||||
}
|
}
|
||||||
@ -1988,153 +2097,62 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
|||||||
int *dt, struct url *proxy)
|
int *dt, struct url *proxy)
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
bool use_ts, got_head = false;/* time-stamping info */
|
bool got_head = false; /* used for time-stamping */
|
||||||
char *filename_plus_orig_suffix;
|
char *tms;
|
||||||
char *local_filename = NULL;
|
|
||||||
char *tms, *locf;
|
|
||||||
const char *tmrate;
|
const char *tmrate;
|
||||||
uerr_t err;
|
uerr_t err;
|
||||||
time_t tml = -1, tmr = -1; /* local and remote time-stamps */
|
time_t tmr = -1; /* remote time-stamp */
|
||||||
wgint local_size = 0; /* the size of the local file */
|
wgint local_size = 0; /* the size of the local file */
|
||||||
size_t filename_len;
|
|
||||||
struct http_stat hstat; /* HTTP status */
|
struct http_stat hstat; /* HTTP status */
|
||||||
struct_stat st;
|
struct_stat st;
|
||||||
char *dummy = NULL;
|
|
||||||
|
DEBUGP(("in http_loop\n"));
|
||||||
|
|
||||||
|
/* Assert that no value for *LOCAL_FILE was passed. */
|
||||||
|
assert (local_file == NULL || *local_file == NULL);
|
||||||
|
|
||||||
|
/* Set LOCAL_FILE parameter. */
|
||||||
|
if (local_file && opt.output_document)
|
||||||
|
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
||||||
|
|
||||||
|
/* Reset NEWLOC parameter. */
|
||||||
|
*newloc = NULL;
|
||||||
|
|
||||||
/* This used to be done in main(), but it's a better idea to do it
|
/* This used to be done in main(), but it's a better idea to do it
|
||||||
here so that we don't go through the hoops if we're just using
|
here so that we don't go through the hoops if we're just using
|
||||||
FTP or whatever. */
|
FTP or whatever. */
|
||||||
if (opt.cookies)
|
if (opt.cookies)
|
||||||
{
|
load_cookies();
|
||||||
if (!wget_cookie_jar)
|
|
||||||
wget_cookie_jar = cookie_jar_new ();
|
|
||||||
if (opt.cookies_input && !cookies_loaded_p)
|
|
||||||
{
|
|
||||||
cookie_jar_load (wget_cookie_jar, opt.cookies_input);
|
|
||||||
cookies_loaded_p = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*newloc = NULL;
|
|
||||||
|
|
||||||
/* Warn on (likely bogus) wildcard usage in HTTP. */
|
/* Warn on (likely bogus) wildcard usage in HTTP. */
|
||||||
if (opt.ftp_glob && has_wildcards_p (u->path))
|
if (opt.ftp_glob && has_wildcards_p (u->path))
|
||||||
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
||||||
|
|
||||||
|
/* Setup hstat struct. */
|
||||||
xzero (hstat);
|
xzero (hstat);
|
||||||
|
|
||||||
/* Determine the local filename. */
|
|
||||||
if (local_file && *local_file)
|
|
||||||
hstat.local_file = local_file;
|
|
||||||
else if (local_file && !opt.output_document)
|
|
||||||
{
|
|
||||||
*local_file = url_file_name (u);
|
|
||||||
hstat.local_file = local_file;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
dummy = url_file_name (u);
|
|
||||||
hstat.local_file = &dummy;
|
|
||||||
/* be honest about where we will save the file */
|
|
||||||
if (local_file && opt.output_document)
|
|
||||||
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!opt.output_document)
|
|
||||||
locf = *hstat.local_file;
|
|
||||||
else
|
|
||||||
locf = opt.output_document;
|
|
||||||
|
|
||||||
hstat.referer = referer;
|
hstat.referer = referer;
|
||||||
|
|
||||||
filename_len = strlen (*hstat.local_file);
|
if (opt.output_document)
|
||||||
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
hstat.local_file = xstrdup (opt.output_document);
|
||||||
|
|
||||||
if (opt.noclobber && file_exists_p (*hstat.local_file))
|
|
||||||
{
|
|
||||||
/* If opt.noclobber is turned on and file already exists, do not
|
|
||||||
retrieve the file */
|
|
||||||
logprintf (LOG_VERBOSE, _("\
|
|
||||||
File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|
||||||
/* If the file is there, we suppose it's retrieved OK. */
|
|
||||||
*dt |= RETROKF;
|
|
||||||
|
|
||||||
/* #### Bogusness alert. */
|
|
||||||
/* If its suffix is "html" or "htm" or similar, assume text/html. */
|
|
||||||
if (has_html_suffix_p (*hstat.local_file))
|
|
||||||
*dt |= TEXTHTML;
|
|
||||||
|
|
||||||
xfree_null (dummy);
|
|
||||||
return RETROK;
|
|
||||||
}
|
|
||||||
|
|
||||||
use_ts = false;
|
|
||||||
if (opt.timestamping)
|
|
||||||
{
|
|
||||||
bool local_dot_orig_file_exists = false;
|
|
||||||
|
|
||||||
if (opt.backup_converted)
|
|
||||||
/* If -K is specified, we'll act on the assumption that it was specified
|
|
||||||
last time these files were downloaded as well, and instead of just
|
|
||||||
comparing local file X against server file X, we'll compare local
|
|
||||||
file X.orig (if extant, else X) against server file X. If -K
|
|
||||||
_wasn't_ specified last time, or the server contains files called
|
|
||||||
*.orig, -N will be back to not operating correctly with -k. */
|
|
||||||
{
|
|
||||||
/* Would a single s[n]printf() call be faster? --dan
|
|
||||||
|
|
||||||
Definitely not. sprintf() is horribly slow. It's a
|
|
||||||
different question whether the difference between the two
|
|
||||||
affects a program. Usually I'd say "no", but at one
|
|
||||||
point I profiled Wget, and found that a measurable and
|
|
||||||
non-negligible amount of time was lost calling sprintf()
|
|
||||||
in url.c. Replacing sprintf with inline calls to
|
|
||||||
strcpy() and number_to_string() made a difference.
|
|
||||||
--hniksic */
|
|
||||||
memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
|
|
||||||
memcpy (filename_plus_orig_suffix + filename_len,
|
|
||||||
".orig", sizeof (".orig"));
|
|
||||||
|
|
||||||
/* Try to stat() the .orig file. */
|
|
||||||
if (stat (filename_plus_orig_suffix, &st) == 0)
|
|
||||||
{
|
|
||||||
local_dot_orig_file_exists = 1;
|
|
||||||
local_filename = filename_plus_orig_suffix;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!local_dot_orig_file_exists)
|
|
||||||
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
|
||||||
if (stat (*hstat.local_file, &st) == 0)
|
|
||||||
local_filename = *hstat.local_file;
|
|
||||||
|
|
||||||
if (local_filename != NULL)
|
|
||||||
/* There was a local file, so we'll check later to see if the version
|
|
||||||
the server has is the same version we already have, allowing us to
|
|
||||||
skip a download. */
|
|
||||||
{
|
|
||||||
use_ts = true;
|
|
||||||
tml = st.st_mtime;
|
|
||||||
#ifdef WINDOWS
|
|
||||||
/* Modification time granularity is 2 seconds for Windows, so
|
|
||||||
increase local time by 1 second for later comparison. */
|
|
||||||
tml++;
|
|
||||||
#endif
|
|
||||||
local_size = st.st_size;
|
|
||||||
got_head = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Reset the counter. */
|
/* Reset the counter. */
|
||||||
count = 0;
|
count = 0;
|
||||||
|
|
||||||
|
/* Reset the document type. */
|
||||||
*dt = 0;
|
*dt = 0;
|
||||||
|
|
||||||
/* THE loop */
|
/* THE loop */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
DEBUGP(("in http_loop LOOP\n"));
|
||||||
|
|
||||||
/* Increment the pass counter. */
|
/* Increment the pass counter. */
|
||||||
++count;
|
++count;
|
||||||
sleep_between_retrievals (count);
|
sleep_between_retrievals (count);
|
||||||
|
|
||||||
/* Get the current time string. */
|
/* Get the current time string. */
|
||||||
tms = time_str (NULL);
|
tms = time_str (NULL);
|
||||||
|
|
||||||
/* Print fetch message, if opt.verbose. */
|
/* Print fetch message, if opt.verbose. */
|
||||||
if (opt.verbose)
|
if (opt.verbose)
|
||||||
{
|
{
|
||||||
@ -2143,8 +2161,8 @@ File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|||||||
strcpy (tmp, " ");
|
strcpy (tmp, " ");
|
||||||
if (count > 1)
|
if (count > 1)
|
||||||
sprintf (tmp, _("(try:%2d)"), count);
|
sprintf (tmp, _("(try:%2d)"), count);
|
||||||
logprintf (LOG_VERBOSE, "--%s-- %s\n %s => `%s'\n",
|
logprintf (LOG_VERBOSE, "--%s-- %s\n %s\n",
|
||||||
tms, hurl, tmp, locf);
|
tms, hurl, tmp);
|
||||||
#ifdef WINDOWS
|
#ifdef WINDOWS
|
||||||
ws_changetitle (hurl);
|
ws_changetitle (hurl);
|
||||||
#endif
|
#endif
|
||||||
@ -2154,14 +2172,14 @@ File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|||||||
/* Default document type is empty. However, if spider mode is
|
/* Default document type is empty. However, if spider mode is
|
||||||
on or time-stamping is employed, HEAD_ONLY commands is
|
on or time-stamping is employed, HEAD_ONLY commands is
|
||||||
encoded within *dt. */
|
encoded within *dt. */
|
||||||
if (opt.spider || (use_ts && !got_head))
|
if (opt.spider || (opt.timestamping && !got_head))
|
||||||
*dt |= HEAD_ONLY;
|
*dt |= HEAD_ONLY;
|
||||||
else
|
else
|
||||||
*dt &= ~HEAD_ONLY;
|
*dt &= ~HEAD_ONLY;
|
||||||
|
|
||||||
/* Decide whether or not to restart. */
|
/* Decide whether or not to restart. */
|
||||||
if (opt.always_rest
|
if (opt.always_rest
|
||||||
&& stat (locf, &st) == 0
|
&& stat (hstat.local_file, &st) == 0
|
||||||
&& S_ISREG (st.st_mode))
|
&& S_ISREG (st.st_mode))
|
||||||
/* When -c is used, continue from on-disk size. (Can't use
|
/* When -c is used, continue from on-disk size. (Can't use
|
||||||
hstat.len even if count>1 because we don't want a failed
|
hstat.len even if count>1 because we don't want a failed
|
||||||
@ -2189,18 +2207,13 @@ File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|||||||
/* Try fetching the document, or at least its head. */
|
/* Try fetching the document, or at least its head. */
|
||||||
err = gethttp (u, &hstat, dt, proxy);
|
err = gethttp (u, &hstat, dt, proxy);
|
||||||
|
|
||||||
/* It's unfortunate that wget determines the local filename before finding
|
|
||||||
out the Content-Type of the file. Barring a major restructuring of the
|
|
||||||
code, we need to re-set locf here, since gethttp() may have xrealloc()d
|
|
||||||
*hstat.local_file to tack on ".html". */
|
|
||||||
if (!opt.output_document)
|
|
||||||
locf = *hstat.local_file;
|
|
||||||
|
|
||||||
/* Time? */
|
/* Time? */
|
||||||
tms = time_str (NULL);
|
tms = time_str (NULL);
|
||||||
|
|
||||||
/* Get the new location (with or without the redirection). */
|
/* Get the new location (with or without the redirection). */
|
||||||
if (hstat.newloc)
|
if (hstat.newloc)
|
||||||
*newloc = xstrdup (hstat.newloc);
|
*newloc = xstrdup (hstat.newloc);
|
||||||
|
|
||||||
switch (err)
|
switch (err)
|
||||||
{
|
{
|
||||||
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
|
case HERR: case HEOF: case CONSOCKERR: case CONCLOSED:
|
||||||
@ -2209,70 +2222,41 @@ File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|||||||
/* Non-fatal errors continue executing the loop, which will
|
/* Non-fatal errors continue executing the loop, which will
|
||||||
bring them to "while" statement at the end, to judge
|
bring them to "while" statement at the end, to judge
|
||||||
whether the number of tries was exceeded. */
|
whether the number of tries was exceeded. */
|
||||||
free_hstat (&hstat);
|
//free_hstat (&hstat);
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
if (err == FOPEN_EXCL_ERR)
|
|
||||||
{
|
|
||||||
/* Re-determine the file name. */
|
|
||||||
if (local_file && *local_file)
|
|
||||||
{
|
|
||||||
xfree (*local_file);
|
|
||||||
*local_file = url_file_name (u);
|
|
||||||
hstat.local_file = local_file;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
xfree (dummy);
|
|
||||||
dummy = url_file_name (u);
|
|
||||||
hstat.local_file = &dummy;
|
|
||||||
}
|
|
||||||
/* be honest about where we will save the file */
|
|
||||||
if (local_file && opt.output_document)
|
|
||||||
*local_file = HYPHENP (opt.output_document) ? NULL : xstrdup (opt.output_document);
|
|
||||||
if (!opt.output_document)
|
|
||||||
locf = *hstat.local_file;
|
|
||||||
else
|
|
||||||
locf = opt.output_document;
|
|
||||||
}
|
|
||||||
continue;
|
continue;
|
||||||
case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
|
case HOSTERR: case CONIMPOSSIBLE: case PROXERR: case AUTHFAILED:
|
||||||
case SSLINITFAILED: case CONTNOTSUPPORTED:
|
case SSLINITFAILED: case CONTNOTSUPPORTED:
|
||||||
/* Fatal errors just return from the function. */
|
/* Fatal errors just return from the function. */
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return err;
|
return err;
|
||||||
case FWRITEERR: case FOPENERR:
|
case FWRITEERR: case FOPENERR:
|
||||||
/* Another fatal error. */
|
/* Another fatal error. */
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
||||||
*hstat.local_file, strerror (errno));
|
hstat.local_file, strerror (errno));
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return err;
|
return err;
|
||||||
case CONSSLERR:
|
case CONSSLERR:
|
||||||
/* Another fatal error. */
|
/* Another fatal error. */
|
||||||
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return err;
|
return err;
|
||||||
case NEWLOCATION:
|
case NEWLOCATION:
|
||||||
/* Return the new location to the caller. */
|
/* Return the new location to the caller. */
|
||||||
if (!hstat.newloc)
|
if (!*newloc)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("ERROR: Redirection (%d) without location.\n"),
|
_("ERROR: Redirection (%d) without location.\n"),
|
||||||
hstat.statcode);
|
hstat.statcode);
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return WRONGCODE;
|
return WRONGCODE;
|
||||||
}
|
}
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return NEWLOCATION;
|
return NEWLOCATION;
|
||||||
case RETRUNNEEDED:
|
case RETRUNNEEDED:
|
||||||
/* The file was already fully retrieved. */
|
/* The file was already fully retrieved. */
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return RETROK;
|
return RETROK;
|
||||||
case RETRFINISHED:
|
case RETRFINISHED:
|
||||||
/* Deal with you later. */
|
/* Deal with you later. */
|
||||||
@ -2281,6 +2265,7 @@ File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|||||||
/* All possibilities should have been exhausted. */
|
/* All possibilities should have been exhausted. */
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!(*dt & RETROKF))
|
if (!(*dt & RETROKF))
|
||||||
{
|
{
|
||||||
if (!opt.verbose)
|
if (!opt.verbose)
|
||||||
@ -2294,7 +2279,6 @@ File `%s' already there; not retrieving.\n\n"), *hstat.local_file);
|
|||||||
tms, hstat.statcode, escnonprint (hstat.error));
|
tms, hstat.statcode, escnonprint (hstat.error));
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return WRONGCODE;
|
return WRONGCODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2317,13 +2301,12 @@ Last-modified header invalid -- time-stamp ignored.\n"));
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* The time-stamping section. */
|
/* The time-stamping section. */
|
||||||
if (use_ts)
|
if (opt.timestamping && !got_head)
|
||||||
{
|
{
|
||||||
got_head = true;
|
got_head = true; /* no more time-stamping */
|
||||||
*dt &= ~HEAD_ONLY;
|
*dt &= ~HEAD_ONLY;
|
||||||
use_ts = false; /* no more time-stamping */
|
count = 0; /* the retrieve count for HEAD is reset */
|
||||||
count = 0; /* the retrieve count for HEAD is
|
|
||||||
reset */
|
|
||||||
if (hstat.remote_time && tmr != (time_t) (-1))
|
if (hstat.remote_time && tmr != (time_t) (-1))
|
||||||
{
|
{
|
||||||
/* Now time-stamping can be used validly. Time-stamping
|
/* Now time-stamping can be used validly. Time-stamping
|
||||||
@ -2331,27 +2314,33 @@ Last-modified header invalid -- time-stamp ignored.\n"));
|
|||||||
match, and local file is newer than the remote file,
|
match, and local file is newer than the remote file,
|
||||||
it will not be retrieved. Otherwise, the normal
|
it will not be retrieved. Otherwise, the normal
|
||||||
download procedure is resumed. */
|
download procedure is resumed. */
|
||||||
if (tml >= tmr &&
|
if (hstat.orig_file_tstamp >= tmr)
|
||||||
(hstat.contlen == -1 || local_size == hstat.contlen))
|
{
|
||||||
|
if (hstat.contlen == -1 || hstat.orig_file_size == hstat.contlen)
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
||||||
local_filename);
|
hstat.orig_file_name);
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else if (tml >= tmr)
|
else
|
||||||
|
{
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
The sizes do not match (local %s) -- retrieving.\n"),
|
The sizes do not match (local %s) -- retrieving.\n"),
|
||||||
number_to_static_string (local_size));
|
number_to_static_string (local_size));
|
||||||
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
logputs (LOG_VERBOSE,
|
logputs (LOG_VERBOSE,
|
||||||
_("Remote file is newer, retrieving.\n"));
|
_("Remote file is newer, retrieving.\n"));
|
||||||
}
|
}
|
||||||
free_hstat (&hstat);
|
|
||||||
|
//free_hstat (&hstat);
|
||||||
|
hstat.timestamp_checked = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ((tmr != (time_t) (-1))
|
if ((tmr != (time_t) (-1))
|
||||||
&& !opt.spider
|
&& !opt.spider
|
||||||
&& ((hstat.len == hstat.contlen) ||
|
&& ((hstat.len == hstat.contlen) ||
|
||||||
@ -2366,7 +2355,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
fl = opt.output_document;
|
fl = opt.output_document;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fl = *hstat.local_file;
|
fl = hstat.local_file;
|
||||||
if (fl)
|
if (fl)
|
||||||
touch (fl, tmr);
|
touch (fl, tmr);
|
||||||
}
|
}
|
||||||
@ -2376,7 +2365,6 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
|
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode,
|
||||||
escnonprint (hstat.error));
|
escnonprint (hstat.error));
|
||||||
xfree_null (dummy);
|
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2389,7 +2377,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE,
|
logprintf (LOG_VERBOSE,
|
||||||
_("%s (%s) - `%s' saved [%s/%s]\n\n"),
|
_("%s (%s) - `%s' saved [%s/%s]\n\n"),
|
||||||
tms, tmrate, locf,
|
tms, tmrate, hstat.local_file,
|
||||||
number_to_static_string (hstat.len),
|
number_to_static_string (hstat.len),
|
||||||
number_to_static_string (hstat.contlen));
|
number_to_static_string (hstat.contlen));
|
||||||
logprintf (LOG_NONVERBOSE,
|
logprintf (LOG_NONVERBOSE,
|
||||||
@ -2397,19 +2385,18 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
tms, u->url,
|
tms, u->url,
|
||||||
number_to_static_string (hstat.len),
|
number_to_static_string (hstat.len),
|
||||||
number_to_static_string (hstat.contlen),
|
number_to_static_string (hstat.contlen),
|
||||||
locf, count);
|
hstat.local_file, count);
|
||||||
}
|
}
|
||||||
++opt.numurls;
|
++opt.numurls;
|
||||||
total_downloaded_bytes += hstat.len;
|
total_downloaded_bytes += hstat.len;
|
||||||
|
|
||||||
/* Remember that we downloaded the file for later ".orig" code. */
|
/* Remember that we downloaded the file for later ".orig" code. */
|
||||||
if (*dt & ADDED_HTML_EXTENSION)
|
if (*dt & ADDED_HTML_EXTENSION)
|
||||||
downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
|
downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
|
||||||
else
|
else
|
||||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
|
||||||
|
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else if (hstat.res == 0) /* No read error */
|
else if (hstat.res == 0) /* No read error */
|
||||||
@ -2421,24 +2408,23 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE,
|
logprintf (LOG_VERBOSE,
|
||||||
_("%s (%s) - `%s' saved [%s]\n\n"),
|
_("%s (%s) - `%s' saved [%s]\n\n"),
|
||||||
tms, tmrate, locf,
|
tms, tmrate, hstat.local_file,
|
||||||
number_to_static_string (hstat.len));
|
number_to_static_string (hstat.len));
|
||||||
logprintf (LOG_NONVERBOSE,
|
logprintf (LOG_NONVERBOSE,
|
||||||
"%s URL:%s [%s] -> \"%s\" [%d]\n",
|
"%s URL:%s [%s] -> \"%s\" [%d]\n",
|
||||||
tms, u->url, number_to_static_string (hstat.len),
|
tms, u->url, number_to_static_string (hstat.len),
|
||||||
locf, count);
|
hstat.local_file, count);
|
||||||
}
|
}
|
||||||
++opt.numurls;
|
++opt.numurls;
|
||||||
total_downloaded_bytes += hstat.len;
|
total_downloaded_bytes += hstat.len;
|
||||||
|
|
||||||
/* Remember that we downloaded the file for later ".orig" code. */
|
/* Remember that we downloaded the file for later ".orig" code. */
|
||||||
if (*dt & ADDED_HTML_EXTENSION)
|
if (*dt & ADDED_HTML_EXTENSION)
|
||||||
downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, locf);
|
downloaded_file(FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED, hstat.local_file);
|
||||||
else
|
else
|
||||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
downloaded_file(FILE_DOWNLOADED_NORMALLY, hstat.local_file);
|
||||||
|
|
||||||
free_hstat (&hstat);
|
free_hstat (&hstat);
|
||||||
xfree_null (dummy);
|
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else if (hstat.len < hstat.contlen) /* meaning we lost the
|
else if (hstat.len < hstat.contlen) /* meaning we lost the
|
||||||
@ -2448,7 +2434,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
_("%s (%s) - Connection closed at byte %s. "),
|
_("%s (%s) - Connection closed at byte %s. "),
|
||||||
tms, tmrate, number_to_static_string (hstat.len));
|
tms, tmrate, number_to_static_string (hstat.len));
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
free_hstat (&hstat);
|
//free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -2456,7 +2442,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
requested with content-length, which we never do. */
|
requested with content-length, which we never do. */
|
||||||
abort ();
|
abort ();
|
||||||
}
|
}
|
||||||
else /* now hstat.res can only be -1 */
|
else /* from now on hstat.res can only be -1 */
|
||||||
{
|
{
|
||||||
if (hstat.contlen == -1)
|
if (hstat.contlen == -1)
|
||||||
{
|
{
|
||||||
@ -2465,7 +2451,7 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
tms, tmrate, number_to_static_string (hstat.len),
|
tms, tmrate, number_to_static_string (hstat.len),
|
||||||
hstat.rderrmsg);
|
hstat.rderrmsg);
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
free_hstat (&hstat);
|
//free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else /* hstat.res == -1 and contlen is given */
|
else /* hstat.res == -1 and contlen is given */
|
||||||
@ -2477,13 +2463,14 @@ The sizes do not match (local %s) -- retrieving.\n"),
|
|||||||
number_to_static_string (hstat.contlen),
|
number_to_static_string (hstat.contlen),
|
||||||
hstat.rderrmsg);
|
hstat.rderrmsg);
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
free_hstat (&hstat);
|
//free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* not reached */
|
/* not reached */
|
||||||
}
|
}
|
||||||
while (!opt.ntry || (count < opt.ntry));
|
while (!opt.ntry || (count < opt.ntry));
|
||||||
|
|
||||||
return TRYLIMEXC;
|
return TRYLIMEXC;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2878,6 +2865,18 @@ create_authorization_line (const char *au, const char *user,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
load_cookies (void)
|
||||||
|
{
|
||||||
|
if (!wget_cookie_jar)
|
||||||
|
wget_cookie_jar = cookie_jar_new ();
|
||||||
|
if (opt.cookies_input && !cookies_loaded_p)
|
||||||
|
{
|
||||||
|
cookie_jar_load (wget_cookie_jar, opt.cookies_input);
|
||||||
|
cookies_loaded_p = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
save_cookies (void)
|
save_cookies (void)
|
||||||
{
|
{
|
||||||
@ -2892,3 +2891,8 @@ http_cleanup (void)
|
|||||||
if (wget_cookie_jar)
|
if (wget_cookie_jar)
|
||||||
cookie_jar_delete (wget_cookie_jar);
|
cookie_jar_delete (wget_cookie_jar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* vim: et ts=2 sw=2
|
||||||
|
*/
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user