1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Implemented the item I formerly had in the TODO: When -K and -N are used

together, we compare local file X.orig (if extant) against server file X.
Previously -k and -N were worthless in combination because the local converted
files always differed from the server versions.
This commit is contained in:
dan 2000-03-01 22:33:48 -08:00
parent c33c857eb2
commit 4331c39c9a
10 changed files with 196 additions and 30 deletions

View File

@ -1,3 +1,9 @@
2000-03-01 Dan Harkless <dan-wget@dilvish.speed.net>
* NEWS (-K): Now possible to use -N with -k thanks to this option.
* TODO: Removed the -K / -N interaction item.
2000-02-29 Dan Harkless <dan-wget@dilvish.speed.net>
* NEWS (-K / --backup-converted): Mentioned this new option.

3
NEWS
View File

@ -8,7 +8,8 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
* Changes in Wget 1.5.3+dev
** New -K / --backup-converted / backup_converted = on option causes files
modified due to -k to be saved with a .orig prefix before being changed.
modified due to -k to be saved with a .orig prefix before being changed. When
using -N as well, it is these .orig files that are compared against the server.
* Wget 1.5.3 is a bugfix release with no user-visible changes.

3
TODO
View File

@ -76,6 +76,3 @@ particular order. Not all of them are user-visible changes.
* Implement more HTTP/1.1 bells and whistles (ETag, Content-MD5 etc.)
* Support SSL encryption through SSLeay or OpenSSL.
* When -K is used with -N, check local file X.orig (if extant) against server
file X.

View File

@ -1,3 +1,35 @@
2000-03-01 Dan Harkless <dan-wget@dilvish.speed.net>
* ftp.c (ftp_loop_internal): Call new downloaded_file() function,
even though we don't do conversion on HTML files retrieved via
FTP, so _current_ usage of downloaded_file() makes this call unneeded.
(ftp_retrieve_list): Added a comment saying where we need to
stat() a .orig file if FTP'd HTML file conversion is ever implemented.
(ftp_retrieve_list): "Local file '%s' is more recent," is sometimes
a lie -- reworded as "Server file no newer than local file '%s' --".
* http.c (http_loop): Fixed a typo and clarified a comment.
(http_loop): When -K and -N are specified together, compare size
and timestamp of server file X against local file X.orig (if
extant) rather than converted local file X.
(http_loop): "Local file '%s' is more recent," is sometimes a lie
-- reworded as "Server file no newer than local file '%s' --".
(http_loop): Call new downloaded_file() function to prevent
wrongful overwriting of .orig file when -N is specified.
* url.c (convert_links): When -K specified, only rename X to
X.orig if downloaded_file() returns TRUE. Otherwise when we skip
file X due to -N, we clobber an X.orig from a previous invocation.
(convert_links): Call the failsafe xstrdup(), not the real strdup().
(convert_links): Added a note asking anyone who understands how
multiple URLs can correspond to a single file to comment it.
(downloaded_file): Added this new function.
* url.h (downloaded_file): Added prototype for this new function
as well as its downloaded_file_t enum type.
* wget.h (boolean): Added this new typedef and TRUE and FALSE #defines.
2000-02-29 Dan Harkless <dan-wget@dilvish.speed.net>
* version.c: Upped version to developer-only "1.5.3+dev".

View File

@ -947,6 +947,11 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Not as great. */
abort ();
}
/* If we get out of the switch above without continue'ing, we've
successfully downloaded a file. Remember this fact. */
downloaded_file(ADD_FILE, locf);
if (con->st & ON_YOUR_OWN)
{
CLOSE (RBUF_FD (&con->rbuf));
@ -1086,6 +1091,11 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
if (opt.timestamping && f->type == FT_PLAINFILE)
{
struct stat st;
/* If conversion of HTML files retrieved via FTP is ever implemented,
we'll need to stat() <file>.orig here when -K has been specified.
I'm not implementing it now since files on an FTP server are much
more likely than files on an HTTP server to legitimately have a
.orig suffix. */
if (!stat (u->local, &st))
{
/* Else, get it from the file. */
@ -1094,13 +1104,13 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
if (local_size == f->size && tml >= f->tstamp)
{
logprintf (LOG_VERBOSE, _("\
Local file `%s' is more recent, not retrieving.\n\n"), u->local);
Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
dlthis = 0;
}
else if (local_size != f->size)
{
logprintf (LOG_VERBOSE, _("\
The sizes do not match (local %ld), retrieving.\n"), local_size);
The sizes do not match (local %ld) -- retrieving.\n"), local_size);
}
}
} /* opt.timestamping && f->type == FT_PLAINFILE */

View File

@ -840,6 +840,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
static int first_retrieval = 1;
int count;
int local_dot_orig_file_exists = FALSE;
int use_ts, got_head = 0; /* time-stamping info */
char *tms, *suf, *locf, *tmrate;
uerr_t err;
@ -851,7 +852,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
*newloc = NULL;
/* Warn on (likely bogus) wildcard usage in HTTP. Don't use
has_wildcards_p because it would also warn on `?', and we that
has_wildcards_p because it would also warn on `?', and we know that
shows up in CGI paths a *lot*. */
if (strchr (u->url, '*'))
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
@ -888,7 +889,43 @@ File `%s' already there, will not retrieve.\n"), u->local);
use_ts = 0;
if (opt.timestamping)
{
if (stat (u->local, &st) == 0)
boolean local_file_exists = FALSE;
if (opt.backup_converted)
/* If -K is specified, we'll act on the assumption that it was specified
last time these files were downloaded as well, and instead of just
comparing local file X against server file X, we'll compare local
file X.orig (if extant, else X) against server file X. If -K
_wasn't_ specified last time, or the server contains files called
*.orig, -N will be back to not operating correctly with -k. */
{
size_t filename_len = strlen(u->local);
char* filename_plus_orig_suffix = malloc(filename_len +
sizeof(".orig"));
/* Would a single s[n]printf() call be faster? */
strcpy(filename_plus_orig_suffix, u->local);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
/* Try to stat() the .orig file. */
if (stat(filename_plus_orig_suffix, &st) == 0)
{
local_file_exists = TRUE;
local_dot_orig_file_exists = TRUE;
}
free(filename_plus_orig_suffix);
}
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
if (stat (u->local, &st) == 0)
local_file_exists = TRUE;
if (local_file_exists)
/* There was a local file, so we'll check later to see if the version
the server has is the same version we already have, allowing us to
skip a download. */
{
use_ts = 1;
tml = st.st_mtime;
@ -1051,14 +1088,26 @@ Last-modified header invalid -- time-stamp ignored.\n"));
if (tml >= tmr &&
(hstat.contlen == -1 || local_size == hstat.contlen))
{
logprintf (LOG_VERBOSE, _("\
Local file `%s' is more recent, not retrieving.\n\n"), u->local);
if (local_dot_orig_file_exists)
/* We can't collapse this down into just one logprintf()
call with a variable set to u->local or the .orig
filename because we have to malloc() space for the
latter, and because there are multiple returns above (a
coding style no-no by many measures, for reasons such as
this) we'd have to remember to free() the string at each
one to avoid a memory leak. */
logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s.orig' -- not retrieving.\n\n"),
u->local);
else
logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
FREEHSTAT (hstat);
return RETROK;
}
else if (tml >= tmr)
logprintf (LOG_VERBOSE, _("\
The sizes do not match (local %ld), retrieving.\n"), local_size);
The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else
logputs (LOG_VERBOSE,
_("Remote file is newer, retrieving.\n"));
@ -1103,12 +1152,13 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
}
++opt.numurls;
opt.downloaded += hstat.len;
downloaded_file(ADD_FILE, locf);
return RETROK;
}
else if (hstat.res == 0) /* No read error */
{
if (hstat.contlen == -1) /* We don't know how much we were
supposed to get, so... */
if (hstat.contlen == -1) /* We don't know how much we were supposed
to get, so assume we succeeded. */
{
if (*dt & RETROKF)
{
@ -1121,6 +1171,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
}
++opt.numurls;
opt.downloaded += hstat.len;
downloaded_file(ADD_FILE, locf);
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
@ -1142,6 +1193,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
tms, u->url, hstat.len, hstat.contlen, locf, count);
++opt.numurls;
opt.downloaded += hstat.len;
downloaded_file(ADD_FILE, locf);
return RETROK;
}
else /* the same, but not accepted */

View File

@ -350,7 +350,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
if (result != URLOK || u->proto != URLHTTP)
{
if (u->proto == URLHTTP)
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg (result));
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
@ -455,7 +455,8 @@ retrieve_from_file (const char *file, int html, int *count)
}
status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
if (opt.recursive && status == RETROK && (dt & TEXTHTML))
status = recursive_retrieve (filename, new_file ? new_file : cur_url->url);
status = recursive_retrieve (filename, new_file ? new_file
: cur_url->url);
if (filename && opt.delete_after && file_exists_p (filename))
{

View File

@ -1366,10 +1366,9 @@ no_proxy_match (const char *host, const char **no_proxy)
void
convert_links (const char *file, urlpos *l)
{
FILE *fp;
char *buf, *p, *p2;
long size;
static slist* converted_files = NULL;
FILE *fp;
char *buf, *p, *p2;
long size;
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
/* Read from the file.... */
@ -1383,28 +1382,34 @@ convert_links (const char *file, urlpos *l)
/* ...to a buffer. */
load_file (fp, &buf, &size);
fclose (fp);
if (opt.backup_converted)
if (opt.backup_converted && downloaded_file(CHECK_FOR_FILE, file))
/* Rather than just writing over the original .html file with the converted
version, save the former to *.orig. */
version, save the former to *.orig. Note we only do this for files we've
_successfully_ downloaded, so we don't clobber .orig files sitting around
from previous invocations. */
{
/* Construct the backup filename as the original name plus ".orig". */
size_t filename_len = strlen(file);
char* filename_plus_orig_suffix = malloc(filename_len + sizeof(".orig"));
int already_wrote_backup_file = 0;
slist* converted_file_ptr;
size_t filename_len = strlen(file);
char* filename_plus_orig_suffix = malloc(filename_len +
sizeof(".orig"));
boolean already_wrote_backup_file = FALSE;
slist* converted_file_ptr;
static slist* converted_files = NULL;
/* Would a single s[n]printf() call be faster? */
strcpy(filename_plus_orig_suffix, file);
strcpy(filename_plus_orig_suffix + filename_len, ".orig");
/* We can get called twice on the same URL thanks to the
convert_all_links() call in main(). If we write the .orig file each
time in such a case, it'll end up containing the first-pass conversion,
not the original file. */
not the original file. So, see if we've already been called on this
file. */
converted_file_ptr = converted_files;
while (converted_file_ptr != NULL)
if (strcmp(converted_file_ptr->string, file) == 0)
{
already_wrote_backup_file = 1;
already_wrote_backup_file = TRUE;
break;
}
else
@ -1421,10 +1426,13 @@ convert_links (const char *file, urlpos *l)
Note that we never free this memory since we need it till the
convert_all_links() call, which is one of the last things the
program does before terminating. BTW, I'm not sure if it would be
safe to just set converted_file_ptr->string to file below, rather
than making a copy of the string... */
safe to just set 'converted_file_ptr->string' to 'file' below,
rather than making a copy of the string... Another note is that I
thought I could just add a field to the urlpos structure saying
that we'd written a .orig file for this URL, but that didn't work,
so I had to make this separate list. */
converted_file_ptr = malloc(sizeof(slist));
converted_file_ptr->string = strdup(file);
converted_file_ptr->string = xstrdup(file); /* die on out-of-mem. */
converted_file_ptr->next = converted_files;
converted_files = converted_file_ptr;
}
@ -1440,6 +1448,8 @@ convert_links (const char *file, urlpos *l)
free (buf);
return;
}
/* [If someone understands why multiple URLs can correspond to one local file,
can they please add a comment here...?] */
for (p = buf; l; l = l->next)
{
if (l->pos >= size)
@ -1547,3 +1557,44 @@ add_url (urlpos *l, const char *url, const char *file)
t->next = l;
return t;
}
/* Remembers which files have been downloaded. Should be called with
add_or_check == ADD_FILE for each file we actually download successfully
(i.e. not for ones we have failures on or that we skip due to -N). If you
just want to check if a file has been previously added without adding it,
call with add_or_check == CHECK_FOR_FILE. Please be sure to call this
function with local filenames, not remote URLs -- by some means that isn't
commented well enough for me understand, multiple remote URLs can apparently
correspond to a single local file. */
boolean
downloaded_file (downloaded_file_t add_or_check, const char* file)
{
boolean found_file = FALSE;
static slist* downloaded_files = NULL;
slist* rover = downloaded_files;
while (rover != NULL)
if (strcmp(rover->string, file) == 0)
{
found_file = TRUE;
break;
}
else
rover = rover->next;
if (found_file)
return TRUE; /* file had already been downloaded */
else
{
if (add_or_check == ADD_FILE)
{
rover = malloc(sizeof(slist));
rover->string = xstrdup(file); /* die on out-of-mem. */
rover->next = downloaded_files;
downloaded_files = rover;
}
return FALSE; /* file had not already been downloaded */
}
}

View File

@ -62,6 +62,12 @@ typedef struct _urlpos
struct _urlpos *next; /* Next struct in list */
} urlpos;
/* Controls how downloaded_file() behaves. */
typedef enum
{
ADD_FILE,
CHECK_FOR_FILE
} downloaded_file_t;
/* Function declarations */
@ -95,4 +101,6 @@ int no_proxy_match PARAMS ((const char *, const char **));
void convert_links PARAMS ((const char *, urlpos *));
urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
boolean downloaded_file PARAMS ((downloaded_file_t, const char *));
#endif /* URL_H */

View File

@ -198,4 +198,12 @@ typedef enum
ROBOTSOK, NOROBOTS, PROXERR, AUTHFAILED, QUOTEXC, WRITEFAILED
} uerr_t;
typedef unsigned char boolean;
#ifndef FALSE
#define FALSE 0
#endif
#ifndef TRUE
#define TRUE 1
#endif
#endif /* WGET_H */