[svn] Implemented the item I formerly had in the TODO: When -K and -N are used

together, we compare local file X.orig (if extant) against server file X. Previously -k and -N were worthless in combination because the local converted files always differed from the server versions.
2000-03-01 22:33:48 -08:00 · 2000-03-01 22:33:48 -08:00 · 4331c39c9a
parent c33c857eb2
commit 4331c39c9a
10 changed files with 196 additions and 30 deletions
--- a/6
+++ b/6
@ -1,3 +1,9 @@
 2000-03-01  Dan Harkless  <dan-wget@dilvish.speed.net>
 	* NEWS (-K): Now possible to use -N with -k thanks to this option.
 	* TODO: Removed the -K / -N interaction item.
 2000-02-29  Dan Harkless  <dan-wget@dilvish.speed.net>
 	* NEWS (-K / --backup-converted): Mentioned this new option.
--- a/3
+++ b/3
@ -8,7 +8,8 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
 * Changes in Wget 1.5.3+dev
 ** New -K / --backup-converted / backup_converted = on option causes files
-modified due to -k to be saved with a .orig prefix before being changed.
+modified due to -k to be saved with a .orig prefix before being changed.  When
 using -N as well, it is these .orig files that are compared against the server.
 * Wget 1.5.3 is a bugfix release with no user-visible changes.
--- a/3
+++ b/3
@ -76,6 +76,3 @@ particular order.  Not all of them are user-visible changes.
 * Implement more HTTP/1.1 bells and whistles (ETag, Content-MD5 etc.)
 * Support SSL encryption through SSLeay or OpenSSL.
 * When -K is used with -N, check local file X.orig (if extant) against server 
  file X.
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,35 @@
 2000-03-01  Dan Harkless  <dan-wget@dilvish.speed.net>
 	* ftp.c (ftp_loop_internal): Call new downloaded_file() function,
 	even though we don't do conversion on HTML files retrieved via
 	FTP, so _current_ usage of downloaded_file() makes this call unneeded. 
 	(ftp_retrieve_list): Added a comment saying where we need to
 	stat() a .orig file if FTP'd HTML file conversion is ever implemented.
 	(ftp_retrieve_list): "Local file '%s' is more recent," is sometimes
 	a lie -- reworded as "Server file no newer than local file '%s' --".
 	* http.c (http_loop): Fixed a typo and clarified a comment.
 	(http_loop): When -K and -N are specified together, compare size
 	and timestamp of server file X against local file X.orig (if
 	extant) rather than converted local file X.
 	(http_loop): "Local file '%s' is more recent," is sometimes a lie
 	-- reworded as "Server file no newer than local file '%s' --".
 	(http_loop): Call new downloaded_file() function to prevent
 	wrongful overwriting of .orig file when -N is specified.
 	* url.c (convert_links): When -K specified, only rename X to
 	X.orig if downloaded_file() returns TRUE.  Otherwise when we skip
 	file X due to -N, we clobber an X.orig from a previous invocation.
 	(convert_links): Call the failsafe xstrdup(), not the real strdup().
 	(convert_links): Added a note asking anyone who understands how
 	multiple URLs can correspond to a single file to comment it.
 	(downloaded_file): Added this new function.
 	* url.h (downloaded_file): Added prototype for this new function
 	as well as its downloaded_file_t enum type.
 	* wget.h (boolean): Added this new typedef and TRUE and FALSE #defines.
 2000-02-29  Dan Harkless  <dan-wget@dilvish.speed.net>
 	* version.c: Upped version to developer-only "1.5.3+dev".
--- a/src/ftp.c
+++ b/src/ftp.c
@ -947,6 +947,11 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
 	  /* Not as great.  */
 	  abort ();
 	}
      /* If we get out of the switch above without continue'ing, we've
 	 successfully downloaded a file.  Remember this fact. */
      downloaded_file(ADD_FILE, locf);
      if (con->st & ON_YOUR_OWN)
 	{
 	  CLOSE (RBUF_FD (&con->rbuf));
@ -1086,6 +1091,11 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
      if (opt.timestamping && f->type == FT_PLAINFILE)
 	{
 	  struct stat st;
 	  /* If conversion of HTML files retrieved via FTP is ever implemented,
 	     we'll need to stat() <file>.orig here when -K has been specified.
 	     I'm not implementing it now since files on an FTP server are much
 	     more likely than files on an HTTP server to legitimately have a
 	     .orig suffix. */
 	  if (!stat (u->local, &st))
 	    {
 	      /* Else, get it from the file.  */
@ -1094,13 +1104,13 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
 	      if (local_size == f->size && tml >= f->tstamp)
 		{
 		  logprintf (LOG_VERBOSE, _("\
-Local file `%s' is more recent, not retrieving.\n\n"), u->local);
+Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
 		  dlthis = 0;
 		}
 	      else if (local_size != f->size)
 		{
 		  logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %ld), retrieving.\n"), local_size);
+The sizes do not match (local %ld) -- retrieving.\n"), local_size);
 		}
 	    }
 	}	/* opt.timestamping && f->type == FT_PLAINFILE */
--- a/src/http.c
+++ b/src/http.c
@ -840,6 +840,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
  static int first_retrieval = 1;
  int count;
  int local_dot_orig_file_exists = FALSE;
  int use_ts, got_head = 0;	/* time-stamping info */
  char *tms, *suf, *locf, *tmrate;
  uerr_t err;
@ -851,7 +852,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
  *newloc = NULL;
  /* Warn on (likely bogus) wildcard usage in HTTP.  Don't use
-     has_wildcards_p because it would also warn on `?', and we that
+     has_wildcards_p because it would also warn on `?', and we know that
     shows up in CGI paths a *lot*.  */
  if (strchr (u->url, '*'))
    logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
@ -888,7 +889,43 @@ File `%s' already there, will not retrieve.\n"), u->local);
  use_ts = 0;
  if (opt.timestamping)
    {
-      if (stat (u->local, &st) == 0)
+      boolean  local_file_exists = FALSE;
      if (opt.backup_converted)
 	/* If -K is specified, we'll act on the assumption that it was specified
 	   last time these files were downloaded as well, and instead of just
 	   comparing local file X against server file X, we'll compare local
 	   file X.orig (if extant, else X) against server file X.  If -K
 	   _wasn't_ specified last time, or the server contains files called
 	   *.orig, -N will be back to not operating correctly with -k. */
 	{
 	  size_t filename_len = strlen(u->local);
 	  char*  filename_plus_orig_suffix = malloc(filename_len +
 						    sizeof(".orig"));
 	  /* Would a single s[n]printf() call be faster? */
 	  strcpy(filename_plus_orig_suffix, u->local);
 	  strcpy(filename_plus_orig_suffix + filename_len, ".orig");
 	  /* Try to stat() the .orig file. */
 	  if (stat(filename_plus_orig_suffix, &st) == 0)
 	    {
 	      local_file_exists = TRUE;
 	      local_dot_orig_file_exists = TRUE;
 	    }
 	  free(filename_plus_orig_suffix);
 	}      
      if (!local_dot_orig_file_exists)
 	/* Couldn't stat() <file>.orig, so try to stat() <file>. */
 	if (stat (u->local, &st) == 0)
 	  local_file_exists = TRUE;
      if (local_file_exists)
 	/* There was a local file, so we'll check later to see if the version
 	   the server has is the same version we already have, allowing us to
 	   skip a download. */
 	{
 	  use_ts = 1;
 	  tml = st.st_mtime;
@ -1051,14 +1088,26 @@ Last-modified header invalid -- time-stamp ignored.\n"));
 	      if (tml >= tmr &&
 		  (hstat.contlen == -1 || local_size == hstat.contlen))
 		{
-		  logprintf (LOG_VERBOSE, _("\
+		  if (local_dot_orig_file_exists)
-Local file `%s' is more recent, not retrieving.\n\n"), u->local);
+		    /* We can't collapse this down into just one logprintf()
 		       call with a variable set to u->local or the .orig
 		       filename because we have to malloc() space for the
 		       latter, and because there are multiple returns above (a
 		       coding style no-no by many measures, for reasons such as
 		       this) we'd have to remember to free() the string at each
 		       one to avoid a memory leak. */
 		    logprintf (LOG_VERBOSE, _("\
 Server file no newer than local file `%s.orig' -- not retrieving.\n\n"),
 			       u->local);
 		  else
 		    logprintf (LOG_VERBOSE, _("\
 Server file no newer than local file `%s' -- not retrieving.\n\n"), u->local);
 		  FREEHSTAT (hstat);
 		  return RETROK;
 		}
 	      else if (tml >= tmr)
 		logprintf (LOG_VERBOSE, _("\
-The sizes do not match (local %ld), retrieving.\n"), local_size);
+The sizes do not match (local %ld) -- retrieving.\n"), local_size);
 	      else
 		logputs (LOG_VERBOSE,
 			 _("Remote file is newer, retrieving.\n"));
@ -1103,12 +1152,13 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
 	    }
 	  ++opt.numurls;
 	  opt.downloaded += hstat.len;
 	  downloaded_file(ADD_FILE, locf);
 	  return RETROK;
 	}
      else if (hstat.res == 0) /* No read error */
 	{
-	  if (hstat.contlen == -1)  /* We don't know how much we were
+	  if (hstat.contlen == -1)  /* We don't know how much we were supposed
-				       supposed to get, so...  */
+				       to get, so assume we succeeded. */ 
 	    {
 	      if (*dt & RETROKF)
 		{
@ -1121,6 +1171,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
 		}
 	      ++opt.numurls;
 	      opt.downloaded += hstat.len;
 	      downloaded_file(ADD_FILE, locf);
 	      return RETROK;
 	    }
 	  else if (hstat.len < hstat.contlen) /* meaning we lost the
@ -1142,6 +1193,7 @@ The sizes do not match (local %ld), retrieving.\n"), local_size);
 			 tms, u->url, hstat.len, hstat.contlen, locf, count);
 	      ++opt.numurls;
 	      opt.downloaded += hstat.len;
 	      downloaded_file(ADD_FILE, locf);
 	      return RETROK;
 	    }
 	  else			/* the same, but not accepted */
--- a/src/retr.c
+++ b/src/retr.c
@ -350,7 +350,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
      if (result != URLOK || u->proto != URLHTTP)
 	{
 	  if (u->proto == URLHTTP)
-	    logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg (result));
+	    logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
 	  else
 	    logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
 	  freeurl (u, 1);
@ -455,7 +455,8 @@ retrieve_from_file (const char *file, int html, int *count)
 	}
      status = retrieve_url (cur_url->url, &filename, &new_file, NULL, &dt);
      if (opt.recursive && status == RETROK && (dt & TEXTHTML))
-	status = recursive_retrieve (filename, new_file ? new_file : cur_url->url);
+	status = recursive_retrieve (filename, new_file ? new_file
 				                        : cur_url->url);
      if (filename && opt.delete_after && file_exists_p (filename))
 	{
--- a/src/url.c
+++ b/src/url.c
@ -1366,10 +1366,9 @@ no_proxy_match (const char *host, const char **no_proxy)
 void
 convert_links (const char *file, urlpos *l)
 {
-  FILE           *fp;
+  FILE *fp;
-  char           *buf, *p, *p2;
+  char *buf, *p, *p2;
-  long           size;
+  long size;
  static slist*  converted_files = NULL;
  logprintf (LOG_VERBOSE, _("Converting %s... "), file);
  /* Read from the file....  */
@ -1383,28 +1382,34 @@ convert_links (const char *file, urlpos *l)
  /* ...to a buffer.  */
  load_file (fp, &buf, &size);
  fclose (fp);
-  if (opt.backup_converted)
+  if (opt.backup_converted && downloaded_file(CHECK_FOR_FILE, file))
    /* Rather than just writing over the original .html file with the converted
-       version, save the former to *.orig. */
+       version, save the former to *.orig.  Note we only do this for files we've
       _successfully_ downloaded, so we don't clobber .orig files sitting around
       from previous invocations. */
    {
      /* Construct the backup filename as the original name plus ".orig". */
-      size_t filename_len = strlen(file);
+      size_t         filename_len = strlen(file);
-      char*  filename_plus_orig_suffix = malloc(filename_len + sizeof(".orig"));
+      char*          filename_plus_orig_suffix = malloc(filename_len +
-      int    already_wrote_backup_file = 0;
+							sizeof(".orig"));
-      slist* converted_file_ptr;
+      boolean        already_wrote_backup_file = FALSE;
      slist*         converted_file_ptr;
      static slist*  converted_files = NULL;
      /* Would a single s[n]printf() call be faster? */
      strcpy(filename_plus_orig_suffix, file);
      strcpy(filename_plus_orig_suffix + filename_len, ".orig");
      /* We can get called twice on the same URL thanks to the
 	 convert_all_links() call in main().  If we write the .orig file each
 	 time in such a case, it'll end up containing the first-pass conversion,
-	 not the original file. */
+	 not the original file.  So, see if we've already been called on this
 	 file. */
      converted_file_ptr = converted_files;
      while (converted_file_ptr != NULL)
 	if (strcmp(converted_file_ptr->string, file) == 0)
 	  {
-	    already_wrote_backup_file = 1;
+	    already_wrote_backup_file = TRUE;
 	    break;
 	  }
 	else
@ -1421,10 +1426,13 @@ convert_links (const char *file, urlpos *l)
 	     Note that we never free this memory since we need it till the
 	     convert_all_links() call, which is one of the last things the
 	     program does before terminating.  BTW, I'm not sure if it would be
-	     safe to just set converted_file_ptr->string to file below, rather
+	     safe to just set 'converted_file_ptr->string' to 'file' below,
-	     than making a copy of the string... */
+	     rather than making a copy of the string...  Another note is that I
 	     thought I could just add a field to the urlpos structure saying
 	     that we'd written a .orig file for this URL, but that didn't work,
 	     so I had to make this separate list. */
 	  converted_file_ptr = malloc(sizeof(slist));
-	  converted_file_ptr->string = strdup(file);
+	  converted_file_ptr->string = xstrdup(file);  /* die on out-of-mem. */
 	  converted_file_ptr->next = converted_files;
 	  converted_files = converted_file_ptr;
 	}
@ -1440,6 +1448,8 @@ convert_links (const char *file, urlpos *l)
      free (buf);
      return;
    }
  /* [If someone understands why multiple URLs can correspond to one local file,
     can they please add a comment here...?] */
  for (p = buf; l; l = l->next)
    {
      if (l->pos >= size)
@ -1547,3 +1557,44 @@ add_url (urlpos *l, const char *url, const char *file)
  t->next = l;
  return t;
 }
 /* Remembers which files have been downloaded.  Should be called with
   add_or_check == ADD_FILE for each file we actually download successfully
   (i.e. not for ones we have failures on or that we skip due to -N).  If you
   just want to check if a file has been previously added without adding it,
   call with add_or_check == CHECK_FOR_FILE.  Please be sure to call this
   function with local filenames, not remote URLs -- by some means that isn't
   commented well enough for me understand, multiple remote URLs can apparently
   correspond to a single local file. */
 boolean
 downloaded_file (downloaded_file_t  add_or_check, const char*  file)
 {
  boolean        found_file = FALSE;
  static slist*  downloaded_files = NULL;
  slist*         rover = downloaded_files;
  while (rover != NULL)
    if (strcmp(rover->string, file) == 0)
      {
 	found_file = TRUE;
 	break;
      }
    else
      rover = rover->next;
  if (found_file)
    return TRUE;  /* file had already been downloaded */
  else
    {
      if (add_or_check == ADD_FILE)
 	{
 	  rover = malloc(sizeof(slist));
 	  rover->string = xstrdup(file);  /* die on out-of-mem. */
 	  rover->next = downloaded_files;
 	  downloaded_files = rover;
 	}
      return FALSE;  /* file had not already been downloaded */
    }
 }
--- a/src/url.h
+++ b/src/url.h
@ -62,6 +62,12 @@ typedef struct _urlpos
  struct _urlpos *next;        /* Next struct in list */
 } urlpos;
 /* Controls how downloaded_file() behaves. */
 typedef enum
 {
  ADD_FILE,
  CHECK_FOR_FILE
 } downloaded_file_t;
 /* Function declarations */
@ -95,4 +101,6 @@ int no_proxy_match PARAMS ((const char *, const char **));
 void convert_links PARAMS ((const char *, urlpos *));
 urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
 boolean downloaded_file PARAMS ((downloaded_file_t, const char *));
 #endif /* URL_H */
--- a/src/wget.h
+++ b/src/wget.h
@ -198,4 +198,12 @@ typedef enum
  ROBOTSOK, NOROBOTS, PROXERR, AUTHFAILED, QUOTEXC, WRITEFAILED
 } uerr_t;
 typedef unsigned char  boolean;
 #ifndef FALSE
 #define FALSE 0
 #endif
 #ifndef TRUE
 #define TRUE  1
 #endif
 #endif /* WGET_H */