[svn] Recursion and progress bar tweaks.

Published in <sxsd727cvc0.fsf@florida.arsdigita.de>.
2024-07-03 16:38:41 -04:00 · 2001-11-25 13:03:30 -08:00 · 2001-11-25 13:03:30 -08:00 · 3afb9c659a
commit 3afb9c659a
parent df05e7ff10
6 changed files with 78 additions and 53 deletions
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,15 @@
 2001-11-25  Hrvoje Niksic  <hniksic@arsdigita.com>
 	* recur.c (descend_url_p): Be more conservative with blacklisting
 	URLs.
 	(convert_all_links): Print how many files have been converted, and
 	how long it took.
 	* progress.c (create_image): Place the number of downloaded bytes
 	right after the progress bar.
 	* utils.c (suffix): Return a pointer into the string.
 2001-11-25  Hrvoje Niksic  <hniksic@arsdigita.com>
 	* url.c (convert_links): Handle CO_NULLIFY_BASE.
--- a/src/http.c
+++ b/src/http.c
@ -1453,7 +1453,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file);
 	  && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
 	*dt |= TEXTHTML;
      FREE_MAYBE (suf);
      FREE_MAYBE (dummy);
      return RETROK;
    }
--- a/src/progress.c
+++ b/src/progress.c
@ -477,24 +477,24 @@ create_image (struct bar_progress *bp, long dltime)
  long size = bp->initial_length + bp->count;
  /* The progress bar should look like this:
-     xx% [=======>             ] xx KB/s nnnnn ETA 00:00
+     xx% [=======>             ] nn.nnn rrK/s ETA 00:00
     Calculate its geometry:
-     "xx% " or "100%" - percentage                - 4 chars exactly
+     "xx% " or "100%"  - percentage                - 4 chars exactly
-     "[]"             - progress bar decorations  - 2 chars exactly
+     "[]"              - progress bar decorations  - 2 chars exactly
-     "1012.56K/s "    - dl rate                   - 11 chars exactly
+     " n,nnn,nnn,nnn"  - downloaded bytes          - 14 or less chars
-     "n,nnn,nnn,nnn " - downloaded bytes          - 14 or less chars
+     " 1012.56K/s"     - dl rate                   - 11 chars exactly
-     "ETA xx:xx:xx"   - ETA                       - 12 or less chars
+     " ETA xx:xx:xx"   - ETA                       - 13 or less chars
-     "=====>..."      - progress bar content      - the rest
+     "=====>..."       - progress bar content      - the rest
  */
-  int progress_size = screen_width - (4 + 2 + 11 + 14 + 12);
+  int progress_size = screen_width - (4 + 2 + 14 + 11 + 13);
  if (progress_size < 5)
    progress_size = 0;
-  /* "xxx%" */
+  /* "xx% " */
  if (bp->total_length > 0)
    {
      int percentage = (int)(100.0 * size / bp->total_length);
@ -509,12 +509,13 @@ create_image (struct bar_progress *bp, long dltime)
    }
  else
    {
-      int i = 5;
+      *p++ = ' ';
-      while (i--)
+      *p++ = ' ';
-	*p++ = ' ';
+      *p++ = ' ';
      *p++ = ' ';
    }
-  /* The progress bar: "|====>      |" */
+  /* The progress bar: "[====>      ]" */
  if (progress_size && bp->total_length > 0)
    {
      double fraction = (double)size / bp->total_length;
@ -566,30 +567,30 @@ create_image (struct bar_progress *bp, long dltime)
      ++bp->tick;
    }
-  /* "1012.45K/s " */
+  /* " 1,234,567" */
  /* If there are 7 or less digits (9 because of "legible" comas),
     print the number in constant space.  This will prevent the rest
     of the line jerking at the beginning of download, but without
     assigning maximum width in all cases.  */
  sprintf (p, " %9s", legible (size));
  p += strlen (p);
  /* " 1012.45K/s" */
  if (dltime && bp->count)
    {
      static char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
      int units = 0;
      double dlrate = calc_rate (bp->count, dltime, &units);
-      sprintf (p, "%7.2f%s ", dlrate, short_units[units]);
+      sprintf (p, " %7.2f%s", dlrate, short_units[units]);
      p += strlen (p);
    }
  else
    {
-      strcpy (p, "  --.-- K/s ");
+      strcpy (p, "   --.--K/s");
-      p += 12;
+      p += 11;
    }
-  /* "1,234,567 " */
+  /* " ETA xx:xx:xx" */
  /* If there are 7 or less digits (9 because of "legible" comas),
     print the number in constant space.  This will prevent the "ETA"
     string from jerking as the data begins to arrive.  */
  sprintf (p, "%9s", legible (size));
  p += strlen (p);
  *p++ = ' ';
  /* "ETA xx:xx:xx" */
  if (bp->total_length > 0 && bp->count > 0)
    {
      int eta, eta_hrs, eta_min, eta_sec;
@ -605,6 +606,7 @@ create_image (struct bar_progress *bp, long dltime)
      /*printf ("\neta: %d, %d %d %d\n", eta, eta_hrs, eta_min, eta_sec);*/
      /*printf ("\n%ld %f %ld %ld\n", dltime, tm_sofar, bytes_remaining, bp->count);*/
      *p++ = ' ';
      *p++ = 'E';
      *p++ = 'T';
      *p++ = 'A';
@ -621,8 +623,8 @@ create_image (struct bar_progress *bp, long dltime)
    }
  else if (bp->total_length > 0)
    {
-      strcpy (p, "ETA --:--");
+      strcpy (p, " ETA --:--");
-      p += 9;
+      p += 10;
    }
  assert (p - bp->buffer <= screen_width);
--- a/src/recur.c
+++ b/src/recur.c
@ -149,7 +149,7 @@ url_dequeue (struct url_queue *queue,
  xfree (qel);
  return 1;
 }
-
+
 static int descend_url_p PARAMS ((const struct urlpos *, struct url *, int,
 				  struct url *, struct hash_table *));
@ -182,7 +182,8 @@ retrieve_tree (const char *start_url)
  /* The queue of URLs we need to load. */
  struct url_queue *queue = url_queue_new ();
-  /* The URLs we decided we don't want to load. */
+  /* The URLs we do not wish to enqueue, because they are already in
     the queue, but haven't been downloaded yet.  */
  struct hash_table *blacklist = make_string_hash_table (0);
  /* We'll need various components of this, so better get it over with
@ -242,9 +243,6 @@ retrieve_tree (const char *start_url)
 	       tree.  The recursion is partial in that we won't
 	       traverse any <A> or <AREA> tags, nor any <LINK> tags
 	       except for <LINK REL="stylesheet">. */
 	    /* #### This would be the place to implement the TODO
 	       entry saying that -p should do two more hops on
 	       framesets.  */
 	    dash_p_leaf_HTML = TRUE;
 	  else
 	    {
@ -348,7 +346,11 @@ retrieve_tree (const char *start_url)
 /* Based on the context provided by retrieve_tree, decide whether a
   URL is to be descended to.  This is only ever called from
-   retrieve_tree, but is in a separate function for clarity.  */
+   retrieve_tree, but is in a separate function for clarity.
   The most expensive checks (such as those for robots) are memoized
   by storing these URLs to BLACKLIST.  This may or may not help.  It
   will help if those URLs are encountered many times.  */
 static int
 descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
@ -391,7 +393,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
      && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
    {
      DEBUGP (("Not following non-HTTP schemes.\n"));
-      goto blacklist;
+      goto out;
    }
  /* 2. If it is an absolute link and they are not followed, throw it
@ -400,7 +402,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
    if (opt.relative_only && !upos->link_relative_p)
      {
 	DEBUGP (("It doesn't really look like a relative link.\n"));
-	goto blacklist;
+	goto out;
      }
  /* 3. If its domain is not to be accepted/looked-up, chuck it
@ -408,7 +410,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
  if (!accept_domain (u))
    {
      DEBUGP (("The domain was not accepted.\n"));
-      goto blacklist;
+      goto out;
    }
  /* 4. Check for parent directory.
@ -423,7 +425,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
      if (!frontcmp (parent->dir, u->dir))
 	{
 	  DEBUGP (("Trying to escape the root directory with no_parent in effect.\n"));
-	  goto blacklist;
+	  goto out;
 	}
    }
@ -435,13 +437,13 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
      if (!accdir (u->dir, ALLABS))
 	{
 	  DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
-	  goto blacklist;
+	  goto out;
 	}
    }
  /* 6. */
  {
-    char *suf = NULL;
+    char *suf;
    /* Check for acceptance/rejection rules.  We ignore these rules
       for HTML documents because they might lead to other files which
       need to be downloaded.  Of course, we don't know which
@ -466,11 +468,9 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
 	  {
 	    DEBUGP (("%s (%s) does not match acc/rej rules.\n",
 		     url, u->file));
-	    FREE_MAYBE (suf);
+	    goto out;
 	    goto blacklist;
 	  }
      }
    FREE_MAYBE (suf);
  }
  /* 7. */
@ -479,7 +479,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
      {
 	DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
 		 u->host, parent->host));
-	goto blacklist;
+	goto out;
      }
  /* 8. */
@ -509,7 +509,8 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
      if (!res_match_path (specs, u->path))
 	{
 	  DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
-	  goto blacklist;
+	  string_set_add (blacklist, url);
 	  goto out;
 	}
    }
@ -519,9 +520,6 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
  return 1;
 blacklist:
  string_set_add (blacklist, url);
 out:
  DEBUGP (("Decided NOT to load it.\n"));
@ -604,6 +602,11 @@ void
 convert_all_links (void)
 {
  slist *html;
  struct wget_timer *timer;
  long msecs;
  int file_count = 0;
  timer = wtimer_new ();
  /* Destructively reverse downloaded_html_files to get it in the right order.
     recursive_retrieve() used slist_prepend() consistently.  */
@ -675,11 +678,19 @@ convert_all_links (void)
 	      cur_url->local_name = NULL;
 	    }
 	}
      /* Convert the links in the file.  */
      convert_links (html->string, urls);
      ++file_count;
      /* Free the data.  */
      free_urlpos (urls);
    }
  msecs = wtimer_elapsed (timer);
  wtimer_delete (timer);
  logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),
 	     file_count, (double)msecs / 1000);
 }
 /* Cleanup the data structures associated with recursive retrieving
--- a/src/retr.c
+++ b/src/retr.c
@ -336,7 +336,6 @@ retrieve_url (const char *origurl, char **file, char **newloc,
 	  char *suf = suffix (u->local);
 	  if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
 	    *dt |= TEXTHTML;
 	  FREE_MAYBE (suf);
 	}
 #endif
    }
--- a/src/utils.c
+++ b/src/utils.c
@ -904,7 +904,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
  return 0;
 }
-/* Return the malloc-ed suffix of STR.  For instance:
+/* Return the location of STR's suffix (file extension).  Examples:
   suffix ("foo.bar")       -> "bar"
   suffix ("foo.bar.baz")   -> "baz"
   suffix ("/foo/bar")      -> NULL
@ -914,9 +914,11 @@ suffix (const char *str)
 {
  int i;
-  for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
+  for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
    ;
  if (str[i++] == '.')
-    return xstrdup (str + i);
+    return (char *)str + i;
  else
    return NULL;
 }