[svn] Minor doc update.

2024-07-03 16:38:41 -04:00 · 2001-12-12 09:01:26 -08:00 · 2001-12-12 09:01:26 -08:00 · d425985c37
commit d425985c37
parent 8817f4c1a4
2 changed files with 38 additions and 20 deletions
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,8 @@
 2001-12-12  Hrvoje Niksic  <hniksic@arsdigita.com>
 	* html-url.c (append_one_url): Resurrect warning when unable to
 	resolve a relative link.
 2001-12-12  Hrvoje Niksic  <hniksic@arsdigita.com>
 	* html-url.c (collect_tags_mapper): Break into several functions.
--- a/src/html-url.c
+++ b/src/html-url.c
@ -328,9 +328,13 @@ append_one_url (const char *link_uri, int inlinep,
      if (!link_has_scheme)
 	{
-	  /* We have no base, and the link does not have a host
+	  /* Base URL is unavailable, and the link does not have a
-	     attached to it.  Nothing we can do.  */
+	     location attached to it -- we have to give up.  Since
-	  /* #### Should we print a warning here?  Wget 1.5.x used to.  */
+	     this can only happen when using `--force-html -i', print
 	     a warning.  */
 	  logprintf (LOG_NOTQUIET,
 		     _("%s: Cannot resolve relative link %s.\n"),
 		     ctx->document_file, link_uri);
 	  return NULL;
 	}
@ -364,6 +368,8 @@ append_one_url (const char *link_uri, int inlinep,
      xfree (complete_uri);
    }
  DEBUGP (("appending \"%s\" to urlpos.\n", url->url));
  newel = (struct urlpos *)xmalloc (sizeof (struct urlpos));
  memset (newel, 0, sizeof (*newel));
@ -394,8 +400,8 @@ append_one_url (const char *link_uri, int inlinep,
 /* All the tag_* functions are called from collect_tags_mapper, as
   specified by KNOWN_TAGS.  */
-/* For most tags, all we want to do is harvest URLs from their
+/* Default tag handler: collect URLs from attributes specified for
-   attributes.  */
+   this tag by tag_url_attributes.  */
 static void
 tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
@ -407,7 +413,7 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
    if (tag_url_attributes[i].tagid == tagid)
      {
 	/* We've found the index of tag_url_attributes where the
-	   attributes of our tags begin.  */
+	   attributes of our tag begin.  */
 	first = i;
 	break;
      }
@ -426,25 +432,26 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
    {
      /* Find whether TAG/ATTRIND is a combination that contains a
 	 URL. */
-      char *attrvalue = tag->attrs[attrind].value;
+      char *link = tag->attrs[attrind].value;
      /* If you're cringing at the inefficiency of the nested loops,
-	 remember that the number of attributes the inner loop
+	 remember that they both iterate over a laughably small
-	 iterates over is laughably small -- three in the worst case
+	 quantity of items.  The worst-case inner loop is for the IMG
-	 (IMG).  */
+	 tag, which has three attributes.  */
      for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
 	{
 	  if (0 == strcasecmp (tag->attrs[attrind].name,
 			       tag_url_attributes[i].attr_name))
 	    {
 	      int flags = tag_url_attributes[i].flags;
-	      append_one_url (attrvalue, !(flags & TUA_EXTERNAL),
+	      append_one_url (link, !(flags & TUA_EXTERNAL), tag, attrind, ctx);
 			      tag, attrind, ctx);
 	    }
 	}
    }
 }
 /* Handle the BASE tag, for <base href=...>. */
 static void
 tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
 {
@ -468,6 +475,9 @@ tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
    ctx->base = xstrdup (newbase);
 }
 /* Handle the LINK tag.  It requires special handling because how its
   links will be followed in -p mode depends on the REL attribute.  */
 static void
 tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
 {
@ -484,14 +494,8 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
    }
 }
-/* Some pages use a META tag to specify that the page be refreshed by
+/* Handle the META tag.  This requires special handling because of the
-   a new page after a given number of seconds.  The general format for
+   refresh feature and because of robot exclusion.  */
   this is:
   <meta http-equiv=Refresh content="NUMBER; URL=index2.html">
   So we just need to skip past the "NUMBER; URL=" garbage to get to
   the URL.  */
 static void
 tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
@ -501,6 +505,15 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
  if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
    {
      /* Some pages use a META tag to specify that the page be
 	 refreshed by a new page after a given number of seconds.  The
 	 general format for this is:
 	   <meta http-equiv=Refresh content="NUMBER; URL=index2.html">
 	 So we just need to skip past the "NUMBER; URL=" garbage to
 	 get to the URL.  */
      struct urlpos *entry;
      int attrind;