1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Minor doc update.

This commit is contained in:
hniksic 2001-12-12 09:01:26 -08:00
parent 8817f4c1a4
commit d425985c37
2 changed files with 38 additions and 20 deletions

View File

@ -1,3 +1,8 @@
2001-12-12 Hrvoje Niksic <hniksic@arsdigita.com>
* html-url.c (append_one_url): Resurrect warning when unable to
resolve a relative link.
2001-12-12 Hrvoje Niksic <hniksic@arsdigita.com> 2001-12-12 Hrvoje Niksic <hniksic@arsdigita.com>
* html-url.c (collect_tags_mapper): Break into several functions. * html-url.c (collect_tags_mapper): Break into several functions.

View File

@ -328,9 +328,13 @@ append_one_url (const char *link_uri, int inlinep,
if (!link_has_scheme) if (!link_has_scheme)
{ {
/* We have no base, and the link does not have a host /* Base URL is unavailable, and the link does not have a
attached to it. Nothing we can do. */ location attached to it -- we have to give up. Since
/* #### Should we print a warning here? Wget 1.5.x used to. */ this can only happen when using `--force-html -i', print
a warning. */
logprintf (LOG_NOTQUIET,
_("%s: Cannot resolve relative link %s.\n"),
ctx->document_file, link_uri);
return NULL; return NULL;
} }
@ -364,6 +368,8 @@ append_one_url (const char *link_uri, int inlinep,
xfree (complete_uri); xfree (complete_uri);
} }
DEBUGP (("appending \"%s\" to urlpos.\n", url->url));
newel = (struct urlpos *)xmalloc (sizeof (struct urlpos)); newel = (struct urlpos *)xmalloc (sizeof (struct urlpos));
memset (newel, 0, sizeof (*newel)); memset (newel, 0, sizeof (*newel));
@ -394,8 +400,8 @@ append_one_url (const char *link_uri, int inlinep,
/* All the tag_* functions are called from collect_tags_mapper, as /* All the tag_* functions are called from collect_tags_mapper, as
specified by KNOWN_TAGS. */ specified by KNOWN_TAGS. */
/* For most tags, all we want to do is harvest URLs from their /* Default tag handler: collect URLs from attributes specified for
attributes. */ this tag by tag_url_attributes. */
static void static void
tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx) tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
@ -407,7 +413,7 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
if (tag_url_attributes[i].tagid == tagid) if (tag_url_attributes[i].tagid == tagid)
{ {
/* We've found the index of tag_url_attributes where the /* We've found the index of tag_url_attributes where the
attributes of our tags begin. */ attributes of our tag begin. */
first = i; first = i;
break; break;
} }
@ -426,25 +432,26 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
{ {
/* Find whether TAG/ATTRIND is a combination that contains a /* Find whether TAG/ATTRIND is a combination that contains a
URL. */ URL. */
char *attrvalue = tag->attrs[attrind].value; char *link = tag->attrs[attrind].value;
/* If you're cringing at the inefficiency of the nested loops, /* If you're cringing at the inefficiency of the nested loops,
remember that the number of attributes the inner loop remember that they both iterate over a laughably small
iterates over is laughably small -- three in the worst case quantity of items. The worst-case inner loop is for the IMG
(IMG). */ tag, which has three attributes. */
for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++) for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
{ {
if (0 == strcasecmp (tag->attrs[attrind].name, if (0 == strcasecmp (tag->attrs[attrind].name,
tag_url_attributes[i].attr_name)) tag_url_attributes[i].attr_name))
{ {
int flags = tag_url_attributes[i].flags; int flags = tag_url_attributes[i].flags;
append_one_url (attrvalue, !(flags & TUA_EXTERNAL), append_one_url (link, !(flags & TUA_EXTERNAL), tag, attrind, ctx);
tag, attrind, ctx);
} }
} }
} }
} }
/* Handle the BASE tag, for <base href=...>. */
static void static void
tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx) tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
{ {
@ -468,6 +475,9 @@ tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
ctx->base = xstrdup (newbase); ctx->base = xstrdup (newbase);
} }
/* Handle the LINK tag. It requires special handling because how its
links will be followed in -p mode depends on the REL attribute. */
static void static void
tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx) tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
{ {
@ -484,14 +494,8 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
} }
} }
/* Some pages use a META tag to specify that the page be refreshed by /* Handle the META tag. This requires special handling because of the
a new page after a given number of seconds. The general format for refresh feature and because of robot exclusion. */
this is:
<meta http-equiv=Refresh content="NUMBER; URL=index2.html">
So we just need to skip past the "NUMBER; URL=" garbage to get to
the URL. */
static void static void
tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx) tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
@ -501,6 +505,15 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
if (http_equiv && 0 == strcasecmp (http_equiv, "refresh")) if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
{ {
/* Some pages use a META tag to specify that the page be
refreshed by a new page after a given number of seconds. The
general format for this is:
<meta http-equiv=Refresh content="NUMBER; URL=index2.html">
So we just need to skip past the "NUMBER; URL=" garbage to
get to the URL. */
struct urlpos *entry; struct urlpos *entry;
int attrind; int attrind;