mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Minor doc update.
This commit is contained in:
parent
8817f4c1a4
commit
d425985c37
@ -1,3 +1,8 @@
|
|||||||
|
2001-12-12 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* html-url.c (append_one_url): Resurrect warning when unable to
|
||||||
|
resolve a relative link.
|
||||||
|
|
||||||
2001-12-12 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-12-12 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* html-url.c (collect_tags_mapper): Break into several functions.
|
* html-url.c (collect_tags_mapper): Break into several functions.
|
||||||
|
@ -328,9 +328,13 @@ append_one_url (const char *link_uri, int inlinep,
|
|||||||
|
|
||||||
if (!link_has_scheme)
|
if (!link_has_scheme)
|
||||||
{
|
{
|
||||||
/* We have no base, and the link does not have a host
|
/* Base URL is unavailable, and the link does not have a
|
||||||
attached to it. Nothing we can do. */
|
location attached to it -- we have to give up. Since
|
||||||
/* #### Should we print a warning here? Wget 1.5.x used to. */
|
this can only happen when using `--force-html -i', print
|
||||||
|
a warning. */
|
||||||
|
logprintf (LOG_NOTQUIET,
|
||||||
|
_("%s: Cannot resolve relative link %s.\n"),
|
||||||
|
ctx->document_file, link_uri);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -364,6 +368,8 @@ append_one_url (const char *link_uri, int inlinep,
|
|||||||
xfree (complete_uri);
|
xfree (complete_uri);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DEBUGP (("appending \"%s\" to urlpos.\n", url->url));
|
||||||
|
|
||||||
newel = (struct urlpos *)xmalloc (sizeof (struct urlpos));
|
newel = (struct urlpos *)xmalloc (sizeof (struct urlpos));
|
||||||
memset (newel, 0, sizeof (*newel));
|
memset (newel, 0, sizeof (*newel));
|
||||||
|
|
||||||
@ -394,8 +400,8 @@ append_one_url (const char *link_uri, int inlinep,
|
|||||||
/* All the tag_* functions are called from collect_tags_mapper, as
|
/* All the tag_* functions are called from collect_tags_mapper, as
|
||||||
specified by KNOWN_TAGS. */
|
specified by KNOWN_TAGS. */
|
||||||
|
|
||||||
/* For most tags, all we want to do is harvest URLs from their
|
/* Default tag handler: collect URLs from attributes specified for
|
||||||
attributes. */
|
this tag by tag_url_attributes. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
|
tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
|
||||||
@ -407,7 +413,7 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
|
|||||||
if (tag_url_attributes[i].tagid == tagid)
|
if (tag_url_attributes[i].tagid == tagid)
|
||||||
{
|
{
|
||||||
/* We've found the index of tag_url_attributes where the
|
/* We've found the index of tag_url_attributes where the
|
||||||
attributes of our tags begin. */
|
attributes of our tag begin. */
|
||||||
first = i;
|
first = i;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -426,25 +432,26 @@ tag_find_urls (int tagid, struct taginfo *tag, struct map_context *ctx)
|
|||||||
{
|
{
|
||||||
/* Find whether TAG/ATTRIND is a combination that contains a
|
/* Find whether TAG/ATTRIND is a combination that contains a
|
||||||
URL. */
|
URL. */
|
||||||
char *attrvalue = tag->attrs[attrind].value;
|
char *link = tag->attrs[attrind].value;
|
||||||
|
|
||||||
/* If you're cringing at the inefficiency of the nested loops,
|
/* If you're cringing at the inefficiency of the nested loops,
|
||||||
remember that the number of attributes the inner loop
|
remember that they both iterate over a laughably small
|
||||||
iterates over is laughably small -- three in the worst case
|
quantity of items. The worst-case inner loop is for the IMG
|
||||||
(IMG). */
|
tag, which has three attributes. */
|
||||||
for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
|
for (i = first; i < size && tag_url_attributes[i].tagid == tagid; i++)
|
||||||
{
|
{
|
||||||
if (0 == strcasecmp (tag->attrs[attrind].name,
|
if (0 == strcasecmp (tag->attrs[attrind].name,
|
||||||
tag_url_attributes[i].attr_name))
|
tag_url_attributes[i].attr_name))
|
||||||
{
|
{
|
||||||
int flags = tag_url_attributes[i].flags;
|
int flags = tag_url_attributes[i].flags;
|
||||||
append_one_url (attrvalue, !(flags & TUA_EXTERNAL),
|
append_one_url (link, !(flags & TUA_EXTERNAL), tag, attrind, ctx);
|
||||||
tag, attrind, ctx);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Handle the BASE tag, for <base href=...>. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
|
tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
|
||||||
{
|
{
|
||||||
@ -468,6 +475,9 @@ tag_handle_base (int tagid, struct taginfo *tag, struct map_context *ctx)
|
|||||||
ctx->base = xstrdup (newbase);
|
ctx->base = xstrdup (newbase);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Handle the LINK tag. It requires special handling because how its
|
||||||
|
links will be followed in -p mode depends on the REL attribute. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
|
tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
|
||||||
{
|
{
|
||||||
@ -484,14 +494,8 @@ tag_handle_link (int tagid, struct taginfo *tag, struct map_context *ctx)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Some pages use a META tag to specify that the page be refreshed by
|
/* Handle the META tag. This requires special handling because of the
|
||||||
a new page after a given number of seconds. The general format for
|
refresh feature and because of robot exclusion. */
|
||||||
this is:
|
|
||||||
|
|
||||||
<meta http-equiv=Refresh content="NUMBER; URL=index2.html">
|
|
||||||
|
|
||||||
So we just need to skip past the "NUMBER; URL=" garbage to get to
|
|
||||||
the URL. */
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
|
tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
|
||||||
@ -501,6 +505,15 @@ tag_handle_meta (int tagid, struct taginfo *tag, struct map_context *ctx)
|
|||||||
|
|
||||||
if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
|
if (http_equiv && 0 == strcasecmp (http_equiv, "refresh"))
|
||||||
{
|
{
|
||||||
|
/* Some pages use a META tag to specify that the page be
|
||||||
|
refreshed by a new page after a given number of seconds. The
|
||||||
|
general format for this is:
|
||||||
|
|
||||||
|
<meta http-equiv=Refresh content="NUMBER; URL=index2.html">
|
||||||
|
|
||||||
|
So we just need to skip past the "NUMBER; URL=" garbage to
|
||||||
|
get to the URL. */
|
||||||
|
|
||||||
struct urlpos *entry;
|
struct urlpos *entry;
|
||||||
|
|
||||||
int attrind;
|
int attrind;
|
||||||
|
Loading…
Reference in New Issue
Block a user