mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Correctly convert links in <meta http-equiv=Refresh content="...">.
Published in <sxsadx3wp49.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
4901488011
commit
f4d019a423
@ -1,3 +1,15 @@
|
|||||||
|
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* url.c (replace_attr_refresh_hack): New function.
|
||||||
|
(convert_links): Call replace_attr_refresh_hack for Refresh
|
||||||
|
links. It will add the "TMOUT; URL=" junk before the link.
|
||||||
|
|
||||||
|
* html-url.c (collect_tags_mapper): Set ID to the ID of the
|
||||||
|
"content" attribute, not "http-equiv".
|
||||||
|
(collect_tags_mapper): Don't use OFFSET to hack the raw_* values;
|
||||||
|
instead, store the information that this entry belongs to a
|
||||||
|
"refresh" link.
|
||||||
|
|
||||||
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-12-01 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* recur.c (retrieve_tree): Allow -p retrievals to exceed maximum
|
* recur.c (retrieve_tree): Allow -p retrievals to exceed maximum
|
||||||
|
@ -482,18 +482,21 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
|
|||||||
So we just need to skip past the "NUMBER; URL=" garbage
|
So we just need to skip past the "NUMBER; URL=" garbage
|
||||||
to get to the URL. */
|
to get to the URL. */
|
||||||
{
|
{
|
||||||
int id;
|
|
||||||
char *name = find_attr (tag, "name", NULL);
|
char *name = find_attr (tag, "name", NULL);
|
||||||
char *http_equiv = find_attr (tag, "http-equiv", &id);
|
char *http_equiv = find_attr (tag, "http-equiv", NULL);
|
||||||
if (http_equiv && !strcasecmp (http_equiv, "refresh"))
|
if (http_equiv && !strcasecmp (http_equiv, "refresh"))
|
||||||
{
|
{
|
||||||
char *refresh = find_attr (tag, "content", NULL);
|
struct urlpos *entry;
|
||||||
char *p = refresh;
|
|
||||||
int offset;
|
int id;
|
||||||
while (ISDIGIT (*p))
|
char *p, *refresh = find_attr (tag, "content", &id);
|
||||||
++p;
|
int timeout = 0;
|
||||||
|
|
||||||
|
for (p = refresh; ISDIGIT (*p); p++)
|
||||||
|
timeout = 10 * timeout + *p - '0';
|
||||||
if (*p++ != ';')
|
if (*p++ != ';')
|
||||||
return;
|
return;
|
||||||
|
|
||||||
while (ISSPACE (*p))
|
while (ISSPACE (*p))
|
||||||
++p;
|
++p;
|
||||||
if (!(TOUPPER (*p) == 'U'
|
if (!(TOUPPER (*p) == 'U'
|
||||||
@ -504,10 +507,13 @@ collect_tags_mapper (struct taginfo *tag, void *arg)
|
|||||||
p += 4;
|
p += 4;
|
||||||
while (ISSPACE (*p))
|
while (ISSPACE (*p))
|
||||||
++p;
|
++p;
|
||||||
offset = p - refresh;
|
|
||||||
tag->attrs[id].value_raw_beginning += offset;
|
entry = handle_link (closure, p, tag, id);
|
||||||
tag->attrs[id].value_raw_size -= offset;
|
if (entry)
|
||||||
handle_link (closure, p, tag, id);
|
{
|
||||||
|
entry->link_refresh_p = 1;
|
||||||
|
entry->refresh_timeout = timeout;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (name && !strcasecmp (name, "robots"))
|
else if (name && !strcasecmp (name, "robots"))
|
||||||
{
|
{
|
||||||
|
35
src/url.c
35
src/url.c
@ -1698,7 +1698,10 @@ no_proxy_match (const char *host, const char **no_proxy)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void write_backup_file PARAMS ((const char *, downloaded_file_t));
|
static void write_backup_file PARAMS ((const char *, downloaded_file_t));
|
||||||
static const char *replace_attr PARAMS ((const char *, int, FILE *, const char *));
|
static const char *replace_attr PARAMS ((const char *, int, FILE *,
|
||||||
|
const char *));
|
||||||
|
static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *,
|
||||||
|
const char *, int));
|
||||||
static char *local_quote_string PARAMS ((const char *));
|
static char *local_quote_string PARAMS ((const char *));
|
||||||
|
|
||||||
/* Change the links in one HTML file. LINKS is a list of links in the
|
/* Change the links in one HTML file. LINKS is a list of links in the
|
||||||
@ -1797,7 +1800,13 @@ convert_links (const char *file, struct urlpos *links)
|
|||||||
{
|
{
|
||||||
char *newname = construct_relative (file, link->local_name);
|
char *newname = construct_relative (file, link->local_name);
|
||||||
char *quoted_newname = local_quote_string (newname);
|
char *quoted_newname = local_quote_string (newname);
|
||||||
|
|
||||||
|
if (!link->link_refresh_p)
|
||||||
p = replace_attr (p, link->size, fp, quoted_newname);
|
p = replace_attr (p, link->size, fp, quoted_newname);
|
||||||
|
else
|
||||||
|
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
|
||||||
|
link->refresh_timeout);
|
||||||
|
|
||||||
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
|
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
|
||||||
link->url->url, newname, link->pos, file));
|
link->url->url, newname, link->pos, file));
|
||||||
xfree (newname);
|
xfree (newname);
|
||||||
@ -1810,7 +1819,13 @@ convert_links (const char *file, struct urlpos *links)
|
|||||||
{
|
{
|
||||||
char *newlink = link->url->url;
|
char *newlink = link->url->url;
|
||||||
char *quoted_newlink = html_quote_string (newlink);
|
char *quoted_newlink = html_quote_string (newlink);
|
||||||
|
|
||||||
|
if (!link->link_refresh_p)
|
||||||
p = replace_attr (p, link->size, fp, quoted_newlink);
|
p = replace_attr (p, link->size, fp, quoted_newlink);
|
||||||
|
else
|
||||||
|
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
|
||||||
|
link->refresh_timeout);
|
||||||
|
|
||||||
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
|
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
|
||||||
newlink, link->pos, file));
|
newlink, link->pos, file));
|
||||||
xfree (quoted_newlink);
|
xfree (quoted_newlink);
|
||||||
@ -2014,6 +2029,24 @@ replace_attr (const char *p, int size, FILE *fp, const char *new_text)
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The same as REPLACE_ATTR, but used when replacing
|
||||||
|
<meta http-equiv=refresh content="new_text"> because we need to
|
||||||
|
append "timeout_value; URL=" before the next_text. */
|
||||||
|
|
||||||
|
static const char *
|
||||||
|
replace_attr_refresh_hack (const char *p, int size, FILE *fp,
|
||||||
|
const char *new_text, int timeout)
|
||||||
|
{
|
||||||
|
/* "0; URL=..." */
|
||||||
|
char *new_with_timeout = (char *)alloca (numdigit (timeout)
|
||||||
|
+ 6 /* "; URL=" */
|
||||||
|
+ strlen (new_text)
|
||||||
|
+ 1);
|
||||||
|
sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);
|
||||||
|
|
||||||
|
return replace_attr (p, size, fp, new_with_timeout);
|
||||||
|
}
|
||||||
|
|
||||||
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
|
/* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not
|
||||||
preceded by '&'. If the character is not found, return zero. If
|
preceded by '&'. If the character is not found, return zero. If
|
||||||
the character is found, return 1 and set BP and EP to point to the
|
the character is found, return 1 and set BP and EP to point to the
|
||||||
|
@ -91,6 +91,10 @@ struct urlpos {
|
|||||||
unsigned int link_base_p :1; /* was the link <base href=...> */
|
unsigned int link_base_p :1; /* was the link <base href=...> */
|
||||||
unsigned int link_inline_p :1; /* needed to render the page. */
|
unsigned int link_inline_p :1; /* needed to render the page. */
|
||||||
|
|
||||||
|
unsigned int link_refresh_p :1; /* link was received from
|
||||||
|
<meta http-equiv=refresh content=...> */
|
||||||
|
int refresh_timeout; /* for reconstructing the refresh. */
|
||||||
|
|
||||||
/* Conversion requirements: */
|
/* Conversion requirements: */
|
||||||
enum convert_options convert; /* is conversion required? */
|
enum convert_options convert; /* is conversion required? */
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user