From f4d019a423d5f3fdaec42116a8b997536a01da45 Mon Sep 17 00:00:00 2001 From: hniksic Date: Fri, 30 Nov 2001 20:18:51 -0800 Subject: [PATCH] [svn] Correctly convert links in . Published in . --- src/ChangeLog | 12 ++++++++++++ src/html-url.c | 28 +++++++++++++++++----------- src/url.c | 39 ++++++++++++++++++++++++++++++++++++--- src/url.h | 4 ++++ 4 files changed, 69 insertions(+), 14 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 9d1bc81b..45f06eab 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,15 @@ +2001-12-01 Hrvoje Niksic + + * url.c (replace_attr_refresh_hack): New function. + (convert_links): Call replace_attr_refresh_hack for Refresh + links. It will add the "TMOUT; URL=" junk before the link. + + * html-url.c (collect_tags_mapper): Set ID to the ID of the + "content" attribute, not "http-equiv". + (collect_tags_mapper): Don't use OFFSET to hack the raw_* values; + instead, store the information that this entry belongs to a + "refresh" link. + 2001-12-01 Hrvoje Niksic * recur.c (retrieve_tree): Allow -p retrievals to exceed maximum diff --git a/src/html-url.c b/src/html-url.c index 5942a49f..7b9bf29a 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -482,18 +482,21 @@ collect_tags_mapper (struct taginfo *tag, void *arg) So we just need to skip past the "NUMBER; URL=" garbage to get to the URL. */ { - int id; char *name = find_attr (tag, "name", NULL); - char *http_equiv = find_attr (tag, "http-equiv", &id); + char *http_equiv = find_attr (tag, "http-equiv", NULL); if (http_equiv && !strcasecmp (http_equiv, "refresh")) { - char *refresh = find_attr (tag, "content", NULL); - char *p = refresh; - int offset; - while (ISDIGIT (*p)) - ++p; + struct urlpos *entry; + + int id; + char *p, *refresh = find_attr (tag, "content", &id); + int timeout = 0; + + for (p = refresh; ISDIGIT (*p); p++) + timeout = 10 * timeout + *p - '0'; if (*p++ != ';') return; + while (ISSPACE (*p)) ++p; if (!(TOUPPER (*p) == 'U' @@ -504,10 +507,13 @@ collect_tags_mapper (struct taginfo *tag, void *arg) p += 4; while (ISSPACE (*p)) ++p; - offset = p - refresh; - tag->attrs[id].value_raw_beginning += offset; - tag->attrs[id].value_raw_size -= offset; - handle_link (closure, p, tag, id); + + entry = handle_link (closure, p, tag, id); + if (entry) + { + entry->link_refresh_p = 1; + entry->refresh_timeout = timeout; + } } else if (name && !strcasecmp (name, "robots")) { diff --git a/src/url.c b/src/url.c index 28e41c8c..38469418 100644 --- a/src/url.c +++ b/src/url.c @@ -1698,7 +1698,10 @@ no_proxy_match (const char *host, const char **no_proxy) } static void write_backup_file PARAMS ((const char *, downloaded_file_t)); -static const char *replace_attr PARAMS ((const char *, int, FILE *, const char *)); +static const char *replace_attr PARAMS ((const char *, int, FILE *, + const char *)); +static const char *replace_attr_refresh_hack PARAMS ((const char *, int, FILE *, + const char *, int)); static char *local_quote_string PARAMS ((const char *)); /* Change the links in one HTML file. LINKS is a list of links in the @@ -1797,7 +1800,13 @@ convert_links (const char *file, struct urlpos *links) { char *newname = construct_relative (file, link->local_name); char *quoted_newname = local_quote_string (newname); - p = replace_attr (p, link->size, fp, quoted_newname); + + if (!link->link_refresh_p) + p = replace_attr (p, link->size, fp, quoted_newname); + else + p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname, + link->refresh_timeout); + DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n", link->url->url, newname, link->pos, file)); xfree (newname); @@ -1810,7 +1819,13 @@ convert_links (const char *file, struct urlpos *links) { char *newlink = link->url->url; char *quoted_newlink = html_quote_string (newlink); - p = replace_attr (p, link->size, fp, quoted_newlink); + + if (!link->link_refresh_p) + p = replace_attr (p, link->size, fp, quoted_newlink); + else + p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink, + link->refresh_timeout); + DEBUGP (("TO_COMPLETE: to %s at position %d in %s.\n", newlink, link->pos, file)); xfree (quoted_newlink); @@ -2014,6 +2029,24 @@ replace_attr (const char *p, int size, FILE *fp, const char *new_text) return p; } +/* The same as REPLACE_ATTR, but used when replacing + because we need to + append "timeout_value; URL=" before the next_text. */ + +static const char * +replace_attr_refresh_hack (const char *p, int size, FILE *fp, + const char *new_text, int timeout) +{ + /* "0; URL=..." */ + char *new_with_timeout = (char *)alloca (numdigit (timeout) + + 6 /* "; URL=" */ + + strlen (new_text) + + 1); + sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text); + + return replace_attr (p, size, fp, new_with_timeout); +} + /* Find the first occurrence of '#' in [BEG, BEG+SIZE) that is not preceded by '&'. If the character is not found, return zero. If the character is found, return 1 and set BP and EP to point to the diff --git a/src/url.h b/src/url.h index 3c42c5aa..b99c64ce 100644 --- a/src/url.h +++ b/src/url.h @@ -91,6 +91,10 @@ struct urlpos { unsigned int link_base_p :1; /* was the link */ unsigned int link_inline_p :1; /* needed to render the page. */ + unsigned int link_refresh_p :1; /* link was received from + */ + int refresh_timeout; /* for reconstructing the refresh. */ + /* Conversion requirements: */ enum convert_options convert; /* is conversion required? */