1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Add FLAGS argument to map_html_tags.

This commit is contained in:
hniksic 2003-10-08 09:17:33 -07:00
parent a9c3c58c9f
commit ae1d264fcc
4 changed files with 37 additions and 18 deletions

View File

@ -1,3 +1,11 @@
2003-10-08 Hrvoje Niksic <hniksic@xemacs.org>
* html-url.c (get_urls_html): Parse the appropriate flags to
html-parse.c.
* html-parse.c (map_html_tags): Accept FLAGS from the caller
instead of examining OPT.
2003-10-08 Hrvoje Niksic <hniksic@xemacs.org>
* html-url.c (find_tag): Switch to binary search.

View File

@ -129,8 +129,6 @@ so, delete this exception statement from your version. */
# define ISALNUM(x) isalnum (x)
# define TOLOWER(x) tolower (x)
# define TOUPPER(x) toupper (x)
static struct options opt;
#endif /* STANDALONE */
/* Pool support. A pool is a resizable chunk of memory. It is first
@ -692,7 +690,7 @@ static int tag_backout_count;
/* Map MAPFUN over HTML tags in TEXT, which is SIZE characters long.
MAPFUN will be called with two arguments: pointer to an initialized
struct taginfo, and CLOSURE.
struct taginfo, and MAPARG.
ALLOWED_TAG_NAMES should be a NULL-terminated array of tag names to
be processed by this function. If it is NULL, all the tags are
@ -708,10 +706,10 @@ static int tag_backout_count;
void
map_html_tags (const char *text, int size,
void (*mapfun) (struct taginfo *, void *), void *maparg,
int flags,
const char **allowed_tag_names,
const char **allowed_attribute_names,
void (*mapfun) (struct taginfo *, void *),
void *closure)
const char **allowed_attribute_names)
{
/* storage for strings passed to MAPFUN callback; if 256 bytes is
too little, POOL_APPEND allocates more with malloc. */
@ -756,7 +754,7 @@ map_html_tags (const char *text, int size,
declaration). */
if (*p == '!')
{
if (!opt.strict_comments
if (!(flags & MHT_STRICT_COMMENTS)
&& p < end + 3 && p[1] == '-' && p[2] == '-')
{
/* If strict comments are not enforced and if we know
@ -893,11 +891,9 @@ map_html_tags (const char *text, int size,
goto look_for_tag;
attr_raw_value_end = p; /* <foo bar="baz"> */
/* ^ */
/* The AP_TRIM_BLANKS is there for buggy HTML
generators that generate <a href=" foo"> instead of
<a href="foo"> (Netscape ignores spaces as well.)
If you really mean space, use &32; or %20. */
operation = AP_PROCESS_ENTITIES | AP_TRIM_BLANKS;
operation = AP_PROCESS_ENTITIES;
if (flags & MHT_TRIM_VALUES)
operation |= AP_TRIM_BLANKS;
}
else
{
@ -985,7 +981,7 @@ map_html_tags (const char *text, int size,
taginfo.start_position = tag_start_position;
taginfo.end_position = p + 1;
/* Ta-dam! */
(*mapfun) (&taginfo, closure);
(*mapfun) (&taginfo, maparg);
ADVANCE (p);
}
goto look_for_tag;

View File

@ -53,7 +53,13 @@ struct taginfo {
const char *end_position; /* end position of tag */
};
void map_html_tags PARAMS ((const char *, int, const char **, const char **,
void (*) (struct taginfo *, void *), void *));
/* Flags for map_html_tags: */
#define MHT_STRICT_COMMENTS 1 /* use strict comment interpretation */
#define MHT_TRIM_VALUES 2 /* trim attribute values, e.g. interpret
<a href=" foo "> as "foo" */
void map_html_tags PARAMS ((const char *, int,
void (*) (struct taginfo *, void *), void *,
int, const char **, const char **));
#endif /* HTML_PARSE_H */

View File

@ -643,6 +643,7 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow)
{
struct file_memory *fm;
struct map_context ctx;
int flags;
/* Load the file. */
fm = read_file (file);
@ -663,8 +664,16 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow)
if (!interesting_tags)
init_interesting ();
map_html_tags (fm->content, fm->length, interesting_tags,
interesting_attributes, collect_tags_mapper, &ctx);
/* Specify MHT_TRIM_VALUES because of buggy HTML generators that
generate <a href=" foo"> instead of <a href="foo"> (Netscape
ignores spaces as well.) If you really mean space, use &32; or
%20. */
flags = MHT_TRIM_VALUES;
if (opt.strict_comments)
flags |= MHT_STRICT_COMMENTS;
map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
interesting_tags, interesting_attributes);
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
if (meta_disallow_follow)