mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Add FLAGS argument to map_html_tags.
This commit is contained in:
parent
a9c3c58c9f
commit
ae1d264fcc
@ -1,3 +1,11 @@
|
||||
2003-10-08 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* html-url.c (get_urls_html): Parse the appropriate flags to
|
||||
html-parse.c.
|
||||
|
||||
* html-parse.c (map_html_tags): Accept FLAGS from the caller
|
||||
instead of examining OPT.
|
||||
|
||||
2003-10-08 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* html-url.c (find_tag): Switch to binary search.
|
||||
|
@ -129,8 +129,6 @@ so, delete this exception statement from your version. */
|
||||
# define ISALNUM(x) isalnum (x)
|
||||
# define TOLOWER(x) tolower (x)
|
||||
# define TOUPPER(x) toupper (x)
|
||||
|
||||
static struct options opt;
|
||||
#endif /* STANDALONE */
|
||||
|
||||
/* Pool support. A pool is a resizable chunk of memory. It is first
|
||||
@ -692,7 +690,7 @@ static int tag_backout_count;
|
||||
|
||||
/* Map MAPFUN over HTML tags in TEXT, which is SIZE characters long.
|
||||
MAPFUN will be called with two arguments: pointer to an initialized
|
||||
struct taginfo, and CLOSURE.
|
||||
struct taginfo, and MAPARG.
|
||||
|
||||
ALLOWED_TAG_NAMES should be a NULL-terminated array of tag names to
|
||||
be processed by this function. If it is NULL, all the tags are
|
||||
@ -708,10 +706,10 @@ static int tag_backout_count;
|
||||
|
||||
void
|
||||
map_html_tags (const char *text, int size,
|
||||
void (*mapfun) (struct taginfo *, void *), void *maparg,
|
||||
int flags,
|
||||
const char **allowed_tag_names,
|
||||
const char **allowed_attribute_names,
|
||||
void (*mapfun) (struct taginfo *, void *),
|
||||
void *closure)
|
||||
const char **allowed_attribute_names)
|
||||
{
|
||||
/* storage for strings passed to MAPFUN callback; if 256 bytes is
|
||||
too little, POOL_APPEND allocates more with malloc. */
|
||||
@ -756,7 +754,7 @@ map_html_tags (const char *text, int size,
|
||||
declaration). */
|
||||
if (*p == '!')
|
||||
{
|
||||
if (!opt.strict_comments
|
||||
if (!(flags & MHT_STRICT_COMMENTS)
|
||||
&& p < end + 3 && p[1] == '-' && p[2] == '-')
|
||||
{
|
||||
/* If strict comments are not enforced and if we know
|
||||
@ -893,11 +891,9 @@ map_html_tags (const char *text, int size,
|
||||
goto look_for_tag;
|
||||
attr_raw_value_end = p; /* <foo bar="baz"> */
|
||||
/* ^ */
|
||||
/* The AP_TRIM_BLANKS is there for buggy HTML
|
||||
generators that generate <a href=" foo"> instead of
|
||||
<a href="foo"> (Netscape ignores spaces as well.)
|
||||
If you really mean space, use &32; or %20. */
|
||||
operation = AP_PROCESS_ENTITIES | AP_TRIM_BLANKS;
|
||||
operation = AP_PROCESS_ENTITIES;
|
||||
if (flags & MHT_TRIM_VALUES)
|
||||
operation |= AP_TRIM_BLANKS;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -985,7 +981,7 @@ map_html_tags (const char *text, int size,
|
||||
taginfo.start_position = tag_start_position;
|
||||
taginfo.end_position = p + 1;
|
||||
/* Ta-dam! */
|
||||
(*mapfun) (&taginfo, closure);
|
||||
(*mapfun) (&taginfo, maparg);
|
||||
ADVANCE (p);
|
||||
}
|
||||
goto look_for_tag;
|
||||
|
@ -6,7 +6,7 @@ This file is part of GNU Wget.
|
||||
GNU Wget is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
(at your option) any later version.
|
||||
|
||||
GNU Wget is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
@ -53,7 +53,13 @@ struct taginfo {
|
||||
const char *end_position; /* end position of tag */
|
||||
};
|
||||
|
||||
void map_html_tags PARAMS ((const char *, int, const char **, const char **,
|
||||
void (*) (struct taginfo *, void *), void *));
|
||||
/* Flags for map_html_tags: */
|
||||
#define MHT_STRICT_COMMENTS 1 /* use strict comment interpretation */
|
||||
#define MHT_TRIM_VALUES 2 /* trim attribute values, e.g. interpret
|
||||
<a href=" foo "> as "foo" */
|
||||
|
||||
void map_html_tags PARAMS ((const char *, int,
|
||||
void (*) (struct taginfo *, void *), void *,
|
||||
int, const char **, const char **));
|
||||
|
||||
#endif /* HTML_PARSE_H */
|
||||
|
@ -643,6 +643,7 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow)
|
||||
{
|
||||
struct file_memory *fm;
|
||||
struct map_context ctx;
|
||||
int flags;
|
||||
|
||||
/* Load the file. */
|
||||
fm = read_file (file);
|
||||
@ -663,8 +664,16 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow)
|
||||
if (!interesting_tags)
|
||||
init_interesting ();
|
||||
|
||||
map_html_tags (fm->content, fm->length, interesting_tags,
|
||||
interesting_attributes, collect_tags_mapper, &ctx);
|
||||
/* Specify MHT_TRIM_VALUES because of buggy HTML generators that
|
||||
generate <a href=" foo"> instead of <a href="foo"> (Netscape
|
||||
ignores spaces as well.) If you really mean space, use &32; or
|
||||
%20. */
|
||||
flags = MHT_TRIM_VALUES;
|
||||
if (opt.strict_comments)
|
||||
flags |= MHT_STRICT_COMMENTS;
|
||||
|
||||
map_html_tags (fm->content, fm->length, collect_tags_mapper, &ctx, flags,
|
||||
interesting_tags, interesting_attributes);
|
||||
|
||||
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
|
||||
if (meta_disallow_follow)
|
||||
|
Loading…
Reference in New Issue
Block a user