mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Cope better with unclosed html tags.
This commit is contained in:
parent
05fccaeed2
commit
1b2092fd06
2
NEWS
2
NEWS
@ -19,6 +19,8 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
|
|||||||
** Set new cookies after an authorization failure.
|
** Set new cookies after an authorization failure.
|
||||||
|
|
||||||
** Exit with failure if -k is specified and -O is not a regular file.
|
** Exit with failure if -k is specified and -O is not a regular file.
|
||||||
|
|
||||||
|
** Cope better with unclosed html tags.
|
||||||
|
|
||||||
* Changes in Wget 1.12
|
* Changes in Wget 1.12
|
||||||
|
|
||||||
|
@ -1,3 +1,9 @@
|
|||||||
|
2010-05-30 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||||
|
|
||||||
|
* html-parse.c (NAME_CHAR_P): Consider '<' an invalid character.
|
||||||
|
(advance_declaration): Close the tag if '<' is found.
|
||||||
|
(map_html_tags): Likewise.
|
||||||
|
|
||||||
2010-05-27 Giuseppe Scrivano <gscrivano@gnu.org>
|
2010-05-27 Giuseppe Scrivano <gscrivano@gnu.org>
|
||||||
|
|
||||||
* main.c (main): Exit with failure when -k is specified and -O is not
|
* main.c (main): Exit with failure when -k is specified and -O is not
|
||||||
|
@ -528,13 +528,14 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
|
|||||||
* whitespace
|
* whitespace
|
||||||
* 8-bit and control chars
|
* 8-bit and control chars
|
||||||
* characters that clearly cannot be part of name:
|
* characters that clearly cannot be part of name:
|
||||||
'=', '>', '/'.
|
'=', '<', '>', '/'.
|
||||||
|
|
||||||
This only affects attribute and tag names; attribute values allow
|
This only affects attribute and tag names; attribute values allow
|
||||||
an even greater variety of characters. */
|
an even greater variety of characters. */
|
||||||
|
|
||||||
#define NAME_CHAR_P(x) ((x) > 32 && (x) < 127 \
|
#define NAME_CHAR_P(x) ((x) > 32 && (x) < 127 \
|
||||||
&& (x) != '=' && (x) != '>' && (x) != '/')
|
&& (x) != '=' && (x) != '<' && (x) != '>' \
|
||||||
|
&& (x) != '/')
|
||||||
|
|
||||||
#ifdef STANDALONE
|
#ifdef STANDALONE
|
||||||
static int comment_backout_count;
|
static int comment_backout_count;
|
||||||
@ -619,6 +620,7 @@ advance_declaration (const char *beg, const char *end)
|
|||||||
case '\n':
|
case '\n':
|
||||||
ch = *p++;
|
ch = *p++;
|
||||||
break;
|
break;
|
||||||
|
case '<':
|
||||||
case '>':
|
case '>':
|
||||||
state = AC_S_DONE;
|
state = AC_S_DONE;
|
||||||
break;
|
break;
|
||||||
@ -926,7 +928,7 @@ map_html_tags (const char *text, int size,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (end_tag && *p != '>')
|
if (end_tag && *p != '>' && *p != '<')
|
||||||
goto backout_tag;
|
goto backout_tag;
|
||||||
|
|
||||||
if (!name_allowed (allowed_tags, tag_name_begin, tag_name_end))
|
if (!name_allowed (allowed_tags, tag_name_begin, tag_name_end))
|
||||||
@ -958,12 +960,12 @@ map_html_tags (const char *text, int size,
|
|||||||
/* ^ */
|
/* ^ */
|
||||||
ADVANCE (p);
|
ADVANCE (p);
|
||||||
SKIP_WS (p);
|
SKIP_WS (p);
|
||||||
if (*p != '>')
|
if (*p != '<' || *p != '>')
|
||||||
goto backout_tag;
|
goto backout_tag;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for end of tag definition. */
|
/* Check for end of tag definition. */
|
||||||
if (*p == '>')
|
if (*p == '<' || *p == '>')
|
||||||
break;
|
break;
|
||||||
|
|
||||||
/* Establish bounds of attribute name. */
|
/* Establish bounds of attribute name. */
|
||||||
@ -978,7 +980,8 @@ map_html_tags (const char *text, int size,
|
|||||||
|
|
||||||
/* Establish bounds of attribute value. */
|
/* Establish bounds of attribute value. */
|
||||||
SKIP_WS (p);
|
SKIP_WS (p);
|
||||||
if (NAME_CHAR_P (*p) || *p == '/' || *p == '>')
|
|
||||||
|
if (NAME_CHAR_P (*p) || *p == '/' || *p == '<' || *p == '>')
|
||||||
{
|
{
|
||||||
/* Minimized attribute syntax allows `=' to be omitted.
|
/* Minimized attribute syntax allows `=' to be omitted.
|
||||||
For example, <UL COMPACT> is a valid shorthand for <UL
|
For example, <UL COMPACT> is a valid shorthand for <UL
|
||||||
@ -1015,7 +1018,7 @@ map_html_tags (const char *text, int size,
|
|||||||
newline_seen = true;
|
newline_seen = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (newline_seen && *p == '>')
|
else if (newline_seen && (*p == '<' || *p == '>'))
|
||||||
break;
|
break;
|
||||||
ADVANCE (p);
|
ADVANCE (p);
|
||||||
}
|
}
|
||||||
@ -1040,7 +1043,7 @@ map_html_tags (const char *text, int size,
|
|||||||
violated by, for instance, `%' in `width=75%'.
|
violated by, for instance, `%' in `width=75%'.
|
||||||
We'll be liberal and allow just about anything as
|
We'll be liberal and allow just about anything as
|
||||||
an attribute value. */
|
an attribute value. */
|
||||||
while (!c_isspace (*p) && *p != '>')
|
while (!c_isspace (*p) && *p != '<' && *p != '>')
|
||||||
ADVANCE (p);
|
ADVANCE (p);
|
||||||
attr_value_end = p; /* <foo bar=baz qux=quix> */
|
attr_value_end = p; /* <foo bar=baz qux=quix> */
|
||||||
/* ^ */
|
/* ^ */
|
||||||
@ -1138,7 +1141,8 @@ map_html_tags (const char *text, int size,
|
|||||||
}
|
}
|
||||||
|
|
||||||
mapfun (&taginfo, maparg);
|
mapfun (&taginfo, maparg);
|
||||||
ADVANCE (p);
|
if (*p != '<')
|
||||||
|
ADVANCE (p);
|
||||||
}
|
}
|
||||||
goto look_for_tag;
|
goto look_for_tag;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user