1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Support XML-style empty tags.

This commit is contained in:
hniksic 2001-11-16 08:44:34 -08:00
parent b1dd1c1bad
commit 0ce7b6bffc
2 changed files with 19 additions and 2 deletions

View File

@ -1,3 +1,7 @@
2001-11-16 Hrvoje Niksic <hniksic@arsdigita.com>
* html-parse.c (map_html_tags): Support XML-style empty tags.
2001-06-26 Hrvoje Niksic <hniksic@arsdigita.com> 2001-06-26 Hrvoje Niksic <hniksic@arsdigita.com>
* wget.h (DO_REALLOC_FROM_ALLOCA): Check for do_realloc_newsize in * wget.h (DO_REALLOC_FROM_ALLOCA): Check for do_realloc_newsize in

View File

@ -638,6 +638,19 @@ map_html_tags (const char *text, int size,
SKIP_WS (p); SKIP_WS (p);
if (*p == '/')
{
/* A slash at this point means the tag is about to be
closed. This is legal in XML and has been popularized
in HTML via XHTML. */
/* <foo a=b c=d /> */
/* ^ */
ADVANCE (p);
SKIP_WS (p);
if (*p != '>')
goto backout_tag;
}
/* Check for end of tag definition. */ /* Check for end of tag definition. */
if (*p == '>') if (*p == '>')
break; break;
@ -654,7 +667,7 @@ map_html_tags (const char *text, int size,
/* Establish bounds of attribute value. */ /* Establish bounds of attribute value. */
SKIP_WS (p); SKIP_WS (p);
if (NAME_CHAR_P (*p) || *p == '>') if (NAME_CHAR_P (*p) || *p == '/' || *p == '>')
{ {
/* Minimized attribute syntax allows `=' to be omitted. /* Minimized attribute syntax allows `=' to be omitted.
For example, <UL COMPACT> is a valid shorthand for <UL For example, <UL COMPACT> is a valid shorthand for <UL
@ -735,7 +748,7 @@ map_html_tags (const char *text, int size,
/* We skipped the whitespace and found something that is /* We skipped the whitespace and found something that is
neither `=' nor the beginning of the next attribute's neither `=' nor the beginning of the next attribute's
name. Back out. */ name. Back out. */
goto backout_tag; /* <foo bar /... */ goto backout_tag; /* <foo bar [... */
/* ^ */ /* ^ */
} }