diff --git a/src/ChangeLog b/src/ChangeLog index de743a8f..29edda04 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,7 @@ +2001-11-16 Hrvoje Niksic + + * html-parse.c (map_html_tags): Support XML-style empty tags. + 2001-06-26 Hrvoje Niksic * wget.h (DO_REALLOC_FROM_ALLOCA): Check for do_realloc_newsize in diff --git a/src/html-parse.c b/src/html-parse.c index f9a5b0f5..4ae7afe7 100644 --- a/src/html-parse.c +++ b/src/html-parse.c @@ -638,6 +638,19 @@ map_html_tags (const char *text, int size, SKIP_WS (p); + if (*p == '/') + { + /* A slash at this point means the tag is about to be + closed. This is legal in XML and has been popularized + in HTML via XHTML. */ + /* */ + /* ^ */ + ADVANCE (p); + SKIP_WS (p); + if (*p != '>') + goto backout_tag; + } + /* Check for end of tag definition. */ if (*p == '>') break; @@ -654,7 +667,7 @@ map_html_tags (const char *text, int size, /* Establish bounds of attribute value. */ SKIP_WS (p); - if (NAME_CHAR_P (*p) || *p == '>') + if (NAME_CHAR_P (*p) || *p == '/' || *p == '>') { /* Minimized attribute syntax allows `=' to be omitted. For example,
    is a valid shorthand for