1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Allow almost any character in attribute/tag names.

This commit is contained in:
hniksic 2002-05-27 08:03:35 -07:00
parent f21d888d7c
commit 89b37c7eff
2 changed files with 24 additions and 7 deletions

View File

@ -1,3 +1,7 @@
2002-05-27 Hrvoje Niksic <hniksic@arsdigita.com>
* html-parse.c (NAME_CHAR_P): Allow almost any character here.
2002-05-24 Hrvoje Niksic <hniksic@arsdigita.com> 2002-05-24 Hrvoje Niksic <hniksic@arsdigita.com>
* progress.c (bar_set_params): Fall back to dot progress if the * progress.c (bar_set_params): Fall back to dot progress if the

View File

@ -344,10 +344,23 @@ array_allowed (const char **array, const char *beg, const char *end)
return 1; return 1;
} }
/* RFC1866: name [of attribute or tag] consists of letters, digits, /* Originally we used to adhere to RFC1866 here, and allowed only
periods, or hyphens. We also allow _, for compatibility with letters, digits, periods, and hyphens as names (of tags or
brain-damaged generators. */ attributes). However, this broke too many pages which used
#define NAME_CHAR_P(x) (ISALNUM (x) || (x) == '.' || (x) == '-' || (x) == '_') proprietary or strange attributes, e.g. <img src="a.gif"
v:shapes="whatever">.
So now we allow any character except:
* whitespace
* 8-bit and control chars
* characters that clearly cannot be part of name:
'=', '>', '/'.
This only affects attribute and tag names; attribute values allow
an even greater variety of characters. */
#define NAME_CHAR_P(x) ((x) > 32 && (x) < 127 \
&& (x) != '=' && (x) != '>' && (x) != '/')
/* States while advancing through comments. */ /* States while advancing through comments. */
#define AC_S_DONE 0 #define AC_S_DONE 0
@ -450,10 +463,10 @@ advance_declaration (const char *beg, const char *end)
} }
break; break;
case AC_S_DCLNAME: case AC_S_DCLNAME:
if (NAME_CHAR_P (ch)) if (ch == '-')
ch = *p++;
else if (ch == '-')
state = AC_S_DASH1; state = AC_S_DASH1;
else if (NAME_CHAR_P (ch))
ch = *p++;
else else
state = AC_S_DEFAULT; state = AC_S_DEFAULT;
break; break;