mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Fixed incorrect handling of reserved chars.
* src/iri.c (do_conversion): Call url_unescape_except_reserved, instead of url_unescape. * src/url.c (url_unescape_1): New static function. (url_unescape): Calls url_unescape_1 with mask zero. Preserves same behavior as before. Only code changes. (url_unescape_except_reserved): New function. * src/url.h: Added prototype for url_unescape_except_reserved(). When the locale is US-ASCII, URIs that contain special characters in them are converted to IRIs according to RFC 3987, section 3.2 "Converting URIs to IRIs".
This commit is contained in:
parent
b6b1388fb7
commit
b0820d553b
@ -136,7 +136,7 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz
|
||||
|
||||
/* iconv() has to work on an unescaped string */
|
||||
in_save = in = xstrndup (in_org, inlen);
|
||||
url_unescape(in);
|
||||
url_unescape_except_reserved (in);
|
||||
inlen = strlen(in);
|
||||
|
||||
len = outlen = inlen * 2;
|
||||
|
40
src/url.c
40
src/url.c
@ -161,17 +161,8 @@ static const unsigned char urlchr_table[256] =
|
||||
#undef U
|
||||
#undef RU
|
||||
|
||||
/* URL-unescape the string S.
|
||||
|
||||
This is done by transforming the sequences "%HH" to the character
|
||||
represented by the hexadecimal digits HH. If % is not followed by
|
||||
two hexadecimal digits, it is inserted literally.
|
||||
|
||||
The transformation is done in place. If you need the original
|
||||
string intact, make a copy before calling this function. */
|
||||
|
||||
void
|
||||
url_unescape (char *s)
|
||||
static void
|
||||
url_unescape_1 (char *s, unsigned char mask)
|
||||
{
|
||||
char *t = s; /* t - tortoise */
|
||||
char *h = s; /* h - hare */
|
||||
@ -190,6 +181,8 @@ url_unescape (char *s)
|
||||
if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
|
||||
goto copychar;
|
||||
c = X2DIGITS_TO_NUM (h[1], h[2]);
|
||||
if (urlchr_test(c, mask))
|
||||
goto copychar;
|
||||
/* Don't unescape %00 because there is no way to insert it
|
||||
into a C string without effectively truncating it. */
|
||||
if (c == '\0')
|
||||
@ -201,6 +194,31 @@ url_unescape (char *s)
|
||||
*t = '\0';
|
||||
}
|
||||
|
||||
/* URL-unescape the string S.
|
||||
|
||||
This is done by transforming the sequences "%HH" to the character
|
||||
represented by the hexadecimal digits HH. If % is not followed by
|
||||
two hexadecimal digits, it is inserted literally.
|
||||
|
||||
The transformation is done in place. If you need the original
|
||||
string intact, make a copy before calling this function. */
|
||||
void
|
||||
url_unescape (char *s)
|
||||
{
|
||||
url_unescape_1 (s, 0);
|
||||
}
|
||||
|
||||
/* URL-unescape the string S.
|
||||
|
||||
This functions behaves identically as url_unescape(), but does not
|
||||
convert characters from "reserved". In other words, it only converts
|
||||
"unsafe" characters. */
|
||||
void
|
||||
url_unescape_except_reserved (char *s)
|
||||
{
|
||||
url_unescape_1 (s, urlchr_reserved);
|
||||
}
|
||||
|
||||
/* The core of url_escape_* functions. Escapes the characters that
|
||||
match the provided mask in urlchr_table.
|
||||
|
||||
|
@ -106,6 +106,7 @@ struct url
|
||||
char *url_escape (const char *);
|
||||
char *url_escape_unsafe_and_reserved (const char *);
|
||||
void url_unescape (char *);
|
||||
void url_unescape_except_reserved (char *);
|
||||
|
||||
struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode);
|
||||
char *url_error (const char *, int);
|
||||
|
Loading…
Reference in New Issue
Block a user