mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
Fixed incorrect handling of reserved chars.
* src/iri.c (do_conversion): Call url_unescape_except_reserved, instead of url_unescape. * src/url.c (url_unescape_1): New static function. (url_unescape): Calls url_unescape_1 with mask zero. Preserves same behavior as before. Only code changes. (url_unescape_except_reserved): New function. * src/url.h: Added prototype for url_unescape_except_reserved(). When the locale is US-ASCII, URIs that contain special characters in them are converted to IRIs according to RFC 3987, section 3.2 "Converting URIs to IRIs".
This commit is contained in:
parent
b6b1388fb7
commit
b0820d553b
@ -136,7 +136,7 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz
|
|||||||
|
|
||||||
/* iconv() has to work on an unescaped string */
|
/* iconv() has to work on an unescaped string */
|
||||||
in_save = in = xstrndup (in_org, inlen);
|
in_save = in = xstrndup (in_org, inlen);
|
||||||
url_unescape(in);
|
url_unescape_except_reserved (in);
|
||||||
inlen = strlen(in);
|
inlen = strlen(in);
|
||||||
|
|
||||||
len = outlen = inlen * 2;
|
len = outlen = inlen * 2;
|
||||||
|
40
src/url.c
40
src/url.c
@ -161,17 +161,8 @@ static const unsigned char urlchr_table[256] =
|
|||||||
#undef U
|
#undef U
|
||||||
#undef RU
|
#undef RU
|
||||||
|
|
||||||
/* URL-unescape the string S.
|
static void
|
||||||
|
url_unescape_1 (char *s, unsigned char mask)
|
||||||
This is done by transforming the sequences "%HH" to the character
|
|
||||||
represented by the hexadecimal digits HH. If % is not followed by
|
|
||||||
two hexadecimal digits, it is inserted literally.
|
|
||||||
|
|
||||||
The transformation is done in place. If you need the original
|
|
||||||
string intact, make a copy before calling this function. */
|
|
||||||
|
|
||||||
void
|
|
||||||
url_unescape (char *s)
|
|
||||||
{
|
{
|
||||||
char *t = s; /* t - tortoise */
|
char *t = s; /* t - tortoise */
|
||||||
char *h = s; /* h - hare */
|
char *h = s; /* h - hare */
|
||||||
@ -190,6 +181,8 @@ url_unescape (char *s)
|
|||||||
if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
|
if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
|
||||||
goto copychar;
|
goto copychar;
|
||||||
c = X2DIGITS_TO_NUM (h[1], h[2]);
|
c = X2DIGITS_TO_NUM (h[1], h[2]);
|
||||||
|
if (urlchr_test(c, mask))
|
||||||
|
goto copychar;
|
||||||
/* Don't unescape %00 because there is no way to insert it
|
/* Don't unescape %00 because there is no way to insert it
|
||||||
into a C string without effectively truncating it. */
|
into a C string without effectively truncating it. */
|
||||||
if (c == '\0')
|
if (c == '\0')
|
||||||
@ -201,6 +194,31 @@ url_unescape (char *s)
|
|||||||
*t = '\0';
|
*t = '\0';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* URL-unescape the string S.
|
||||||
|
|
||||||
|
This is done by transforming the sequences "%HH" to the character
|
||||||
|
represented by the hexadecimal digits HH. If % is not followed by
|
||||||
|
two hexadecimal digits, it is inserted literally.
|
||||||
|
|
||||||
|
The transformation is done in place. If you need the original
|
||||||
|
string intact, make a copy before calling this function. */
|
||||||
|
void
|
||||||
|
url_unescape (char *s)
|
||||||
|
{
|
||||||
|
url_unescape_1 (s, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* URL-unescape the string S.
|
||||||
|
|
||||||
|
This functions behaves identically as url_unescape(), but does not
|
||||||
|
convert characters from "reserved". In other words, it only converts
|
||||||
|
"unsafe" characters. */
|
||||||
|
void
|
||||||
|
url_unescape_except_reserved (char *s)
|
||||||
|
{
|
||||||
|
url_unescape_1 (s, urlchr_reserved);
|
||||||
|
}
|
||||||
|
|
||||||
/* The core of url_escape_* functions. Escapes the characters that
|
/* The core of url_escape_* functions. Escapes the characters that
|
||||||
match the provided mask in urlchr_table.
|
match the provided mask in urlchr_table.
|
||||||
|
|
||||||
|
@ -106,6 +106,7 @@ struct url
|
|||||||
char *url_escape (const char *);
|
char *url_escape (const char *);
|
||||||
char *url_escape_unsafe_and_reserved (const char *);
|
char *url_escape_unsafe_and_reserved (const char *);
|
||||||
void url_unescape (char *);
|
void url_unescape (char *);
|
||||||
|
void url_unescape_except_reserved (char *);
|
||||||
|
|
||||||
struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode);
|
struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode);
|
||||||
char *url_error (const char *, int);
|
char *url_error (const char *, int);
|
||||||
|
Loading…
Reference in New Issue
Block a user