1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

Fixed incorrect handling of reserved chars.

* src/iri.c (do_conversion): Call url_unescape_except_reserved,
instead of url_unescape.

* src/url.c (url_unescape_1): New static function.
(url_unescape): Calls url_unescape_1 with mask zero. Preserves
same behavior as before. Only code changes.
(url_unescape_except_reserved): New function.

* src/url.h: Added prototype for url_unescape_except_reserved().

When the locale is US-ASCII, URIs that contain special characters
in them are converted to IRIs according to RFC 3987, section 3.2
"Converting URIs to IRIs".
This commit is contained in:
Ander Juaristi 2015-04-13 16:28:36 +02:00 committed by Tim Rühsen
parent b6b1388fb7
commit b0820d553b
3 changed files with 31 additions and 12 deletions

View File

@ -136,7 +136,7 @@ do_conversion (const char *tocode, const char *fromcode, char const *in_org, siz
/* iconv() has to work on an unescaped string */ /* iconv() has to work on an unescaped string */
in_save = in = xstrndup (in_org, inlen); in_save = in = xstrndup (in_org, inlen);
url_unescape(in); url_unescape_except_reserved (in);
inlen = strlen(in); inlen = strlen(in);
len = outlen = inlen * 2; len = outlen = inlen * 2;

View File

@ -161,17 +161,8 @@ static const unsigned char urlchr_table[256] =
#undef U #undef U
#undef RU #undef RU
/* URL-unescape the string S. static void
url_unescape_1 (char *s, unsigned char mask)
This is done by transforming the sequences "%HH" to the character
represented by the hexadecimal digits HH. If % is not followed by
two hexadecimal digits, it is inserted literally.
The transformation is done in place. If you need the original
string intact, make a copy before calling this function. */
void
url_unescape (char *s)
{ {
char *t = s; /* t - tortoise */ char *t = s; /* t - tortoise */
char *h = s; /* h - hare */ char *h = s; /* h - hare */
@ -190,6 +181,8 @@ url_unescape (char *s)
if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2]))) if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
goto copychar; goto copychar;
c = X2DIGITS_TO_NUM (h[1], h[2]); c = X2DIGITS_TO_NUM (h[1], h[2]);
if (urlchr_test(c, mask))
goto copychar;
/* Don't unescape %00 because there is no way to insert it /* Don't unescape %00 because there is no way to insert it
into a C string without effectively truncating it. */ into a C string without effectively truncating it. */
if (c == '\0') if (c == '\0')
@ -201,6 +194,31 @@ url_unescape (char *s)
*t = '\0'; *t = '\0';
} }
/* URL-unescape the string S.
This is done by transforming the sequences "%HH" to the character
represented by the hexadecimal digits HH. If % is not followed by
two hexadecimal digits, it is inserted literally.
The transformation is done in place. If you need the original
string intact, make a copy before calling this function. */
void
url_unescape (char *s)
{
url_unescape_1 (s, 0);
}
/* URL-unescape the string S.
This functions behaves identically as url_unescape(), but does not
convert characters from "reserved". In other words, it only converts
"unsafe" characters. */
void
url_unescape_except_reserved (char *s)
{
url_unescape_1 (s, urlchr_reserved);
}
/* The core of url_escape_* functions. Escapes the characters that /* The core of url_escape_* functions. Escapes the characters that
match the provided mask in urlchr_table. match the provided mask in urlchr_table.

View File

@ -106,6 +106,7 @@ struct url
char *url_escape (const char *); char *url_escape (const char *);
char *url_escape_unsafe_and_reserved (const char *); char *url_escape_unsafe_and_reserved (const char *);
void url_unescape (char *); void url_unescape (char *);
void url_unescape_except_reserved (char *);
struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode); struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode);
char *url_error (const char *, int); char *url_error (const char *, int);