mirror of
https://github.com/moparisthebest/curl
synced 2024-12-23 16:48:49 -05:00
curl_url_set: reject spaces in URLs w/o CURLU_ALLOW_SPACE
They were never officially allowed and slipped in only due to sloppy parsing. Spaces (ascii 32) should be correctly encoded (to %20) before being part of a URL. The new flag bit CURLU_ALLOW_SPACE when a full URL is set, makes libcurl allow spaces. Updated test 1560 to verify. Closes #7073
This commit is contained in:
parent
8f717b6cf0
commit
b67d3ba73e
@ -125,6 +125,15 @@ When set for \fBCURLUPART_URL\fP, this makes libcurl skip the normalization of
|
||||
the path. That's the procedure where curl otherwise removes sequences of
|
||||
dot-slash and dot-dot etc. The same option used for transfers is called
|
||||
\fICURLOPT_PATH_AS_IS(3)\fP.
|
||||
.IP CURLU_ALLOW_SPACE
|
||||
If set, a the URL parser allows space (ASCII 32) where possible. The URL
|
||||
syntax does normally not allow spaces anywhere, but they should be encoded as
|
||||
%20 or '+'. When spaces are allowed, they are still not allowed in the scheme.
|
||||
When space is used and allowed in a URL, it will be stored as-is unless
|
||||
\fICURLU_URLENCODE\fP is also set, which then makes libcurl URL-encode the
|
||||
space before stored. This affects how the URL will be constructed when
|
||||
\fIcurl_url_get(3)\fP is subsequently used to extract the full URL or
|
||||
individual parts.
|
||||
.SH RETURN VALUE
|
||||
Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went
|
||||
fine.
|
||||
|
@ -865,6 +865,7 @@ CURLUSESSL_ALL 7.17.0
|
||||
CURLUSESSL_CONTROL 7.17.0
|
||||
CURLUSESSL_NONE 7.17.0
|
||||
CURLUSESSL_TRY 7.17.0
|
||||
CURLU_ALLOW_SPACE 7.78.0
|
||||
CURLU_APPENDQUERY 7.62.0
|
||||
CURLU_DEFAULT_PORT 7.62.0
|
||||
CURLU_DEFAULT_SCHEME 7.62.0
|
||||
|
@ -79,6 +79,7 @@ typedef enum {
|
||||
#define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */
|
||||
#define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the
|
||||
scheme is unknown. */
|
||||
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
|
||||
|
||||
typedef struct Curl_URL CURLU;
|
||||
|
||||
|
@ -1639,7 +1639,8 @@ CURLcode Curl_follow(struct Curl_easy *data,
|
||||
DEBUGASSERT(data->state.uh);
|
||||
uc = curl_url_set(data->state.uh, CURLUPART_URL, newurl,
|
||||
(type == FOLLOW_FAKE) ? CURLU_NON_SUPPORT_SCHEME :
|
||||
((type == FOLLOW_REDIR) ? CURLU_URLENCODE : 0) );
|
||||
((type == FOLLOW_REDIR) ? CURLU_URLENCODE : 0) |
|
||||
CURLU_ALLOW_SPACE);
|
||||
if(uc) {
|
||||
if(type != FOLLOW_FAKE)
|
||||
return Curl_uc_to_curlcode(uc);
|
||||
|
20
lib/urlapi.c
20
lib/urlapi.c
@ -131,7 +131,7 @@ static const char *find_host_sep(const char *url)
|
||||
*/
|
||||
static bool urlchar_needs_escaping(int c)
|
||||
{
|
||||
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
|
||||
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -580,7 +580,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
|
||||
}
|
||||
|
||||
/* scan for byte values < 31 or 127 */
|
||||
static CURLUcode junkscan(const char *part)
|
||||
static bool junkscan(const char *part, unsigned int flags)
|
||||
{
|
||||
if(part) {
|
||||
static const char badbytes[]={
|
||||
@ -588,17 +588,18 @@ static CURLUcode junkscan(const char *part)
|
||||
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
||||
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x7f,
|
||||
0x00 /* null-terminate */
|
||||
0x7f, 0x00 /* null-terminate */
|
||||
};
|
||||
size_t n = strlen(part);
|
||||
size_t nfine = strcspn(part, badbytes);
|
||||
if(nfine != n)
|
||||
/* since we don't know which part is scanned, return a generic error
|
||||
code */
|
||||
return CURLUE_MALFORMED_INPUT;
|
||||
return TRUE;
|
||||
if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
|
||||
return TRUE;
|
||||
}
|
||||
return CURLUE_OK;
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
|
||||
@ -884,9 +885,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
||||
!(flags & CURLU_NON_SUPPORT_SCHEME))
|
||||
return CURLUE_UNSUPPORTED_SCHEME;
|
||||
|
||||
if(junkscan(schemep))
|
||||
if(junkscan(schemep, flags))
|
||||
return CURLUE_MALFORMED_INPUT;
|
||||
|
||||
}
|
||||
else {
|
||||
/* no scheme! */
|
||||
@ -927,7 +927,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
||||
}
|
||||
}
|
||||
|
||||
if(junkscan(path))
|
||||
if(junkscan(path, flags))
|
||||
return CURLUE_MALFORMED_INPUT;
|
||||
|
||||
if((flags & CURLU_URLENCODE) && path[0]) {
|
||||
@ -991,7 +991,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
||||
/*
|
||||
* Parse the login details and strip them out of the host name.
|
||||
*/
|
||||
if(junkscan(hostname))
|
||||
if(junkscan(hostname, flags))
|
||||
return CURLUE_MALFORMED_INPUT;
|
||||
|
||||
result = parse_hostname_login(u, &hostname, flags);
|
||||
|
@ -33,7 +33,7 @@ MQTT PUBLISH with no POSTFIELDSIZE set
|
||||
lib%TESTNUMBER
|
||||
</tool>
|
||||
<command option="binary-trace">
|
||||
"mqtt://%HOSTIP:%MQTTPORT/ "
|
||||
"mqtt://%HOSTIP:%MQTTPORT/%20"
|
||||
</command>
|
||||
</client>
|
||||
|
||||
|
@ -36,7 +36,7 @@ MQTT PUBLISH with CURLOPT_POST set (no payload)
|
||||
lib%TESTNUMBER
|
||||
</tool>
|
||||
<command option="binary-trace">
|
||||
"mqtt://%HOSTIP:%MQTTPORT/ "
|
||||
"mqtt://%HOSTIP:%MQTTPORT/%20"
|
||||
</command>
|
||||
</client>
|
||||
|
||||
|
@ -129,6 +129,37 @@ struct querycase {
|
||||
};
|
||||
|
||||
static struct testcase get_parts_list[] ={
|
||||
{"https://user:password@example.net/get?this=and what", "",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"https://user:password@example.net/ge t?this=and-what", "",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"https://user:pass word@example.net/get?this=and-what", "",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"https://u ser:password@example.net/get?this=and-what", "",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
|
||||
/* no space allowed in scheme */
|
||||
{"htt ps://user:password@example.net/get?this=and-what", "",
|
||||
CURLU_NON_SUPPORT_SCHEME|CURLU_ALLOW_SPACE, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"https://user:password@example.net/get?this=and what",
|
||||
"https | user | password | [13] | example.net | [15] | /get | "
|
||||
"this=and what | [17]",
|
||||
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||
{"https://user:password@example.net/ge t?this=and-what",
|
||||
"https | user | password | [13] | example.net | [15] | /ge t | "
|
||||
"this=and-what | [17]",
|
||||
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||
{"https://user:pass word@example.net/get?this=and-what",
|
||||
"https | user | pass word | [13] | example.net | [15] | /get | "
|
||||
"this=and-what | [17]",
|
||||
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||
{"https://u ser:password@example.net/get?this=and-what",
|
||||
"https | u ser | password | [13] | example.net | [15] | /get | "
|
||||
"this=and-what | [17]",
|
||||
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
|
||||
{"https://user:password@example.net/ge t?this=and-what",
|
||||
"https | user | password | [13] | example.net | [15] | /ge%20t | "
|
||||
"this=and-what | [17]",
|
||||
CURLU_ALLOW_SPACE | CURLU_URLENCODE, 0, CURLUE_OK},
|
||||
{"[::1]",
|
||||
"http | [11] | [12] | [13] | [::1] | [15] | / | [16] | [17]",
|
||||
CURLU_GUESS_SCHEME, 0, CURLUE_OK },
|
||||
@ -253,11 +284,9 @@ static struct testcase get_parts_list[] ={
|
||||
{"https://127abc.com",
|
||||
"https | [11] | [12] | [13] | 127abc.com | [15] | / | [16] | [17]",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
|
||||
{"https:// example.com?check",
|
||||
"",
|
||||
{"https:// example.com?check", "",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"https://e x a m p l e.com?check",
|
||||
"",
|
||||
{"https://e x a m p l e.com?check", "",
|
||||
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"https://example.com?check",
|
||||
"https | [11] | [12] | [13] | example.com | [15] | / | check | [17]",
|
||||
@ -385,8 +414,8 @@ static struct urltestcase get_url_list[] = {
|
||||
CURLU_GUESS_SCHEME, 0, CURLUE_OK},
|
||||
{"HTTP://test/", "http://test/", 0, 0, CURLUE_OK},
|
||||
{"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK},
|
||||
{"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
|
||||
{"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
|
||||
{"http:/@example.com: 123/", "", 0, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"http:/@example.com:123 /", "", 0, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
|
||||
{"http://host/file\r", "", 0, 0, CURLUE_MALFORMED_INPUT},
|
||||
{"http://host/file\n\x03", "", 0, 0, CURLUE_MALFORMED_INPUT},
|
||||
|
Loading…
Reference in New Issue
Block a user