curl_url_set: reject spaces in URLs w/o CURLU_ALLOW_SPACE

They were never officially allowed and slipped in only due to sloppy
parsing. Spaces (ascii 32) should be correctly encoded (to %20) before
being part of a URL.

The new flag bit CURLU_ALLOW_SPACE when a full URL is set, makes libcurl
allow spaces.

Updated test 1560 to verify.

Closes #7073
This commit is contained in:
Daniel Stenberg 2021-05-31 08:59:24 +02:00
parent 8f717b6cf0
commit b67d3ba73e
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
8 changed files with 60 additions and 19 deletions

View File

@ -125,6 +125,15 @@ When set for \fBCURLUPART_URL\fP, this makes libcurl skip the normalization of
the path. That's the procedure where curl otherwise removes sequences of
dot-slash and dot-dot etc. The same option used for transfers is called
\fICURLOPT_PATH_AS_IS(3)\fP.
.IP CURLU_ALLOW_SPACE
If set, a the URL parser allows space (ASCII 32) where possible. The URL
syntax does normally not allow spaces anywhere, but they should be encoded as
%20 or '+'. When spaces are allowed, they are still not allowed in the scheme.
When space is used and allowed in a URL, it will be stored as-is unless
\fICURLU_URLENCODE\fP is also set, which then makes libcurl URL-encode the
space before stored. This affects how the URL will be constructed when
\fIcurl_url_get(3)\fP is subsequently used to extract the full URL or
individual parts.
.SH RETURN VALUE
Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went
fine.

View File

@ -865,6 +865,7 @@ CURLUSESSL_ALL 7.17.0
CURLUSESSL_CONTROL 7.17.0
CURLUSESSL_NONE 7.17.0
CURLUSESSL_TRY 7.17.0
CURLU_ALLOW_SPACE 7.78.0
CURLU_APPENDQUERY 7.62.0
CURLU_DEFAULT_PORT 7.62.0
CURLU_DEFAULT_SCHEME 7.62.0

View File

@ -79,6 +79,7 @@ typedef enum {
#define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */
#define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the
scheme is unknown. */
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
typedef struct Curl_URL CURLU;

View File

@ -1639,7 +1639,8 @@ CURLcode Curl_follow(struct Curl_easy *data,
DEBUGASSERT(data->state.uh);
uc = curl_url_set(data->state.uh, CURLUPART_URL, newurl,
(type == FOLLOW_FAKE) ? CURLU_NON_SUPPORT_SCHEME :
((type == FOLLOW_REDIR) ? CURLU_URLENCODE : 0) );
((type == FOLLOW_REDIR) ? CURLU_URLENCODE : 0) |
CURLU_ALLOW_SPACE);
if(uc) {
if(type != FOLLOW_FAKE)
return Curl_uc_to_curlcode(uc);

View File

@ -131,7 +131,7 @@ static const char *find_host_sep(const char *url)
*/
static bool urlchar_needs_escaping(int c)
{
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c));
}
/*
@ -580,7 +580,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname,
}
/* scan for byte values < 31 or 127 */
static CURLUcode junkscan(const char *part)
static bool junkscan(const char *part, unsigned int flags)
{
if(part) {
static const char badbytes[]={
@ -588,17 +588,18 @@ static CURLUcode junkscan(const char *part)
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x7f,
0x00 /* null-terminate */
0x7f, 0x00 /* null-terminate */
};
size_t n = strlen(part);
size_t nfine = strcspn(part, badbytes);
if(nfine != n)
/* since we don't know which part is scanned, return a generic error
code */
return CURLUE_MALFORMED_INPUT;
return TRUE;
if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' '))
return TRUE;
}
return CURLUE_OK;
return FALSE;
}
static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
@ -884,9 +885,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
!(flags & CURLU_NON_SUPPORT_SCHEME))
return CURLUE_UNSUPPORTED_SCHEME;
if(junkscan(schemep))
if(junkscan(schemep, flags))
return CURLUE_MALFORMED_INPUT;
}
else {
/* no scheme! */
@ -927,7 +927,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}
}
if(junkscan(path))
if(junkscan(path, flags))
return CURLUE_MALFORMED_INPUT;
if((flags & CURLU_URLENCODE) && path[0]) {
@ -991,7 +991,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
/*
* Parse the login details and strip them out of the host name.
*/
if(junkscan(hostname))
if(junkscan(hostname, flags))
return CURLUE_MALFORMED_INPUT;
result = parse_hostname_login(u, &hostname, flags);

View File

@ -33,7 +33,7 @@ MQTT PUBLISH with no POSTFIELDSIZE set
lib%TESTNUMBER
</tool>
<command option="binary-trace">
"mqtt://%HOSTIP:%MQTTPORT/ "
"mqtt://%HOSTIP:%MQTTPORT/%20"
</command>
</client>

View File

@ -36,7 +36,7 @@ MQTT PUBLISH with CURLOPT_POST set (no payload)
lib%TESTNUMBER
</tool>
<command option="binary-trace">
"mqtt://%HOSTIP:%MQTTPORT/ "
"mqtt://%HOSTIP:%MQTTPORT/%20"
</command>
</client>

View File

@ -129,6 +129,37 @@ struct querycase {
};
static struct testcase get_parts_list[] ={
{"https://user:password@example.net/get?this=and what", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
{"https://user:password@example.net/ge t?this=and-what", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
{"https://user:pass word@example.net/get?this=and-what", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
{"https://u ser:password@example.net/get?this=and-what", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
/* no space allowed in scheme */
{"htt ps://user:password@example.net/get?this=and-what", "",
CURLU_NON_SUPPORT_SCHEME|CURLU_ALLOW_SPACE, 0, CURLUE_MALFORMED_INPUT},
{"https://user:password@example.net/get?this=and what",
"https | user | password | [13] | example.net | [15] | /get | "
"this=and what | [17]",
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
{"https://user:password@example.net/ge t?this=and-what",
"https | user | password | [13] | example.net | [15] | /ge t | "
"this=and-what | [17]",
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
{"https://user:pass word@example.net/get?this=and-what",
"https | user | pass word | [13] | example.net | [15] | /get | "
"this=and-what | [17]",
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
{"https://u ser:password@example.net/get?this=and-what",
"https | u ser | password | [13] | example.net | [15] | /get | "
"this=and-what | [17]",
CURLU_ALLOW_SPACE, 0, CURLUE_OK},
{"https://user:password@example.net/ge t?this=and-what",
"https | user | password | [13] | example.net | [15] | /ge%20t | "
"this=and-what | [17]",
CURLU_ALLOW_SPACE | CURLU_URLENCODE, 0, CURLUE_OK},
{"[::1]",
"http | [11] | [12] | [13] | [::1] | [15] | / | [16] | [17]",
CURLU_GUESS_SCHEME, 0, CURLUE_OK },
@ -253,11 +284,9 @@ static struct testcase get_parts_list[] ={
{"https://127abc.com",
"https | [11] | [12] | [13] | 127abc.com | [15] | / | [16] | [17]",
CURLU_DEFAULT_SCHEME, 0, CURLUE_OK},
{"https:// example.com?check",
"",
{"https:// example.com?check", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
{"https://e x a m p l e.com?check",
"",
{"https://e x a m p l e.com?check", "",
CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT},
{"https://example.com?check",
"https | [11] | [12] | [13] | example.com | [15] | / | check | [17]",
@ -385,8 +414,8 @@ static struct urltestcase get_url_list[] = {
CURLU_GUESS_SCHEME, 0, CURLUE_OK},
{"HTTP://test/", "http://test/", 0, 0, CURLUE_OK},
{"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK},
{"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http:/@example.com: 123/", "", 0, 0, CURLUE_MALFORMED_INPUT},
{"http:/@example.com:123 /", "", 0, 0, CURLUE_MALFORMED_INPUT},
{"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},
{"http://host/file\r", "", 0, 0, CURLUE_MALFORMED_INPUT},
{"http://host/file\n\x03", "", 0, 0, CURLUE_MALFORMED_INPUT},