From b67d3ba73e98cba63dc6246ee0da22ea803ec3e8 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Mon, 31 May 2021 08:59:24 +0200 Subject: [PATCH] curl_url_set: reject spaces in URLs w/o CURLU_ALLOW_SPACE They were never officially allowed and slipped in only due to sloppy parsing. Spaces (ascii 32) should be correctly encoded (to %20) before being part of a URL. The new flag bit CURLU_ALLOW_SPACE when a full URL is set, makes libcurl allow spaces. Updated test 1560 to verify. Closes #7073 --- docs/libcurl/curl_url_set.3 | 9 +++++++ docs/libcurl/symbols-in-versions | 1 + include/curl/urlapi.h | 1 + lib/transfer.c | 3 ++- lib/urlapi.c | 20 ++++++++-------- tests/data/test1916 | 2 +- tests/data/test1917 | 2 +- tests/libtest/lib1560.c | 41 +++++++++++++++++++++++++++----- 8 files changed, 60 insertions(+), 19 deletions(-) diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 index 8d77a8a03..5c795bde0 100644 --- a/docs/libcurl/curl_url_set.3 +++ b/docs/libcurl/curl_url_set.3 @@ -125,6 +125,15 @@ When set for \fBCURLUPART_URL\fP, this makes libcurl skip the normalization of the path. That's the procedure where curl otherwise removes sequences of dot-slash and dot-dot etc. The same option used for transfers is called \fICURLOPT_PATH_AS_IS(3)\fP. +.IP CURLU_ALLOW_SPACE +If set, a the URL parser allows space (ASCII 32) where possible. The URL +syntax does normally not allow spaces anywhere, but they should be encoded as +%20 or '+'. When spaces are allowed, they are still not allowed in the scheme. +When space is used and allowed in a URL, it will be stored as-is unless +\fICURLU_URLENCODE\fP is also set, which then makes libcurl URL-encode the +space before stored. This affects how the URL will be constructed when +\fIcurl_url_get(3)\fP is subsequently used to extract the full URL or +individual parts. .SH RETURN VALUE Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went fine. diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions index 1684284cd..2dc0af2c9 100644 --- a/docs/libcurl/symbols-in-versions +++ b/docs/libcurl/symbols-in-versions @@ -865,6 +865,7 @@ CURLUSESSL_ALL 7.17.0 CURLUSESSL_CONTROL 7.17.0 CURLUSESSL_NONE 7.17.0 CURLUSESSL_TRY 7.17.0 +CURLU_ALLOW_SPACE 7.78.0 CURLU_APPENDQUERY 7.62.0 CURLU_DEFAULT_PORT 7.62.0 CURLU_DEFAULT_SCHEME 7.62.0 diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h index 7343cb659..1eadc6e3a 100644 --- a/include/curl/urlapi.h +++ b/include/curl/urlapi.h @@ -79,6 +79,7 @@ typedef enum { #define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */ #define CURLU_NO_AUTHORITY (1<<10) /* Allow empty authority when the scheme is unknown. */ +#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */ typedef struct Curl_URL CURLU; diff --git a/lib/transfer.c b/lib/transfer.c index 097d38d79..76aa61c47 100644 --- a/lib/transfer.c +++ b/lib/transfer.c @@ -1639,7 +1639,8 @@ CURLcode Curl_follow(struct Curl_easy *data, DEBUGASSERT(data->state.uh); uc = curl_url_set(data->state.uh, CURLUPART_URL, newurl, (type == FOLLOW_FAKE) ? CURLU_NON_SUPPORT_SCHEME : - ((type == FOLLOW_REDIR) ? CURLU_URLENCODE : 0) ); + ((type == FOLLOW_REDIR) ? CURLU_URLENCODE : 0) | + CURLU_ALLOW_SPACE); if(uc) { if(type != FOLLOW_FAKE) return Curl_uc_to_curlcode(uc); diff --git a/lib/urlapi.c b/lib/urlapi.c index 6483208ec..d6d92cf77 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -131,7 +131,7 @@ static const char *find_host_sep(const char *url) */ static bool urlchar_needs_escaping(int c) { - return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); + return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); } /* @@ -580,7 +580,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname, } /* scan for byte values < 31 or 127 */ -static CURLUcode junkscan(const char *part) +static bool junkscan(const char *part, unsigned int flags) { if(part) { static const char badbytes[]={ @@ -588,17 +588,18 @@ static CURLUcode junkscan(const char *part) 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, - 0x7f, - 0x00 /* null-terminate */ + 0x7f, 0x00 /* null-terminate */ }; size_t n = strlen(part); size_t nfine = strcspn(part, badbytes); if(nfine != n) /* since we don't know which part is scanned, return a generic error code */ - return CURLUE_MALFORMED_INPUT; + return TRUE; + if(!(flags & CURLU_ALLOW_SPACE) && strchr(part, ' ')) + return TRUE; } - return CURLUE_OK; + return FALSE; } static CURLUcode hostname_check(struct Curl_URL *u, char *hostname) @@ -884,9 +885,8 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) !(flags & CURLU_NON_SUPPORT_SCHEME)) return CURLUE_UNSUPPORTED_SCHEME; - if(junkscan(schemep)) + if(junkscan(schemep, flags)) return CURLUE_MALFORMED_INPUT; - } else { /* no scheme! */ @@ -927,7 +927,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) } } - if(junkscan(path)) + if(junkscan(path, flags)) return CURLUE_MALFORMED_INPUT; if((flags & CURLU_URLENCODE) && path[0]) { @@ -991,7 +991,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) /* * Parse the login details and strip them out of the host name. */ - if(junkscan(hostname)) + if(junkscan(hostname, flags)) return CURLUE_MALFORMED_INPUT; result = parse_hostname_login(u, &hostname, flags); diff --git a/tests/data/test1916 b/tests/data/test1916 index 2e495f1d0..afde1cc02 100644 --- a/tests/data/test1916 +++ b/tests/data/test1916 @@ -33,7 +33,7 @@ MQTT PUBLISH with no POSTFIELDSIZE set lib%TESTNUMBER -"mqtt://%HOSTIP:%MQTTPORT/ " +"mqtt://%HOSTIP:%MQTTPORT/%20" diff --git a/tests/data/test1917 b/tests/data/test1917 index 22b981a7d..a2cb981b7 100644 --- a/tests/data/test1917 +++ b/tests/data/test1917 @@ -36,7 +36,7 @@ MQTT PUBLISH with CURLOPT_POST set (no payload) lib%TESTNUMBER -"mqtt://%HOSTIP:%MQTTPORT/ " +"mqtt://%HOSTIP:%MQTTPORT/%20" diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 3285df0eb..b822004ad 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -129,6 +129,37 @@ struct querycase { }; static struct testcase get_parts_list[] ={ + {"https://user:password@example.net/get?this=and what", "", + CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, + {"https://user:password@example.net/ge t?this=and-what", "", + CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, + {"https://user:pass word@example.net/get?this=and-what", "", + CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, + {"https://u ser:password@example.net/get?this=and-what", "", + CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, + /* no space allowed in scheme */ + {"htt ps://user:password@example.net/get?this=and-what", "", + CURLU_NON_SUPPORT_SCHEME|CURLU_ALLOW_SPACE, 0, CURLUE_MALFORMED_INPUT}, + {"https://user:password@example.net/get?this=and what", + "https | user | password | [13] | example.net | [15] | /get | " + "this=and what | [17]", + CURLU_ALLOW_SPACE, 0, CURLUE_OK}, + {"https://user:password@example.net/ge t?this=and-what", + "https | user | password | [13] | example.net | [15] | /ge t | " + "this=and-what | [17]", + CURLU_ALLOW_SPACE, 0, CURLUE_OK}, + {"https://user:pass word@example.net/get?this=and-what", + "https | user | pass word | [13] | example.net | [15] | /get | " + "this=and-what | [17]", + CURLU_ALLOW_SPACE, 0, CURLUE_OK}, + {"https://u ser:password@example.net/get?this=and-what", + "https | u ser | password | [13] | example.net | [15] | /get | " + "this=and-what | [17]", + CURLU_ALLOW_SPACE, 0, CURLUE_OK}, + {"https://user:password@example.net/ge t?this=and-what", + "https | user | password | [13] | example.net | [15] | /ge%20t | " + "this=and-what | [17]", + CURLU_ALLOW_SPACE | CURLU_URLENCODE, 0, CURLUE_OK}, {"[::1]", "http | [11] | [12] | [13] | [::1] | [15] | / | [16] | [17]", CURLU_GUESS_SCHEME, 0, CURLUE_OK }, @@ -253,11 +284,9 @@ static struct testcase get_parts_list[] ={ {"https://127abc.com", "https | [11] | [12] | [13] | 127abc.com | [15] | / | [16] | [17]", CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, - {"https:// example.com?check", - "", + {"https:// example.com?check", "", CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, - {"https://e x a m p l e.com?check", - "", + {"https://e x a m p l e.com?check", "", CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, {"https://example.com?check", "https | [11] | [12] | [13] | example.com | [15] | / | check | [17]", @@ -385,8 +414,8 @@ static struct urltestcase get_url_list[] = { CURLU_GUESS_SCHEME, 0, CURLUE_OK}, {"HTTP://test/", "http://test/", 0, 0, CURLUE_OK}, {"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK}, - {"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, - {"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, + {"http:/@example.com: 123/", "", 0, 0, CURLUE_MALFORMED_INPUT}, + {"http:/@example.com:123 /", "", 0, 0, CURLUE_MALFORMED_INPUT}, {"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, {"http://host/file\r", "", 0, 0, CURLUE_MALFORMED_INPUT}, {"http://host/file\n\x03", "", 0, 0, CURLUE_MALFORMED_INPUT},