diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 index b2b273f82..95b76bd8c 100644 --- a/docs/libcurl/curl_url_set.3 +++ b/docs/libcurl/curl_url_set.3 @@ -96,6 +96,16 @@ The query part gets space-to-plus conversion before the URL conversion. This URL encoding is charset unaware and will convert the input on a byte-by-byte manner. +.IP CURLU_DEFAULT_SCHEME +If set, will make libcurl allow the URL to be set without a scheme and then +sets that to the default scheme: HTTPS. Overrides the \fICURLU_GUESS_SCHEME\fP +option if both are set. +.IP CURLU_GUESS_SCHEME +If set, will make libcurl allow the URL to be set without a scheme and it +instead "guesses" which scheme that was intended based on the host name. If +the outermost sub-domain name matches DICT, FTP, IMAP, LDAP, POP3 or SMTP then +that scheme will be used, otherwise it picks HTTP. Conflicts with the +\fICURLU_DEFAULT_SCHEME\fP option which takes precendence if both are set. .SH RETURN VALUE Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went fine. diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h index b16cfce56..319de35b7 100644 --- a/include/curl/urlapi.h +++ b/include/curl/urlapi.h @@ -75,6 +75,7 @@ typedef enum { #define CURLU_URLDECODE (1<<6) /* URL decode on get */ #define CURLU_URLENCODE (1<<7) /* URL encode on set */ #define CURLU_APPENDQUERY (1<<8) /* append a form style part */ +#define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */ typedef struct Curl_URL CURLU; diff --git a/lib/urlapi.c b/lib/urlapi.c index f6d911667..ef565d98d 100644 --- a/lib/urlapi.c +++ b/lib/urlapi.c @@ -554,7 +554,7 @@ static CURLUcode junkscan(char *part) static CURLUcode hostname_check(char *hostname, unsigned int flags) { - const char *l; /* accepted characters */ + const char *l = NULL; /* accepted characters */ size_t len; size_t hlen = strlen(hostname); (void)flags; @@ -564,14 +564,21 @@ static CURLUcode hostname_check(char *hostname, unsigned int flags) l = "0123456789abcdefABCDEF::."; hlen -= 2; } - else /* % for URL escaped letters */ - l = "0123456789abcdefghijklimnopqrstuvwxyz-_.ABCDEFGHIJKLIMNOPQRSTUVWXYZ%"; - - len = strspn(hostname, l); - if(hlen != len) - /* hostname with bad content */ - return CURLUE_MALFORMED_INPUT; + if(l) { + /* only valid letters are ok */ + len = strspn(hostname, l); + if(hlen != len) + /* hostname with bad content */ + return CURLUE_MALFORMED_INPUT; + } + else { + /* letters from the second string is not ok */ + len = strcspn(hostname, " "); + if(hlen != len) + /* hostname with bad content */ + return CURLUE_MALFORMED_INPUT; + } return CURLUE_OK; } @@ -587,7 +594,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) CURLUcode result; bool url_has_scheme = FALSE; char schemebuf[MAX_SCHEME_LEN]; - char *schemep; + char *schemep = NULL; size_t schemelen = 0; size_t urllen; const struct Curl_handler *h = NULL; @@ -723,9 +730,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) else { /* no scheme! */ - if(!(flags & CURLU_DEFAULT_SCHEME)) + if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) return CURLUE_MALFORMED_INPUT; - schemep = (char *) DEFAULT_SCHEME; + if(flags & CURLU_DEFAULT_SCHEME) + schemep = (char *) DEFAULT_SCHEME; /* * The URL was badly formatted, let's try without scheme specified. @@ -744,6 +752,24 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) memcpy(hostname, hostp, len); hostname[len] = 0; + if((flags & CURLU_GUESS_SCHEME) && !schemep) { + /* legacy curl-style guess based on host name */ + if(checkprefix("ftp.", hostname)) + schemep = (char *)"ftp"; + else if(checkprefix("dict.", hostname)) + schemep = (char *)"dict"; + else if(checkprefix("ldap.", hostname)) + schemep = (char *)"ldap"; + else if(checkprefix("imap.", hostname)) + schemep = (char *)"imap"; + else if(checkprefix("smtp.", hostname)) + schemep = (char *)"smtp"; + else if(checkprefix("pop3.", hostname)) + schemep = (char *)"pop3"; + else + schemep = (char *)"http"; + } + len = strlen(p); memcpy(path, p, len); path[len] = 0; diff --git a/tests/data/test1560 b/tests/data/test1560 index 720df036f..4b6c97a53 100644 --- a/tests/data/test1560 +++ b/tests/data/test1560 @@ -16,6 +16,12 @@ none file https http +pop3 +smtp +imap +ldap +dict +ftp URL API diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c index 669ea9ada..30fb582a2 100644 --- a/tests/libtest/lib1560.c +++ b/tests/libtest/lib1560.c @@ -246,8 +246,32 @@ static struct testcase get_parts_list[] ={ }; static struct urltestcase get_url_list[] = { + {"smtp.example.com/path/html", + "smtp://smtp.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"https.example.com/path/html", + "http://https.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"dict.example.com/path/html", + "dict://dict.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"pop3.example.com/path/html", + "pop3://pop3.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"ldap.example.com/path/html", + "ldap://ldap.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"imap.example.com/path/html", + "imap://imap.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"ftp.example.com/path/html", + "ftp://ftp.example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"example.com/path/html", + "http://example.com/path/html", + CURLU_GUESS_SCHEME, 0, CURLUE_OK}, {"HTTP://test/", "http://test/", 0, 0, CURLUE_OK}, - {"http://HO0_-st..~./", "", 0, 0, CURLUE_MALFORMED_INPUT}, + {"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK}, {"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, {"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, {"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER},