urlapi: guess scheme correct even with credentials given

In the "scheme-less" parsing case, we need to strip off credentials
first before we guess scheme based on the host name!

Assisted-by: Jay Satiro
Fixes #4856
Closes #4857
This commit is contained in:
Daniel Stenberg 2020-01-27 17:28:40 +01:00
parent 0b030a5b23
commit d3dc0a07e9
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
2 changed files with 46 additions and 32 deletions

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___ * | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____| * \___|\___/|_| \_\_____|
* *
* Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al. * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
* *
* This software is licensed as described in the file COPYING, which * This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms * you should have received as part of this distribution. The terms
@ -428,7 +428,6 @@ static char *concat_url(const char *base, const char *relurl)
* *
*/ */
static CURLUcode parse_hostname_login(struct Curl_URL *u, static CURLUcode parse_hostname_login(struct Curl_URL *u,
const struct Curl_handler *h,
char **hostname, char **hostname,
unsigned int flags) unsigned int flags)
{ {
@ -437,6 +436,7 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
char *userp = NULL; char *userp = NULL;
char *passwdp = NULL; char *passwdp = NULL;
char *optionsp = NULL; char *optionsp = NULL;
const struct Curl_handler *h = NULL;
/* At this point, we're hoping all the other special cases have /* At this point, we're hoping all the other special cases have
* been taken care of, so conn->host.name is at most * been taken care of, so conn->host.name is at most
@ -456,6 +456,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u,
* ftp://user:password@ftp.my.site:8021/README */ * ftp://user:password@ftp.my.site:8021/README */
*hostname = ++ptr; *hostname = ++ptr;
/* if this is a known scheme, get some details */
if(u->scheme)
h = Curl_builtin_scheme(u->scheme);
/* We could use the login information in the URL so extract it. Only parse /* We could use the login information in the URL so extract it. Only parse
options if the handler says we should. Note that 'h' might be NULL! */ options if the handler says we should. Note that 'h' might be NULL! */
ccode = Curl_parse_login_details(login, ptr - login - 1, ccode = Curl_parse_login_details(login, ptr - login - 1,
@ -571,7 +575,7 @@ UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, char *hostname)
} }
/* scan for byte values < 31 or 127 */ /* scan for byte values < 31 or 127 */
static CURLUcode junkscan(char *part) static CURLUcode junkscan(const char *part)
{ {
if(part) { if(part) {
static const char badbytes[]={ static const char badbytes[]={
@ -668,10 +672,9 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
CURLUcode result; CURLUcode result;
bool url_has_scheme = FALSE; bool url_has_scheme = FALSE;
char schemebuf[MAX_SCHEME_LEN + 1]; char schemebuf[MAX_SCHEME_LEN + 1];
char *schemep = NULL; const char *schemep = NULL;
size_t schemelen = 0; size_t schemelen = 0;
size_t urllen; size_t urllen;
const struct Curl_handler *h = NULL;
if(!url) if(!url)
return CURLUE_MALFORMED_INPUT; return CURLUE_MALFORMED_INPUT;
@ -798,7 +801,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME)))
return CURLUE_MALFORMED_INPUT; return CURLUE_MALFORMED_INPUT;
if(flags & CURLU_DEFAULT_SCHEME) if(flags & CURLU_DEFAULT_SCHEME)
schemep = (char *) DEFAULT_SCHEME; schemep = DEFAULT_SCHEME;
/* /*
* The URL was badly formatted, let's try without scheme specified. * The URL was badly formatted, let's try without scheme specified.
@ -820,36 +823,17 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
return CURLUE_MALFORMED_INPUT; return CURLUE_MALFORMED_INPUT;
} }
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
schemep = (char *)"ftp";
else if(checkprefix("dict.", hostname))
schemep = (char *)"dict";
else if(checkprefix("ldap.", hostname))
schemep = (char *)"ldap";
else if(checkprefix("imap.", hostname))
schemep = (char *)"imap";
else if(checkprefix("smtp.", hostname))
schemep = (char *)"smtp";
else if(checkprefix("pop3.", hostname))
schemep = (char *)"pop3";
else
schemep = (char *)"http";
}
len = strlen(p); len = strlen(p);
memcpy(path, p, len); memcpy(path, p, len);
path[len] = 0; path[len] = 0;
u->scheme = strdup(schemep); if(schemep) {
if(!u->scheme) u->scheme = strdup(schemep);
return CURLUE_OUT_OF_MEMORY; if(!u->scheme)
return CURLUE_OUT_OF_MEMORY;
}
} }
/* if this is a known scheme, get some details */
h = Curl_builtin_scheme(u->scheme);
if(junkscan(path)) if(junkscan(path))
return CURLUE_MALFORMED_INPUT; return CURLUE_MALFORMED_INPUT;
@ -916,7 +900,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
if(junkscan(hostname)) if(junkscan(hostname))
return CURLUE_MALFORMED_INPUT; return CURLUE_MALFORMED_INPUT;
result = parse_hostname_login(u, h, &hostname, flags); result = parse_hostname_login(u, &hostname, flags);
if(result) if(result)
return result; return result;
@ -936,6 +920,28 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
u->host = strdup(hostname); u->host = strdup(hostname);
if(!u->host) if(!u->host)
return CURLUE_OUT_OF_MEMORY; return CURLUE_OUT_OF_MEMORY;
if((flags & CURLU_GUESS_SCHEME) && !schemep) {
/* legacy curl-style guess based on host name */
if(checkprefix("ftp.", hostname))
schemep = "ftp";
else if(checkprefix("dict.", hostname))
schemep = "dict";
else if(checkprefix("ldap.", hostname))
schemep = "ldap";
else if(checkprefix("imap.", hostname))
schemep = "imap";
else if(checkprefix("smtp.", hostname))
schemep = "smtp";
else if(checkprefix("pop3.", hostname))
schemep = "pop3";
else
schemep = "http";
u->scheme = strdup(schemep);
if(!u->scheme)
return CURLUE_OUT_OF_MEMORY;
}
} }
Curl_safefree(u->scratch); Curl_safefree(u->scratch);

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___ * | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____| * \___|\___/|_| \_\_____|
* *
* Copyright (C) 1998 - 2019, Daniel Stenberg, <daniel@haxx.se>, et al. * Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
* *
* This software is licensed as described in the file COPYING, which * This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms * you should have received as part of this distribution. The terms
@ -129,6 +129,14 @@ struct querycase {
}; };
static struct testcase get_parts_list[] ={ static struct testcase get_parts_list[] ={
{"user:moo@ftp.example.com/color/#green?no-black",
"ftp | user | moo | [13] | ftp.example.com | [15] | /color/ | [16] | "
"green?no-black",
CURLU_GUESS_SCHEME, 0, CURLUE_OK },
{"ftp.user:moo@example.com/color/#green?no-black",
"http | ftp.user | moo | [13] | example.com | [15] | /color/ | [16] | "
"green?no-black",
CURLU_GUESS_SCHEME, 0, CURLUE_OK },
#ifdef WIN32 #ifdef WIN32
{"file:/C:\\programs\\foo", {"file:/C:\\programs\\foo",
"file | [11] | [12] | [13] | [14] | [15] | C:\\programs\\foo | [16] | [17]", "file | [11] | [12] | [13] | [14] | [15] | C:\\programs\\foo | [16] | [17]",