mirror of
https://github.com/moparisthebest/curl
synced 2025-01-09 13:08:00 -05:00
urlapi: "normalize" numerical IPv4 host names
When the host name in a URL is given as an IPv4 numerical address, the address can be specified with dotted numericals in four different ways: a32, a.b24, a.b.c16 or a.b.c.d and each part can be specified in decimal, octal (0-prefixed) or hexadecimal (0x-prefixed). Instead of passing on the name as-is and leaving the handling to the underlying name functions, which made them not work with c-ares but work with getaddrinfo, this change now makes the curl URL API itself detect and "normalize" host names specified as IPv4 numericals. The WHATWG URL Spec says this is an okay way to specify a host name in a URL. RFC 3896 does not allow them, but curl didn't prevent them before and it seems other RFC 3896-using tools have not either. Host names used like this are widely supported by other tools as well due to the handling being done by getaddrinfo and friends. I decided to add the functionality into the URL API itself so that all users of these functions get the benefits, when for example wanting to compare two URLs. Also, it makes curl built to use c-ares now support them as well and make curl builds more consistent. The normalization makes HTTPS and virtual hosted HTTP work fine even when curl gets the address specified using one of the "obscure" formats. Test 1560 is extended to verify. Fixes #6863 Closes #6871
This commit is contained in:
parent
2426fa49ea
commit
56a037cc0a
90
lib/urlapi.c
90
lib/urlapi.c
@ -5,7 +5,7 @@
|
|||||||
* | (__| |_| | _ <| |___
|
* | (__| |_| | _ <| |___
|
||||||
* \___|\___/|_| \_\_____|
|
* \___|\___/|_| \_\_____|
|
||||||
*
|
*
|
||||||
* Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
|
* Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
|
||||||
*
|
*
|
||||||
* This software is licensed as described in the file COPYING, which
|
* This software is licensed as described in the file COPYING, which
|
||||||
* you should have received as part of this distribution. The terms
|
* you should have received as part of this distribution. The terms
|
||||||
@ -667,6 +667,90 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
|
|||||||
|
|
||||||
#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
|
#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Handle partial IPv4 numerical addresses and different bases, like
|
||||||
|
* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
|
||||||
|
*
|
||||||
|
* If the given input string is syntactically wrong or any part for example is
|
||||||
|
* too big, this function returns FALSE and doesn't create any output.
|
||||||
|
*
|
||||||
|
* Output the "normalized" version of that input string in plain quad decimal
|
||||||
|
* integers and return TRUE.
|
||||||
|
*/
|
||||||
|
static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
|
||||||
|
{
|
||||||
|
bool done = FALSE;
|
||||||
|
int n = 0;
|
||||||
|
const char *c = hostname;
|
||||||
|
unsigned long parts[4] = {0, 0, 0, 0};
|
||||||
|
|
||||||
|
while(!done) {
|
||||||
|
char *endp;
|
||||||
|
unsigned long l = strtoul(c, &endp, 0);
|
||||||
|
|
||||||
|
/* overflow or nothing parsed at all */
|
||||||
|
if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
#if SIZEOF_LONG > 4
|
||||||
|
/* a value larger than 32 bits */
|
||||||
|
if(l > UINT_MAX)
|
||||||
|
return FALSE;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
parts[n] = l;
|
||||||
|
c = endp;
|
||||||
|
|
||||||
|
switch (*c) {
|
||||||
|
case '.' :
|
||||||
|
if(n == 3)
|
||||||
|
return FALSE;
|
||||||
|
n++;
|
||||||
|
c++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case '\0':
|
||||||
|
done = TRUE;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* this is deemed a valid IPv4 numerical address */
|
||||||
|
|
||||||
|
switch(n) {
|
||||||
|
case 0: /* a -- 32 bits */
|
||||||
|
msnprintf(outp, olen, "%u.%u.%u.%u",
|
||||||
|
parts[0] >> 24, (parts[0] >> 16) & 0xff,
|
||||||
|
(parts[0] >> 8) & 0xff, parts[0] & 0xff);
|
||||||
|
break;
|
||||||
|
case 1: /* a.b -- 8.24 bits */
|
||||||
|
if((parts[0] > 0xff) || (parts[1] > 0xffffff))
|
||||||
|
return FALSE;
|
||||||
|
msnprintf(outp, olen, "%u.%u.%u.%u",
|
||||||
|
parts[0], (parts[1] >> 16) & 0xff,
|
||||||
|
(parts[1] >> 8) & 0xff, parts[1] & 0xff);
|
||||||
|
break;
|
||||||
|
case 2: /* a.b.c -- 8.8.16 bits */
|
||||||
|
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
|
||||||
|
return FALSE;
|
||||||
|
msnprintf(outp, olen, "%u.%u.%u.%u",
|
||||||
|
parts[0], parts[1], (parts[2] >> 8) & 0xff,
|
||||||
|
parts[2] & 0xff);
|
||||||
|
break;
|
||||||
|
case 3: /* a.b.c.d -- 8.8.8.8 bits */
|
||||||
|
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
|
||||||
|
(parts[3] > 0xff))
|
||||||
|
return FALSE;
|
||||||
|
msnprintf(outp, olen, "%u.%u.%u.%u",
|
||||||
|
parts[0], parts[1], parts[2], parts[3]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
||||||
{
|
{
|
||||||
char *path;
|
char *path;
|
||||||
@ -899,6 +983,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if(hostname) {
|
if(hostname) {
|
||||||
|
char normalized_ipv4[sizeof("255.255.255.255") + 1];
|
||||||
/*
|
/*
|
||||||
* Parse the login details and strip them out of the host name.
|
* Parse the login details and strip them out of the host name.
|
||||||
*/
|
*/
|
||||||
@ -922,6 +1007,9 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
|
||||||
|
u->host = strdup(normalized_ipv4);
|
||||||
|
else
|
||||||
u->host = strdup(hostname);
|
u->host = strdup(hostname);
|
||||||
if(!u->host)
|
if(!u->host)
|
||||||
return CURLUE_OUT_OF_MEMORY;
|
return CURLUE_OUT_OF_MEMORY;
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
* | (__| |_| | _ <| |___
|
* | (__| |_| | _ <| |___
|
||||||
* \___|\___/|_| \_\_____|
|
* \___|\___/|_| \_\_____|
|
||||||
*
|
*
|
||||||
* Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
|
* Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
|
||||||
*
|
*
|
||||||
* This software is licensed as described in the file COPYING, which
|
* This software is licensed as described in the file COPYING, which
|
||||||
* you should have received as part of this distribution. The terms
|
* you should have received as part of this distribution. The terms
|
||||||
@ -323,6 +323,19 @@ static struct testcase get_parts_list[] ={
|
|||||||
};
|
};
|
||||||
|
|
||||||
static struct urltestcase get_url_list[] = {
|
static struct urltestcase get_url_list[] = {
|
||||||
|
/* IPv4 trickeries */
|
||||||
|
{"https://16843009", "https://1.1.1.1/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://0x7f.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://0177.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://0111.02.0x3", "https://73.2.0.3/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://0xff.0xff.0377.255", "https://255.255.255.255/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://1.0xffffff", "https://1.255.255.255/", 0, 0, CURLUE_OK},
|
||||||
|
/* IPv4 numerical overflows or syntax errors will not normalize */
|
||||||
|
{"https://1.0x1000000", "https://1.0x1000000/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://1.2.3.256", "https://1.2.3.256/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://1.2.3.4.5", "https://1.2.3.4.5/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://1.2.0x100.3", "https://1.2.0x100.3/", 0, 0, CURLUE_OK},
|
||||||
|
{"https://4294967296", "https://4294967296/", 0, 0, CURLUE_OK},
|
||||||
/* 40 bytes scheme is the max allowed */
|
/* 40 bytes scheme is the max allowed */
|
||||||
{"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA://hostname/path",
|
{"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA://hostname/path",
|
||||||
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa://hostname/path",
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa://hostname/path",
|
||||||
|
Loading…
Reference in New Issue
Block a user