1
0
mirror of https://github.com/moparisthebest/curl synced 2024-12-22 16:18:48 -05:00

urlapi: "normalize" numerical IPv4 host names

When the host name in a URL is given as an IPv4 numerical address, the
address can be specified with dotted numericals in four different ways:
a32, a.b24, a.b.c16 or a.b.c.d and each part can be specified in
decimal, octal (0-prefixed) or hexadecimal (0x-prefixed).

Instead of passing on the name as-is and leaving the handling to the
underlying name functions, which made them not work with c-ares but work
with getaddrinfo, this change now makes the curl URL API itself detect
and "normalize" host names specified as IPv4 numericals.

The WHATWG URL Spec says this is an okay way to specify a host name in a
URL. RFC 3896 does not allow them, but curl didn't prevent them before
and it seems other RFC 3896-using tools have not either. Host names used
like this are widely supported by other tools as well due to the
handling being done by getaddrinfo and friends.

I decided to add the functionality into the URL API itself so that all
users of these functions get the benefits, when for example wanting to
compare two URLs. Also, it makes curl built to use c-ares now support
them as well and make curl builds more consistent.

The normalization makes HTTPS and virtual hosted HTTP work fine even
when curl gets the address specified using one of the "obscure" formats.

Test 1560 is extended to verify.

Fixes #6863
Closes #6871
This commit is contained in:
Daniel Stenberg 2021-04-19 08:34:52 +02:00
parent 2426fa49ea
commit 56a037cc0a
No known key found for this signature in database
GPG Key ID: 5CC908FDB71E12C2
2 changed files with 104 additions and 3 deletions

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -667,6 +667,90 @@ static CURLUcode hostname_check(struct Curl_URL *u, char *hostname)
#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#'))
/*
* Handle partial IPv4 numerical addresses and different bases, like
* '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
*
* If the given input string is syntactically wrong or any part for example is
* too big, this function returns FALSE and doesn't create any output.
*
* Output the "normalized" version of that input string in plain quad decimal
* integers and return TRUE.
*/
static bool ipv4_normalize(const char *hostname, char *outp, size_t olen)
{
bool done = FALSE;
int n = 0;
const char *c = hostname;
unsigned long parts[4] = {0, 0, 0, 0};
while(!done) {
char *endp;
unsigned long l = strtoul(c, &endp, 0);
/* overflow or nothing parsed at all */
if(((l == ULONG_MAX) && (errno == ERANGE)) || (endp == c))
return FALSE;
#if SIZEOF_LONG > 4
/* a value larger than 32 bits */
if(l > UINT_MAX)
return FALSE;
#endif
parts[n] = l;
c = endp;
switch (*c) {
case '.' :
if(n == 3)
return FALSE;
n++;
c++;
break;
case '\0':
done = TRUE;
break;
default:
return FALSE;
}
}
/* this is deemed a valid IPv4 numerical address */
switch(n) {
case 0: /* a -- 32 bits */
msnprintf(outp, olen, "%u.%u.%u.%u",
parts[0] >> 24, (parts[0] >> 16) & 0xff,
(parts[0] >> 8) & 0xff, parts[0] & 0xff);
break;
case 1: /* a.b -- 8.24 bits */
if((parts[0] > 0xff) || (parts[1] > 0xffffff))
return FALSE;
msnprintf(outp, olen, "%u.%u.%u.%u",
parts[0], (parts[1] >> 16) & 0xff,
(parts[1] >> 8) & 0xff, parts[1] & 0xff);
break;
case 2: /* a.b.c -- 8.8.16 bits */
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
return FALSE;
msnprintf(outp, olen, "%u.%u.%u.%u",
parts[0], parts[1], (parts[2] >> 8) & 0xff,
parts[2] & 0xff);
break;
case 3: /* a.b.c.d -- 8.8.8.8 bits */
if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
(parts[3] > 0xff))
return FALSE;
msnprintf(outp, olen, "%u.%u.%u.%u",
parts[0], parts[1], parts[2], parts[3]);
break;
}
return TRUE;
}
static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
{
char *path;
@ -899,6 +983,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
}
if(hostname) {
char normalized_ipv4[sizeof("255.255.255.255") + 1];
/*
* Parse the login details and strip them out of the host name.
*/
@ -922,7 +1007,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags)
return result;
}
u->host = strdup(hostname);
if(ipv4_normalize(hostname, normalized_ipv4, sizeof(normalized_ipv4)))
u->host = strdup(normalized_ipv4);
else
u->host = strdup(hostname);
if(!u->host)
return CURLUE_OUT_OF_MEMORY;

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2020, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2021, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -323,6 +323,19 @@ static struct testcase get_parts_list[] ={
};
static struct urltestcase get_url_list[] = {
/* IPv4 trickeries */
{"https://16843009", "https://1.1.1.1/", 0, 0, CURLUE_OK},
{"https://0x7f.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
{"https://0177.1", "https://127.0.0.1/", 0, 0, CURLUE_OK},
{"https://0111.02.0x3", "https://73.2.0.3/", 0, 0, CURLUE_OK},
{"https://0xff.0xff.0377.255", "https://255.255.255.255/", 0, 0, CURLUE_OK},
{"https://1.0xffffff", "https://1.255.255.255/", 0, 0, CURLUE_OK},
/* IPv4 numerical overflows or syntax errors will not normalize */
{"https://1.0x1000000", "https://1.0x1000000/", 0, 0, CURLUE_OK},
{"https://1.2.3.256", "https://1.2.3.256/", 0, 0, CURLUE_OK},
{"https://1.2.3.4.5", "https://1.2.3.4.5/", 0, 0, CURLUE_OK},
{"https://1.2.0x100.3", "https://1.2.0x100.3/", 0, 0, CURLUE_OK},
{"https://4294967296", "https://4294967296/", 0, 0, CURLUE_OK},
/* 40 bytes scheme is the max allowed */
{"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA://hostname/path",
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa://hostname/path",