HTTP proxy: insert slash in URL if missing

curl has been accepting URLs using slightly wrong syntax for a long
time, such as when completely missing as slash "http://example.org" or
missing a slash when a query part is given
"http://example.org?q=foobar".

curl would translate these into a legitimate HTTP request to servers,
although as was shown in bug #1206 it was not adjusted properly in the
cases where a HTTP proxy was used.

Test 1213 and 1214 were added to the test suite to verify this fix.

The test HTTP server was adjusted to allow us to specify test number in
the host name only without using any slashes in a given URL.

Bug: http://curl.haxx.se/bug/view.cgi?id=1206
Reported by: ScottJi
This commit is contained in:
Daniel Stenberg 2013-03-15 14:18:16 +01:00
parent b50285d751
commit e4b733e3f1
6 changed files with 197 additions and 9 deletions

View File

@ -3663,6 +3663,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
char protobuf[16];
const char *protop;
CURLcode result;
bool fix_slash = FALSE;
*prot_missing = FALSE;
@ -3809,12 +3810,14 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
memcpy(path+1, query, hostlen);
path[0]='/'; /* prepend the missing slash */
fix_slash = TRUE;
*query=0; /* now cut off the hostname at the ? */
}
else if(!path[0]) {
/* if there's no path set, use a single slash */
strcpy(path, "/");
fix_slash = TRUE;
}
/* If the URL is malformatted (missing a '/' after hostname before path) we
@ -3827,6 +3830,41 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
is bigger than the path. Use +1 to move the zero byte too. */
memmove(&path[1], path, strlen(path)+1);
path[0] = '/';
fix_slash = TRUE;
}
/*
* "fix_slash" means that the URL was malformatted so we need to generate an
* updated version with the new slash inserted at the right place! We need
* the corrected URL when communicating over HTTP proxy and we don't know at
* this point if we're using a proxy or not.
*/
if(fix_slash) {
char *reurl;
size_t plen = strlen(path); /* new path, should be 1 byte longer than
the original */
size_t urllen = strlen(data->change.url); /* original URL length */
reurl = malloc(urllen + 2); /* 2 for zerobyte + slash */
if(!reurl)
return CURLE_OUT_OF_MEMORY;
/* copy the prefix */
memcpy(reurl, data->change.url, urllen - (plen-1));
/* append the trailing piece + zerobyte */
memcpy(&reurl[urllen - (plen-1)], path, plen + 1);
/* possible free the old one */
if(data->change.url_alloc) {
Curl_safefree(data->change.url);
data->change.url_alloc = FALSE;
}
data->change.url = reurl;
data->change.url_alloc = TRUE; /* free this later */
}
/*************************************************************

View File

@ -250,6 +250,10 @@ If a CONNECT is used to the server (to emulate HTTPS etc over proxy), the port
number given in the CONNECT request will be used to identify which test that
is being run, if the proxy host name is said to start with 'test'.
If there's no non-zero test number found in the above to places, the HTTP test
server will use the number following the last dot in the given url so that
"foo.bar.123" gets treated as test case 123.
Set type="perl" to write the test case as a perl script. It implies that
there's no memory debugging and valgrind gets shut off for this test.

View File

@ -77,7 +77,7 @@ test1110 test1111 test1112 test1113 test1114 test1115 test1116 test1117 \
test1118 test1119 test1120 test1121 test1122 test1123 test1124 test1125 \
test1126 test1127 test1128 test1129 test1130 test1131 test1132 test1133 \
test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \
test1208 test1209 test1210 test1211 test1212 \
test1208 test1209 test1210 test1211 test1212 test1213 test1214 \
test1220 test1221 test1222 test1223 \
test1300 test1301 test1302 test1303 test1304 test1305 \
test1306 test1307 test1308 test1309 test1310 test1311 test1312 test1313 \

53
tests/data/test1213 Normal file
View File

@ -0,0 +1,53 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
HTTP proxy
</keywords>
</info>
# Server-side
<reply>
<data>
HTTP/1.1 200 OK
Date: Thu, 09 Nov 2010 14:49:00 GMT
Server: test-server/fake
Content-Type: text/html
Funny-head: yesyes
Content-Length: 22
the content goes here
</data>
</reply>
# Client-side
<client>
<server>
http
</server>
<name>
HTTP with proxy and host-only URL
</name>
# the thing here is that this sloppy form is accepted and we convert it
# for normal server use, and we need to make sure it gets converted to
# RFC style even for proxies
<command>
-x %HOSTIP:%HTTPPORT we.want.that.site.com.1213
</command>
</client>
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET HTTP://we.want.that.site.com.1213/ HTTP/1.1
Host: we.want.that.site.com.1213
Accept: */*
Proxy-Connection: Keep-Alive
</protocol>
</verify>
</testcase>

53
tests/data/test1214 Normal file
View File

@ -0,0 +1,53 @@
<testcase>
<info>
<keywords>
HTTP
HTTP GET
HTTP proxy
</keywords>
</info>
# Server-side
<reply>
<data>
HTTP/1.1 200 OK
Date: Thu, 09 Nov 2010 14:49:00 GMT
Server: test-server/fake
Content-Type: text/html
Funny-head: yesyes
Content-Length: 22
the content goes here
</data>
</reply>
# Client-side
<client>
<server>
http
</server>
<name>
HTTP with proxy and URL with ? and no slash separator
</name>
# the thing here is that this sloppy form is accepted and we convert it
# for normal server use, and we need to make sure it gets converted to
# RFC style even for proxies
<command>
-x %HOSTIP:%HTTPPORT http://we.want.that.site.com.1214?moo=foo
</command>
</client>
# Verify data after the test has been "shot"
<verify>
<strip>
^User-Agent:.*
</strip>
<protocol>
GET http://we.want.that.site.com.1214/?moo=foo HTTP/1.1
Host: we.want.that.site.com.1214
Accept: */*
Proxy-Connection: Keep-Alive
</protocol>
</verify>
</testcase>

View File

@ -507,15 +507,24 @@ static int ProcessRequest(struct httprequest *req)
else
req->partno = 0;
sprintf(logbuf, "Requested test number %ld part %ld",
req->testno, req->partno);
logmsg("%s", logbuf);
if(req->testno) {
/* find and parse <servercmd> for this test */
parse_servercmd(req);
sprintf(logbuf, "Requested test number %ld part %ld",
req->testno, req->partno);
logmsg("%s", logbuf);
/* find and parse <servercmd> for this test */
parse_servercmd(req);
}
else
req->testno = DOCNUMBER_NOTHING;
}
else {
if(req->testno == DOCNUMBER_NOTHING) {
/* didn't find any in the first scan, try alternative test case
number placements */
if(sscanf(req->reqbuf, "CONNECT %" MAXDOCNAMELEN_TXT "s HTTP/%d.%d",
doc, &prot_major, &prot_minor) == 3) {
char *portp = NULL;
@ -563,8 +572,39 @@ static int ProcessRequest(struct httprequest *req)
parse_servercmd(req);
}
else {
logmsg("Did not find test number in PATH");
req->testno = DOCNUMBER_404;
/* there was no trailing slash and it wasn't CONNECT, then we get the
the number off the last dot instead, IE we consider the TLD to be
the test number. Test 123 can then be written as
"example.com.123". */
/* find the last dot */
ptr = strrchr(doc, '.');
/* get the number after it */
if(ptr) {
ptr++; /* skip the dot */
req->testno = strtol(ptr, &ptr, 10);
if(req->testno > 10000) {
req->partno = req->testno % 10000;
req->testno /= 10000;
}
else
req->partno = 0;
sprintf(logbuf, "Requested test number %ld part %ld (from host name)",
req->testno, req->partno);
logmsg("%s", logbuf);
}
if(!req->testno) {
logmsg("Did not find test number in PATH");
req->testno = DOCNUMBER_404;
}
else
parse_servercmd(req);
}
}
}