From e4b733e3f1a771bd1017cdcfb355fcb9caffe646 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Fri, 15 Mar 2013 14:18:16 +0100 Subject: [PATCH] HTTP proxy: insert slash in URL if missing curl has been accepting URLs using slightly wrong syntax for a long time, such as when completely missing as slash "http://example.org" or missing a slash when a query part is given "http://example.org?q=foobar". curl would translate these into a legitimate HTTP request to servers, although as was shown in bug #1206 it was not adjusted properly in the cases where a HTTP proxy was used. Test 1213 and 1214 were added to the test suite to verify this fix. The test HTTP server was adjusted to allow us to specify test number in the host name only without using any slashes in a given URL. Bug: http://curl.haxx.se/bug/view.cgi?id=1206 Reported by: ScottJi --- lib/url.c | 38 ++++++++++++++++++++++++++++ tests/FILEFORMAT | 4 +++ tests/data/Makefile.am | 2 +- tests/data/test1213 | 53 +++++++++++++++++++++++++++++++++++++++ tests/data/test1214 | 53 +++++++++++++++++++++++++++++++++++++++ tests/server/sws.c | 56 ++++++++++++++++++++++++++++++++++++------ 6 files changed, 197 insertions(+), 9 deletions(-) create mode 100644 tests/data/test1213 create mode 100644 tests/data/test1214 diff --git a/lib/url.c b/lib/url.c index a14c0626b..e401ca363 100644 --- a/lib/url.c +++ b/lib/url.c @@ -3663,6 +3663,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, char protobuf[16]; const char *protop; CURLcode result; + bool fix_slash = FALSE; *prot_missing = FALSE; @@ -3809,12 +3810,14 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, memcpy(path+1, query, hostlen); path[0]='/'; /* prepend the missing slash */ + fix_slash = TRUE; *query=0; /* now cut off the hostname at the ? */ } else if(!path[0]) { /* if there's no path set, use a single slash */ strcpy(path, "/"); + fix_slash = TRUE; } /* If the URL is malformatted (missing a '/' after hostname before path) we @@ -3827,6 +3830,41 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, is bigger than the path. Use +1 to move the zero byte too. */ memmove(&path[1], path, strlen(path)+1); path[0] = '/'; + fix_slash = TRUE; + } + + + /* + * "fix_slash" means that the URL was malformatted so we need to generate an + * updated version with the new slash inserted at the right place! We need + * the corrected URL when communicating over HTTP proxy and we don't know at + * this point if we're using a proxy or not. + */ + if(fix_slash) { + char *reurl; + + size_t plen = strlen(path); /* new path, should be 1 byte longer than + the original */ + size_t urllen = strlen(data->change.url); /* original URL length */ + + reurl = malloc(urllen + 2); /* 2 for zerobyte + slash */ + if(!reurl) + return CURLE_OUT_OF_MEMORY; + + /* copy the prefix */ + memcpy(reurl, data->change.url, urllen - (plen-1)); + + /* append the trailing piece + zerobyte */ + memcpy(&reurl[urllen - (plen-1)], path, plen + 1); + + /* possible free the old one */ + if(data->change.url_alloc) { + Curl_safefree(data->change.url); + data->change.url_alloc = FALSE; + } + + data->change.url = reurl; + data->change.url_alloc = TRUE; /* free this later */ } /************************************************************* diff --git a/tests/FILEFORMAT b/tests/FILEFORMAT index d79cbf76d..96cd5c807 100644 --- a/tests/FILEFORMAT +++ b/tests/FILEFORMAT @@ -250,6 +250,10 @@ If a CONNECT is used to the server (to emulate HTTPS etc over proxy), the port number given in the CONNECT request will be used to identify which test that is being run, if the proxy host name is said to start with 'test'. +If there's no non-zero test number found in the above to places, the HTTP test +server will use the number following the last dot in the given url so that +"foo.bar.123" gets treated as test case 123. + Set type="perl" to write the test case as a perl script. It implies that there's no memory debugging and valgrind gets shut off for this test. diff --git a/tests/data/Makefile.am b/tests/data/Makefile.am index 7c2e648f5..04a6aa631 100644 --- a/tests/data/Makefile.am +++ b/tests/data/Makefile.am @@ -77,7 +77,7 @@ test1110 test1111 test1112 test1113 test1114 test1115 test1116 test1117 \ test1118 test1119 test1120 test1121 test1122 test1123 test1124 test1125 \ test1126 test1127 test1128 test1129 test1130 test1131 test1132 test1133 \ test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \ -test1208 test1209 test1210 test1211 test1212 \ +test1208 test1209 test1210 test1211 test1212 test1213 test1214 \ test1220 test1221 test1222 test1223 \ test1300 test1301 test1302 test1303 test1304 test1305 \ test1306 test1307 test1308 test1309 test1310 test1311 test1312 test1313 \ diff --git a/tests/data/test1213 b/tests/data/test1213 new file mode 100644 index 000000000..729a7034d --- /dev/null +++ b/tests/data/test1213 @@ -0,0 +1,53 @@ + + + +HTTP +HTTP GET +HTTP proxy + + + +# Server-side + + +HTTP/1.1 200 OK +Date: Thu, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Content-Type: text/html +Funny-head: yesyes +Content-Length: 22 + +the content goes here + + + +# Client-side + + +http + + +HTTP with proxy and host-only URL + +# the thing here is that this sloppy form is accepted and we convert it +# for normal server use, and we need to make sure it gets converted to +# RFC style even for proxies + +-x %HOSTIP:%HTTPPORT we.want.that.site.com.1213 + + + +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET HTTP://we.want.that.site.com.1213/ HTTP/1.1 +Host: we.want.that.site.com.1213 +Accept: */* +Proxy-Connection: Keep-Alive + + + + diff --git a/tests/data/test1214 b/tests/data/test1214 new file mode 100644 index 000000000..3eeb3e3ad --- /dev/null +++ b/tests/data/test1214 @@ -0,0 +1,53 @@ + + + +HTTP +HTTP GET +HTTP proxy + + + +# Server-side + + +HTTP/1.1 200 OK +Date: Thu, 09 Nov 2010 14:49:00 GMT +Server: test-server/fake +Content-Type: text/html +Funny-head: yesyes +Content-Length: 22 + +the content goes here + + + +# Client-side + + +http + + +HTTP with proxy and URL with ? and no slash separator + +# the thing here is that this sloppy form is accepted and we convert it +# for normal server use, and we need to make sure it gets converted to +# RFC style even for proxies + +-x %HOSTIP:%HTTPPORT http://we.want.that.site.com.1214?moo=foo + + + +# Verify data after the test has been "shot" + + +^User-Agent:.* + + +GET http://we.want.that.site.com.1214/?moo=foo HTTP/1.1 +Host: we.want.that.site.com.1214 +Accept: */* +Proxy-Connection: Keep-Alive + + + + diff --git a/tests/server/sws.c b/tests/server/sws.c index a7de09f92..aef55ea96 100644 --- a/tests/server/sws.c +++ b/tests/server/sws.c @@ -507,15 +507,24 @@ static int ProcessRequest(struct httprequest *req) else req->partno = 0; - sprintf(logbuf, "Requested test number %ld part %ld", - req->testno, req->partno); - logmsg("%s", logbuf); + if(req->testno) { - /* find and parse for this test */ - parse_servercmd(req); + sprintf(logbuf, "Requested test number %ld part %ld", + req->testno, req->partno); + logmsg("%s", logbuf); + + /* find and parse for this test */ + parse_servercmd(req); + } + else + req->testno = DOCNUMBER_NOTHING; } - else { + + if(req->testno == DOCNUMBER_NOTHING) { + /* didn't find any in the first scan, try alternative test case + number placements */ + if(sscanf(req->reqbuf, "CONNECT %" MAXDOCNAMELEN_TXT "s HTTP/%d.%d", doc, &prot_major, &prot_minor) == 3) { char *portp = NULL; @@ -563,8 +572,39 @@ static int ProcessRequest(struct httprequest *req) parse_servercmd(req); } else { - logmsg("Did not find test number in PATH"); - req->testno = DOCNUMBER_404; + /* there was no trailing slash and it wasn't CONNECT, then we get the + the number off the last dot instead, IE we consider the TLD to be + the test number. Test 123 can then be written as + "example.com.123". */ + + /* find the last dot */ + ptr = strrchr(doc, '.'); + + /* get the number after it */ + if(ptr) { + ptr++; /* skip the dot */ + + req->testno = strtol(ptr, &ptr, 10); + + if(req->testno > 10000) { + req->partno = req->testno % 10000; + req->testno /= 10000; + } + else + req->partno = 0; + + sprintf(logbuf, "Requested test number %ld part %ld (from host name)", + req->testno, req->partno); + logmsg("%s", logbuf); + + } + + if(!req->testno) { + logmsg("Did not find test number in PATH"); + req->testno = DOCNUMBER_404; + } + else + parse_servercmd(req); } } }