mirror of
https://github.com/moparisthebest/curl
synced 2024-12-21 23:58:49 -05:00
Move the URL concat code to Curl_follow(), and added a proto for that
function. For Location: following.
This commit is contained in:
parent
daea056210
commit
5f649a1649
465
lib/transfer.c
465
lib/transfer.c
@ -1228,6 +1228,238 @@ CURLcode Curl_posttransfer(struct SessionHandle *data)
|
|||||||
return CURLE_OK;
|
return CURLE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CURLcode Curl_follow(struct SessionHandle *data,
|
||||||
|
char *newurl) /* this 'newurl' is the Location: string,
|
||||||
|
and it must be malloc()ed before passed
|
||||||
|
here */
|
||||||
|
{
|
||||||
|
/* Location: redirect */
|
||||||
|
char prot[16]; /* URL protocol string storage */
|
||||||
|
char letter; /* used for a silly sscanf */
|
||||||
|
|
||||||
|
if (data->set.maxredirs &&
|
||||||
|
(data->set.followlocation >= data->set.maxredirs)) {
|
||||||
|
failf(data,"Maximum (%d) redirects followed", data->set.maxredirs);
|
||||||
|
return CURLE_TOO_MANY_REDIRECTS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* mark the next request as a followed location: */
|
||||||
|
data->state.this_is_a_follow = TRUE;
|
||||||
|
|
||||||
|
data->set.followlocation++; /* count location-followers */
|
||||||
|
|
||||||
|
if(data->set.http_auto_referer) {
|
||||||
|
/* We are asked to automatically set the previous URL as the
|
||||||
|
referer when we get the next URL. We pick the ->url field,
|
||||||
|
which may or may not be 100% correct */
|
||||||
|
|
||||||
|
if(data->change.referer_alloc)
|
||||||
|
/* If we already have an allocated referer, free this first */
|
||||||
|
free(data->change.referer);
|
||||||
|
|
||||||
|
data->change.referer = strdup(data->change.url);
|
||||||
|
data->change.referer_alloc = TRUE; /* yes, free this later */
|
||||||
|
}
|
||||||
|
|
||||||
|
if(2 != sscanf(newurl, "%15[^?&/:]://%c", prot, &letter)) {
|
||||||
|
/***
|
||||||
|
*DANG* this is an RFC 2068 violation. The URL is supposed
|
||||||
|
to be absolute and this doesn't seem to be that!
|
||||||
|
***
|
||||||
|
Instead, we have to TRY to append this new path to the old URL
|
||||||
|
to the right of the host part. Oh crap, this is doomed to cause
|
||||||
|
problems in the future...
|
||||||
|
*/
|
||||||
|
char *protsep;
|
||||||
|
char *pathsep;
|
||||||
|
char *newest;
|
||||||
|
|
||||||
|
char *useurl = newurl;
|
||||||
|
|
||||||
|
/* we must make our own copy of the URL to play with, as it may
|
||||||
|
point to read-only data */
|
||||||
|
char *url_clone=strdup(data->change.url);
|
||||||
|
|
||||||
|
if(!url_clone)
|
||||||
|
return CURLE_OUT_OF_MEMORY; /* skip out of this NOW */
|
||||||
|
|
||||||
|
/* protsep points to the start of the host name */
|
||||||
|
protsep=strstr(url_clone, "//");
|
||||||
|
if(!protsep)
|
||||||
|
protsep=url_clone;
|
||||||
|
else
|
||||||
|
protsep+=2; /* pass the slashes */
|
||||||
|
|
||||||
|
if('/' != newurl[0]) {
|
||||||
|
int level=0;
|
||||||
|
|
||||||
|
/* First we need to find out if there's a ?-letter in the URL,
|
||||||
|
and cut it and the right-side of that off */
|
||||||
|
pathsep = strrchr(protsep, '?');
|
||||||
|
if(pathsep)
|
||||||
|
*pathsep=0;
|
||||||
|
|
||||||
|
/* we have a relative path to append to the last slash if
|
||||||
|
there's one available */
|
||||||
|
pathsep = strrchr(protsep, '/');
|
||||||
|
if(pathsep)
|
||||||
|
*pathsep=0;
|
||||||
|
|
||||||
|
/* Check if there's any slash after the host name, and if so,
|
||||||
|
remember that position instead */
|
||||||
|
pathsep = strchr(protsep, '/');
|
||||||
|
if(pathsep)
|
||||||
|
protsep = pathsep+1;
|
||||||
|
else
|
||||||
|
protsep = NULL;
|
||||||
|
|
||||||
|
/* now deal with one "./" or any amount of "../" in the newurl
|
||||||
|
and act accordingly */
|
||||||
|
|
||||||
|
if((useurl[0] == '.') && (useurl[1] == '/'))
|
||||||
|
useurl+=2; /* just skip the "./" */
|
||||||
|
|
||||||
|
while((useurl[0] == '.') &&
|
||||||
|
(useurl[1] == '.') &&
|
||||||
|
(useurl[2] == '/')) {
|
||||||
|
level++;
|
||||||
|
useurl+=3; /* pass the "../" */
|
||||||
|
}
|
||||||
|
|
||||||
|
if(protsep) {
|
||||||
|
while(level--) {
|
||||||
|
/* cut off one more level from the right of the original URL */
|
||||||
|
pathsep = strrchr(protsep, '/');
|
||||||
|
if(pathsep)
|
||||||
|
*pathsep=0;
|
||||||
|
else {
|
||||||
|
*protsep=0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
/* We got a new absolute path for this server, cut off from the
|
||||||
|
first slash */
|
||||||
|
pathsep = strchr(protsep, '/');
|
||||||
|
if(pathsep)
|
||||||
|
*pathsep=0;
|
||||||
|
}
|
||||||
|
|
||||||
|
newest=(char *)malloc( strlen(url_clone) +
|
||||||
|
1 + /* possible slash */
|
||||||
|
strlen(useurl) + 1/* zero byte */);
|
||||||
|
|
||||||
|
if(!newest)
|
||||||
|
return CURLE_OUT_OF_MEMORY; /* go out from this */
|
||||||
|
|
||||||
|
sprintf(newest, "%s%s%s", url_clone,
|
||||||
|
(('/' == useurl[0]) || !*protsep)?"":"/",
|
||||||
|
useurl);
|
||||||
|
free(newurl); /* newurl is the allocated pointer */
|
||||||
|
free(url_clone);
|
||||||
|
newurl = newest;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
/* This is an absolute URL, don't allow the custom port number */
|
||||||
|
data->state.allow_port = FALSE;
|
||||||
|
|
||||||
|
if(data->change.url_alloc)
|
||||||
|
free(data->change.url);
|
||||||
|
else
|
||||||
|
data->change.url_alloc = TRUE; /* the URL is allocated */
|
||||||
|
|
||||||
|
/* TBD: set the URL with curl_setopt() */
|
||||||
|
data->change.url = newurl;
|
||||||
|
newurl = NULL; /* don't free! */
|
||||||
|
|
||||||
|
infof(data, "Follows Location: to new URL: '%s'\n", data->change.url);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We get here when the HTTP code is 300-399. We need to perform
|
||||||
|
* differently based on exactly what return code there was.
|
||||||
|
* Discussed on the curl mailing list and posted about on the 26th
|
||||||
|
* of January 2001.
|
||||||
|
*/
|
||||||
|
switch(data->info.httpcode) {
|
||||||
|
case 300: /* Multiple Choices */
|
||||||
|
case 306: /* Not used */
|
||||||
|
case 307: /* Temporary Redirect */
|
||||||
|
default: /* for all unknown ones */
|
||||||
|
/* These are explicitly mention since I've checked RFC2616 and they
|
||||||
|
* seem to be OK to POST to.
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
case 301: /* Moved Permanently */
|
||||||
|
/* (quote from RFC2616, section 10.3.2):
|
||||||
|
*
|
||||||
|
* Note: When automatically redirecting a POST request after
|
||||||
|
* receiving a 301 status code, some existing HTTP/1.0 user agents
|
||||||
|
* will erroneously change it into a GET request.
|
||||||
|
*
|
||||||
|
* ----
|
||||||
|
* Warning: Because most of importants user agents do this clear
|
||||||
|
* RFC2616 violation, many webservers expect this misbehavior. So
|
||||||
|
* these servers often answers to a POST request with an error page.
|
||||||
|
* To be sure that libcurl gets the page that most user agents
|
||||||
|
* would get, libcurl has to force GET:
|
||||||
|
*/
|
||||||
|
if( data->set.httpreq == HTTPREQ_POST
|
||||||
|
|| data->set.httpreq == HTTPREQ_POST_FORM) {
|
||||||
|
infof(data,
|
||||||
|
"Violate RFC 2616/10.3.2 and switch from POST to GET\n");
|
||||||
|
data->set.httpreq = HTTPREQ_GET;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 302: /* Found */
|
||||||
|
/* (From 10.3.3)
|
||||||
|
|
||||||
|
Note: RFC 1945 and RFC 2068 specify that the client is not allowed
|
||||||
|
to change the method on the redirected request. However, most
|
||||||
|
existing user agent implementations treat 302 as if it were a 303
|
||||||
|
response, performing a GET on the Location field-value regardless
|
||||||
|
of the original request method. The status codes 303 and 307 have
|
||||||
|
been added for servers that wish to make unambiguously clear which
|
||||||
|
kind of reaction is expected of the client.
|
||||||
|
|
||||||
|
(From 10.3.4)
|
||||||
|
|
||||||
|
Note: Many pre-HTTP/1.1 user agents do not understand the 303
|
||||||
|
status. When interoperability with such clients is a concern, the
|
||||||
|
302 status code may be used instead, since most user agents react
|
||||||
|
to a 302 response as described here for 303.
|
||||||
|
*/
|
||||||
|
case 303: /* See Other */
|
||||||
|
/* Disable both types of POSTs, since doing a second POST when
|
||||||
|
* following isn't what anyone would want! */
|
||||||
|
if(data->set.httpreq != HTTPREQ_GET) {
|
||||||
|
data->set.httpreq = HTTPREQ_GET; /* enforce GET request */
|
||||||
|
infof(data, "Disables POST, goes with %s\n",
|
||||||
|
data->set.no_body?"HEAD":"GET");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case 304: /* Not Modified */
|
||||||
|
/* 304 means we did a conditional request and it was "Not modified".
|
||||||
|
* We shouldn't get any Location: header in this response!
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
case 305: /* Use Proxy */
|
||||||
|
/* (quote from RFC2616, section 10.3.6):
|
||||||
|
* "The requested resource MUST be accessed through the proxy given
|
||||||
|
* by the Location field. The Location field gives the URI of the
|
||||||
|
* proxy. The recipient is expected to repeat this single request
|
||||||
|
* via the proxy. 305 responses MUST only be generated by origin
|
||||||
|
* servers."
|
||||||
|
*/
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Curl_pgrsTime(data, TIMER_REDIRECT);
|
||||||
|
Curl_pgrsResetTimes(data);
|
||||||
|
|
||||||
|
return CURLE_OK;
|
||||||
|
}
|
||||||
|
|
||||||
CURLcode Curl_perform(struct SessionHandle *data)
|
CURLcode Curl_perform(struct SessionHandle *data)
|
||||||
{
|
{
|
||||||
CURLcode res;
|
CURLcode res;
|
||||||
@ -1299,236 +1531,11 @@ CURLcode Curl_perform(struct SessionHandle *data)
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
if((res == CURLE_OK) && newurl) {
|
if((res == CURLE_OK) && newurl) {
|
||||||
/* Location: redirect
|
res = Curl_follow(data, newurl);
|
||||||
|
if(CURLE_OK == res) {
|
||||||
This is assumed to happen for HTTP(S) only!
|
newurl = NULL;
|
||||||
*/
|
continue;
|
||||||
char prot[16]; /* URL protocol string storage */
|
|
||||||
char letter; /* used for a silly sscanf */
|
|
||||||
|
|
||||||
if (data->set.maxredirs && (data->set.followlocation >= data->set.maxredirs)) {
|
|
||||||
failf(data,"Maximum (%d) redirects followed", data->set.maxredirs);
|
|
||||||
res=CURLE_TOO_MANY_REDIRECTS;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* mark the next request as a followed location: */
|
|
||||||
data->state.this_is_a_follow = TRUE;
|
|
||||||
|
|
||||||
data->set.followlocation++; /* count location-followers */
|
|
||||||
|
|
||||||
if(data->set.http_auto_referer) {
|
|
||||||
/* We are asked to automatically set the previous URL as the
|
|
||||||
referer when we get the next URL. We pick the ->url field,
|
|
||||||
which may or may not be 100% correct */
|
|
||||||
|
|
||||||
if(data->change.referer_alloc)
|
|
||||||
/* If we already have an allocated referer, free this first */
|
|
||||||
free(data->change.referer);
|
|
||||||
|
|
||||||
data->change.referer = strdup(data->change.url);
|
|
||||||
data->change.referer_alloc = TRUE; /* yes, free this later */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(2 != sscanf(newurl, "%15[^?&/:]://%c", prot, &letter)) {
|
|
||||||
/***
|
|
||||||
*DANG* this is an RFC 2068 violation. The URL is supposed
|
|
||||||
to be absolute and this doesn't seem to be that!
|
|
||||||
***
|
|
||||||
Instead, we have to TRY to append this new path to the old URL
|
|
||||||
to the right of the host part. Oh crap, this is doomed to cause
|
|
||||||
problems in the future...
|
|
||||||
*/
|
|
||||||
char *protsep;
|
|
||||||
char *pathsep;
|
|
||||||
char *newest;
|
|
||||||
|
|
||||||
char *useurl = newurl;
|
|
||||||
|
|
||||||
/* we must make our own copy of the URL to play with, as it may
|
|
||||||
point to read-only data */
|
|
||||||
char *url_clone=strdup(data->change.url);
|
|
||||||
|
|
||||||
if(!url_clone) {
|
|
||||||
res = CURLE_OUT_OF_MEMORY;
|
|
||||||
break; /* skip out of this loop NOW */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* protsep points to the start of the host name */
|
|
||||||
protsep=strstr(url_clone, "//");
|
|
||||||
if(!protsep)
|
|
||||||
protsep=url_clone;
|
|
||||||
else
|
|
||||||
protsep+=2; /* pass the slashes */
|
|
||||||
|
|
||||||
if('/' != newurl[0]) {
|
|
||||||
int level=0;
|
|
||||||
|
|
||||||
/* First we need to find out if there's a ?-letter in the URL,
|
|
||||||
and cut it and the right-side of that off */
|
|
||||||
pathsep = strrchr(protsep, '?');
|
|
||||||
if(pathsep)
|
|
||||||
*pathsep=0;
|
|
||||||
|
|
||||||
/* we have a relative path to append to the last slash if
|
|
||||||
there's one available */
|
|
||||||
pathsep = strrchr(protsep, '/');
|
|
||||||
if(pathsep)
|
|
||||||
*pathsep=0;
|
|
||||||
|
|
||||||
/* Check if there's any slash after the host name, and if so,
|
|
||||||
remember that position instead */
|
|
||||||
pathsep = strchr(protsep, '/');
|
|
||||||
if(pathsep)
|
|
||||||
protsep = pathsep+1;
|
|
||||||
else
|
|
||||||
protsep = NULL;
|
|
||||||
|
|
||||||
/* now deal with one "./" or any amount of "../" in the newurl
|
|
||||||
and act accordingly */
|
|
||||||
|
|
||||||
if((useurl[0] == '.') && (useurl[1] == '/'))
|
|
||||||
useurl+=2; /* just skip the "./" */
|
|
||||||
|
|
||||||
while((useurl[0] == '.') &&
|
|
||||||
(useurl[1] == '.') &&
|
|
||||||
(useurl[2] == '/')) {
|
|
||||||
level++;
|
|
||||||
useurl+=3; /* pass the "../" */
|
|
||||||
}
|
|
||||||
|
|
||||||
if(protsep) {
|
|
||||||
while(level--) {
|
|
||||||
/* cut off one more level from the right of the original URL */
|
|
||||||
pathsep = strrchr(protsep, '/');
|
|
||||||
if(pathsep)
|
|
||||||
*pathsep=0;
|
|
||||||
else {
|
|
||||||
*protsep=0;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
/* We got a new absolute path for this server, cut off from the
|
|
||||||
first slash */
|
|
||||||
pathsep = strchr(protsep, '/');
|
|
||||||
if(pathsep)
|
|
||||||
*pathsep=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
newest=(char *)malloc( strlen(url_clone) +
|
|
||||||
1 + /* possible slash */
|
|
||||||
strlen(useurl) + 1/* zero byte */);
|
|
||||||
|
|
||||||
if(!newest) {
|
|
||||||
res = CURLE_OUT_OF_MEMORY;
|
|
||||||
break; /* go go go out from this loop */
|
|
||||||
}
|
|
||||||
sprintf(newest, "%s%s%s", url_clone,
|
|
||||||
(('/' == useurl[0]) || !*protsep)?"":"/",
|
|
||||||
useurl);
|
|
||||||
free(newurl); /* newurl is the allocated pointer */
|
|
||||||
free(url_clone);
|
|
||||||
newurl = newest;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
/* This is an absolute URL, don't allow the custom port number */
|
|
||||||
data->state.allow_port = FALSE;
|
|
||||||
|
|
||||||
if(data->change.url_alloc)
|
|
||||||
free(data->change.url);
|
|
||||||
else
|
|
||||||
data->change.url_alloc = TRUE; /* the URL is allocated */
|
|
||||||
|
|
||||||
/* TBD: set the URL with curl_setopt() */
|
|
||||||
data->change.url = newurl;
|
|
||||||
newurl = NULL; /* don't free! */
|
|
||||||
|
|
||||||
infof(data, "Follows Location: to new URL: '%s'\n", data->change.url);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We get here when the HTTP code is 300-399. We need to perform
|
|
||||||
* differently based on exactly what return code there was.
|
|
||||||
* Discussed on the curl mailing list and posted about on the 26th
|
|
||||||
* of January 2001.
|
|
||||||
*/
|
|
||||||
switch(data->info.httpcode) {
|
|
||||||
case 300: /* Multiple Choices */
|
|
||||||
case 306: /* Not used */
|
|
||||||
case 307: /* Temporary Redirect */
|
|
||||||
default: /* for all unknown ones */
|
|
||||||
/* These are explicitly mention since I've checked RFC2616 and they
|
|
||||||
* seem to be OK to POST to.
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
case 301: /* Moved Permanently */
|
|
||||||
/* (quote from RFC2616, section 10.3.2):
|
|
||||||
*
|
|
||||||
* Note: When automatically redirecting a POST request after
|
|
||||||
* receiving a 301 status code, some existing HTTP/1.0 user agents
|
|
||||||
* will erroneously change it into a GET request.
|
|
||||||
*
|
|
||||||
* ----
|
|
||||||
* Warning: Because most of importants user agents do this clear
|
|
||||||
* RFC2616 violation, many webservers expect this misbehavior. So
|
|
||||||
* these servers often answers to a POST request with an error page.
|
|
||||||
* To be sure that libcurl gets the page that most user agents
|
|
||||||
* would get, libcurl has to force GET:
|
|
||||||
*/
|
|
||||||
if( data->set.httpreq == HTTPREQ_POST
|
|
||||||
|| data->set.httpreq == HTTPREQ_POST_FORM) {
|
|
||||||
infof(data,
|
|
||||||
"Violate RFC 2616/10.3.2 and switch from POST to GET\n");
|
|
||||||
data->set.httpreq = HTTPREQ_GET;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 302: /* Found */
|
|
||||||
/* (From 10.3.3)
|
|
||||||
|
|
||||||
Note: RFC 1945 and RFC 2068 specify that the client is not allowed
|
|
||||||
to change the method on the redirected request. However, most
|
|
||||||
existing user agent implementations treat 302 as if it were a 303
|
|
||||||
response, performing a GET on the Location field-value regardless
|
|
||||||
of the original request method. The status codes 303 and 307 have
|
|
||||||
been added for servers that wish to make unambiguously clear which
|
|
||||||
kind of reaction is expected of the client.
|
|
||||||
|
|
||||||
(From 10.3.4)
|
|
||||||
|
|
||||||
Note: Many pre-HTTP/1.1 user agents do not understand the 303
|
|
||||||
status. When interoperability with such clients is a concern, the
|
|
||||||
302 status code may be used instead, since most user agents react
|
|
||||||
to a 302 response as described here for 303.
|
|
||||||
*/
|
|
||||||
case 303: /* See Other */
|
|
||||||
/* Disable both types of POSTs, since doing a second POST when
|
|
||||||
* following isn't what anyone would want! */
|
|
||||||
if(data->set.httpreq != HTTPREQ_GET) {
|
|
||||||
data->set.httpreq = HTTPREQ_GET; /* enforce GET request */
|
|
||||||
infof(data, "Disables POST, goes with %s\n",
|
|
||||||
data->set.no_body?"HEAD":"GET");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case 304: /* Not Modified */
|
|
||||||
/* 304 means we did a conditional request and it was "Not modified".
|
|
||||||
* We shouldn't get any Location: header in this response!
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
case 305: /* Use Proxy */
|
|
||||||
/* (quote from RFC2616, section 10.3.6):
|
|
||||||
* "The requested resource MUST be accessed through the proxy given
|
|
||||||
* by the Location field. The Location field gives the URI of the
|
|
||||||
* proxy. The recipient is expected to repeat this single request
|
|
||||||
* via the proxy. 305 responses MUST only be generated by origin
|
|
||||||
* servers."
|
|
||||||
*/
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
Curl_pgrsTime(data, TIMER_REDIRECT);
|
|
||||||
Curl_pgrsResetTimes(data);
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
break; /* it only reaches here when this shouldn't loop */
|
break; /* it only reaches here when this shouldn't loop */
|
||||||
|
@ -23,10 +23,9 @@
|
|||||||
* $Id$
|
* $Id$
|
||||||
***************************************************************************/
|
***************************************************************************/
|
||||||
CURLcode Curl_perform(struct SessionHandle *data);
|
CURLcode Curl_perform(struct SessionHandle *data);
|
||||||
|
|
||||||
CURLcode Curl_pretransfer(struct SessionHandle *data);
|
CURLcode Curl_pretransfer(struct SessionHandle *data);
|
||||||
CURLcode Curl_posttransfer(struct SessionHandle *data);
|
CURLcode Curl_posttransfer(struct SessionHandle *data);
|
||||||
|
CURLcode Curl_follow(struct SessionHandle *data, char *newurl);
|
||||||
CURLcode Curl_readwrite(struct connectdata *conn, bool *done);
|
CURLcode Curl_readwrite(struct connectdata *conn, bool *done);
|
||||||
void Curl_single_fdset(struct connectdata *conn,
|
void Curl_single_fdset(struct connectdata *conn,
|
||||||
fd_set *read_fd_set,
|
fd_set *read_fd_set,
|
||||||
|
Loading…
Reference in New Issue
Block a user