/***************************************************************************** * _ _ ____ _ * Project ___| | | | _ \| | * / __| | | | |_) | | * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * * The contents of this file are subject to the Mozilla Public License * Version 1.0 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the * License for the specific language governing rights and limitations * under the License. * * The Original Code is Curl. * * The Initial Developer of the Original Code is Daniel Stenberg. * * Portions created by the Initial Developer are Copyright (C) 1998. * All Rights Reserved. * * ------------------------------------------------------------ * Main author: * - Daniel Stenberg * * http://curl.haxx.se * * $Source$ * $Revision$ * $Date$ * $Author$ * $State$ * $Locker$ * * ------------------------------------------------------------ ****************************************************************************/ /* -- WIN32 approved -- */ #include #include #include #include #include #include #include #include #include "setup.h" #include "strequal.h" #if defined(WIN32) && !defined(__GNUC__) || defined(__MINGW32__) #include #include #include #else #ifdef HAVE_SYS_SOCKET_H #include #endif #include #include #include #ifdef HAVE_UNISTD_H #include #endif #include #ifdef HAVE_ARPA_INET_H #include #endif #ifdef HAVE_NET_IF_H #include #endif #include #include #ifdef HAVE_SYS_PARAM_H #include #endif #ifdef HAVE_SYS_SELECT_H #include #endif #ifndef HAVE_SELECT #error "We can't compile without select() support!" #endif #ifndef HAVE_SOCKET #error "We can't compile without socket() support!" #endif #endif #include "urldata.h" #include #include #include "netrc.h" #include "getenv.h" #include "hostip.h" #include "download.h" #include "sendf.h" #include "speedcheck.h" #include "getpass.h" #include "progress.h" #include "getdate.h" #include "writeout.h" #define _MPRINTF_REPLACE /* use our functions only */ #include CURLcode _Transfer(struct connectdata *c_conn) { size_t nread; /* number of bytes read */ int bytecount = 0; /* total number of bytes read */ int writebytecount = 0; /* number of bytes written */ long contentlength=0; /* size of incoming data */ struct timeval start = tvnow(); struct timeval now = start; /* current time */ bool header = TRUE; /* incoming data has HTTP header */ int headerline = 0; /* counts header lines to better track the first one */ char *hbufp; /* points at *end* of header line */ int hbuflen = 0; char *str; /* within buf */ char *str_start; /* within buf */ char *end_ptr; /* within buf */ char *p; /* within headerbuff */ bool content_range = FALSE; /* set TRUE if Content-Range: was found */ int offset = 0; /* possible resume offset read from the Content-Range: header */ int code = 0; /* error code from the 'HTTP/1.? XXX' line */ /* for the low speed checks: */ CURLcode urg; time_t timeofdoc=0; long bodywrites=0; char newurl[URL_MAX_LENGTH]; /* buffer for Location: URL */ /* the highest fd we use + 1 */ struct UrlData *data; struct connectdata *conn = (struct connectdata *)c_conn; char *buf; int maxfd; if(!conn || (conn->handle != STRUCT_CONNECT)) return CURLE_BAD_FUNCTION_ARGUMENT; data = conn->data; /* there's the root struct */ buf = data->buffer; maxfd = (conn->sockfd>conn->writesockfd?conn->sockfd:conn->writesockfd)+1; hbufp = data->headerbuff; myalarm (0); /* switch off the alarm-style timeout */ now = tvnow(); start = now; #define KEEP_READ 1 #define KEEP_WRITE 2 pgrsTime(data, TIMER_PRETRANSFER); if (!conn->getheader) { header = FALSE; if(conn->size > 0) pgrsSetDownloadSize(data, conn->size); } { fd_set readfd; fd_set writefd; fd_set rkeepfd; fd_set wkeepfd; struct timeval interval; int keepon=0; /* timeout every X second - makes a better progressmeter (i.e even when no data is read, the meter can be updated and reflect reality) - allows removal of the alarm() crap - variable timeout is easier */ FD_ZERO (&readfd); /* clear it */ if(conn->sockfd != -1) { FD_SET (conn->sockfd, &readfd); /* read socket */ keepon |= KEEP_READ; } FD_ZERO (&writefd); /* clear it */ if(conn->writesockfd != -1) { FD_SET (conn->writesockfd, &writefd); /* write socket */ keepon |= KEEP_WRITE; } /* get these in backup variables to be able to restore them on each lap in the select() loop */ rkeepfd = readfd; wkeepfd = writefd; while (keepon) { readfd = rkeepfd; /* set those every lap in the loop */ writefd = wkeepfd; interval.tv_sec = 1; interval.tv_usec = 0; switch (select (maxfd, &readfd, &writefd, NULL, &interval)) { case -1: /* select() error, stop reading */ #ifdef EINTR /* The EINTR is not serious, and it seems you might get this more ofen when using the lib in a multi-threaded environment! */ if(errno == EINTR) ; else #endif keepon = 0; /* no more read or write */ continue; case 0: /* timeout */ break; default: if((keepon & KEEP_READ) && FD_ISSET(conn->sockfd, &readfd)) { /* read! */ urg = curl_read(conn, buf, BUFSIZE -1, &nread); /* NULL terminate, allowing string ops to be used */ if (0 < (signed int) nread) buf[nread] = 0; /* if we receive 0 or less here, the server closed the connection and we bail out from this! */ else if (0 >= (signed int) nread) { keepon &= ~KEEP_READ; break; } str = buf; /* Default buffer to use when we write the buffer, it may be changed in the flow below before the actual storing is done. */ /* Since this is a two-state thing, we check if we are parsing headers at the moment or not. */ if (header) { /* we are in parse-the-header-mode */ /* header line within buffer loop */ do { int hbufp_index; str_start = str; /* str_start is start of line within buf */ end_ptr = strchr (str_start, '\n'); if (!end_ptr) { /* no more complete header lines within buffer */ /* copy what is remaining into headerbuff */ int str_length = (int)strlen(str); if (hbuflen + (int)str_length >= data->headersize) { char *newbuff; long newsize=MAX((hbuflen+str_length)*3/2, data->headersize*2); hbufp_index = hbufp - data->headerbuff; newbuff = (char *)realloc(data->headerbuff, newsize); if(!newbuff) { failf (data, "Failed to alloc memory for big header!"); return CURLE_READ_ERROR; } data->headersize=newsize; data->headerbuff = newbuff; hbufp = data->headerbuff + hbufp_index; } strcpy (hbufp, str); hbufp += strlen (str); hbuflen += strlen (str); break; /* read more and try again */ } str = end_ptr + 1; /* move just past new line */ if (hbuflen + (str - str_start) >= data->headersize) { char *newbuff; long newsize=MAX((hbuflen+(str-str_start))*3/2, data->headersize*2); hbufp_index = hbufp - data->headerbuff; newbuff = (char *)realloc(data->headerbuff, newsize); if(!newbuff) { failf (data, "Failed to alloc memory for big header!"); return CURLE_READ_ERROR; } data->headersize= newsize; data->headerbuff = newbuff; hbufp = data->headerbuff + hbufp_index; } /* copy to end of line */ strncpy (hbufp, str_start, str - str_start); hbufp += str - str_start; hbuflen += str - str_start; *hbufp = 0; p = data->headerbuff; /* we now have a full line that p points to */ if (('\n' == *p) || ('\r' == *p)) { /* Zero-length line means end of header! */ if (-1 != conn->size) /* if known */ conn->size += bytecount; /* we append the already read size */ if ('\r' == *p) p++; /* pass the \r byte */ if ('\n' == *p) p++; /* pass the \n byte */ pgrsSetDownloadSize(data, conn->size); header = FALSE; /* no more header to parse! */ /* now, only output this if the header AND body are requested: */ if (data->bits.http_include_header) { if((p - data->headerbuff) != data->fwrite (data->headerbuff, 1, p - data->headerbuff, data->out)) { failf (data, "Failed writing output"); return CURLE_WRITE_ERROR; } } if(data->writeheader) { /* obviously, the header is requested to be written to this file: */ if((p - data->headerbuff) != data->fwrite (data->headerbuff, 1, p - data->headerbuff, data->writeheader)) { failf (data, "Failed writing output"); return CURLE_WRITE_ERROR; } } break; /* exit header line loop */ } if (!headerline++) { /* This is the first header, it MUST be the error code line or else we consiser this to be the body right away! */ if (sscanf (p, " HTTP/1.%*c %3d", &code)) { /* 404 -> URL not found! */ if ( ( ((data->bits.http_follow_location) && (code >= 400)) || (!data->bits.http_follow_location && (code >= 300))) && (data->bits.http_fail_on_error)) { /* If we have been told to fail hard on HTTP-errors, here is the check for that: */ /* serious error, go home! */ failf (data, "The requested file was not found"); return CURLE_HTTP_NOT_FOUND; } data->progress.httpcode = code; } else { header = FALSE; /* this is not a header line */ break; } } /* check for Content-Length: header lines to get size */ if (strnequal("Content-Length", p, 14) && sscanf (p+14, ": %ld", &contentlength)) conn->size = contentlength; else if (strnequal("Content-Range", p, 13)) { if (sscanf (p+13, ": bytes %d-", &offset) || sscanf (p+13, ": bytes: %d-", &offset)) { /* This second format was added August 1st by Igor Khristophorov since Sun's webserver JavaWebServer/1.1.1 obviously sends the header this way! :-( */ if (data->resume_from == offset) { /* we asked for a resume and we got it */ content_range = TRUE; } } } else if(data->cookies && strnequal("Set-Cookie: ", p, 11)) { cookie_add(data->cookies, TRUE, &p[12]); } else if(strnequal("Last-Modified:", p, strlen("Last-Modified:")) && data->timecondition) { time_t secs=time(NULL); timeofdoc = curl_getdate(p+strlen("Last-Modified:"), &secs); } else if ((code >= 300 && code < 400) && (data->bits.http_follow_location) && strnequal("Location", p, 8) && sscanf (p+8, ": %" URL_MAX_LENGTH_TXT "s", newurl)) { /* this is the URL that the server advices us to get instead */ data->newurl = strdup (newurl); } if (data->bits.http_include_header) { if(hbuflen != data->fwrite (p, 1, hbuflen, data->out)) { failf (data, "Failed writing output"); return CURLE_WRITE_ERROR; } } if(data->writeheader) { /* the header is requested to be written to this file */ if(hbuflen != data->fwrite (p, 1, hbuflen, data->writeheader)) { failf (data, "Failed writing output"); return CURLE_WRITE_ERROR; } } /* reset hbufp pointer && hbuflen */ hbufp = data->headerbuff; hbuflen = 0; } while (*str); /* header line within buffer */ /* We might have reached the end of the header part here, but there might be a non-header part left in the end of the read buffer. */ if (!header) { /* the next token and forward is not part of the header! */ /* we subtract the remaining header size from the buffer */ nread -= (str - buf); } } /* end if header mode */ /* This is not an 'else if' since it may be a rest from the header parsing, where the beginning of the buffer is headers and the end is non-headers. */ if (str && !header && ((signed int)nread > 0)) { if(0 == bodywrites) { /* These checks are only made the first time we are about to write a chunk of the body */ if(conn->protocol&PROT_HTTP) { /* HTTP-only checks */ if (data->resume_from && !content_range ) { /* we wanted to resume a download, although the server doesn't seem to support this */ failf (data, "HTTP server doesn't seem to support byte ranges. Cannot resume."); return CURLE_HTTP_RANGE_ERROR; } else if (data->newurl) { /* abort after the headers if "follow Location" is set */ infof (data, "Follow to new URL: %s\n", data->newurl); return CURLE_OK; } else if(data->timecondition && !data->range) { /* A time condition has been set AND no ranges have been requested. This seems to be what chapter 13.3.4 of RFC 2616 defines to be the correct action for a HTTP/1.1 client */ if((timeofdoc > 0) && (data->timevalue > 0)) { switch(data->timecondition) { case TIMECOND_IFMODSINCE: default: if(timeofdoc < data->timevalue) { infof(data, "The requested document is not new enough"); return CURLE_OK; } break; case TIMECOND_IFUNMODSINCE: if(timeofdoc > data->timevalue) { infof(data, "The requested document is not old enough"); return CURLE_OK; } break; } /* switch */ } /* two valid time strings */ } /* we have a time condition */ } /* this is HTTP */ } /* this is the first time we write a body part */ bodywrites++; if(data->maxdownload && (bytecount + nread > data->maxdownload)) { nread = data->maxdownload - bytecount; if((signed int)nread < 0 ) /* this should be unusual */ nread = 0; keepon &= ~KEEP_READ; /* we're done reading */ } bytecount += nread; pgrsSetDownloadCounter(data, (double)bytecount); if (nread != data->fwrite (str, 1, nread, data->out)) { failf (data, "Failed writing output"); return CURLE_WRITE_ERROR; } } /* if (! header and data to read ) */ } /* if( read from socket ) */ if((keepon & KEEP_WRITE) && FD_ISSET(conn->writesockfd, &writefd)) { /* write */ char scratch[BUFSIZE * 2]; int i, si; size_t bytes_written; if(data->crlf) buf = data->buffer; /* put it back on the buffer */ nread = data->fread(buf, 1, BUFSIZE, data->in); /* the signed int typecase of nread of for systems that has unsigned size_t */ if ((signed int)nread<=0) { /* done */ keepon &= ~KEEP_WRITE; /* we're done writing */ break; } writebytecount += nread; pgrsSetUploadCounter(data, (double)writebytecount); /* convert LF to CRLF if so asked */ if (data->crlf) { for(i = 0, si = 0; i < (int)nread; i++, si++) { if (buf[i] == 0x0a) { scratch[si++] = 0x0d; scratch[si] = 0x0a; } else { scratch[si] = buf[i]; } } nread = si; buf = scratch; /* point to the new buffer */ } /* write to socket */ urg = curl_write(conn, buf, nread, &bytes_written); if(nread != bytes_written) { failf(data, "Failed uploading data"); return CURLE_WRITE_ERROR; } } break; } now = tvnow(); if(pgrsUpdate(data)) urg = CURLE_ABORTED_BY_CALLBACK; else urg = speedcheck (data, now); if (urg) return urg; if (data->timeout && (tvdiff (now, start) > data->timeout)) { failf (data, "Operation timed out with %d out of %d bytes received", bytecount, conn->size); return CURLE_OPERATION_TIMEOUTED; } } } if(!(data->bits.no_body) && contentlength && (bytecount != contentlength)) { failf(data, "transfer closed with %d bytes remaining to read", contentlength-bytecount); return CURLE_PARTIAL_FILE; } if(pgrsUpdate(data)) return CURLE_ABORTED_BY_CALLBACK; if(conn->bytecountp) *conn->bytecountp = bytecount; /* read count */ if(conn->writebytecountp) *conn->writebytecountp = writebytecount; /* write count */ return CURLE_OK; } typedef int (*func_T)(void); CURLcode curl_transfer(CURL *curl) { CURLcode res; struct UrlData *data = curl; struct connectdata *c_connect; pgrsStartNow(data); do { res = curl_connect(curl, (CURLconnect **)&c_connect); if(res == CURLE_OK) { res = curl_do(c_connect); if(res == CURLE_OK) { res = _Transfer(c_connect); /* now fetch that URL please */ if(res == CURLE_OK) res = curl_done(c_connect); } if((res == CURLE_OK) && data->newurl) { /* Location: redirect */ char prot[16]; char path[URL_MAX_LENGTH]; /* mark the next request as a followed location: */ data->bits.this_is_a_follow = TRUE; if(data->bits.http_auto_referer) { /* We are asked to automatically set the previous URL as the referer when we get the next URL. We pick the ->url field, which may or may not be 100% correct */ if(data->free_referer) { /* If we already have an allocated referer, free this first */ free(data->referer); } data->referer = strdup(data->url); data->free_referer = TRUE; /* yes, free this later */ data->bits.http_set_referer = TRUE; /* might have been false */ } if(2 != sscanf(data->newurl, "%15[^:]://%" URL_MAX_LENGTH_TXT "s", prot, path)) { /*** *DANG* this is an RFC 2068 violation. The URL is supposed to be absolute and this doesn't seem to be that! *** Instead, we have to TRY to append this new path to the old URL to the right of the host part. Oh crap, this is doomed to cause problems in the future... */ char *protsep; char *pathsep; char *newest; /* protsep points to the start of the host name */ protsep=strstr(data->url, "//"); if(!protsep) protsep=data->url; else { /* TBD: set the port with curl_setopt() */ data->port=0; /* we got a full URL and then we should reset the port number here to re-initiate it later */ protsep+=2; /* pass the slashes */ } if('/' != data->newurl[0]) { /* First we need to find out if there's a ?-letter in the URL, and cut it and the right-side of that off */ pathsep = strrchr(protsep, '?'); if(pathsep) *pathsep=0; /* we have a relative path to append to the last slash if there's one available */ pathsep = strrchr(protsep, '/'); if(pathsep) *pathsep=0; } else { /* We got a new absolute path for this server, cut off from the first slash */ pathsep = strchr(protsep, '/'); if(pathsep) *pathsep=0; } newest=(char *)malloc( strlen(data->url) + 1 + /* possible slash */ strlen(data->newurl) + 1/* zero byte */); if(!newest) return CURLE_OUT_OF_MEMORY; sprintf(newest, "%s%s%s", data->url, ('/' == data->newurl[0])?"":"/", data->newurl); free(data->newurl); data->newurl = newest; } else { /* This was an absolute URL, clear the port number! */ /* TBD: set the port with curl_setopt() */ data->port = 0; } /* TBD: set the URL with curl_setopt() */ data->url = data->newurl; data->newurl = NULL; /* don't show! */ /* Disable both types of POSTs, since doing a second POST when following isn't what anyone would want! */ data->bits.http_post = FALSE; data->bits.http_formpost = FALSE; infof(data, "Follows Location: to new URL: '%s'\n", data->url); curl_disconnect(c_connect); continue; } curl_disconnect(c_connect); } break; /* it only reaches here when this shouldn't loop */ } while(1); /* loop if Location: */ if(data->newurl) free(data->newurl); if((CURLE_OK == res) && data->writeinfo) { /* Time to output some info to stdout */ WriteOut(data); } return res; }