1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] * retr.c (fd_read_body): Report the amount of data *written* as

amount_read.  This is not entirely logical, but that's what the
callers expect, and it's not easy to change.

* ftp.c (ftp_loop_internal): Ditto.

* http.c (http_loop): Be smarter about assigning restval; if we're
in the nth pass of a download, simply use the information we have
about how much data has been retrieved as restval.

* ftp.c (getftp): Ditto for FTP "REST" command.

* http.c (gethttp): When the server doesn't respect range, skip
the first RESTVAL bytes of the read body.  Never truncate the
output file.

* retr.c (fd_read_body): Support skipping initial STARTPOS octets.
This commit is contained in:
hniksic 2003-11-30 15:39:04 -08:00
parent 381457408a
commit 78706dc5ea
11 changed files with 191 additions and 227 deletions

10
TODO
View File

@ -15,16 +15,6 @@ represent user-visible changes.
file name. If possible, try not to break `-nc' and friends when file name. If possible, try not to break `-nc' and friends when
doing that. doing that.
* Should allow retries with multiple downloads when using -O on
regular files. As the source comment says: "A possible solution to
[rewind not working with multiple downloads] would be to remember
the file position in the output document and to seek to that
position, instead of rewinding."
But the above won't work for -O/dev/stdout, when stdout is a pipe.
An even better solution would be to simply keep writing to the same
file descriptor each time, instead of reopening it in append mode.
* Wget shouldn't delete rejected files that were not downloaded, but * Wget shouldn't delete rejected files that were not downloaded, but
just found on disk because of `-nc'. For example, `wget -r -nc just found on disk because of `-nc'. For example, `wget -r -nc
-A.gif URL' should allow the user to get all the GIFs without -A.gif URL' should allow the user to get all the GIFs without

View File

@ -1,3 +1,23 @@
2003-11-30 Hrvoje Niksic <hniksic@xemacs.org>
* retr.c (fd_read_body): Report the amount of data *written* as
amount_read. This is not entirely logical, but that's what the
callers expect, and it's not easy to change.
* ftp.c (ftp_loop_internal): Ditto.
* http.c (http_loop): Be smarter about assigning restval; if we're
in the nth pass of a download, simply use the information we have
about how much data has been retrieved as restval.
* ftp.c (getftp): Ditto for FTP "REST" command.
* http.c (gethttp): When the server doesn't respect range, skip
the first RESTVAL bytes of the read body. Never truncate the
output file.
* retr.c (fd_read_body): Support skipping initial STARTPOS octets.
2003-11-30 Hrvoje Niksic <hniksic@xemacs.org> 2003-11-30 Hrvoje Niksic <hniksic@xemacs.org>
* http.c (skip_short_body): Renamed skip_body to skip_short_body; * http.c (skip_short_body): Renamed skip_body to skip_short_body;

View File

@ -48,6 +48,8 @@ so, delete this exception statement from your version. */
#include "ftp.h" #include "ftp.h"
#include "url.h" #include "url.h"
extern FILE *output_stream;
/* Converts symbolic permissions to number-style ones, e.g. string /* Converts symbolic permissions to number-style ones, e.g. string
rwxr-xr-x to 755. For now, it knows nothing of rwxr-xr-x to 755. For now, it knows nothing of
setuid/setgid/sticky. ACLs are ignored. */ setuid/setgid/sticky. ACLs are ignored. */
@ -827,7 +829,7 @@ ftp_index (const char *file, struct url *u, struct fileinfo *f)
char *upwd; char *upwd;
char *htclfile; /* HTML-clean file name */ char *htclfile; /* HTML-clean file name */
if (!opt.dfp) if (!output_stream)
{ {
fp = fopen (file, "wb"); fp = fopen (file, "wb");
if (!fp) if (!fp)
@ -837,7 +839,7 @@ ftp_index (const char *file, struct url *u, struct fileinfo *f)
} }
} }
else else
fp = opt.dfp; fp = output_stream;
if (u->user) if (u->user)
{ {
char *tmpu, *tmpp; /* temporary, clean user and passwd */ char *tmpu, *tmpp; /* temporary, clean user and passwd */
@ -919,7 +921,7 @@ ftp_index (const char *file, struct url *u, struct fileinfo *f)
} }
fprintf (fp, "</pre>\n</body>\n</html>\n"); fprintf (fp, "</pre>\n</body>\n</html>\n");
xfree (upwd); xfree (upwd);
if (!opt.dfp) if (!output_stream)
fclose (fp); fclose (fp);
else else
fflush (fp); fflush (fp);

View File

@ -66,6 +66,9 @@ extern LARGE_INT total_downloaded_bytes;
extern char ftp_last_respline[]; extern char ftp_last_respline[];
extern FILE *output_stream;
extern int output_stream_regular;
typedef struct typedef struct
{ {
int st; /* connection status */ int st; /* connection status */
@ -241,6 +244,8 @@ getftp (struct url *u, long *len, long restval, ccon *con)
int cmd = con->cmd; int cmd = con->cmd;
int pasv_mode_open = 0; int pasv_mode_open = 0;
long expected_bytes = 0L; long expected_bytes = 0L;
int rest_failed = 0;
int flags;
assert (con != NULL); assert (con != NULL);
assert (con->target != NULL); assert (con->target != NULL);
@ -791,22 +796,8 @@ Error in server response, closing control connection.\n"));
return err; return err;
break; break;
case FTPRESTFAIL: case FTPRESTFAIL:
/* If `-c' is specified and the file already existed when
Wget was started, it would be a bad idea for us to start
downloading it from scratch, effectively truncating it. */
if (opt.always_rest && (cmd & NO_TRUNCATE))
{
logprintf (LOG_NOTQUIET,
_("\nREST failed; will not truncate `%s'.\n"),
con->target);
fd_close (csock);
con->csock = -1;
fd_close (dtsock);
fd_close (local_sock);
return CONTNOTSUPPORTED;
}
logputs (LOG_VERBOSE, _("\nREST failed, starting from scratch.\n")); logputs (LOG_VERBOSE, _("\nREST failed, starting from scratch.\n"));
restval = 0L; rest_failed = 1;
break; break;
case FTPOK: case FTPOK:
/* fine and dandy */ /* fine and dandy */
@ -965,8 +956,8 @@ Error in server response, closing control connection.\n"));
} }
} }
/* Open the file -- if opt.dfp is set, use it instead. */ /* Open the file -- if output_stream is set, use it instead. */
if (!opt.dfp || con->cmd & DO_LIST) if (!output_stream || con->cmd & DO_LIST)
{ {
mkalldirs (con->target); mkalldirs (con->target);
if (opt.backups) if (opt.backups)
@ -986,24 +977,7 @@ Error in server response, closing control connection.\n"));
} }
} }
else else
{ fp = output_stream;
extern int global_download_count;
fp = opt.dfp;
/* Rewind the output document if the download starts over and if
this is the first download. See gethttp() for a longer
explanation. */
if (!restval && global_download_count == 0 && opt.dfp != stdout)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
rewind (fp);
/* ftruncate is needed because opt.dfp is opened in append
mode if opt.always_rest is set. */
ftruncate (fileno (fp), 0);
clearerr (fp);
}
}
if (*len) if (*len)
{ {
@ -1023,9 +997,12 @@ Error in server response, closing control connection.\n"));
} }
/* Get the contents of the document. */ /* Get the contents of the document. */
flags = 0;
if (restval && rest_failed)
flags |= rb_skip_startpos;
res = fd_read_body (dtsock, fp, res = fd_read_body (dtsock, fp,
expected_bytes ? expected_bytes - restval : 0, expected_bytes ? expected_bytes - restval : 0,
0, restval, len, &con->dltime); restval, len, &con->dltime, flags);
*len += restval; *len += restval;
tms = time_str (NULL); tms = time_str (NULL);
@ -1039,7 +1016,7 @@ Error in server response, closing control connection.\n"));
error here. Checking the result of fwrite() is not enough -- error here. Checking the result of fwrite() is not enough --
errors could go unnoticed! */ errors could go unnoticed! */
int flush_res; int flush_res;
if (!opt.dfp || con->cmd & DO_LIST) if (!output_stream || con->cmd & DO_LIST)
flush_res = fclose (fp); flush_res = fclose (fp);
else else
flush_res = fflush (fp); flush_res = fflush (fp);
@ -1105,8 +1082,8 @@ Error in server response, closing control connection.\n"));
if (!(cmd & LEAVE_PENDING)) if (!(cmd & LEAVE_PENDING))
{ {
/* I should probably send 'QUIT' and check for a reply, but this /* Closing the socket is faster than sending 'QUIT' and the
is faster. #### Is it OK, though? */ effect is the same. */
fd_close (csock); fd_close (csock);
con->csock = -1; con->csock = -1;
} }
@ -1144,7 +1121,7 @@ static uerr_t
ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con) ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
{ {
int count, orig_lp; int count, orig_lp;
long restval, len; long restval, len = 0;
char *tms, *locf; char *tms, *locf;
char *tmrate = NULL; char *tmrate = NULL;
uerr_t err; uerr_t err;
@ -1202,20 +1179,14 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
con->cmd |= DO_CWD; con->cmd |= DO_CWD;
} }
/* Assume no restarting. */ /* Decide whether or not to restart. */
restval = 0L; restval = 0;
if ((count > 1 || opt.always_rest) if (count > 1)
&& !(con->cmd & DO_LIST) restval = len; /* start where the previous run left off */
&& file_exists_p (locf)) else if (opt.always_rest
if (stat (locf, &st) == 0 && S_ISREG (st.st_mode)) && stat (locf, &st) == 0
restval = st.st_size; && S_ISREG (st.st_mode))
restval = st.st_size;
/* In `-c' is used, check whether the file we're writing to
exists and is of non-zero length. If so, we'll refuse to
truncate it if the server doesn't support continued
downloads. */
if (opt.always_rest && restval > 0)
con->cmd |= NO_TRUNCATE;
/* Get the current time string. */ /* Get the current time string. */
tms = time_str (NULL); tms = time_str (NULL);
@ -1601,7 +1572,7 @@ Already have correct symlink %s -> %s\n\n"),
const char *fl = NULL; const char *fl = NULL;
if (opt.output_document) if (opt.output_document)
{ {
if (opt.od_known_regular) if (output_stream_regular)
fl = opt.output_document; fl = opt.output_document;
} }
else else

View File

@ -104,9 +104,7 @@ enum wget_ftp_command
DO_CWD = 0x0002, /* Change current directory. */ DO_CWD = 0x0002, /* Change current directory. */
DO_RETR = 0x0004, /* Retrieve the file. */ DO_RETR = 0x0004, /* Retrieve the file. */
DO_LIST = 0x0008, /* Retrieve the directory list. */ DO_LIST = 0x0008, /* Retrieve the directory list. */
LEAVE_PENDING = 0x0010, /* Do not close the socket. */ LEAVE_PENDING = 0x0010 /* Do not close the socket. */
NO_TRUNCATE = 0x0020 /* Don't truncate the file if REST
malfunctions. */
}; };
enum wget_ftp_fstatus enum wget_ftp_fstatus

View File

@ -1,6 +1,5 @@
/* HTTP support. /* HTTP support.
Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2002 Copyright (C) 2003 Free Software Foundation, Inc.
Free Software Foundation, Inc.
This file is part of GNU Wget. This file is part of GNU Wget.
@ -76,6 +75,9 @@ extern int errno;
extern char *version_string; extern char *version_string;
extern LARGE_INT total_downloaded_bytes; extern LARGE_INT total_downloaded_bytes;
extern FILE *output_stream;
extern int output_stream_regular;
#ifndef MIN #ifndef MIN
# define MIN(x, y) ((x) > (y) ? (y) : (x)) # define MIN(x, y) ((x) > (y) ? (y) : (x))
#endif #endif
@ -114,6 +116,7 @@ struct cookie_jar *wget_cookie_jar;
#define HTTP_STATUS_UNAUTHORIZED 401 #define HTTP_STATUS_UNAUTHORIZED 401
#define HTTP_STATUS_FORBIDDEN 403 #define HTTP_STATUS_FORBIDDEN 403
#define HTTP_STATUS_NOT_FOUND 404 #define HTTP_STATUS_NOT_FOUND 404
#define HTTP_STATUS_RANGE_NOT_SATISFIABLE 416
/* Server errors 5xx. */ /* Server errors 5xx. */
#define HTTP_STATUS_INTERNAL 500 #define HTTP_STATUS_INTERNAL 500
@ -978,8 +981,6 @@ struct http_stat
char *error; /* textual HTTP error */ char *error; /* textual HTTP error */
int statcode; /* status code */ int statcode; /* status code */
double dltime; /* time of the download in msecs */ double dltime; /* time of the download in msecs */
int no_truncate; /* whether truncating the file is
forbidden. */
const char *referer; /* value of the referer header. */ const char *referer; /* value of the referer header. */
char **local_file; /* local file. */ char **local_file; /* local file. */
}; };
@ -1035,6 +1036,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
FILE *fp; FILE *fp;
int sock = -1; int sock = -1;
int flags;
/* Whether authorization has been already tried. */ /* Whether authorization has been already tried. */
int auth_tried_already = 0; int auth_tried_already = 0;
@ -1617,68 +1619,28 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
} }
} }
if (contrange == 0 && hs->restval > 0) if (statcode == HTTP_STATUS_RANGE_NOT_SATISFIABLE)
{ {
/* The download starts from the beginning, presumably because /* If `-c' is in use and the file has been fully downloaded (or
the server did not honor our `Range' request. Normally we'd the remote file has shrunk), Wget effectively requests bytes
just reset hs->restval and start the download from after the end of file and the server response with 416. */
scratch. */ logputs (LOG_VERBOSE, _("\
/* However, if `-c' is used, we need to be a bit more careful:
1. If `-c' is specified and the file already existed when
Wget was started, it would be a bad idea to start downloading
it from scratch, effectively truncating the file.
2. If `-c' is used on a file that is already fully
downloaded, we're requesting bytes after the end of file,
which can result in the server not honoring `Range'. If this
is the case, `Content-Length' will be equal to the length of
the file. */
if (opt.always_rest)
{
/* Check for condition #2. */
if (contlen != -1 /* we got content-length. */
&& hs->restval >= contlen /* file fully downloaded
or has shrunk. */
)
{
logputs (LOG_VERBOSE, _("\
\n The file is already fully retrieved; nothing to do.\n\n")); \n The file is already fully retrieved; nothing to do.\n\n"));
/* In case the caller inspects. */ /* In case the caller inspects. */
hs->len = contlen; hs->len = contlen;
hs->res = 0; hs->res = 0;
/* Mark as successfully retrieved. */ /* Mark as successfully retrieved. */
*dt |= RETROKF; *dt |= RETROKF;
xfree_null (type); xfree_null (type);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */ might be more bytes in the body. */
return RETRUNNEEDED; return RETRUNNEEDED;
}
/* Check for condition #1. */
if (hs->no_truncate)
{
logprintf (LOG_NOTQUIET,
_("\
\n\
Continued download failed on this file, which conflicts with `-c'.\n\
Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
xfree_null (type);
CLOSE_INVALIDATE (sock); /* see above */
return CONTNOTSUPPORTED;
}
/* Fallthrough */
}
hs->restval = 0;
} }
else if (contrange != hs->restval || if ((contrange != 0 && contrange != hs->restval)
(H_PARTIAL (statcode) && contrange == -1)) || (H_PARTIAL (statcode) && !contrange))
{ {
/* This means the whole request was somehow misunderstood by the /* The Range request was somehow misunderstood by the server.
server. Bail out. */ Bail out. */
xfree_null (type); xfree_null (type);
CLOSE_INVALIDATE (sock); CLOSE_INVALIDATE (sock);
return RANGEERR; return RANGEERR;
@ -1727,7 +1689,7 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
} }
/* Open the local file. */ /* Open the local file. */
if (!opt.dfp) if (!output_stream)
{ {
mkalldirs (*hs->local_file); mkalldirs (*hs->local_file);
if (opt.backups) if (opt.backups)
@ -1736,53 +1698,27 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
if (!fp) if (!fp)
{ {
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno)); logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there CLOSE_INVALIDATE (sock);
might be more bytes in the body. */
return FOPENERR; return FOPENERR;
} }
} }
else /* opt.dfp */ else
{ fp = output_stream;
extern int global_download_count;
fp = opt.dfp;
/* To ensure that repeated "from scratch" downloads work for -O
files, we rewind the file pointer, unless restval is
non-zero. (This works only when -O is used on regular files,
but it's still a valuable feature.)
However, this loses when more than one URL is specified on /* #### This confuses the timestamping code that checks for file
the command line the second rewinds eradicates the contents size. Maybe we should save some additional information? */
of the first download. Thus we disable the above trick for
all the downloads except the very first one.
#### A possible solution to this would be to remember the
file position in the output document and to seek to that
position, instead of rewinding.
We don't truncate stdout, since that breaks
"wget -O - [...] >> foo".
*/
if (!hs->restval && global_download_count == 0 && opt.dfp != stdout)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
rewind (fp);
/* ftruncate is needed because opt.dfp is opened in append
mode if opt.always_rest is set. */
ftruncate (fileno (fp), 0);
clearerr (fp);
}
}
/* #### This confuses the code that checks for file size. There
should be some overhead information. */
if (opt.save_headers) if (opt.save_headers)
fwrite (head, 1, strlen (head), fp); fwrite (head, 1, strlen (head), fp);
/* Download the request body. */ /* Download the request body. */
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0, keep_alive, flags = 0;
hs->restval, &hs->len, &hs->dltime); if (keep_alive)
hs->len += contrange; flags |= rb_read_exactly;
if (hs->restval > 0 && contrange == 0)
flags |= rb_skip_startpos;
hs->res = fd_read_body (sock, fp, contlen != -1 ? contlen : 0,
hs->restval, &hs->len, &hs->dltime, flags);
hs->len += hs->restval;
if (hs->res >= 0) if (hs->res >= 0)
CLOSE_FINISH (sock); CLOSE_FINISH (sock);
@ -1794,7 +1730,7 @@ Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
error here. Checking the result of fwrite() is not enough -- error here. Checking the result of fwrite() is not enough --
errors could go unnoticed! */ errors could go unnoticed! */
int flush_res; int flush_res;
if (!opt.dfp) if (!output_stream)
flush_res = fclose (fp); flush_res = fclose (fp);
else else
flush_res = fflush (fp); flush_res = fflush (fp);
@ -1847,6 +1783,8 @@ http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
if (strchr (u->url, '*')) if (strchr (u->url, '*'))
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n")); logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
xzero (hstat);
/* Determine the local filename. */ /* Determine the local filename. */
if (local_file && *local_file) if (local_file && *local_file)
hstat.local_file = local_file; hstat.local_file = local_file;
@ -1947,7 +1885,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file);
} }
/* Reset the counter. */ /* Reset the counter. */
count = 0; count = 0;
*dt = 0 | ACCEPTRANGES; *dt = 0;
/* THE loop */ /* THE loop */
do do
{ {
@ -1979,21 +1917,15 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file);
*dt |= HEAD_ONLY; *dt |= HEAD_ONLY;
else else
*dt &= ~HEAD_ONLY; *dt &= ~HEAD_ONLY;
/* Assume no restarting. */
hstat.restval = 0L;
/* Decide whether or not to restart. */
if (((count > 1 && (*dt & ACCEPTRANGES)) || opt.always_rest)
/* #### this calls access() and then stat(); could be optimized. */
&& file_exists_p (locf))
if (stat (locf, &st) == 0 && S_ISREG (st.st_mode))
hstat.restval = st.st_size;
/* In `-c' is used and the file is existing and non-empty, /* Decide whether or not to restart. */
refuse to truncate it if the server doesn't support continued hstat.restval = 0;
downloads. */ if (count > 1)
hstat.no_truncate = 0; hstat.restval = hstat.len; /* continue where we left off */
if (opt.always_rest && hstat.restval) else if (opt.always_rest
hstat.no_truncate = 1; && stat (locf, &st) == 0
&& S_ISREG (st.st_mode))
hstat.restval = st.st_size;
/* Decide whether to send the no-cache directive. We send it in /* Decide whether to send the no-cache directive. We send it in
two cases: two cases:
@ -2171,7 +2103,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
const char *fl = NULL; const char *fl = NULL;
if (opt.output_document) if (opt.output_document)
{ {
if (opt.od_known_regular) if (output_stream_regular)
fl = opt.output_document; fl = opt.output_document;
} }
else else

View File

@ -1284,8 +1284,13 @@ cleanup (void)
{ {
/* Free external resources, close files, etc. */ /* Free external resources, close files, etc. */
if (opt.dfp) {
fclose (opt.dfp); extern FILE *output_stream;
if (output_stream)
fclose (output_stream);
/* No need to check for error because Wget flushes its output (and
checks for errors) after any data arrives. */
}
/* We're exiting anyway so there's no real need to call free() /* We're exiting anyway so there's no real need to call free()
hundreds of times. Skipping the frees will make Wget exit hundreds of times. Skipping the frees will make Wget exit

View File

@ -864,19 +864,23 @@ Can't timestamp and not clobber old files at the same time.\n"));
/* Open the output filename if necessary. */ /* Open the output filename if necessary. */
if (opt.output_document) if (opt.output_document)
{ {
extern FILE *output_stream;
extern int output_stream_regular;
if (HYPHENP (opt.output_document)) if (HYPHENP (opt.output_document))
opt.dfp = stdout; output_stream = stdout;
else else
{ {
struct stat st; struct stat st;
opt.dfp = fopen (opt.output_document, opt.always_rest ? "ab" : "wb"); output_stream = fopen (opt.output_document,
if (opt.dfp == NULL) opt.always_rest ? "ab" : "wb");
if (output_stream == NULL)
{ {
perror (opt.output_document); perror (opt.output_document);
exit (1); exit (1);
} }
if (fstat (fileno (opt.dfp), &st) == 0 && S_ISREG (st.st_mode)) if (fstat (fileno (output_stream), &st) == 0 && S_ISREG (st.st_mode))
opt.od_known_regular = 1; output_stream_regular = 1;
} }
} }

View File

@ -81,11 +81,6 @@ struct options
FTP. */ FTP. */
char *output_document; /* The output file to which the char *output_document; /* The output file to which the
documents will be printed. */ documents will be printed. */
int od_known_regular; /* whether output_document is a
regular file we can manipulate,
i.e. not `-' or a device file. */
FILE *dfp; /* The file pointer to the output
document. */
int always_rest; /* Always use REST. */ int always_rest; /* Always use REST. */
char *ftp_acc; /* FTP username */ char *ftp_acc; /* FTP username */

View File

@ -63,12 +63,16 @@ so, delete this exception statement from your version. */
extern int errno; extern int errno;
#endif #endif
/* See the comment in gethttp() why this is needed. */
int global_download_count;
/* Total size of downloaded files. Used to enforce quota. */ /* Total size of downloaded files. Used to enforce quota. */
LARGE_INT total_downloaded_bytes; LARGE_INT total_downloaded_bytes;
/* If non-NULL, the stream to which output should be written. This
stream is initialized when `-O' is used. */
FILE *output_stream;
/* Whether output_document is a regular file we can manipulate,
i.e. not `-' or a device file. */
int output_stream_regular;
static struct { static struct {
long chunk_bytes; long chunk_bytes;
@ -133,18 +137,51 @@ limit_bandwidth (long bytes, struct wget_timer *timer)
# define MIN(i, j) ((i) <= (j) ? (i) : (j)) # define MIN(i, j) ((i) <= (j) ? (i) : (j))
#endif #endif
/* Write data in BUF to OUT. However, if *SKIP is non-zero, skip that
amount of data and decrease SKIP. Increment *TOTAL by the amount
of data written. */
static int
write_data (FILE *out, const char *buf, int bufsize, long *skip,
long *transferred)
{
if (!out)
return 1;
if (*skip > bufsize)
{
*skip -= bufsize;
return 1;
}
if (*skip)
{
buf += *skip;
bufsize -= *skip;
*skip = 0;
if (bufsize == 0)
return 1;
}
*transferred += bufsize;
fwrite (buf, 1, bufsize, out);
/* Immediately flush the downloaded data. This should not hinder
performance: fast downloads will arrive in large 16K chunks
(which stdio would write out immediately anyway), and slow
downloads wouldn't be limited by disk speed. */
fflush (out);
return !ferror (out);
}
/* Read the contents of file descriptor FD until it the connection /* Read the contents of file descriptor FD until it the connection
terminates or a read error occurs. The data is read in portions of terminates or a read error occurs. The data is read in portions of
up to 16K and written to OUT as it arrives. If opt.verbose is set, up to 16K and written to OUT as it arrives. If opt.verbose is set,
the progress is shown. the progress is shown.
TOREAD is the amount of data expected to arrive, normally only used TOREAD is the amount of data expected to arrive, normally only used
by the progress gauge. However, if EXACT is set, no more than by the progress gauge.
TOREAD octets will be read.
STARTPOS is the position from which the download starts, used by STARTPOS is the position from which the download starts, used by
the progress gauge. The amount of data read gets stored to the progress gauge. The amount of data read gets stored to
*AMOUNT_READ. The time it took to download the data (in *TRANSFERRED. The time it took to download the data (in
milliseconds) is stored to *ELAPSED. milliseconds) is stored to *ELAPSED.
The function exits and returns the amount of data read. In case of The function exits and returns the amount of data read. In case of
@ -152,8 +189,8 @@ limit_bandwidth (long bytes, struct wget_timer *timer)
writing data, -2 is returned. */ writing data, -2 is returned. */
int int
fd_read_body (int fd, FILE *out, long toread, int exact, long startpos, fd_read_body (int fd, FILE *out, long toread, long startpos,
long *amount_read, double *elapsed) long *transferred, double *elapsed, int flags)
{ {
int ret = 0; int ret = 0;
@ -172,11 +209,22 @@ fd_read_body (int fd, FILE *out, long toread, int exact, long startpos,
data arrives slowly. */ data arrives slowly. */
int progress_interactive = 0; int progress_interactive = 0;
*amount_read = 0; int exact = flags & rb_read_exactly;
long skip = 0;
/* How much data we've read. This is used internally and is
unaffected by skipping STARTPOS. */
long total_read = 0;
*transferred = 0;
if (flags & rb_skip_startpos)
skip = startpos;
if (opt.verbose) if (opt.verbose)
{ {
progress = progress_create (startpos, toread); /* If we're skipping STARTPOS bytes, hide it from
progress_create because the indicator can't deal with it. */
progress = progress_create (skip ? 0 : startpos, toread);
progress_interactive = progress_interactive_p (progress); progress_interactive = progress_interactive_p (progress);
} }
@ -203,9 +251,9 @@ fd_read_body (int fd, FILE *out, long toread, int exact, long startpos,
means that it is unknown how much data is to arrive. However, if means that it is unknown how much data is to arrive. However, if
EXACT is set, then toread==0 means what it says: that no data EXACT is set, then toread==0 means what it says: that no data
should be read. */ should be read. */
while (!exact || (*amount_read < toread)) while (!exact || (total_read < toread))
{ {
int rdsize = exact ? MIN (toread - *amount_read, dlbufsize) : dlbufsize; int rdsize = exact ? MIN (toread - total_read, dlbufsize) : dlbufsize;
double tmout = opt.read_timeout; double tmout = opt.read_timeout;
if (progress_interactive) if (progress_interactive)
{ {
@ -241,15 +289,10 @@ fd_read_body (int fd, FILE *out, long toread, int exact, long startpos,
last_successful_read_tm = wtimer_read (timer); last_successful_read_tm = wtimer_read (timer);
} }
if (ret > 0 && out != NULL) if (ret > 0)
{ {
fwrite (dlbuf, 1, ret, out); total_read += ret;
/* Immediately flush the downloaded data. This should not if (!write_data (out, dlbuf, ret, &skip, transferred))
hinder performance: fast downloads will arrive in large
16K chunks (which stdio would write out anyway), and slow
downloads wouldn't be limited by disk speed. */
fflush (out);
if (ferror (out))
{ {
ret = -2; ret = -2;
goto out; goto out;
@ -259,13 +302,12 @@ fd_read_body (int fd, FILE *out, long toread, int exact, long startpos,
if (opt.limit_rate) if (opt.limit_rate)
limit_bandwidth (ret, timer); limit_bandwidth (ret, timer);
*amount_read += ret;
if (progress) if (progress)
progress_update (progress, ret, wtimer_read (timer)); progress_update (progress, ret, wtimer_read (timer));
#ifdef WINDOWS #ifdef WINDOWS
if (toread > 0) if (toread > 0)
ws_percenttitle (100.0 * ws_percenttitle (100.0 *
(startpos + *amount_read) / (startpos + toread)); (startpos + total_read) / (startpos + toread));
#endif #endif
} }
if (ret < -1) if (ret < -1)
@ -713,7 +755,6 @@ retrieve_url (const char *origurl, char **file, char **newloc,
xfree (url); xfree (url);
} }
++global_download_count;
RESTORE_POST_DATA; RESTORE_POST_DATA;
return result; return result;

View File

@ -30,13 +30,19 @@ so, delete this exception statement from your version. */
#ifndef RETR_H #ifndef RETR_H
#define RETR_H #define RETR_H
/* Flags for fd_read_body. */
enum {
rb_read_exactly = 1,
rb_skip_startpos = 2
};
int fd_read_body PARAMS ((int, FILE *, long, long, long *, double *, int));
typedef const char *(*hunk_terminator_t) PARAMS ((const char *, int, int)); typedef const char *(*hunk_terminator_t) PARAMS ((const char *, int, int));
char *fd_read_hunk PARAMS ((int, hunk_terminator_t, int)); char *fd_read_hunk PARAMS ((int, hunk_terminator_t, int));
char *fd_read_line PARAMS ((int)); char *fd_read_line PARAMS ((int));
int fd_read_body PARAMS ((int, FILE *, long, int, long, long *, double *));
uerr_t retrieve_url PARAMS ((const char *, char **, char **, uerr_t retrieve_url PARAMS ((const char *, char **, char **,
const char *, int *)); const char *, int *));
uerr_t retrieve_from_file PARAMS ((const char *, int, int *)); uerr_t retrieve_from_file PARAMS ((const char *, int, int *));