From dfa1f4e0646f6fab5b768fd67477601126070dbc Mon Sep 17 00:00:00 2001 From: Yousong Zhou Date: Wed, 19 Mar 2014 23:42:04 +0800 Subject: [PATCH] Make wget capable of starting downloads from a specified position. This patch adds an option `--start-pos' for specifying starting position of a HTTP or FTP download. Signed-off-by: Yousong Zhou --- doc/ChangeLog | 4 ++++ doc/wget.texi | 16 ++++++++++++++++ src/ChangeLog | 7 +++++++ src/ftp.c | 2 ++ src/http.c | 2 ++ src/init.c | 4 ++++ src/main.c | 18 +++++++++++++++--- src/options.h | 1 + 8 files changed, 51 insertions(+), 3 deletions(-) diff --git a/doc/ChangeLog b/doc/ChangeLog index 58d14395..68629c69 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2014-02-10 Yousong Zhou + + * wget.texi: Add documentation for --start-pos. + 2013-12-29 Giuseppe Scrivano * wget.texi: Update to GFDL 1.3. diff --git a/doc/wget.texi b/doc/wget.texi index 6a8c6a33..11f2c404 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -701,6 +701,22 @@ Another instance where you'll get a garbled file if you try to use Note that @samp{-c} only works with @sc{ftp} servers and with @sc{http} servers that support the @code{Range} header. +@cindex offset +@cindex continue retrieval +@cindex incomplete downloads +@cindex resume download +@cindex start position +@item --start-pos=@var{OFFSET} +Start downloading at zero-based position @var{OFFSET}. Offset may be expressed +in bytes, kilobytes with the `k' suffix, or megabytes with the `m' suffix, etc. + +@samp{--start-pos} has higher precedence over @samp{--continue}. When +@samp{--start-pos} and @samp{--continue} are both specified, wget will emit a +warning then proceed as if @samp{--continue} was absent. + +Server support for continued download is required, otherwise @samp{--start-pos} +cannot help. See @samp{-c} for details. + @cindex progress indicator @cindex dot style @item --progress=@var{type} diff --git a/src/ChangeLog b/src/ChangeLog index d3ac754c..9b10ee8d 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,10 @@ +2014-03-19 Yousong Zhou + + * init.c, main.c, options.h: Add option --start-pos for specifying + start position of a download. + * http.c: Utilize opt.start_pos for HTTP download. + * ftp.c: Utilize opt.start_pos for FTP retrieval. + 2014-03-04 Giuseppe Scrivano * http.c (modify_param_value, extract_param): Aesthetic change. diff --git a/src/ftp.c b/src/ftp.c index c2522ca1..5282588d 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -1632,6 +1632,8 @@ ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con, char **local_fi /* Decide whether or not to restart. */ if (con->cmd & DO_LIST) restval = 0; + else if (opt.start_pos >= 0) + restval = opt.start_pos; else if (opt.always_rest && stat (locf, &st) == 0 && S_ISREG (st.st_mode)) diff --git a/src/http.c b/src/http.c index cd2bd15e..8bba70df 100644 --- a/src/http.c +++ b/src/http.c @@ -3121,6 +3121,8 @@ Spider mode enabled. Check if remote file exists.\n")); /* Decide whether or not to restart. */ if (force_full_retrieve) hstat.restval = hstat.len; + else if (opt.start_pos >= 0) + hstat.restval = opt.start_pos; else if (opt.always_rest && got_name && stat (hstat.local_file, &st) == 0 diff --git a/src/init.c b/src/init.c index 56fef508..9ed72b2a 100644 --- a/src/init.c +++ b/src/init.c @@ -270,6 +270,7 @@ static const struct { { "showalldnsentries", &opt.show_all_dns_entries, cmd_boolean }, { "spanhosts", &opt.spanhost, cmd_boolean }, { "spider", &opt.spider, cmd_boolean }, + { "startpos", &opt.start_pos, cmd_bytes }, { "strictcomments", &opt.strict_comments, cmd_boolean }, { "timeout", NULL, cmd_spec_timeout }, { "timestamping", &opt.timestamping, cmd_boolean }, @@ -406,6 +407,9 @@ defaults (void) opt.warc_cdx_dedup_filename = NULL; opt.warc_tempdir = NULL; opt.warc_keep_log = true; + + /* Use a negative value to mark the absence of --start-pos option */ + opt.start_pos = -1; } /* Return the user's home directory (strdup-ed), or NULL if none is diff --git a/src/main.c b/src/main.c index 3ce7583f..39fcff44 100644 --- a/src/main.c +++ b/src/main.c @@ -276,6 +276,7 @@ static struct cmdline_option option_data[] = { "server-response", 'S', OPT_BOOLEAN, "serverresponse", -1 }, { "span-hosts", 'H', OPT_BOOLEAN, "spanhosts", -1 }, { "spider", 0, OPT_BOOLEAN, "spider", -1 }, + { "start-pos", 0, OPT_VALUE, "startpos", -1 }, { "strict-comments", 0, OPT_BOOLEAN, "strictcomments", -1 }, { "timeout", 'T', OPT_VALUE, "timeout", -1 }, { "timestamping", 'N', OPT_BOOLEAN, "timestamping", -1 }, @@ -485,6 +486,8 @@ Download:\n"), existing files (overwriting them).\n"), N_("\ -c, --continue resume getting a partially-downloaded file.\n"), + N_("\ + --start-pos=OFFSET start downloading from zero-based position OFFSET.\n"), N_("\ --progress=TYPE select progress gauge type.\n"), N_("\ @@ -1323,12 +1326,13 @@ for details.\n\n")); _("WARC output does not work with --spider.\n")); exit (1); } - if (opt.always_rest) + if (opt.always_rest || opt.start_pos >= 0) { fprintf (stderr, - _("WARC output does not work with --continue, " - "--continue will be disabled.\n")); + _("WARC output does not work with --continue or" + " --start-pos, they will be disabled.\n")); opt.always_rest = false; + opt.start_pos = -1; } if (opt.warc_cdx_dedup_filename != 0 && !opt.warc_digests_enabled) { @@ -1350,6 +1354,14 @@ for details.\n\n")); exit (1); } + if (opt.start_pos >= 0 && opt.always_rest) + { + fprintf (stderr, + _("Specifying both --start-pos and --continue is not " + "recommended; --continue will be disabled.\n")); + opt.always_rest = false; + } + if (!nurl && !opt.input_filename) { /* No URL specified. */ diff --git a/src/options.h b/src/options.h index 06783cc3..b527829e 100644 --- a/src/options.h +++ b/src/options.h @@ -116,6 +116,7 @@ struct options bool ask_passwd; /* Ask for password? */ bool always_rest; /* Always use REST. */ + wgint start_pos; /* Start position of a download. */ char *ftp_user; /* FTP username */ char *ftp_passwd; /* FTP password */ bool netrc; /* Whether to read .netrc. */