mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] New option --random-wait.
Submitted by Alan Eldridge in <200111042106.fA4L63b75804@wwweasel.geeksrus.net>.
This commit is contained in:
parent
3afb9c659a
commit
2c41d783c6
@ -1,3 +1,7 @@
|
||||
2001-11-04 Alan Eldridge <alane@geeksrus.net>
|
||||
|
||||
* wget.texi: Document --random-wait, randomwait=on/off.
|
||||
|
||||
2001-11-23 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* wget.texi (Download Options): Document the new `--progress'
|
||||
|
@ -701,6 +701,26 @@ seconds per file.
|
||||
Note that this option is turned on by default in the global
|
||||
@file{wgetrc} file.
|
||||
|
||||
@cindex wait, random
|
||||
@cindex random wait
|
||||
@itemx --random-wait
|
||||
Some web sites may perform log analysis to identify retrieval programs
|
||||
such as Wget by looking for statistically significant similarities in
|
||||
the time between requests. This option causes the time between requests
|
||||
to vary between 0 and 2 * @var{wait} seconds, where @var{wait} was
|
||||
specified using the @samp{-w} or @samp{--wait} options, in order to mask
|
||||
Wget's presence from such analysis.
|
||||
|
||||
A recent article in a publication devoted to development on a popular
|
||||
consumer platform provided code to perform this analysis on the fly.
|
||||
Its author suggested blocking at the class C address level to ensure
|
||||
automated retrieval programs were blocked despite changing DHCP-supplied
|
||||
addresses.
|
||||
|
||||
The @samp{--random-wait} option was inspired by this ill-advised
|
||||
recommendation to block many unrelated users from a web site due to the
|
||||
actions of one.
|
||||
|
||||
@cindex proxy
|
||||
@item -Y on/off
|
||||
@itemx --proxy=on/off
|
||||
@ -2168,6 +2188,10 @@ Wait @var{n} seconds between retrievals---the same as @samp{-w}.
|
||||
Wait up to @var{n} seconds between retries of failed retrievals
|
||||
only---the same as @samp{--waitretry}. Note that this is turned on by
|
||||
default in the global @file{wgetrc}.
|
||||
|
||||
@item randomwait = on/off
|
||||
Turn random between-request wait times on or off. The same as
|
||||
@samp{--random-wait}.
|
||||
@end table
|
||||
|
||||
@node Sample Wgetrc, , Wgetrc Commands, Startup File
|
||||
|
@ -1,3 +1,17 @@
|
||||
2001-11-04 Alan Eldridge <alane@geeksrus.net>
|
||||
|
||||
* config.h.in: added HAVE_RANDOM.
|
||||
|
||||
* options.h: added random_wait to struct options.
|
||||
|
||||
* main.c (print_help [HAVE_RANDOM], main): added arg parsing, help
|
||||
for --random-wait.
|
||||
|
||||
* retr.c (sleep_between_retrievals) [HAVE_RANDOM]: added
|
||||
implementation of random wait times.
|
||||
|
||||
* init.c (commands): added "randomwait" keyword.
|
||||
|
||||
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* recur.c (descend_url_p): Be more conservative with blacklisting
|
||||
|
@ -160,6 +160,7 @@ static struct {
|
||||
{ "proxyuser", &opt.proxy_user, cmd_string },
|
||||
{ "quiet", &opt.quiet, cmd_boolean },
|
||||
{ "quota", &opt.quota, cmd_bytes },
|
||||
{ "randomwait", &opt.random_wait, cmd_boolean },
|
||||
{ "reclevel", &opt.reclevel, cmd_number_inf },
|
||||
{ "recursive", NULL, cmd_spec_recursive },
|
||||
{ "referer", &opt.referer, cmd_string },
|
||||
|
@ -161,9 +161,14 @@ Download:\n\
|
||||
-T, --timeout=SECONDS set the read timeout to SECONDS.\n\
|
||||
-w, --wait=SECONDS wait SECONDS between retrievals.\n\
|
||||
--waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
|
||||
--random-wait wait from 0...2*WAIT secs between retrievals.\n\
|
||||
-Y, --proxy=on/off turn proxy on or off.\n\
|
||||
-Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
|
||||
\n"), stdout);
|
||||
#ifdef HAVE_RANDOM
|
||||
fputs (_("\
|
||||
\n"), stdout);
|
||||
#endif
|
||||
fputs (_("\
|
||||
Directories:\n\
|
||||
-nd --no-directories don\'t create directories.\n\
|
||||
@ -261,6 +266,7 @@ main (int argc, char *const *argv)
|
||||
{ "passive-ftp", no_argument, NULL, 139 },
|
||||
{ "page-requisites", no_argument, NULL, 'p' },
|
||||
{ "quiet", no_argument, NULL, 'q' },
|
||||
{ "random-wait", no_argument, NULL, 165 },
|
||||
{ "recursive", no_argument, NULL, 'r' },
|
||||
{ "relative", no_argument, NULL, 'L' },
|
||||
{ "retr-symlinks", no_argument, NULL, 137 },
|
||||
@ -395,6 +401,9 @@ hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
|
||||
case 156:
|
||||
setval ("httpkeepalive", "off");
|
||||
break;
|
||||
case 165:
|
||||
setval ("randomwait", "on");
|
||||
break;
|
||||
case 'b':
|
||||
setval ("background", "on");
|
||||
break;
|
||||
|
@ -99,6 +99,7 @@ struct options
|
||||
long timeout; /* The value of read timeout in
|
||||
seconds. */
|
||||
#endif
|
||||
int random_wait; /* vary from 0 .. wait secs by random()? */
|
||||
long wait; /* The wait period between retrievals. */
|
||||
long waitretry; /* The wait period between retries. - HEH */
|
||||
int use_robots; /* Do we heed robots.txt? */
|
||||
|
10
src/retr.c
10
src/retr.c
@ -560,8 +560,18 @@ sleep_between_retrievals (int count)
|
||||
sleep (opt.waitretry);
|
||||
}
|
||||
else if (opt.wait)
|
||||
{
|
||||
/* Otherwise, check if opt.wait is specified. If so, sleep. */
|
||||
if (count > 1 || !opt.random_wait)
|
||||
sleep (opt.wait);
|
||||
else
|
||||
{
|
||||
int waitsecs = random() % (opt.wait * 2 + 1);
|
||||
DEBUGP(("sleep_between_retrievals: norm=%ld,random=%ld,sleep=%d\n",
|
||||
opt.wait, waitsecs - opt.wait, waitsecs));
|
||||
sleep(waitsecs);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (first_retrieval)
|
||||
first_retrieval = 0;
|
||||
|
Loading…
Reference in New Issue
Block a user