[svn] New option --random-wait.

Submitted by Alan Eldridge in
<200111042106.fA4L63b75804@wwweasel.geeksrus.net>.
This commit is contained in:
hniksic 2001-11-25 13:23:15 -08:00
parent 3afb9c659a
commit 2c41d783c6
7 changed files with 65 additions and 2 deletions

View File

@ -1,3 +1,7 @@
2001-11-04 Alan Eldridge <alane@geeksrus.net>
* wget.texi: Document --random-wait, randomwait=on/off.
2001-11-23 Hrvoje Niksic <hniksic@arsdigita.com>
* wget.texi (Download Options): Document the new `--progress'

View File

@ -701,6 +701,26 @@ seconds per file.
Note that this option is turned on by default in the global
@file{wgetrc} file.
@cindex wait, random
@cindex random wait
@itemx --random-wait
Some web sites may perform log analysis to identify retrieval programs
such as Wget by looking for statistically significant similarities in
the time between requests. This option causes the time between requests
to vary between 0 and 2 * @var{wait} seconds, where @var{wait} was
specified using the @samp{-w} or @samp{--wait} options, in order to mask
Wget's presence from such analysis.
A recent article in a publication devoted to development on a popular
consumer platform provided code to perform this analysis on the fly.
Its author suggested blocking at the class C address level to ensure
automated retrieval programs were blocked despite changing DHCP-supplied
addresses.
The @samp{--random-wait} option was inspired by this ill-advised
recommendation to block many unrelated users from a web site due to the
actions of one.
@cindex proxy
@item -Y on/off
@itemx --proxy=on/off
@ -2168,6 +2188,10 @@ Wait @var{n} seconds between retrievals---the same as @samp{-w}.
Wait up to @var{n} seconds between retries of failed retrievals
only---the same as @samp{--waitretry}. Note that this is turned on by
default in the global @file{wgetrc}.
@item randomwait = on/off
Turn random between-request wait times on or off. The same as
@samp{--random-wait}.
@end table
@node Sample Wgetrc, , Wgetrc Commands, Startup File

View File

@ -1,3 +1,17 @@
2001-11-04 Alan Eldridge <alane@geeksrus.net>
* config.h.in: added HAVE_RANDOM.
* options.h: added random_wait to struct options.
* main.c (print_help [HAVE_RANDOM], main): added arg parsing, help
for --random-wait.
* retr.c (sleep_between_retrievals) [HAVE_RANDOM]: added
implementation of random wait times.
* init.c (commands): added "randomwait" keyword.
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
* recur.c (descend_url_p): Be more conservative with blacklisting

View File

@ -160,6 +160,7 @@ static struct {
{ "proxyuser", &opt.proxy_user, cmd_string },
{ "quiet", &opt.quiet, cmd_boolean },
{ "quota", &opt.quota, cmd_bytes },
{ "randomwait", &opt.random_wait, cmd_boolean },
{ "reclevel", &opt.reclevel, cmd_number_inf },
{ "recursive", NULL, cmd_spec_recursive },
{ "referer", &opt.referer, cmd_string },

View File

@ -161,9 +161,14 @@ Download:\n\
-T, --timeout=SECONDS set the read timeout to SECONDS.\n\
-w, --wait=SECONDS wait SECONDS between retrievals.\n\
--waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
--random-wait wait from 0...2*WAIT secs between retrievals.\n\
-Y, --proxy=on/off turn proxy on or off.\n\
-Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
\n"), stdout);
#ifdef HAVE_RANDOM
fputs (_("\
\n"), stdout);
#endif
fputs (_("\
Directories:\n\
-nd --no-directories don\'t create directories.\n\
@ -261,6 +266,7 @@ main (int argc, char *const *argv)
{ "passive-ftp", no_argument, NULL, 139 },
{ "page-requisites", no_argument, NULL, 'p' },
{ "quiet", no_argument, NULL, 'q' },
{ "random-wait", no_argument, NULL, 165 },
{ "recursive", no_argument, NULL, 'r' },
{ "relative", no_argument, NULL, 'L' },
{ "retr-symlinks", no_argument, NULL, 137 },
@ -395,6 +401,9 @@ hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
case 156:
setval ("httpkeepalive", "off");
break;
case 165:
setval ("randomwait", "on");
break;
case 'b':
setval ("background", "on");
break;

View File

@ -99,6 +99,7 @@ struct options
long timeout; /* The value of read timeout in
seconds. */
#endif
int random_wait; /* vary from 0 .. wait secs by random()? */
long wait; /* The wait period between retrievals. */
long waitretry; /* The wait period between retries. - HEH */
int use_robots; /* Do we heed robots.txt? */

View File

@ -560,8 +560,18 @@ sleep_between_retrievals (int count)
sleep (opt.waitretry);
}
else if (opt.wait)
/* Otherwise, check if opt.wait is specified. If so, sleep. */
sleep (opt.wait);
{
/* Otherwise, check if opt.wait is specified. If so, sleep. */
if (count > 1 || !opt.random_wait)
sleep (opt.wait);
else
{
int waitsecs = random() % (opt.wait * 2 + 1);
DEBUGP(("sleep_between_retrievals: norm=%ld,random=%ld,sleep=%d\n",
opt.wait, waitsecs - opt.wait, waitsecs));
sleep(waitsecs);
}
}
}
if (first_retrieval)
first_retrieval = 0;