mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] New option --random-wait.
Submitted by Alan Eldridge in <200111042106.fA4L63b75804@wwweasel.geeksrus.net>.
This commit is contained in:
parent
3afb9c659a
commit
2c41d783c6
@ -1,3 +1,7 @@
|
|||||||
|
2001-11-04 Alan Eldridge <alane@geeksrus.net>
|
||||||
|
|
||||||
|
* wget.texi: Document --random-wait, randomwait=on/off.
|
||||||
|
|
||||||
2001-11-23 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-11-23 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* wget.texi (Download Options): Document the new `--progress'
|
* wget.texi (Download Options): Document the new `--progress'
|
||||||
|
@ -701,6 +701,26 @@ seconds per file.
|
|||||||
Note that this option is turned on by default in the global
|
Note that this option is turned on by default in the global
|
||||||
@file{wgetrc} file.
|
@file{wgetrc} file.
|
||||||
|
|
||||||
|
@cindex wait, random
|
||||||
|
@cindex random wait
|
||||||
|
@itemx --random-wait
|
||||||
|
Some web sites may perform log analysis to identify retrieval programs
|
||||||
|
such as Wget by looking for statistically significant similarities in
|
||||||
|
the time between requests. This option causes the time between requests
|
||||||
|
to vary between 0 and 2 * @var{wait} seconds, where @var{wait} was
|
||||||
|
specified using the @samp{-w} or @samp{--wait} options, in order to mask
|
||||||
|
Wget's presence from such analysis.
|
||||||
|
|
||||||
|
A recent article in a publication devoted to development on a popular
|
||||||
|
consumer platform provided code to perform this analysis on the fly.
|
||||||
|
Its author suggested blocking at the class C address level to ensure
|
||||||
|
automated retrieval programs were blocked despite changing DHCP-supplied
|
||||||
|
addresses.
|
||||||
|
|
||||||
|
The @samp{--random-wait} option was inspired by this ill-advised
|
||||||
|
recommendation to block many unrelated users from a web site due to the
|
||||||
|
actions of one.
|
||||||
|
|
||||||
@cindex proxy
|
@cindex proxy
|
||||||
@item -Y on/off
|
@item -Y on/off
|
||||||
@itemx --proxy=on/off
|
@itemx --proxy=on/off
|
||||||
@ -2168,6 +2188,10 @@ Wait @var{n} seconds between retrievals---the same as @samp{-w}.
|
|||||||
Wait up to @var{n} seconds between retries of failed retrievals
|
Wait up to @var{n} seconds between retries of failed retrievals
|
||||||
only---the same as @samp{--waitretry}. Note that this is turned on by
|
only---the same as @samp{--waitretry}. Note that this is turned on by
|
||||||
default in the global @file{wgetrc}.
|
default in the global @file{wgetrc}.
|
||||||
|
|
||||||
|
@item randomwait = on/off
|
||||||
|
Turn random between-request wait times on or off. The same as
|
||||||
|
@samp{--random-wait}.
|
||||||
@end table
|
@end table
|
||||||
|
|
||||||
@node Sample Wgetrc, , Wgetrc Commands, Startup File
|
@node Sample Wgetrc, , Wgetrc Commands, Startup File
|
||||||
|
@ -1,3 +1,17 @@
|
|||||||
|
2001-11-04 Alan Eldridge <alane@geeksrus.net>
|
||||||
|
|
||||||
|
* config.h.in: added HAVE_RANDOM.
|
||||||
|
|
||||||
|
* options.h: added random_wait to struct options.
|
||||||
|
|
||||||
|
* main.c (print_help [HAVE_RANDOM], main): added arg parsing, help
|
||||||
|
for --random-wait.
|
||||||
|
|
||||||
|
* retr.c (sleep_between_retrievals) [HAVE_RANDOM]: added
|
||||||
|
implementation of random wait times.
|
||||||
|
|
||||||
|
* init.c (commands): added "randomwait" keyword.
|
||||||
|
|
||||||
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* recur.c (descend_url_p): Be more conservative with blacklisting
|
* recur.c (descend_url_p): Be more conservative with blacklisting
|
||||||
|
@ -160,6 +160,7 @@ static struct {
|
|||||||
{ "proxyuser", &opt.proxy_user, cmd_string },
|
{ "proxyuser", &opt.proxy_user, cmd_string },
|
||||||
{ "quiet", &opt.quiet, cmd_boolean },
|
{ "quiet", &opt.quiet, cmd_boolean },
|
||||||
{ "quota", &opt.quota, cmd_bytes },
|
{ "quota", &opt.quota, cmd_bytes },
|
||||||
|
{ "randomwait", &opt.random_wait, cmd_boolean },
|
||||||
{ "reclevel", &opt.reclevel, cmd_number_inf },
|
{ "reclevel", &opt.reclevel, cmd_number_inf },
|
||||||
{ "recursive", NULL, cmd_spec_recursive },
|
{ "recursive", NULL, cmd_spec_recursive },
|
||||||
{ "referer", &opt.referer, cmd_string },
|
{ "referer", &opt.referer, cmd_string },
|
||||||
|
@ -161,9 +161,14 @@ Download:\n\
|
|||||||
-T, --timeout=SECONDS set the read timeout to SECONDS.\n\
|
-T, --timeout=SECONDS set the read timeout to SECONDS.\n\
|
||||||
-w, --wait=SECONDS wait SECONDS between retrievals.\n\
|
-w, --wait=SECONDS wait SECONDS between retrievals.\n\
|
||||||
--waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
|
--waitretry=SECONDS wait 1...SECONDS between retries of a retrieval.\n\
|
||||||
|
--random-wait wait from 0...2*WAIT secs between retrievals.\n\
|
||||||
-Y, --proxy=on/off turn proxy on or off.\n\
|
-Y, --proxy=on/off turn proxy on or off.\n\
|
||||||
-Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
|
-Q, --quota=NUMBER set retrieval quota to NUMBER.\n\
|
||||||
\n"), stdout);
|
\n"), stdout);
|
||||||
|
#ifdef HAVE_RANDOM
|
||||||
|
fputs (_("\
|
||||||
|
\n"), stdout);
|
||||||
|
#endif
|
||||||
fputs (_("\
|
fputs (_("\
|
||||||
Directories:\n\
|
Directories:\n\
|
||||||
-nd --no-directories don\'t create directories.\n\
|
-nd --no-directories don\'t create directories.\n\
|
||||||
@ -261,6 +266,7 @@ main (int argc, char *const *argv)
|
|||||||
{ "passive-ftp", no_argument, NULL, 139 },
|
{ "passive-ftp", no_argument, NULL, 139 },
|
||||||
{ "page-requisites", no_argument, NULL, 'p' },
|
{ "page-requisites", no_argument, NULL, 'p' },
|
||||||
{ "quiet", no_argument, NULL, 'q' },
|
{ "quiet", no_argument, NULL, 'q' },
|
||||||
|
{ "random-wait", no_argument, NULL, 165 },
|
||||||
{ "recursive", no_argument, NULL, 'r' },
|
{ "recursive", no_argument, NULL, 'r' },
|
||||||
{ "relative", no_argument, NULL, 'L' },
|
{ "relative", no_argument, NULL, 'L' },
|
||||||
{ "retr-symlinks", no_argument, NULL, 137 },
|
{ "retr-symlinks", no_argument, NULL, 137 },
|
||||||
@ -395,6 +401,9 @@ hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:",
|
|||||||
case 156:
|
case 156:
|
||||||
setval ("httpkeepalive", "off");
|
setval ("httpkeepalive", "off");
|
||||||
break;
|
break;
|
||||||
|
case 165:
|
||||||
|
setval ("randomwait", "on");
|
||||||
|
break;
|
||||||
case 'b':
|
case 'b':
|
||||||
setval ("background", "on");
|
setval ("background", "on");
|
||||||
break;
|
break;
|
||||||
|
@ -99,6 +99,7 @@ struct options
|
|||||||
long timeout; /* The value of read timeout in
|
long timeout; /* The value of read timeout in
|
||||||
seconds. */
|
seconds. */
|
||||||
#endif
|
#endif
|
||||||
|
int random_wait; /* vary from 0 .. wait secs by random()? */
|
||||||
long wait; /* The wait period between retrievals. */
|
long wait; /* The wait period between retrievals. */
|
||||||
long waitretry; /* The wait period between retries. - HEH */
|
long waitretry; /* The wait period between retries. - HEH */
|
||||||
int use_robots; /* Do we heed robots.txt? */
|
int use_robots; /* Do we heed robots.txt? */
|
||||||
|
14
src/retr.c
14
src/retr.c
@ -560,8 +560,18 @@ sleep_between_retrievals (int count)
|
|||||||
sleep (opt.waitretry);
|
sleep (opt.waitretry);
|
||||||
}
|
}
|
||||||
else if (opt.wait)
|
else if (opt.wait)
|
||||||
/* Otherwise, check if opt.wait is specified. If so, sleep. */
|
{
|
||||||
sleep (opt.wait);
|
/* Otherwise, check if opt.wait is specified. If so, sleep. */
|
||||||
|
if (count > 1 || !opt.random_wait)
|
||||||
|
sleep (opt.wait);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int waitsecs = random() % (opt.wait * 2 + 1);
|
||||||
|
DEBUGP(("sleep_between_retrievals: norm=%ld,random=%ld,sleep=%d\n",
|
||||||
|
opt.wait, waitsecs - opt.wait, waitsecs));
|
||||||
|
sleep(waitsecs);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (first_retrieval)
|
if (first_retrieval)
|
||||||
first_retrieval = 0;
|
first_retrieval = 0;
|
||||||
|
Loading…
Reference in New Issue
Block a user