mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Recursion and progress bar tweaks.
Published in <sxsd727cvc0.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
df05e7ff10
commit
3afb9c659a
@ -1,3 +1,15 @@
|
|||||||
|
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* recur.c (descend_url_p): Be more conservative with blacklisting
|
||||||
|
URLs.
|
||||||
|
(convert_all_links): Print how many files have been converted, and
|
||||||
|
how long it took.
|
||||||
|
|
||||||
|
* progress.c (create_image): Place the number of downloaded bytes
|
||||||
|
right after the progress bar.
|
||||||
|
|
||||||
|
* utils.c (suffix): Return a pointer into the string.
|
||||||
|
|
||||||
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* url.c (convert_links): Handle CO_NULLIFY_BASE.
|
* url.c (convert_links): Handle CO_NULLIFY_BASE.
|
||||||
|
@ -1453,7 +1453,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file);
|
|||||||
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
||||||
*dt |= TEXTHTML;
|
*dt |= TEXTHTML;
|
||||||
|
|
||||||
FREE_MAYBE (suf);
|
|
||||||
FREE_MAYBE (dummy);
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
|
@ -477,24 +477,24 @@ create_image (struct bar_progress *bp, long dltime)
|
|||||||
long size = bp->initial_length + bp->count;
|
long size = bp->initial_length + bp->count;
|
||||||
|
|
||||||
/* The progress bar should look like this:
|
/* The progress bar should look like this:
|
||||||
xx% [=======> ] xx KB/s nnnnn ETA 00:00
|
xx% [=======> ] nn.nnn rrK/s ETA 00:00
|
||||||
|
|
||||||
Calculate its geometry:
|
Calculate its geometry:
|
||||||
|
|
||||||
"xx% " or "100%" - percentage - 4 chars exactly
|
"xx% " or "100%" - percentage - 4 chars exactly
|
||||||
"[]" - progress bar decorations - 2 chars exactly
|
"[]" - progress bar decorations - 2 chars exactly
|
||||||
"1012.56K/s " - dl rate - 11 chars exactly
|
" n,nnn,nnn,nnn" - downloaded bytes - 14 or less chars
|
||||||
"n,nnn,nnn,nnn " - downloaded bytes - 14 or less chars
|
" 1012.56K/s" - dl rate - 11 chars exactly
|
||||||
"ETA xx:xx:xx" - ETA - 12 or less chars
|
" ETA xx:xx:xx" - ETA - 13 or less chars
|
||||||
|
|
||||||
"=====>..." - progress bar content - the rest
|
"=====>..." - progress bar content - the rest
|
||||||
*/
|
*/
|
||||||
int progress_size = screen_width - (4 + 2 + 11 + 14 + 12);
|
int progress_size = screen_width - (4 + 2 + 14 + 11 + 13);
|
||||||
|
|
||||||
if (progress_size < 5)
|
if (progress_size < 5)
|
||||||
progress_size = 0;
|
progress_size = 0;
|
||||||
|
|
||||||
/* "xxx%" */
|
/* "xx% " */
|
||||||
if (bp->total_length > 0)
|
if (bp->total_length > 0)
|
||||||
{
|
{
|
||||||
int percentage = (int)(100.0 * size / bp->total_length);
|
int percentage = (int)(100.0 * size / bp->total_length);
|
||||||
@ -509,12 +509,13 @@ create_image (struct bar_progress *bp, long dltime)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int i = 5;
|
*p++ = ' ';
|
||||||
while (i--)
|
*p++ = ' ';
|
||||||
*p++ = ' ';
|
*p++ = ' ';
|
||||||
|
*p++ = ' ';
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The progress bar: "|====> |" */
|
/* The progress bar: "[====> ]" */
|
||||||
if (progress_size && bp->total_length > 0)
|
if (progress_size && bp->total_length > 0)
|
||||||
{
|
{
|
||||||
double fraction = (double)size / bp->total_length;
|
double fraction = (double)size / bp->total_length;
|
||||||
@ -566,30 +567,30 @@ create_image (struct bar_progress *bp, long dltime)
|
|||||||
++bp->tick;
|
++bp->tick;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "1012.45K/s " */
|
/* " 1,234,567" */
|
||||||
|
/* If there are 7 or less digits (9 because of "legible" comas),
|
||||||
|
print the number in constant space. This will prevent the rest
|
||||||
|
of the line jerking at the beginning of download, but without
|
||||||
|
assigning maximum width in all cases. */
|
||||||
|
sprintf (p, " %9s", legible (size));
|
||||||
|
p += strlen (p);
|
||||||
|
|
||||||
|
/* " 1012.45K/s" */
|
||||||
if (dltime && bp->count)
|
if (dltime && bp->count)
|
||||||
{
|
{
|
||||||
static char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
|
static char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
|
||||||
int units = 0;
|
int units = 0;
|
||||||
double dlrate = calc_rate (bp->count, dltime, &units);
|
double dlrate = calc_rate (bp->count, dltime, &units);
|
||||||
sprintf (p, "%7.2f%s ", dlrate, short_units[units]);
|
sprintf (p, " %7.2f%s", dlrate, short_units[units]);
|
||||||
p += strlen (p);
|
p += strlen (p);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
strcpy (p, " --.-- K/s ");
|
strcpy (p, " --.--K/s");
|
||||||
p += 12;
|
p += 11;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "1,234,567 " */
|
/* " ETA xx:xx:xx" */
|
||||||
/* If there are 7 or less digits (9 because of "legible" comas),
|
|
||||||
print the number in constant space. This will prevent the "ETA"
|
|
||||||
string from jerking as the data begins to arrive. */
|
|
||||||
sprintf (p, "%9s", legible (size));
|
|
||||||
p += strlen (p);
|
|
||||||
*p++ = ' ';
|
|
||||||
|
|
||||||
/* "ETA xx:xx:xx" */
|
|
||||||
if (bp->total_length > 0 && bp->count > 0)
|
if (bp->total_length > 0 && bp->count > 0)
|
||||||
{
|
{
|
||||||
int eta, eta_hrs, eta_min, eta_sec;
|
int eta, eta_hrs, eta_min, eta_sec;
|
||||||
@ -605,6 +606,7 @@ create_image (struct bar_progress *bp, long dltime)
|
|||||||
/*printf ("\neta: %d, %d %d %d\n", eta, eta_hrs, eta_min, eta_sec);*/
|
/*printf ("\neta: %d, %d %d %d\n", eta, eta_hrs, eta_min, eta_sec);*/
|
||||||
/*printf ("\n%ld %f %ld %ld\n", dltime, tm_sofar, bytes_remaining, bp->count);*/
|
/*printf ("\n%ld %f %ld %ld\n", dltime, tm_sofar, bytes_remaining, bp->count);*/
|
||||||
|
|
||||||
|
*p++ = ' ';
|
||||||
*p++ = 'E';
|
*p++ = 'E';
|
||||||
*p++ = 'T';
|
*p++ = 'T';
|
||||||
*p++ = 'A';
|
*p++ = 'A';
|
||||||
@ -621,8 +623,8 @@ create_image (struct bar_progress *bp, long dltime)
|
|||||||
}
|
}
|
||||||
else if (bp->total_length > 0)
|
else if (bp->total_length > 0)
|
||||||
{
|
{
|
||||||
strcpy (p, "ETA --:--");
|
strcpy (p, " ETA --:--");
|
||||||
p += 9;
|
p += 10;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert (p - bp->buffer <= screen_width);
|
assert (p - bp->buffer <= screen_width);
|
||||||
|
51
src/recur.c
51
src/recur.c
@ -149,7 +149,7 @@ url_dequeue (struct url_queue *queue,
|
|||||||
xfree (qel);
|
xfree (qel);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int descend_url_p PARAMS ((const struct urlpos *, struct url *, int,
|
static int descend_url_p PARAMS ((const struct urlpos *, struct url *, int,
|
||||||
struct url *, struct hash_table *));
|
struct url *, struct hash_table *));
|
||||||
|
|
||||||
@ -182,7 +182,8 @@ retrieve_tree (const char *start_url)
|
|||||||
/* The queue of URLs we need to load. */
|
/* The queue of URLs we need to load. */
|
||||||
struct url_queue *queue = url_queue_new ();
|
struct url_queue *queue = url_queue_new ();
|
||||||
|
|
||||||
/* The URLs we decided we don't want to load. */
|
/* The URLs we do not wish to enqueue, because they are already in
|
||||||
|
the queue, but haven't been downloaded yet. */
|
||||||
struct hash_table *blacklist = make_string_hash_table (0);
|
struct hash_table *blacklist = make_string_hash_table (0);
|
||||||
|
|
||||||
/* We'll need various components of this, so better get it over with
|
/* We'll need various components of this, so better get it over with
|
||||||
@ -242,9 +243,6 @@ retrieve_tree (const char *start_url)
|
|||||||
tree. The recursion is partial in that we won't
|
tree. The recursion is partial in that we won't
|
||||||
traverse any <A> or <AREA> tags, nor any <LINK> tags
|
traverse any <A> or <AREA> tags, nor any <LINK> tags
|
||||||
except for <LINK REL="stylesheet">. */
|
except for <LINK REL="stylesheet">. */
|
||||||
/* #### This would be the place to implement the TODO
|
|
||||||
entry saying that -p should do two more hops on
|
|
||||||
framesets. */
|
|
||||||
dash_p_leaf_HTML = TRUE;
|
dash_p_leaf_HTML = TRUE;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -348,7 +346,11 @@ retrieve_tree (const char *start_url)
|
|||||||
|
|
||||||
/* Based on the context provided by retrieve_tree, decide whether a
|
/* Based on the context provided by retrieve_tree, decide whether a
|
||||||
URL is to be descended to. This is only ever called from
|
URL is to be descended to. This is only ever called from
|
||||||
retrieve_tree, but is in a separate function for clarity. */
|
retrieve_tree, but is in a separate function for clarity.
|
||||||
|
|
||||||
|
The most expensive checks (such as those for robots) are memoized
|
||||||
|
by storing these URLs to BLACKLIST. This may or may not help. It
|
||||||
|
will help if those URLs are encountered many times. */
|
||||||
|
|
||||||
static int
|
static int
|
||||||
descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||||
@ -391,7 +393,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
&& !(u->scheme == SCHEME_FTP && opt.follow_ftp))
|
&& !(u->scheme == SCHEME_FTP && opt.follow_ftp))
|
||||||
{
|
{
|
||||||
DEBUGP (("Not following non-HTTP schemes.\n"));
|
DEBUGP (("Not following non-HTTP schemes.\n"));
|
||||||
goto blacklist;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 2. If it is an absolute link and they are not followed, throw it
|
/* 2. If it is an absolute link and they are not followed, throw it
|
||||||
@ -400,7 +402,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
if (opt.relative_only && !upos->link_relative_p)
|
if (opt.relative_only && !upos->link_relative_p)
|
||||||
{
|
{
|
||||||
DEBUGP (("It doesn't really look like a relative link.\n"));
|
DEBUGP (("It doesn't really look like a relative link.\n"));
|
||||||
goto blacklist;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 3. If its domain is not to be accepted/looked-up, chuck it
|
/* 3. If its domain is not to be accepted/looked-up, chuck it
|
||||||
@ -408,7 +410,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
if (!accept_domain (u))
|
if (!accept_domain (u))
|
||||||
{
|
{
|
||||||
DEBUGP (("The domain was not accepted.\n"));
|
DEBUGP (("The domain was not accepted.\n"));
|
||||||
goto blacklist;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 4. Check for parent directory.
|
/* 4. Check for parent directory.
|
||||||
@ -423,7 +425,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
if (!frontcmp (parent->dir, u->dir))
|
if (!frontcmp (parent->dir, u->dir))
|
||||||
{
|
{
|
||||||
DEBUGP (("Trying to escape the root directory with no_parent in effect.\n"));
|
DEBUGP (("Trying to escape the root directory with no_parent in effect.\n"));
|
||||||
goto blacklist;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -435,13 +437,13 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
if (!accdir (u->dir, ALLABS))
|
if (!accdir (u->dir, ALLABS))
|
||||||
{
|
{
|
||||||
DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
|
DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
|
||||||
goto blacklist;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 6. */
|
/* 6. */
|
||||||
{
|
{
|
||||||
char *suf = NULL;
|
char *suf;
|
||||||
/* Check for acceptance/rejection rules. We ignore these rules
|
/* Check for acceptance/rejection rules. We ignore these rules
|
||||||
for HTML documents because they might lead to other files which
|
for HTML documents because they might lead to other files which
|
||||||
need to be downloaded. Of course, we don't know which
|
need to be downloaded. Of course, we don't know which
|
||||||
@ -466,11 +468,9 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
{
|
{
|
||||||
DEBUGP (("%s (%s) does not match acc/rej rules.\n",
|
DEBUGP (("%s (%s) does not match acc/rej rules.\n",
|
||||||
url, u->file));
|
url, u->file));
|
||||||
FREE_MAYBE (suf);
|
goto out;
|
||||||
goto blacklist;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
FREE_MAYBE (suf);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 7. */
|
/* 7. */
|
||||||
@ -479,7 +479,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
{
|
{
|
||||||
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
|
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
|
||||||
u->host, parent->host));
|
u->host, parent->host));
|
||||||
goto blacklist;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 8. */
|
/* 8. */
|
||||||
@ -509,7 +509,8 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
if (!res_match_path (specs, u->path))
|
if (!res_match_path (specs, u->path))
|
||||||
{
|
{
|
||||||
DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
|
DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
|
||||||
goto blacklist;
|
string_set_add (blacklist, url);
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -519,9 +520,6 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
|||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
blacklist:
|
|
||||||
string_set_add (blacklist, url);
|
|
||||||
|
|
||||||
out:
|
out:
|
||||||
DEBUGP (("Decided NOT to load it.\n"));
|
DEBUGP (("Decided NOT to load it.\n"));
|
||||||
|
|
||||||
@ -604,6 +602,11 @@ void
|
|||||||
convert_all_links (void)
|
convert_all_links (void)
|
||||||
{
|
{
|
||||||
slist *html;
|
slist *html;
|
||||||
|
struct wget_timer *timer;
|
||||||
|
long msecs;
|
||||||
|
int file_count = 0;
|
||||||
|
|
||||||
|
timer = wtimer_new ();
|
||||||
|
|
||||||
/* Destructively reverse downloaded_html_files to get it in the right order.
|
/* Destructively reverse downloaded_html_files to get it in the right order.
|
||||||
recursive_retrieve() used slist_prepend() consistently. */
|
recursive_retrieve() used slist_prepend() consistently. */
|
||||||
@ -675,11 +678,19 @@ convert_all_links (void)
|
|||||||
cur_url->local_name = NULL;
|
cur_url->local_name = NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert the links in the file. */
|
/* Convert the links in the file. */
|
||||||
convert_links (html->string, urls);
|
convert_links (html->string, urls);
|
||||||
|
++file_count;
|
||||||
|
|
||||||
/* Free the data. */
|
/* Free the data. */
|
||||||
free_urlpos (urls);
|
free_urlpos (urls);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
msecs = wtimer_elapsed (timer);
|
||||||
|
wtimer_delete (timer);
|
||||||
|
logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),
|
||||||
|
file_count, (double)msecs / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Cleanup the data structures associated with recursive retrieving
|
/* Cleanup the data structures associated with recursive retrieving
|
||||||
|
@ -336,7 +336,6 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
char *suf = suffix (u->local);
|
char *suf = suffix (u->local);
|
||||||
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
||||||
*dt |= TEXTHTML;
|
*dt |= TEXTHTML;
|
||||||
FREE_MAYBE (suf);
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -904,7 +904,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return the malloc-ed suffix of STR. For instance:
|
/* Return the location of STR's suffix (file extension). Examples:
|
||||||
suffix ("foo.bar") -> "bar"
|
suffix ("foo.bar") -> "bar"
|
||||||
suffix ("foo.bar.baz") -> "baz"
|
suffix ("foo.bar.baz") -> "baz"
|
||||||
suffix ("/foo/bar") -> NULL
|
suffix ("/foo/bar") -> NULL
|
||||||
@ -914,9 +914,11 @@ suffix (const char *str)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
|
for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
|
||||||
|
;
|
||||||
|
|
||||||
if (str[i++] == '.')
|
if (str[i++] == '.')
|
||||||
return xstrdup (str + i);
|
return (char *)str + i;
|
||||||
else
|
else
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user