mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Recursion and progress bar tweaks.
Published in <sxsd727cvc0.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
df05e7ff10
commit
3afb9c659a
@ -1,3 +1,15 @@
|
||||
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* recur.c (descend_url_p): Be more conservative with blacklisting
|
||||
URLs.
|
||||
(convert_all_links): Print how many files have been converted, and
|
||||
how long it took.
|
||||
|
||||
* progress.c (create_image): Place the number of downloaded bytes
|
||||
right after the progress bar.
|
||||
|
||||
* utils.c (suffix): Return a pointer into the string.
|
||||
|
||||
2001-11-25 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* url.c (convert_links): Handle CO_NULLIFY_BASE.
|
||||
|
@ -1453,7 +1453,6 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file);
|
||||
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
||||
*dt |= TEXTHTML;
|
||||
|
||||
FREE_MAYBE (suf);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
|
@ -477,24 +477,24 @@ create_image (struct bar_progress *bp, long dltime)
|
||||
long size = bp->initial_length + bp->count;
|
||||
|
||||
/* The progress bar should look like this:
|
||||
xx% [=======> ] xx KB/s nnnnn ETA 00:00
|
||||
xx% [=======> ] nn.nnn rrK/s ETA 00:00
|
||||
|
||||
Calculate its geometry:
|
||||
|
||||
"xx% " or "100%" - percentage - 4 chars exactly
|
||||
"[]" - progress bar decorations - 2 chars exactly
|
||||
"1012.56K/s " - dl rate - 11 chars exactly
|
||||
"n,nnn,nnn,nnn " - downloaded bytes - 14 or less chars
|
||||
"ETA xx:xx:xx" - ETA - 12 or less chars
|
||||
"xx% " or "100%" - percentage - 4 chars exactly
|
||||
"[]" - progress bar decorations - 2 chars exactly
|
||||
" n,nnn,nnn,nnn" - downloaded bytes - 14 or less chars
|
||||
" 1012.56K/s" - dl rate - 11 chars exactly
|
||||
" ETA xx:xx:xx" - ETA - 13 or less chars
|
||||
|
||||
"=====>..." - progress bar content - the rest
|
||||
"=====>..." - progress bar content - the rest
|
||||
*/
|
||||
int progress_size = screen_width - (4 + 2 + 11 + 14 + 12);
|
||||
int progress_size = screen_width - (4 + 2 + 14 + 11 + 13);
|
||||
|
||||
if (progress_size < 5)
|
||||
progress_size = 0;
|
||||
|
||||
/* "xxx%" */
|
||||
/* "xx% " */
|
||||
if (bp->total_length > 0)
|
||||
{
|
||||
int percentage = (int)(100.0 * size / bp->total_length);
|
||||
@ -509,12 +509,13 @@ create_image (struct bar_progress *bp, long dltime)
|
||||
}
|
||||
else
|
||||
{
|
||||
int i = 5;
|
||||
while (i--)
|
||||
*p++ = ' ';
|
||||
*p++ = ' ';
|
||||
*p++ = ' ';
|
||||
*p++ = ' ';
|
||||
*p++ = ' ';
|
||||
}
|
||||
|
||||
/* The progress bar: "|====> |" */
|
||||
/* The progress bar: "[====> ]" */
|
||||
if (progress_size && bp->total_length > 0)
|
||||
{
|
||||
double fraction = (double)size / bp->total_length;
|
||||
@ -566,30 +567,30 @@ create_image (struct bar_progress *bp, long dltime)
|
||||
++bp->tick;
|
||||
}
|
||||
|
||||
/* "1012.45K/s " */
|
||||
/* " 1,234,567" */
|
||||
/* If there are 7 or less digits (9 because of "legible" comas),
|
||||
print the number in constant space. This will prevent the rest
|
||||
of the line jerking at the beginning of download, but without
|
||||
assigning maximum width in all cases. */
|
||||
sprintf (p, " %9s", legible (size));
|
||||
p += strlen (p);
|
||||
|
||||
/* " 1012.45K/s" */
|
||||
if (dltime && bp->count)
|
||||
{
|
||||
static char *short_units[] = { "B/s", "K/s", "M/s", "G/s" };
|
||||
int units = 0;
|
||||
double dlrate = calc_rate (bp->count, dltime, &units);
|
||||
sprintf (p, "%7.2f%s ", dlrate, short_units[units]);
|
||||
sprintf (p, " %7.2f%s", dlrate, short_units[units]);
|
||||
p += strlen (p);
|
||||
}
|
||||
else
|
||||
{
|
||||
strcpy (p, " --.-- K/s ");
|
||||
p += 12;
|
||||
strcpy (p, " --.--K/s");
|
||||
p += 11;
|
||||
}
|
||||
|
||||
/* "1,234,567 " */
|
||||
/* If there are 7 or less digits (9 because of "legible" comas),
|
||||
print the number in constant space. This will prevent the "ETA"
|
||||
string from jerking as the data begins to arrive. */
|
||||
sprintf (p, "%9s", legible (size));
|
||||
p += strlen (p);
|
||||
*p++ = ' ';
|
||||
|
||||
/* "ETA xx:xx:xx" */
|
||||
/* " ETA xx:xx:xx" */
|
||||
if (bp->total_length > 0 && bp->count > 0)
|
||||
{
|
||||
int eta, eta_hrs, eta_min, eta_sec;
|
||||
@ -605,6 +606,7 @@ create_image (struct bar_progress *bp, long dltime)
|
||||
/*printf ("\neta: %d, %d %d %d\n", eta, eta_hrs, eta_min, eta_sec);*/
|
||||
/*printf ("\n%ld %f %ld %ld\n", dltime, tm_sofar, bytes_remaining, bp->count);*/
|
||||
|
||||
*p++ = ' ';
|
||||
*p++ = 'E';
|
||||
*p++ = 'T';
|
||||
*p++ = 'A';
|
||||
@ -621,8 +623,8 @@ create_image (struct bar_progress *bp, long dltime)
|
||||
}
|
||||
else if (bp->total_length > 0)
|
||||
{
|
||||
strcpy (p, "ETA --:--");
|
||||
p += 9;
|
||||
strcpy (p, " ETA --:--");
|
||||
p += 10;
|
||||
}
|
||||
|
||||
assert (p - bp->buffer <= screen_width);
|
||||
|
51
src/recur.c
51
src/recur.c
@ -149,7 +149,7 @@ url_dequeue (struct url_queue *queue,
|
||||
xfree (qel);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
static int descend_url_p PARAMS ((const struct urlpos *, struct url *, int,
|
||||
struct url *, struct hash_table *));
|
||||
|
||||
@ -182,7 +182,8 @@ retrieve_tree (const char *start_url)
|
||||
/* The queue of URLs we need to load. */
|
||||
struct url_queue *queue = url_queue_new ();
|
||||
|
||||
/* The URLs we decided we don't want to load. */
|
||||
/* The URLs we do not wish to enqueue, because they are already in
|
||||
the queue, but haven't been downloaded yet. */
|
||||
struct hash_table *blacklist = make_string_hash_table (0);
|
||||
|
||||
/* We'll need various components of this, so better get it over with
|
||||
@ -242,9 +243,6 @@ retrieve_tree (const char *start_url)
|
||||
tree. The recursion is partial in that we won't
|
||||
traverse any <A> or <AREA> tags, nor any <LINK> tags
|
||||
except for <LINK REL="stylesheet">. */
|
||||
/* #### This would be the place to implement the TODO
|
||||
entry saying that -p should do two more hops on
|
||||
framesets. */
|
||||
dash_p_leaf_HTML = TRUE;
|
||||
else
|
||||
{
|
||||
@ -348,7 +346,11 @@ retrieve_tree (const char *start_url)
|
||||
|
||||
/* Based on the context provided by retrieve_tree, decide whether a
|
||||
URL is to be descended to. This is only ever called from
|
||||
retrieve_tree, but is in a separate function for clarity. */
|
||||
retrieve_tree, but is in a separate function for clarity.
|
||||
|
||||
The most expensive checks (such as those for robots) are memoized
|
||||
by storing these URLs to BLACKLIST. This may or may not help. It
|
||||
will help if those URLs are encountered many times. */
|
||||
|
||||
static int
|
||||
descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
@ -391,7 +393,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
&& !(u->scheme == SCHEME_FTP && opt.follow_ftp))
|
||||
{
|
||||
DEBUGP (("Not following non-HTTP schemes.\n"));
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 2. If it is an absolute link and they are not followed, throw it
|
||||
@ -400,7 +402,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
if (opt.relative_only && !upos->link_relative_p)
|
||||
{
|
||||
DEBUGP (("It doesn't really look like a relative link.\n"));
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 3. If its domain is not to be accepted/looked-up, chuck it
|
||||
@ -408,7 +410,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
if (!accept_domain (u))
|
||||
{
|
||||
DEBUGP (("The domain was not accepted.\n"));
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 4. Check for parent directory.
|
||||
@ -423,7 +425,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
if (!frontcmp (parent->dir, u->dir))
|
||||
{
|
||||
DEBUGP (("Trying to escape the root directory with no_parent in effect.\n"));
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -435,13 +437,13 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
if (!accdir (u->dir, ALLABS))
|
||||
{
|
||||
DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* 6. */
|
||||
{
|
||||
char *suf = NULL;
|
||||
char *suf;
|
||||
/* Check for acceptance/rejection rules. We ignore these rules
|
||||
for HTML documents because they might lead to other files which
|
||||
need to be downloaded. Of course, we don't know which
|
||||
@ -466,11 +468,9 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
{
|
||||
DEBUGP (("%s (%s) does not match acc/rej rules.\n",
|
||||
url, u->file));
|
||||
FREE_MAYBE (suf);
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
FREE_MAYBE (suf);
|
||||
}
|
||||
|
||||
/* 7. */
|
||||
@ -479,7 +479,7 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
{
|
||||
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
|
||||
u->host, parent->host));
|
||||
goto blacklist;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 8. */
|
||||
@ -509,7 +509,8 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
if (!res_match_path (specs, u->path))
|
||||
{
|
||||
DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
|
||||
goto blacklist;
|
||||
string_set_add (blacklist, url);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
@ -519,9 +520,6 @@ descend_url_p (const struct urlpos *upos, struct url *parent, int depth,
|
||||
|
||||
return 1;
|
||||
|
||||
blacklist:
|
||||
string_set_add (blacklist, url);
|
||||
|
||||
out:
|
||||
DEBUGP (("Decided NOT to load it.\n"));
|
||||
|
||||
@ -604,6 +602,11 @@ void
|
||||
convert_all_links (void)
|
||||
{
|
||||
slist *html;
|
||||
struct wget_timer *timer;
|
||||
long msecs;
|
||||
int file_count = 0;
|
||||
|
||||
timer = wtimer_new ();
|
||||
|
||||
/* Destructively reverse downloaded_html_files to get it in the right order.
|
||||
recursive_retrieve() used slist_prepend() consistently. */
|
||||
@ -675,11 +678,19 @@ convert_all_links (void)
|
||||
cur_url->local_name = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert the links in the file. */
|
||||
convert_links (html->string, urls);
|
||||
++file_count;
|
||||
|
||||
/* Free the data. */
|
||||
free_urlpos (urls);
|
||||
}
|
||||
|
||||
msecs = wtimer_elapsed (timer);
|
||||
wtimer_delete (timer);
|
||||
logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"),
|
||||
file_count, (double)msecs / 1000);
|
||||
}
|
||||
|
||||
/* Cleanup the data structures associated with recursive retrieving
|
||||
|
@ -336,7 +336,6 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
char *suf = suffix (u->local);
|
||||
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
||||
*dt |= TEXTHTML;
|
||||
FREE_MAYBE (suf);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -904,7 +904,7 @@ in_acclist (const char *const *accepts, const char *s, int backward)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Return the malloc-ed suffix of STR. For instance:
|
||||
/* Return the location of STR's suffix (file extension). Examples:
|
||||
suffix ("foo.bar") -> "bar"
|
||||
suffix ("foo.bar.baz") -> "baz"
|
||||
suffix ("/foo/bar") -> NULL
|
||||
@ -914,9 +914,11 @@ suffix (const char *str)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--);
|
||||
for (i = strlen (str); i && str[i] != '/' && str[i] != '.'; i--)
|
||||
;
|
||||
|
||||
if (str[i++] == '.')
|
||||
return xstrdup (str + i);
|
||||
return (char *)str + i;
|
||||
else
|
||||
return NULL;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user