1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Use sets/hash-tables instead of "slists". Remove slist implementation from

utils.c.
This commit is contained in:
hniksic 2005-03-30 11:27:34 -08:00
parent 52aafa6d15
commit 30e4a33756
5 changed files with 62 additions and 148 deletions

View File

@ -1,3 +1,11 @@
2005-03-30 Hrvoje Niksic <hniksic@xemacs.org>
* utils.c (string_set_to_array): New function.
* convert.c: Replace the use of "slists" with sets/hash-tables,
which in fact suit the intended purpose much better.
downloaded_html_list is removed altogether.
2005-03-29 Hrvoje Niksic <hniksic@xemacs.org> 2005-03-29 Hrvoje Niksic <hniksic@xemacs.org>
* ftp.h (enum): Rename GLOBALL, GETALL, and GETONE to * ftp.h (enum): Rename GLOBALL, GETALL, and GETONE to

View File

@ -1,5 +1,5 @@
/* Conversion of links to local files. /* Conversion of links to local files.
Copyright (C) 1996, 1997, 2000, 2001 Free Software Foundation, Inc. Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GNU Wget. This file is part of GNU Wget.
@ -53,11 +53,8 @@ so, delete this exception statement from your version. */
static struct hash_table *dl_file_url_map; static struct hash_table *dl_file_url_map;
struct hash_table *dl_url_file_map; struct hash_table *dl_url_file_map;
/* List of HTML files downloaded in this Wget run, used for link /* Set of HTML files downloaded in this Wget run, used for link
conversion after Wget is done. The list and the set contain the conversion after Wget is done. */
same information, except the list maintains the order. Perhaps I
should get rid of the list, it's there for historical reasons. */
static slist *downloaded_html_list;
struct hash_table *downloaded_html_set; struct hash_table *downloaded_html_set;
static void convert_links PARAMS ((const char *, struct urlpos *)); static void convert_links PARAMS ((const char *, struct urlpos *));
@ -80,21 +77,28 @@ static void convert_links PARAMS ((const char *, struct urlpos *));
void void
convert_all_links (void) convert_all_links (void)
{ {
slist *html; int i;
long msecs; double secs;
int file_count = 0; int file_count = 0;
struct wget_timer *timer = wtimer_new (); struct wget_timer *timer = wtimer_new ();
/* Destructively reverse downloaded_html_files to get it in the right order. int cnt;
recursive_retrieve() used slist_prepend() consistently. */ char **file_array;
downloaded_html_list = slist_nreverse (downloaded_html_list);
for (html = downloaded_html_list; html; html = html->next) cnt = 0;
if (downloaded_html_set)
cnt = hash_table_count (downloaded_html_set);
if (cnt == 0)
return;
file_array = alloca_array (char *, cnt);
string_set_to_array (downloaded_html_set, file_array);
for (i = 0; i < cnt; i++)
{ {
struct urlpos *urls, *cur_url; struct urlpos *urls, *cur_url;
char *url; char *url;
char *file = html->string; char *file = file_array[i];
/* Determine the URL of the HTML file. get_urls_html will need /* Determine the URL of the HTML file. get_urls_html will need
it. */ it. */
@ -167,10 +171,10 @@ convert_all_links (void)
} }
wtimer_update (timer); wtimer_update (timer);
msecs = wtimer_read (timer); secs = wtimer_read (timer) / 1000;
wtimer_delete (timer); wtimer_delete (timer);
logprintf (LOG_VERBOSE, _("Converted %d files in %.2f seconds.\n"), logprintf (LOG_VERBOSE, _("Converted %d files in %.*f seconds.\n"),
file_count, (double)msecs / 1000); file_count, secs < 10 ? 3 : 1, secs);
} }
static void write_backup_file PARAMS ((const char *, downloaded_file_t)); static void write_backup_file PARAMS ((const char *, downloaded_file_t));
@ -400,11 +404,9 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
clobber .orig files sitting around from previous invocations. */ clobber .orig files sitting around from previous invocations. */
/* Construct the backup filename as the original name plus ".orig". */ /* Construct the backup filename as the original name plus ".orig". */
size_t filename_len = strlen(file); size_t filename_len = strlen (file);
char* filename_plus_orig_suffix; char* filename_plus_orig_suffix;
int already_wrote_backup_file = 0; static struct hash_table *converted_files;
slist* converted_file_ptr;
static slist* converted_files = NULL;
if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED) if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
{ {
@ -416,36 +418,29 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
".html", so we need to compare vs. the original URL plus ".html", so we need to compare vs. the original URL plus
".orig", not the original URL plus ".html.orig". */ ".orig", not the original URL plus ".html.orig". */
filename_plus_orig_suffix = alloca (filename_len + 1); filename_plus_orig_suffix = alloca (filename_len + 1);
strcpy(filename_plus_orig_suffix, file); strcpy (filename_plus_orig_suffix, file);
strcpy((filename_plus_orig_suffix + filename_len) - 4, "orig"); strcpy ((filename_plus_orig_suffix + filename_len) - 4, "orig");
} }
else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */ else /* downloaded_file_return == FILE_DOWNLOADED_NORMALLY */
{ {
/* Append ".orig" to the name. */ /* Append ".orig" to the name. */
filename_plus_orig_suffix = alloca (filename_len + sizeof(".orig")); filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
strcpy(filename_plus_orig_suffix, file); strcpy (filename_plus_orig_suffix, file);
strcpy(filename_plus_orig_suffix + filename_len, ".orig"); strcpy (filename_plus_orig_suffix + filename_len, ".orig");
} }
if (!converted_files)
converted_files = make_string_hash_table (0);
/* We can get called twice on the same URL thanks to the /* We can get called twice on the same URL thanks to the
convert_all_links() call in main(). If we write the .orig file convert_all_links() call in main(). If we write the .orig file
each time in such a case, it'll end up containing the first-pass each time in such a case, it'll end up containing the first-pass
conversion, not the original file. So, see if we've already been conversion, not the original file. So, see if we've already been
called on this file. */ called on this file. */
converted_file_ptr = converted_files; if (!string_set_contains (converted_files, file))
while (converted_file_ptr != NULL)
if (strcmp(converted_file_ptr->string, file) == 0)
{
already_wrote_backup_file = 1;
break;
}
else
converted_file_ptr = converted_file_ptr->next;
if (!already_wrote_backup_file)
{ {
/* Rename <file> to <file>.orig before former gets written over. */ /* Rename <file> to <file>.orig before former gets written over. */
if (rename(file, filename_plus_orig_suffix) != 0) if (rename (file, filename_plus_orig_suffix) != 0)
logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"), logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),
file, filename_plus_orig_suffix, strerror (errno)); file, filename_plus_orig_suffix, strerror (errno));
@ -466,10 +461,7 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
list. list.
-- Hrvoje Niksic <hniksic@xemacs.org> -- Hrvoje Niksic <hniksic@xemacs.org>
*/ */
converted_file_ptr = xmalloc (sizeof (*converted_file_ptr)); string_set_add (converted_files, file);
converted_file_ptr->string = xstrdup (file);
converted_file_ptr->next = converted_files;
converted_files = converted_file_ptr;
} }
} }
@ -839,14 +831,7 @@ register_html (const char *url, const char *file)
{ {
if (!downloaded_html_set) if (!downloaded_html_set)
downloaded_html_set = make_string_hash_table (0); downloaded_html_set = make_string_hash_table (0);
else if (hash_table_contains (downloaded_html_set, file))
return;
/* The set and the list should use the same copy of FILE, but the
slist interface insists on strduping the string it gets. Oh
well. */
string_set_add (downloaded_html_set, file); string_set_add (downloaded_html_set, file);
downloaded_html_list = slist_prepend (downloaded_html_list, file);
} }
/* Cleanup the data structures associated with recursive retrieving /* Cleanup the data structures associated with recursive retrieving
@ -868,8 +853,6 @@ convert_cleanup (void)
} }
if (downloaded_html_set) if (downloaded_html_set)
string_set_free (downloaded_html_set); string_set_free (downloaded_html_set);
slist_free (downloaded_html_list);
downloaded_html_list = NULL;
} }
/* Book-keeping code for downloaded files that enables extension /* Book-keeping code for downloaded files that enables extension

View File

@ -30,6 +30,9 @@ so, delete this exception statement from your version. */
#ifndef CONVERT_H #ifndef CONVERT_H
#define CONVERT_H #define CONVERT_H
struct hash_table; /* forward decl */
extern struct hash_table *downloaded_html_set;
enum convert_options { enum convert_options {
CO_NOCONVERT = 0, /* don't convert this URL */ CO_NOCONVERT = 0, /* don't convert this URL */
CO_CONVERT_TO_RELATIVE, /* convert to relative, e.g. to CO_CONVERT_TO_RELATIVE, /* convert to relative, e.g. to

View File

@ -1080,91 +1080,6 @@ merge_vecs (char **v1, char **v2)
xfree (v2); xfree (v2);
return v1; return v1;
} }
/* A set of simple-minded routines to store strings in a linked list.
This used to also be used for searching, but now we have hash
tables for that. */
/* It's a shame that these simple things like linked lists and hash
tables (see hash.c) need to be implemented over and over again. It
would be nice to be able to use the routines from glib -- see
www.gtk.org for details. However, that would make Wget depend on
glib, and I want to avoid dependencies to external libraries for
reasons of convenience and portability (I suspect Wget is more
portable than anything ever written for Gnome). */
/* Append an element to the list. If the list has a huge number of
elements, this can get slow because it has to find the list's
ending. If you think you have to call slist_append in a loop,
think about calling slist_prepend() followed by slist_nreverse(). */
slist *
slist_append (slist *l, const char *s)
{
slist *newel = xnew (slist);
slist *beg = l;
newel->string = xstrdup (s);
newel->next = NULL;
if (!l)
return newel;
/* Find the last element. */
while (l->next)
l = l->next;
l->next = newel;
return beg;
}
/* Prepend S to the list. Unlike slist_append(), this is O(1). */
slist *
slist_prepend (slist *l, const char *s)
{
slist *newel = xnew (slist);
newel->string = xstrdup (s);
newel->next = l;
return newel;
}
/* Destructively reverse L. */
slist *
slist_nreverse (slist *l)
{
slist *prev = NULL;
while (l)
{
slist *next = l->next;
l->next = prev;
prev = l;
l = next;
}
return prev;
}
/* Is there a specific entry in the list? */
int
slist_contains (slist *l, const char *s)
{
for (; l; l = l->next)
if (!strcmp (l->string, s))
return 1;
return 0;
}
/* Free the whole slist. */
void
slist_free (slist *l)
{
while (l)
{
slist *n = l->next;
xfree (l->string);
xfree (l);
l = n;
}
}
/* Sometimes it's useful to create "sets" of strings, i.e. special /* Sometimes it's useful to create "sets" of strings, i.e. special
hash tables where you want to store strings as keys and merely hash tables where you want to store strings as keys and merely
@ -1195,6 +1110,22 @@ string_set_contains (struct hash_table *ht, const char *s)
return hash_table_contains (ht, s); return hash_table_contains (ht, s);
} }
static int
string_set_to_array_mapper (void *key, void *value_ignored, void *arg)
{
char ***arrayptr = (char ***) arg;
*(*arrayptr)++ = (char *) key;
return 0;
}
/* Convert the specified string set to array. ARRAY should be large
enough to hold hash_table_count(ht) char pointers. */
void string_set_to_array (struct hash_table *ht, char **array)
{
hash_table_map (ht, string_set_to_array_mapper, &array);
}
static int static int
string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored) string_set_free_mapper (void *key, void *value_ignored, void *arg_ignored)
{ {

View File

@ -1,5 +1,5 @@
/* Declarations for utils.c. /* Declarations for utils.c.
Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. Copyright (C) 2005 Free Software Foundation, Inc.
This file is part of GNU Wget. This file is part of GNU Wget.
@ -34,13 +34,6 @@ enum accd {
ALLABS = 1 ALLABS = 1
}; };
/* A linked list of strings. The list is ordered alphabetically. */
typedef struct _slist
{
char *string;
struct _slist *next;
} slist;
struct hash_table; struct hash_table;
struct file_memory { struct file_memory {
@ -101,14 +94,10 @@ void read_file_free PARAMS ((struct file_memory *));
void free_vec PARAMS ((char **)); void free_vec PARAMS ((char **));
char **merge_vecs PARAMS ((char **, char **)); char **merge_vecs PARAMS ((char **, char **));
slist *slist_append PARAMS ((slist *, const char *));
slist *slist_prepend PARAMS ((slist *, const char *));
slist *slist_nreverse PARAMS ((slist *));
int slist_contains PARAMS ((slist *, const char *));
void slist_free PARAMS ((slist *));
void string_set_add PARAMS ((struct hash_table *, const char *)); void string_set_add PARAMS ((struct hash_table *, const char *));
int string_set_contains PARAMS ((struct hash_table *, const char *)); int string_set_contains PARAMS ((struct hash_table *, const char *));
void string_set_to_array PARAMS ((struct hash_table *, char **));
void string_set_free PARAMS ((struct hash_table *)); void string_set_free PARAMS ((struct hash_table *));
void free_keys_and_values PARAMS ((struct hash_table *)); void free_keys_and_values PARAMS ((struct hash_table *));