1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Make the first argument to hash_table_new a minimal count of items before

regrow, not raw size, which is more useful.
This commit is contained in:
hniksic 2003-10-09 19:46:09 -07:00
parent 1f87d90f07
commit ffc2d0f653
3 changed files with 109 additions and 65 deletions

View File

@ -1,3 +1,19 @@
2003-10-10 Hrvoje Niksic <hniksic@xemacs.org>
* hash.c (find_mapping): Return the next available mapping when
the key is not found, not NULL.
(hash_table_put): Use find_mapping to find the storage for the new
data.
(hash_table_put): Grow the table before exceeding maximum
fullness, not afterwards.
2003-10-10 Hrvoje Niksic <hniksic@xemacs.org>
* hash.c (hash_table_new): Slightly change the meaning of the
first parameter. Instead of being the minimum initial size, it is
now the minimum number of items that the hash table can take
without needing to resize.
2003-10-09 Hrvoje Niksic <hniksic@xemacs.org>
* html-url.c (init_interesting): Initialize interesting_tags and

View File

@ -145,6 +145,14 @@ so, delete this exception statement from your version. */
hash_table_remove is careful to rehash the mappings that follow the
deleted one. */
/* When hash table's fullness exceeds this threshold, the hash table
is resized. */
#define HASH_FULLNESS_THRESHOLD 0.75
/* The hash table size is multiplied by this factor with each resize.
This guarantees infrequent resizes. */
#define HASH_RESIZE_FACTOR 2
struct mapping {
void *key;
void *value;
@ -166,12 +174,18 @@ struct hash_table {
struct mapping *mappings; /* the array of mapping pairs. */
};
#define EMPTY_MAPPING_P(mp) ((mp)->key == NULL)
#define NEXT_MAPPING(mp, mappings, size) (mp == mappings + (size - 1) \
? mappings : mp + 1)
/* We use NULL key to mark a mapping as empty. It is consequently
illegal to store NULL keys. */
#define NON_EMPTY(mp) (mp->key != NULL)
/* "Next" mapping is the mapping after MP, but wrapping back to
MAPPINGS when MP would reach MAPPINGS+SIZE. */
#define NEXT_MAPPING(mp, mappings, size) (mp != mappings + (size - 1) \
? mp + 1 : mappings)
/* Loop over non-empty mappings starting at MP. */
#define LOOP_NON_EMPTY(mp, mappings, size) \
for (; !EMPTY_MAPPING_P (mp); mp = NEXT_MAPPING (mp, mappings, size))
for (; NON_EMPTY (mp); mp = NEXT_MAPPING (mp, mappings, size))
/* #### We might want to multiply with the "golden ratio" here to get
better randomness for keys that do not result from a good hash
@ -185,7 +199,7 @@ struct hash_table {
table does not contain all primes in range, just a selection useful
for this purpose.
PRIME_OFFSET is a micro-optimization: if specified, it starts the
PRIME_OFFSET is a minor optimization: if specified, it starts the
search for the prime number beginning with the specific offset in
the prime number table. The final offset is stored in the same
variable. */
@ -223,23 +237,32 @@ prime_size (int size, int *prime_offset)
return 0;
}
/* Create a hash table of INITIAL_SIZE with hash function
HASH_FUNCTION and test function TEST_FUNCTION. INITIAL_SIZE will
be rounded to the next prime, so you don't have to worry about it
being a prime number.
/* Create a hash table with hash function HASH_FUNCTION and test
function TEST_FUNCTION. The table is empty (its count is 0), but
pre-allocated to store at least ITEMS items.
Consequently, if you wish to start out with a "small" table which
will be regrown as needed, specify INITIAL_SIZE 0.
ITEMS is the number of items that the table can accept without
needing to resize. It is useful when creating a table that is to
be immediately filled with a known number of items. In that case,
the regrows are a waste of time, and specifying ITEMS correctly
will avoid them altogether.
Note that hash tables grow dynamically regardless of ITEMS. The
only use of ITEMS is to preallocate the table and avoid unnecessary
dynamic regrows. Don't bother making ITEMS prime because it's not
used as size unchanged. To start with a small table that grows as
needed, simply specify zero ITEMS.
If HASH_FUNCTION is not provided, identity table is assumed,
i.e. key pointers are compared as keys. If you want strings with
equal contents to hash the same, use make_string_hash_table. */
struct hash_table *
hash_table_new (int initial_size,
hash_table_new (int items,
unsigned long (*hash_function) (const void *),
int (*test_function) (const void *, const void *))
{
int size;
struct hash_table *ht
= (struct hash_table *)xmalloc (sizeof (struct hash_table));
@ -247,14 +270,20 @@ hash_table_new (int initial_size,
ht->test_function = test_function ? test_function : ptrcmp;
ht->prime_offset = 0;
ht->size = prime_size (initial_size, &ht->prime_offset);
ht->resize_threshold = ht->size * 3 / 4;
ht->count = 0;
/* Calculate the size that ensures that the table will store at
least ITEMS keys without the need to resize. */
size = 1 + items / HASH_FULLNESS_THRESHOLD;
size = prime_size (size, &ht->prime_offset);
ht->size = size;
ht->resize_threshold = size * HASH_FULLNESS_THRESHOLD;
/*assert (ht->resize_threshold >= items);*/
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
ht->count = 0;
return ht;
}
@ -267,9 +296,9 @@ hash_table_destroy (struct hash_table *ht)
xfree (ht);
}
/* The heart of almost all functions in this file -- find the mapping
whose KEY is equal to key, using linear probing. Returns the
mapping that matches KEY, or NULL if none matches. */
/* The heart of most functions in this file -- find the mapping whose
KEY is equal to key, using linear probing. Returns the mapping
that matches KEY, or the first empty mapping if none matches. */
static inline struct mapping *
find_mapping (const struct hash_table *ht, const void *key)
@ -281,8 +310,8 @@ find_mapping (const struct hash_table *ht, const void *key)
LOOP_NON_EMPTY (mp, mappings, size)
if (equals (key, mp->key))
break;
return mp;
return NULL;
}
/* Get the value that corresponds to the key KEY in the hash table HT.
@ -296,7 +325,7 @@ void *
hash_table_get (const struct hash_table *ht, const void *key)
{
struct mapping *mp = find_mapping (ht, key);
if (mp)
if (NON_EMPTY (mp))
return mp->value;
else
return NULL;
@ -310,8 +339,7 @@ hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
void *orig_key, void *value)
{
struct mapping *mp = find_mapping (ht, lookup_key);
if (mp)
if (NON_EMPTY (mp))
{
if (orig_key)
*(void **)orig_key = mp->key;
@ -328,7 +356,8 @@ hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
int
hash_table_contains (const struct hash_table *ht, const void *key)
{
return find_mapping (ht, key) != NULL;
struct mapping *mp = find_mapping (ht, key);
return NON_EMPTY (mp);
}
/* Grow hash table HT as necessary, and rehash all the key-value
@ -342,28 +371,28 @@ grow_hash_table (struct hash_table *ht)
struct mapping *mp, *mappings;
int newsize;
newsize = prime_size (ht->size * 2, &ht->prime_offset);
newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
#if 0
printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
ht->size, newsize,
(double)100 * ht->count / ht->size,
(double)100 * ht->count / newsize);
100.0 * ht->count / ht->size,
100.0 * ht->count / newsize);
#endif
ht->size = newsize;
ht->resize_threshold = newsize * 3 / 4;
ht->resize_threshold = newsize * HASH_FULLNESS_THRESHOLD;
mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (mappings, '\0', ht->size * sizeof (struct mapping));
ht->mappings = mappings;
for (mp = old_mappings; mp < old_end; mp++)
if (!EMPTY_MAPPING_P (mp))
if (NON_EMPTY (mp))
{
struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
/* We don't need to call test function and worry about
collisions because all the keys come from the hash table
and are therefore guaranteed to be unique. */
/* We don't need to test for uniqueness of keys because all
the keys come from the hash table and are therefore known
to be unique. */
LOOP_NON_EMPTY (new_mp, mappings, newsize)
;
*new_mp = *mp;
@ -378,27 +407,27 @@ grow_hash_table (struct hash_table *ht)
void
hash_table_put (struct hash_table *ht, const void *key, void *value)
{
struct mapping *mappings = ht->mappings;
int size = ht->size;
int (*equals) PARAMS ((const void *, const void *)) = ht->test_function;
struct mapping *mp = mappings + HASH_POSITION (ht, key);
LOOP_NON_EMPTY (mp, mappings, size)
if (equals (key, mp->key))
struct mapping *mp = find_mapping (ht, key);
if (NON_EMPTY (mp))
{
/* update existing item */
mp->key = (void *)key; /* const? */
mp->value = value;
return;
}
/* If adding the item would make the table exceed max. fullness,
grow the table first. */
if (ht->count >= ht->resize_threshold)
{
grow_hash_table (ht);
mp = find_mapping (ht, key);
}
/* add new item */
++ht->count;
mp->key = (void *)key; /* const? */
mp->value = value;
if (ht->count > ht->resize_threshold)
/* When table is 75% full, regrow it. */
grow_hash_table (ht);
}
/* Remove a mapping that matches KEY from HT. Return 0 if there was
@ -408,7 +437,7 @@ int
hash_table_remove (struct hash_table *ht, const void *key)
{
struct mapping *mp = find_mapping (ht, key);
if (!mp)
if (!NON_EMPTY (mp))
return 0;
else
{
@ -459,7 +488,7 @@ hash_table_clear (struct hash_table *ht)
}
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
called with three arguments: the key, the value, and the CLOSURE.
called with three arguments: the key, the value, and MAPARG.
It is undefined what happens if you add or remove entries in the
hash table while hash_table_map is running. The exception is the
@ -469,22 +498,22 @@ hash_table_clear (struct hash_table *ht)
void
hash_table_map (struct hash_table *ht,
int (*mapfun) (void *, void *, void *),
void *closure)
void *maparg)
{
struct mapping *mp = ht->mappings;
struct mapping *end = ht->mappings + ht->size;
for (; mp < end; mp++)
if (!EMPTY_MAPPING_P (mp))
if (NON_EMPTY (mp))
{
void *key;
repeat:
key = mp->key;
if (mapfun (key, mp->value, closure))
if (mapfun (key, mp->value, maparg))
return;
/* hash_table_remove might have moved the adjacent
mappings. */
if (mp->key != key && !EMPTY_MAPPING_P (mp))
if (mp->key != key && NON_EMPTY (mp))
goto repeat;
}
}
@ -535,13 +564,13 @@ string_cmp (const void *s1, const void *s2)
return !strcmp ((const char *)s1, (const char *)s2);
}
/* Return a hash table of initial size INITIAL_SIZE suitable to use
strings as keys. */
/* Return a hash table of preallocated to store at least ITEMS items
suitable to use strings as keys. */
struct hash_table *
make_string_hash_table (int initial_size)
make_string_hash_table (int items)
{
return hash_table_new (initial_size, string_hash, string_cmp);
return hash_table_new (items, string_hash, string_cmp);
}
/*
@ -577,9 +606,9 @@ string_cmp_nocase (const void *s1, const void *s2)
string_cmp_nocase. */
struct hash_table *
make_nocase_string_hash_table (int initial_size)
make_nocase_string_hash_table (int items)
{
return hash_table_new (initial_size, string_hash_nocase, string_cmp_nocase);
return hash_table_new (items, string_hash_nocase, string_cmp_nocase);
}
/* Hashing of pointers. Used for hash tables that are keyed by

View File

@ -201,16 +201,15 @@ init_interesting (void)
/* If --follow-tags is specified, use only those tags. */
if (opt.follow_tags)
{
/* Create a new hash table with the intersection of tags in
--follow-tags and known_tags, and use that as
interesting_tags. */
/* Create a new table intersecting --follow-tags and known_tags,
and use it as interesting_tags. */
struct hash_table *intersect = make_nocase_string_hash_table (0);
char **followed;
for (followed = opt.follow_tags; *followed; followed++)
{
struct known_tag *t = hash_table_get (interesting_tags, *followed);
if (!t)
continue; /* ignore unknown tags in --follow-tags. */
continue; /* ignore unknown --follow-tags entries. */
hash_table_put (intersect, *followed, t);
}
hash_table_destroy (interesting_tags);
@ -218,7 +217,7 @@ init_interesting (void)
}
/* Add the attributes we care about. */
interesting_attributes = make_nocase_string_hash_table (17);
interesting_attributes = make_nocase_string_hash_table (10);
for (i = 0; i < countof (additional_attributes); i++)
string_set_add (interesting_attributes, additional_attributes[i]);
for (i = 0; i < countof (tag_url_attributes); i++)