mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Make the first argument to hash_table_new a minimal count of items before
regrow, not raw size, which is more useful.
This commit is contained in:
parent
1f87d90f07
commit
ffc2d0f653
@ -1,3 +1,19 @@
|
|||||||
|
2003-10-10 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* hash.c (find_mapping): Return the next available mapping when
|
||||||
|
the key is not found, not NULL.
|
||||||
|
(hash_table_put): Use find_mapping to find the storage for the new
|
||||||
|
data.
|
||||||
|
(hash_table_put): Grow the table before exceeding maximum
|
||||||
|
fullness, not afterwards.
|
||||||
|
|
||||||
|
2003-10-10 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* hash.c (hash_table_new): Slightly change the meaning of the
|
||||||
|
first parameter. Instead of being the minimum initial size, it is
|
||||||
|
now the minimum number of items that the hash table can take
|
||||||
|
without needing to resize.
|
||||||
|
|
||||||
2003-10-09 Hrvoje Niksic <hniksic@xemacs.org>
|
2003-10-09 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* html-url.c (init_interesting): Initialize interesting_tags and
|
* html-url.c (init_interesting): Initialize interesting_tags and
|
||||||
|
149
src/hash.c
149
src/hash.c
@ -145,6 +145,14 @@ so, delete this exception statement from your version. */
|
|||||||
hash_table_remove is careful to rehash the mappings that follow the
|
hash_table_remove is careful to rehash the mappings that follow the
|
||||||
deleted one. */
|
deleted one. */
|
||||||
|
|
||||||
|
/* When hash table's fullness exceeds this threshold, the hash table
|
||||||
|
is resized. */
|
||||||
|
#define HASH_FULLNESS_THRESHOLD 0.75
|
||||||
|
|
||||||
|
/* The hash table size is multiplied by this factor with each resize.
|
||||||
|
This guarantees infrequent resizes. */
|
||||||
|
#define HASH_RESIZE_FACTOR 2
|
||||||
|
|
||||||
struct mapping {
|
struct mapping {
|
||||||
void *key;
|
void *key;
|
||||||
void *value;
|
void *value;
|
||||||
@ -166,12 +174,18 @@ struct hash_table {
|
|||||||
struct mapping *mappings; /* the array of mapping pairs. */
|
struct mapping *mappings; /* the array of mapping pairs. */
|
||||||
};
|
};
|
||||||
|
|
||||||
#define EMPTY_MAPPING_P(mp) ((mp)->key == NULL)
|
/* We use NULL key to mark a mapping as empty. It is consequently
|
||||||
#define NEXT_MAPPING(mp, mappings, size) (mp == mappings + (size - 1) \
|
illegal to store NULL keys. */
|
||||||
? mappings : mp + 1)
|
#define NON_EMPTY(mp) (mp->key != NULL)
|
||||||
|
|
||||||
|
/* "Next" mapping is the mapping after MP, but wrapping back to
|
||||||
|
MAPPINGS when MP would reach MAPPINGS+SIZE. */
|
||||||
|
#define NEXT_MAPPING(mp, mappings, size) (mp != mappings + (size - 1) \
|
||||||
|
? mp + 1 : mappings)
|
||||||
|
|
||||||
|
/* Loop over non-empty mappings starting at MP. */
|
||||||
#define LOOP_NON_EMPTY(mp, mappings, size) \
|
#define LOOP_NON_EMPTY(mp, mappings, size) \
|
||||||
for (; !EMPTY_MAPPING_P (mp); mp = NEXT_MAPPING (mp, mappings, size))
|
for (; NON_EMPTY (mp); mp = NEXT_MAPPING (mp, mappings, size))
|
||||||
|
|
||||||
/* #### We might want to multiply with the "golden ratio" here to get
|
/* #### We might want to multiply with the "golden ratio" here to get
|
||||||
better randomness for keys that do not result from a good hash
|
better randomness for keys that do not result from a good hash
|
||||||
@ -185,7 +199,7 @@ struct hash_table {
|
|||||||
table does not contain all primes in range, just a selection useful
|
table does not contain all primes in range, just a selection useful
|
||||||
for this purpose.
|
for this purpose.
|
||||||
|
|
||||||
PRIME_OFFSET is a micro-optimization: if specified, it starts the
|
PRIME_OFFSET is a minor optimization: if specified, it starts the
|
||||||
search for the prime number beginning with the specific offset in
|
search for the prime number beginning with the specific offset in
|
||||||
the prime number table. The final offset is stored in the same
|
the prime number table. The final offset is stored in the same
|
||||||
variable. */
|
variable. */
|
||||||
@ -223,23 +237,32 @@ prime_size (int size, int *prime_offset)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Create a hash table of INITIAL_SIZE with hash function
|
/* Create a hash table with hash function HASH_FUNCTION and test
|
||||||
HASH_FUNCTION and test function TEST_FUNCTION. INITIAL_SIZE will
|
function TEST_FUNCTION. The table is empty (its count is 0), but
|
||||||
be rounded to the next prime, so you don't have to worry about it
|
pre-allocated to store at least ITEMS items.
|
||||||
being a prime number.
|
|
||||||
|
|
||||||
Consequently, if you wish to start out with a "small" table which
|
ITEMS is the number of items that the table can accept without
|
||||||
will be regrown as needed, specify INITIAL_SIZE 0.
|
needing to resize. It is useful when creating a table that is to
|
||||||
|
be immediately filled with a known number of items. In that case,
|
||||||
|
the regrows are a waste of time, and specifying ITEMS correctly
|
||||||
|
will avoid them altogether.
|
||||||
|
|
||||||
|
Note that hash tables grow dynamically regardless of ITEMS. The
|
||||||
|
only use of ITEMS is to preallocate the table and avoid unnecessary
|
||||||
|
dynamic regrows. Don't bother making ITEMS prime because it's not
|
||||||
|
used as size unchanged. To start with a small table that grows as
|
||||||
|
needed, simply specify zero ITEMS.
|
||||||
|
|
||||||
If HASH_FUNCTION is not provided, identity table is assumed,
|
If HASH_FUNCTION is not provided, identity table is assumed,
|
||||||
i.e. key pointers are compared as keys. If you want strings with
|
i.e. key pointers are compared as keys. If you want strings with
|
||||||
equal contents to hash the same, use make_string_hash_table. */
|
equal contents to hash the same, use make_string_hash_table. */
|
||||||
|
|
||||||
struct hash_table *
|
struct hash_table *
|
||||||
hash_table_new (int initial_size,
|
hash_table_new (int items,
|
||||||
unsigned long (*hash_function) (const void *),
|
unsigned long (*hash_function) (const void *),
|
||||||
int (*test_function) (const void *, const void *))
|
int (*test_function) (const void *, const void *))
|
||||||
{
|
{
|
||||||
|
int size;
|
||||||
struct hash_table *ht
|
struct hash_table *ht
|
||||||
= (struct hash_table *)xmalloc (sizeof (struct hash_table));
|
= (struct hash_table *)xmalloc (sizeof (struct hash_table));
|
||||||
|
|
||||||
@ -247,14 +270,20 @@ hash_table_new (int initial_size,
|
|||||||
ht->test_function = test_function ? test_function : ptrcmp;
|
ht->test_function = test_function ? test_function : ptrcmp;
|
||||||
|
|
||||||
ht->prime_offset = 0;
|
ht->prime_offset = 0;
|
||||||
ht->size = prime_size (initial_size, &ht->prime_offset);
|
|
||||||
ht->resize_threshold = ht->size * 3 / 4;
|
|
||||||
|
|
||||||
ht->count = 0;
|
/* Calculate the size that ensures that the table will store at
|
||||||
|
least ITEMS keys without the need to resize. */
|
||||||
|
size = 1 + items / HASH_FULLNESS_THRESHOLD;
|
||||||
|
size = prime_size (size, &ht->prime_offset);
|
||||||
|
ht->size = size;
|
||||||
|
ht->resize_threshold = size * HASH_FULLNESS_THRESHOLD;
|
||||||
|
/*assert (ht->resize_threshold >= items);*/
|
||||||
|
|
||||||
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
||||||
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
|
|
||||||
|
ht->count = 0;
|
||||||
|
|
||||||
return ht;
|
return ht;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -267,9 +296,9 @@ hash_table_destroy (struct hash_table *ht)
|
|||||||
xfree (ht);
|
xfree (ht);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The heart of almost all functions in this file -- find the mapping
|
/* The heart of most functions in this file -- find the mapping whose
|
||||||
whose KEY is equal to key, using linear probing. Returns the
|
KEY is equal to key, using linear probing. Returns the mapping
|
||||||
mapping that matches KEY, or NULL if none matches. */
|
that matches KEY, or the first empty mapping if none matches. */
|
||||||
|
|
||||||
static inline struct mapping *
|
static inline struct mapping *
|
||||||
find_mapping (const struct hash_table *ht, const void *key)
|
find_mapping (const struct hash_table *ht, const void *key)
|
||||||
@ -281,8 +310,8 @@ find_mapping (const struct hash_table *ht, const void *key)
|
|||||||
|
|
||||||
LOOP_NON_EMPTY (mp, mappings, size)
|
LOOP_NON_EMPTY (mp, mappings, size)
|
||||||
if (equals (key, mp->key))
|
if (equals (key, mp->key))
|
||||||
return mp;
|
break;
|
||||||
return NULL;
|
return mp;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Get the value that corresponds to the key KEY in the hash table HT.
|
/* Get the value that corresponds to the key KEY in the hash table HT.
|
||||||
@ -296,7 +325,7 @@ void *
|
|||||||
hash_table_get (const struct hash_table *ht, const void *key)
|
hash_table_get (const struct hash_table *ht, const void *key)
|
||||||
{
|
{
|
||||||
struct mapping *mp = find_mapping (ht, key);
|
struct mapping *mp = find_mapping (ht, key);
|
||||||
if (mp)
|
if (NON_EMPTY (mp))
|
||||||
return mp->value;
|
return mp->value;
|
||||||
else
|
else
|
||||||
return NULL;
|
return NULL;
|
||||||
@ -310,8 +339,7 @@ hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
|
|||||||
void *orig_key, void *value)
|
void *orig_key, void *value)
|
||||||
{
|
{
|
||||||
struct mapping *mp = find_mapping (ht, lookup_key);
|
struct mapping *mp = find_mapping (ht, lookup_key);
|
||||||
|
if (NON_EMPTY (mp))
|
||||||
if (mp)
|
|
||||||
{
|
{
|
||||||
if (orig_key)
|
if (orig_key)
|
||||||
*(void **)orig_key = mp->key;
|
*(void **)orig_key = mp->key;
|
||||||
@ -328,7 +356,8 @@ hash_table_get_pair (const struct hash_table *ht, const void *lookup_key,
|
|||||||
int
|
int
|
||||||
hash_table_contains (const struct hash_table *ht, const void *key)
|
hash_table_contains (const struct hash_table *ht, const void *key)
|
||||||
{
|
{
|
||||||
return find_mapping (ht, key) != NULL;
|
struct mapping *mp = find_mapping (ht, key);
|
||||||
|
return NON_EMPTY (mp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Grow hash table HT as necessary, and rehash all the key-value
|
/* Grow hash table HT as necessary, and rehash all the key-value
|
||||||
@ -342,28 +371,28 @@ grow_hash_table (struct hash_table *ht)
|
|||||||
struct mapping *mp, *mappings;
|
struct mapping *mp, *mappings;
|
||||||
int newsize;
|
int newsize;
|
||||||
|
|
||||||
newsize = prime_size (ht->size * 2, &ht->prime_offset);
|
newsize = prime_size (ht->size * HASH_RESIZE_FACTOR, &ht->prime_offset);
|
||||||
#if 0
|
#if 0
|
||||||
printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
|
printf ("growing from %d to %d; fullness %.2f%% to %.2f%%\n",
|
||||||
ht->size, newsize,
|
ht->size, newsize,
|
||||||
(double)100 * ht->count / ht->size,
|
100.0 * ht->count / ht->size,
|
||||||
(double)100 * ht->count / newsize);
|
100.0 * ht->count / newsize);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ht->size = newsize;
|
ht->size = newsize;
|
||||||
ht->resize_threshold = newsize * 3 / 4;
|
ht->resize_threshold = newsize * HASH_FULLNESS_THRESHOLD;
|
||||||
|
|
||||||
mappings = xmalloc (ht->size * sizeof (struct mapping));
|
mappings = xmalloc (ht->size * sizeof (struct mapping));
|
||||||
memset (mappings, '\0', ht->size * sizeof (struct mapping));
|
memset (mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
ht->mappings = mappings;
|
ht->mappings = mappings;
|
||||||
|
|
||||||
for (mp = old_mappings; mp < old_end; mp++)
|
for (mp = old_mappings; mp < old_end; mp++)
|
||||||
if (!EMPTY_MAPPING_P (mp))
|
if (NON_EMPTY (mp))
|
||||||
{
|
{
|
||||||
struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
|
struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
|
||||||
/* We don't need to call test function and worry about
|
/* We don't need to test for uniqueness of keys because all
|
||||||
collisions because all the keys come from the hash table
|
the keys come from the hash table and are therefore known
|
||||||
and are therefore guaranteed to be unique. */
|
to be unique. */
|
||||||
LOOP_NON_EMPTY (new_mp, mappings, newsize)
|
LOOP_NON_EMPTY (new_mp, mappings, newsize)
|
||||||
;
|
;
|
||||||
*new_mp = *mp;
|
*new_mp = *mp;
|
||||||
@ -378,27 +407,27 @@ grow_hash_table (struct hash_table *ht)
|
|||||||
void
|
void
|
||||||
hash_table_put (struct hash_table *ht, const void *key, void *value)
|
hash_table_put (struct hash_table *ht, const void *key, void *value)
|
||||||
{
|
{
|
||||||
struct mapping *mappings = ht->mappings;
|
struct mapping *mp = find_mapping (ht, key);
|
||||||
int size = ht->size;
|
if (NON_EMPTY (mp))
|
||||||
int (*equals) PARAMS ((const void *, const void *)) = ht->test_function;
|
{
|
||||||
|
/* update existing item */
|
||||||
|
mp->key = (void *)key; /* const? */
|
||||||
|
mp->value = value;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
struct mapping *mp = mappings + HASH_POSITION (ht, key);
|
/* If adding the item would make the table exceed max. fullness,
|
||||||
|
grow the table first. */
|
||||||
LOOP_NON_EMPTY (mp, mappings, size)
|
if (ht->count >= ht->resize_threshold)
|
||||||
if (equals (key, mp->key))
|
{
|
||||||
{
|
grow_hash_table (ht);
|
||||||
mp->key = (void *)key; /* const? */
|
mp = find_mapping (ht, key);
|
||||||
mp->value = value;
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/* add new item */
|
||||||
++ht->count;
|
++ht->count;
|
||||||
mp->key = (void *)key; /* const? */
|
mp->key = (void *)key; /* const? */
|
||||||
mp->value = value;
|
mp->value = value;
|
||||||
|
|
||||||
if (ht->count > ht->resize_threshold)
|
|
||||||
/* When table is 75% full, regrow it. */
|
|
||||||
grow_hash_table (ht);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remove a mapping that matches KEY from HT. Return 0 if there was
|
/* Remove a mapping that matches KEY from HT. Return 0 if there was
|
||||||
@ -408,7 +437,7 @@ int
|
|||||||
hash_table_remove (struct hash_table *ht, const void *key)
|
hash_table_remove (struct hash_table *ht, const void *key)
|
||||||
{
|
{
|
||||||
struct mapping *mp = find_mapping (ht, key);
|
struct mapping *mp = find_mapping (ht, key);
|
||||||
if (!mp)
|
if (!NON_EMPTY (mp))
|
||||||
return 0;
|
return 0;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -459,7 +488,7 @@ hash_table_clear (struct hash_table *ht)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
|
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
|
||||||
called with three arguments: the key, the value, and the CLOSURE.
|
called with three arguments: the key, the value, and MAPARG.
|
||||||
|
|
||||||
It is undefined what happens if you add or remove entries in the
|
It is undefined what happens if you add or remove entries in the
|
||||||
hash table while hash_table_map is running. The exception is the
|
hash table while hash_table_map is running. The exception is the
|
||||||
@ -469,22 +498,22 @@ hash_table_clear (struct hash_table *ht)
|
|||||||
void
|
void
|
||||||
hash_table_map (struct hash_table *ht,
|
hash_table_map (struct hash_table *ht,
|
||||||
int (*mapfun) (void *, void *, void *),
|
int (*mapfun) (void *, void *, void *),
|
||||||
void *closure)
|
void *maparg)
|
||||||
{
|
{
|
||||||
struct mapping *mp = ht->mappings;
|
struct mapping *mp = ht->mappings;
|
||||||
struct mapping *end = ht->mappings + ht->size;
|
struct mapping *end = ht->mappings + ht->size;
|
||||||
|
|
||||||
for (; mp < end; mp++)
|
for (; mp < end; mp++)
|
||||||
if (!EMPTY_MAPPING_P (mp))
|
if (NON_EMPTY (mp))
|
||||||
{
|
{
|
||||||
void *key;
|
void *key;
|
||||||
repeat:
|
repeat:
|
||||||
key = mp->key;
|
key = mp->key;
|
||||||
if (mapfun (key, mp->value, closure))
|
if (mapfun (key, mp->value, maparg))
|
||||||
return;
|
return;
|
||||||
/* hash_table_remove might have moved the adjacent
|
/* hash_table_remove might have moved the adjacent
|
||||||
mappings. */
|
mappings. */
|
||||||
if (mp->key != key && !EMPTY_MAPPING_P (mp))
|
if (mp->key != key && NON_EMPTY (mp))
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -535,13 +564,13 @@ string_cmp (const void *s1, const void *s2)
|
|||||||
return !strcmp ((const char *)s1, (const char *)s2);
|
return !strcmp ((const char *)s1, (const char *)s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Return a hash table of initial size INITIAL_SIZE suitable to use
|
/* Return a hash table of preallocated to store at least ITEMS items
|
||||||
strings as keys. */
|
suitable to use strings as keys. */
|
||||||
|
|
||||||
struct hash_table *
|
struct hash_table *
|
||||||
make_string_hash_table (int initial_size)
|
make_string_hash_table (int items)
|
||||||
{
|
{
|
||||||
return hash_table_new (initial_size, string_hash, string_cmp);
|
return hash_table_new (items, string_hash, string_cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -577,9 +606,9 @@ string_cmp_nocase (const void *s1, const void *s2)
|
|||||||
string_cmp_nocase. */
|
string_cmp_nocase. */
|
||||||
|
|
||||||
struct hash_table *
|
struct hash_table *
|
||||||
make_nocase_string_hash_table (int initial_size)
|
make_nocase_string_hash_table (int items)
|
||||||
{
|
{
|
||||||
return hash_table_new (initial_size, string_hash_nocase, string_cmp_nocase);
|
return hash_table_new (items, string_hash_nocase, string_cmp_nocase);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Hashing of pointers. Used for hash tables that are keyed by
|
/* Hashing of pointers. Used for hash tables that are keyed by
|
||||||
|
@ -201,16 +201,15 @@ init_interesting (void)
|
|||||||
/* If --follow-tags is specified, use only those tags. */
|
/* If --follow-tags is specified, use only those tags. */
|
||||||
if (opt.follow_tags)
|
if (opt.follow_tags)
|
||||||
{
|
{
|
||||||
/* Create a new hash table with the intersection of tags in
|
/* Create a new table intersecting --follow-tags and known_tags,
|
||||||
--follow-tags and known_tags, and use that as
|
and use it as interesting_tags. */
|
||||||
interesting_tags. */
|
|
||||||
struct hash_table *intersect = make_nocase_string_hash_table (0);
|
struct hash_table *intersect = make_nocase_string_hash_table (0);
|
||||||
char **followed;
|
char **followed;
|
||||||
for (followed = opt.follow_tags; *followed; followed++)
|
for (followed = opt.follow_tags; *followed; followed++)
|
||||||
{
|
{
|
||||||
struct known_tag *t = hash_table_get (interesting_tags, *followed);
|
struct known_tag *t = hash_table_get (interesting_tags, *followed);
|
||||||
if (!t)
|
if (!t)
|
||||||
continue; /* ignore unknown tags in --follow-tags. */
|
continue; /* ignore unknown --follow-tags entries. */
|
||||||
hash_table_put (intersect, *followed, t);
|
hash_table_put (intersect, *followed, t);
|
||||||
}
|
}
|
||||||
hash_table_destroy (interesting_tags);
|
hash_table_destroy (interesting_tags);
|
||||||
@ -218,7 +217,7 @@ init_interesting (void)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Add the attributes we care about. */
|
/* Add the attributes we care about. */
|
||||||
interesting_attributes = make_nocase_string_hash_table (17);
|
interesting_attributes = make_nocase_string_hash_table (10);
|
||||||
for (i = 0; i < countof (additional_attributes); i++)
|
for (i = 0; i < countof (additional_attributes); i++)
|
||||||
string_set_add (interesting_attributes, additional_attributes[i]);
|
string_set_add (interesting_attributes, additional_attributes[i]);
|
||||||
for (i = 0; i < countof (tag_url_attributes); i++)
|
for (i = 0; i < countof (tag_url_attributes); i++)
|
||||||
|
Loading…
Reference in New Issue
Block a user