[svn] Commit various hash table changes:

* hash.c (hash_table_map): Allow deletion and change of the
element processed by MAPFUN.
(string_hash): Use the function from glib.
* hash.c (hash_table_remove): Rewrite to actually clear deleted
entries instead of just marking them as deleted.

Published in <sxsu23tvdur.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-04-12 17:34:24 -07:00
parent 452c0bb9f7
commit eae28f142d
6 changed files with 151 additions and 174 deletions

View File

@ -1,3 +1,7 @@
2001-04-12 Hrvoje Niksic <hniksic@arsdigita.com>
* configure.in: Check for inline.
2001-04-11 Hrvoje Niksic <hniksic@arsdigita.com> 2001-04-11 Hrvoje Niksic <hniksic@arsdigita.com>
* po/zh_TW.Big5.po: New file, submitted by Abel Cheung. * po/zh_TW.Big5.po: New file, submitted by Abel Cheung.

View File

@ -140,6 +140,7 @@ dnl
dnl Checks for typedefs, structures, and compiler characteristics. dnl Checks for typedefs, structures, and compiler characteristics.
dnl dnl
AC_C_CONST AC_C_CONST
AC_C_INLINE
AC_TYPE_SIZE_T AC_TYPE_SIZE_T
AC_TYPE_PID_T AC_TYPE_PID_T
dnl #### This generates a warning. What do I do to shut it up? dnl #### This generates a warning. What do I do to shut it up?

View File

@ -1,3 +1,19 @@
2001-04-13 Hrvoje Niksic <hniksic@arsdigita.com>
* cookies.c (unsigned_string_hash): Use the new code in
string_hash as reference.
* hash.c (hash_table_map): Allow deletion and change of the
element processed by MAPFUN.
(string_hash): Use the function from glib.
2001-04-12 Hrvoje Niksic <hniksic@arsdigita.com>
* config.h.in: Include #undef stub.
* hash.c (hash_table_remove): Rewrite to actually clear deleted
entries instead of just marking them as deleted.
2001-04-12 Hrvoje Niksic <hniksic@arsdigita.com> 2001-04-12 Hrvoje Niksic <hniksic@arsdigita.com>
* hash.h: Declare hash_table_get_pair and hash_table_count. * hash.h: Declare hash_table_get_pair and hash_table_count.

View File

@ -50,6 +50,9 @@ char *alloca ();
/* Define to empty if the keyword does not work. */ /* Define to empty if the keyword does not work. */
#undef const #undef const
/* Define to empty or __inline__ or __inline. */
#undef inline
/* Define to `unsigned' if <sys/types.h> doesn't define. */ /* Define to `unsigned' if <sys/types.h> doesn't define. */
#undef size_t #undef size_t

View File

@ -111,20 +111,14 @@ delete_cookie (struct cookie *cookie)
case. */ case. */
static unsigned long static unsigned long
unsigned_string_hash (const void *sv) unsigned_string_hash (const void *key)
{ {
unsigned int h = 0; const char *p = key;
unsigned const char *x = (unsigned const char *) sv; unsigned int h = TOLOWER (*p);
while (*x) if (h)
{ for (p += 1; *p != '\0'; p++)
unsigned int g; h = (h << 5) - h + TOLOWER (*p);
unsigned char c = TOLOWER (*x);
h = (h << 4) + c;
if ((g = h & 0xf0000000) != 0)
h = (h ^ (g >> 24)) ^ g;
++x;
}
return h; return h;
} }

View File

@ -1,5 +1,5 @@
/* Hash tables. /* Hash tables.
Copyright (C) 2000 Free Software Foundation, Inc. Copyright (C) 2000, 2001 Free Software Foundation, Inc.
This file is part of Wget. This file is part of Wget.
@ -86,13 +86,15 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
distinct value, only that non-distinct objects must produce the distinct value, only that non-distinct objects must produce the
same values! For instance, a hash function that returns 0 for same values! For instance, a hash function that returns 0 for
any given object is a perfectly valid (albeit extremely bad) hash any given object is a perfectly valid (albeit extremely bad) hash
function. A hash function that hashes a string by adding up all
its characters is another example of a valid (but quite bad) hash
function. function.
The above stated rule is quite easy to enforce. For example, if The above stated rule is quite easy to enforce. For example, if
your testing function compares strings case-insensitively, all your testing function compares strings case-insensitively, all
your function needs to do is lower-case the string characters your function needs to do is lower-case the string characters
before calculating a hash. That way you have easily guaranteed before calculating a hash. That way you have easily guaranteed
that changes in case will not result in a different hash. that case differences will not result in a different hash.
- (optional) Choose the hash function to get as good "spreading" as - (optional) Choose the hash function to get as good "spreading" as
possible. A good hash function will react to even a small change possible. A good hash function will react to even a small change
@ -125,8 +127,8 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
Collisions make deletion tricky because finding collisions again Collisions make deletion tricky because finding collisions again
relies on new empty spots not being created. That's why relies on new empty spots not being created. That's why
hash_table_remove only marks the spot as deleted rather than really hash_table_remove is careful to rehash the mappings that follow the
making it empty. */ deleted one. */
struct mapping { struct mapping {
void *key; void *key;
@ -138,18 +140,20 @@ struct hash_table {
int (*test_function) (const void *, const void *); int (*test_function) (const void *, const void *);
int size; /* size of the array */ int size; /* size of the array */
int fullness; /* number of non-empty fields */
int count; /* number of non-empty, non-deleted int count; /* number of non-empty, non-deleted
fields. */ fields. */
struct mapping *mappings; struct mapping *mappings;
}; };
#define ENTRY_DELETED ((void *)0xdeadbeef) #define EMPTY_MAPPING_P(mp) ((mp)->key == NULL)
#define ENTRY_EMPTY NULL #define NEXT_MAPPING(mp, mappings, size) (mp == mappings + (size - 1) \
? mappings : mp + 1)
#define DELETED_ENTRY_P(ptr) ((ptr) == ENTRY_DELETED) #define LOOP_NON_EMPTY(mp, mappings, size) \
#define EMPTY_ENTRY_P(ptr) ((ptr) == ENTRY_EMPTY) for (; !EMPTY_MAPPING_P (mp); mp = NEXT_MAPPING (mp, mappings, size))
#define HASH_POSITION(ht, key) (ht->hash_function (key) % ht->size)
/* Find a prime near, but greather than or equal to SIZE. */ /* Find a prime near, but greather than or equal to SIZE. */
@ -190,7 +194,6 @@ hash_table_new (int initial_size,
ht->hash_function = hash_function; ht->hash_function = hash_function;
ht->test_function = test_function; ht->test_function = test_function;
ht->size = prime_size (initial_size); ht->size = prime_size (initial_size);
ht->fullness = 0;
ht->count = 0; ht->count = 0;
ht->mappings = xmalloc (ht->size * sizeof (struct mapping)); ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping)); memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
@ -208,31 +211,20 @@ hash_table_destroy (struct hash_table *ht)
/* The heart of almost all functions in this file -- find the mapping /* The heart of almost all functions in this file -- find the mapping
whose KEY is equal to key, using a linear probing loop. Returns whose KEY is equal to key, using a linear probing loop. Returns
the offset of the mapping in ht->mappings. This should probably be the offset of the mapping in ht->mappings. */
declared inline. */
static int static inline struct mapping *
find_mapping (struct hash_table *ht, const void *key) find_mapping (struct hash_table *ht, const void *key)
{ {
struct mapping *mappings = ht->mappings; struct mapping *mappings = ht->mappings;
int size = ht->size; int size = ht->size;
int location = ht->hash_function (key) % size; struct mapping *mp = mappings + HASH_POSITION (ht, key);
while (1) int (*equals) (const void *, const void *) = ht->test_function;
{
struct mapping *mp = mappings + location;
void *mp_key = mp->key;
if (EMPTY_ENTRY_P (mp_key)) LOOP_NON_EMPTY (mp, mappings, size)
return -1; if (equals (key, mp->key))
else if (DELETED_ENTRY_P (mp_key) return mp;
|| !ht->test_function (key, mp_key)) return NULL;
{
if (++location == size)
location = 0;
}
else
return location;
}
} }
/* Get the value that corresponds to the key KEY in the hash table HT. /* Get the value that corresponds to the key KEY in the hash table HT.
@ -245,11 +237,11 @@ find_mapping (struct hash_table *ht, const void *key)
void * void *
hash_table_get (struct hash_table *ht, const void *key) hash_table_get (struct hash_table *ht, const void *key)
{ {
int location = find_mapping (ht, key); struct mapping *mp = find_mapping (ht, key);
if (location < 0) if (mp)
return NULL; return mp->value;
else else
return ht->mappings[location].value; return NULL;
} }
/* Like hash_table_get, but writes out the pointers to both key and /* Like hash_table_get, but writes out the pointers to both key and
@ -259,18 +251,18 @@ int
hash_table_get_pair (struct hash_table *ht, const void *lookup_key, hash_table_get_pair (struct hash_table *ht, const void *lookup_key,
void *orig_key, void *value) void *orig_key, void *value)
{ {
int location = find_mapping (ht, lookup_key); struct mapping *mp = find_mapping (ht, lookup_key);
if (location < 0)
return 0; if (mp)
else
{ {
struct mapping *mp = ht->mappings + location;
if (orig_key) if (orig_key)
*(void **)orig_key = mp->key; *(void **)orig_key = mp->key;
if (value) if (value)
*(void **)value = mp->value; *(void **)value = mp->value;
return 1; return 1;
} }
else
return 0;
} }
/* Return 1 if KEY exists in HT, 0 otherwise. */ /* Return 1 if KEY exists in HT, 0 otherwise. */
@ -278,7 +270,7 @@ hash_table_get_pair (struct hash_table *ht, const void *lookup_key,
int int
hash_table_exists (struct hash_table *ht, const void *key) hash_table_exists (struct hash_table *ht, const void *key)
{ {
return find_mapping (ht, key) >= 0; return find_mapping (ht, key) != NULL;
} }
#define MAX(i, j) (((i) >= (j)) ? (i) : (j)) #define MAX(i, j) (((i) >= (j)) ? (i) : (j))
@ -289,46 +281,27 @@ hash_table_exists (struct hash_table *ht, const void *key)
static void static void
grow_hash_table (struct hash_table *ht) grow_hash_table (struct hash_table *ht)
{ {
int i;
struct mapping *old_mappings = ht->mappings; struct mapping *old_mappings = ht->mappings;
struct mapping *old_end = ht->mappings + ht->size;
struct mapping *mp;
int old_count = ht->count; /* for assert() below */ int old_count = ht->count; /* for assert() below */
int old_size = ht->size;
/* To minimize the number of regrowth, we'd like to resize the hash
table exponentially. Normally, this would be done by doubling
ht->size (and round it to next prime) on each regrow:
ht->size = prime_size (ht->size * 2);
But it is possible that the table has large fullness because of
the many deleted entries. If that is the case, we don't want to
blindly grow the table; we just want to rehash it. For that
reason, we use ht->count as the relevant parameter. MAX is used
only because we don't want to actually shrink the table. (But
maybe that's wrong.) */
int needed_size = prime_size (ht->count * 3);
ht->size = MAX (old_size, needed_size);
#if 0 #if 0
printf ("growing from %d to %d\n", old_size, ht->size); printf ("growing from %d to %d\n", ht->size, prime_size (ht->size * 2));
#endif #endif
ht->size = prime_size (ht->size * 2);
ht->mappings = xmalloc (ht->size * sizeof (struct mapping)); ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping)); memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
/* Need to reset these two; hash_table_put will reinitialize them. */ /* Need to reset this; hash_table_put will reinitialize it. */
ht->fullness = 0;
ht->count = 0; ht->count = 0;
for (i = 0; i < old_size; i++)
{
struct mapping *mp = old_mappings + i;
void *mp_key = mp->key;
if (!EMPTY_ENTRY_P (mp_key) for (mp = old_mappings; mp < old_end; mp++)
&& !DELETED_ENTRY_P (mp_key)) if (!EMPTY_MAPPING_P (mp))
hash_table_put (ht, mp_key, mp->value); hash_table_put (ht, mp->key, mp->value);
}
assert (ht->count == old_count); assert (ht->count == old_count);
xfree (old_mappings); xfree (old_mappings);
} }
@ -339,86 +312,71 @@ grow_hash_table (struct hash_table *ht)
void void
hash_table_put (struct hash_table *ht, const void *key, void *value) hash_table_put (struct hash_table *ht, const void *key, void *value)
{ {
/* Cannot use find_mapping here because we're actually looking for
an *empty* entry. */
struct mapping *mappings = ht->mappings; struct mapping *mappings = ht->mappings;
int size = ht->size; int size = ht->size;
int location = ht->hash_function (key) % size; int (*equals) (const void *, const void *) = ht->test_function;
while (1)
{
struct mapping *mp = mappings + location;
void *mp_key = mp->key;
if (EMPTY_ENTRY_P (mp_key)) struct mapping *mp = mappings + HASH_POSITION (ht, key);
LOOP_NON_EMPTY (mp, mappings, size)
if (equals (key, mp->key))
{ {
++ht->fullness;
++ht->count;
just_insert:
mp->key = (void *)key; /* const? */ mp->key = (void *)key; /* const? */
mp->value = value; mp->value = value;
break; return;
} }
else if (DELETED_ENTRY_P (mp_key)
|| !ht->test_function (key, mp_key)) ++ht->count;
{ mp->key = (void *)key; /* const? */
if (++location == size) mp->value = value;
location = 0;
} if (ht->count > ht->size * 3 / 4)
else /* equal to key and not deleted */ /* When table is 75% full, regrow it. */
{
/* We're replacing an existing entry, so ht->count and
ht->fullness remain unchanged. */
goto just_insert;
}
}
if (ht->fullness * 4 > ht->size * 3)
/* When fullness exceeds 75% of size, regrow the table. */
grow_hash_table (ht); grow_hash_table (ht);
} }
/* Remove KEY from HT. */ /* Remove a mapping that matches KEY from HT. Return 0 if there was
no such entry; return 1 if an entry was removed. */
int int
hash_table_remove (struct hash_table *ht, const void *key) hash_table_remove (struct hash_table *ht, const void *key)
{ {
int location = find_mapping (ht, key); struct mapping *mp = find_mapping (ht, key);
if (location < 0) if (!mp)
return 0; return 0;
else else
{ {
int size = ht->size;
struct mapping *mappings = ht->mappings; struct mapping *mappings = ht->mappings;
struct mapping *mp = mappings + location;
/* We don't really remove an entry from the hash table: we just
mark it as deleted. This is because there may be other
entries located after this entry whose hash points to a
location before this entry. (Example: keys A, B and C have
the same hash. If you were to really *delete* B from the
table, C could no longer be found.) */
/* Optimization addendum: if the mapping that follows LOCATION
is already empty, that is a sure sign that nobody depends on
LOCATION being non-empty. (This is because we're using
linear probing. This would not be the case with double
hashing.) In that case, we may safely delete the mapping. */
/* This could be generalized so that the all the non-empty
locations following LOCATION are simply shifted leftward. It
would make deletion a bit slower, but it would remove the
ugly DELETED_ENTRY_P checks from all the rest of the code,
making the whole thing faster. */
int location_after = (location + 1) == ht->size ? 0 : location + 1;
struct mapping *mp_after = mappings + location_after;
if (EMPTY_ENTRY_P (mp_after->key))
{
mp->key = ENTRY_EMPTY;
--ht->fullness;
}
else
mp->key = ENTRY_DELETED;
mp->key = NULL;
--ht->count; --ht->count;
/* Rehash all the entries following MP. The alternative
approach is to mark entry as deleted, but that leaves a lot
of garbage. More importantly, this method makes
hash_table_get and hash_table_put measurably faster. */
mp = NEXT_MAPPING (mp, mappings, size);
LOOP_NON_EMPTY (mp, mappings, size)
{
const void *key2 = mp->key;
struct mapping *mp_new = mappings + HASH_POSITION (ht, key2);
/* Find the new location for the key. */
LOOP_NON_EMPTY (mp_new, mappings, size)
if (key2 == mp_new->key)
/* The mapping MP (key2) is already where we want it (in
MP_NEW's "chain" of keys.) */
goto next_rehash;
*mp_new = *mp;
mp->key = NULL;
next_rehash:
;
}
return 1; return 1;
} }
} }
@ -431,31 +389,35 @@ void
hash_table_clear (struct hash_table *ht) hash_table_clear (struct hash_table *ht)
{ {
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping)); memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
ht->fullness = 0;
ht->count = 0; ht->count = 0;
} }
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is /* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
called with three arguments: the key, the value, and the CLOSURE. called with three arguments: the key, the value, and the CLOSURE.
Don't add or remove entries from HT while hash_table_map is being
called, or strange things may happen. */ It is undefined what happens if you add or remove entries in the
hash table while hash_table_map is running. The exception is the
entry you're currently mapping over; you may remove or change that
entry. */
void void
hash_table_map (struct hash_table *ht, hash_table_map (struct hash_table *ht,
int (*mapfun) (void *, void *, void *), int (*mapfun) (void *, void *, void *),
void *closure) void *closure)
{ {
struct mapping *mappings = ht->mappings; struct mapping *mp = ht->mappings;
int i; struct mapping *end = ht->mappings + ht->size;
for (i = 0; i < ht->size; i++)
{
struct mapping *mp = mappings + i;
void *mp_key = mp->key;
if (!EMPTY_ENTRY_P (mp_key) for (; mp < end; mp++)
&& !DELETED_ENTRY_P (mp_key)) if (!EMPTY_MAPPING_P (mp))
if (mapfun (mp_key, mp->value, closure)) {
void *key;
repeat:
key = mp->key;
if (mapfun (key, mp->value, closure))
return; return;
if (mp->key != key && !EMPTY_MAPPING_P (mp))
goto repeat;
} }
} }
@ -470,20 +432,17 @@ hash_table_count (struct hash_table *ht)
/* Support for hash tables whose keys are strings. */ /* Support for hash tables whose keys are strings. */
/* supposedly from the Dragon Book P436. */ /* 31 bit hash function. Taken from Gnome's glib. This seems to
perform much better than the above. */
unsigned long unsigned long
string_hash (const void *sv) string_hash (const void *key)
{ {
unsigned int h = 0; const char *p = key;
unsigned const char *x = (unsigned const char *) sv; unsigned int h = *p;
while (*x) if (h)
{ for (p += 1; *p != '\0'; p++)
unsigned int g; h = (h << 5) - h + *p;
h = (h << 4) + *x++;
if ((g = h & 0xf0000000) != 0)
h = (h ^ (g >> 24)) ^ g;
}
return h; return h;
} }
@ -557,7 +516,7 @@ main (void)
if (!hash_table_exists (ht, line)) if (!hash_table_exists (ht, line))
hash_table_put (ht, strdup (line), "here I am!"); hash_table_put (ht, strdup (line), "here I am!");
#if 1 #if 1
if (len % 3) if (len % 5 == 0)
{ {
char *line_copy; char *line_copy;
if (hash_table_get_pair (ht, line, &line_copy, NULL)) if (hash_table_get_pair (ht, line, &line_copy, NULL))
@ -572,7 +531,7 @@ main (void)
print_hash (ht); print_hash (ht);
#endif #endif
#if 1 #if 1
printf ("%d %d %d\n", ht->count, ht->fullness, ht->size); printf ("%d %d\n", ht->count, ht->size);
#endif #endif
return 0; return 0;
} }