mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Committed a bunch of different tweaks of mine.
Published in <sxsr9463wrx.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
fb42069e51
commit
6e598c81e3
@ -1,3 +1,40 @@
|
|||||||
|
2000-11-21 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* hash.c (find_mapping): New function.
|
||||||
|
(hash_table_get): Use it.
|
||||||
|
(hash_table_get_pair): Ditto.
|
||||||
|
(hash_table_exists): Ditto.
|
||||||
|
(hash_table_remove): Ditto.
|
||||||
|
(hash_table_remove): Really delete the entry if the mapping
|
||||||
|
following LOCATION is empty.
|
||||||
|
|
||||||
|
* utils.c (string_set_add): Check whether the element has existed
|
||||||
|
before.
|
||||||
|
|
||||||
|
* hash.c (hash_table_get_pair): New function.
|
||||||
|
|
||||||
|
2000-11-20 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* http.c (http_process_type): Ignore trailing whitespace; use
|
||||||
|
strdupdelim().
|
||||||
|
|
||||||
|
* recur.c (recursive_retrieve): Use the new `convert' field.
|
||||||
|
(convert_all_links): Ditto.
|
||||||
|
(convert_all_links): Don't respect meta_disallow_follow.
|
||||||
|
|
||||||
|
* html-url.c (handle_link): Fill out link_relative_p and
|
||||||
|
link_complete_p.
|
||||||
|
|
||||||
|
* url.h (struct _urlpos): Make elements more readable.
|
||||||
|
|
||||||
|
* recur.c (recursive_retrieve): Call slist_prepend instead of
|
||||||
|
slist_append.
|
||||||
|
(convert_all_links): Call slist_nreverse before iterating through
|
||||||
|
urls_html.
|
||||||
|
|
||||||
|
* utils.c (slist_prepend): New function.
|
||||||
|
(slist_nreverse): Ditto.
|
||||||
|
|
||||||
2000-11-20 Hrvoje Niksic <hniksic@arsdigita.com>
|
2000-11-20 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* http.c (check_end): Constify.
|
* http.c (check_end): Constify.
|
||||||
|
373
src/hash.c
373
src/hash.c
@ -5,8 +5,8 @@ This file is part of Wget.
|
|||||||
|
|
||||||
This program is free software; you can redistribute it and/or modify
|
This program is free software; you can redistribute it and/or modify
|
||||||
it under the terms of the GNU General Public License as published by
|
it under the terms of the GNU General Public License as published by
|
||||||
the Free Software Foundation; either version 2 of the License, or
|
the Free Software Foundation; either version 2 of the License, or (at
|
||||||
(at your option) any later version.
|
your option) any later version.
|
||||||
|
|
||||||
This program is distributed in the hope that it will be useful,
|
This program is distributed in the hope that it will be useful,
|
||||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
@ -34,19 +34,91 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|||||||
# define xrealloc realloc
|
# define xrealloc realloc
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* This file implements simple hash tables based on linear probing.
|
/* INTERFACE:
|
||||||
The hash table stores key-value pairs in a contiguous array. Both
|
|
||||||
key and value are void pointers that the hash and test functions
|
|
||||||
know how to handle.
|
|
||||||
|
|
||||||
Although Knuth & co. recommend double hashing over linear probing,
|
Hash tables are an implementation technique used to implement
|
||||||
we use the latter because it accesses array elements sequentially
|
mapping between objects. Provided a good hashing function is used,
|
||||||
in case of a collision, yielding in better cache behaviour and
|
they guarantee constant-time access and storing of information.
|
||||||
ultimately in better speed. To avoid collision problems with
|
Duplicate keys are not allowed.
|
||||||
linear probing, we make sure that the table grows as soon as the
|
|
||||||
fullness/size ratio exceeds 75%. */
|
|
||||||
|
|
||||||
struct ht_pair {
|
The basics are all covered. hash_table_new creates a hash table,
|
||||||
|
and hash_table_destroy deletes it. hash_table_put establishes a
|
||||||
|
mapping between a key and a value. hash_table_get retrieves the
|
||||||
|
value that corresponds to a key. hash_table_exists queries whether
|
||||||
|
a key is stored in a table at all. hash_table_remove removes a
|
||||||
|
mapping that corresponds to a key. hash_table_map allows you to
|
||||||
|
map through all the entries in a hash table. hash_table_clear
|
||||||
|
clears all the entries from the hash table.
|
||||||
|
|
||||||
|
The number of mappings in a table is not limited, except by the
|
||||||
|
amount of memory. As you add new elements to a table, it regrows
|
||||||
|
as necessary. If you have an idea about how many elements you will
|
||||||
|
store, you can provide a hint to hash_table_new().
|
||||||
|
|
||||||
|
The hashing and equality functions are normally provided by the
|
||||||
|
user. For the special (and frequent) case of hashing strings, you
|
||||||
|
can use the pre-canned make_string_hash_table(), which provides the
|
||||||
|
string hashing function from the Dragon Book, and a string equality
|
||||||
|
wrapper around strcmp().
|
||||||
|
|
||||||
|
When specifying your own hash and test functions, make sure the
|
||||||
|
following holds true:
|
||||||
|
|
||||||
|
- The test function returns non-zero for keys that are considered
|
||||||
|
"equal", zero otherwise.
|
||||||
|
|
||||||
|
- The hash function returns a number that represents the
|
||||||
|
"distinctness" of the object. In more precise terms, it means
|
||||||
|
that for any two objects that test "equal" under the test
|
||||||
|
function, the hash function MUST produce the same result.
|
||||||
|
|
||||||
|
This does not mean that each distinct object must produce a
|
||||||
|
distinct value, only that non-distinct objects must produce the
|
||||||
|
same values! For instance, a hash function that returns 0 for
|
||||||
|
any given object is a perfectly valid (albeit extremely bad) hash
|
||||||
|
function.
|
||||||
|
|
||||||
|
The above stated rule is quite easy to enforce. For example, if
|
||||||
|
your testing function compares strings case-insensitively, all
|
||||||
|
your function needs to do is lower-case the string characters
|
||||||
|
before calculating a hash. That way you have easily guaranteed
|
||||||
|
that changes in case will not result in a different hash.
|
||||||
|
|
||||||
|
- (optional) Choose the hash function to get as good "spreading" as
|
||||||
|
possible. A good hash function will react to even a small change
|
||||||
|
in its input with a completely different resulting hash.
|
||||||
|
Finally, don't make your hash function extremely slow, because
|
||||||
|
you're then defeating the purpose of hashing.
|
||||||
|
|
||||||
|
Note that neither keys nor values are copied when inserted into the
|
||||||
|
hash table, so they must exist for the lifetime of the table. This
|
||||||
|
means that e.g. the use of static strings is OK, but objects with a
|
||||||
|
shorter life-time need to be copied (with strdup() or the like in
|
||||||
|
the case of strings) before being inserted. */
|
||||||
|
|
||||||
|
/* IMPLEMENTATION:
|
||||||
|
|
||||||
|
All the hash mappings (key-value pairs of pointers) are stored in a
|
||||||
|
contiguous array. The position of each mapping is determined by
|
||||||
|
applying the hash function to the key: location = hash(key) % size.
|
||||||
|
If two different keys end up on the same position, the collision is
|
||||||
|
resolved by placing the second mapping at the next empty place in
|
||||||
|
the array following the occupied place. This method of collision
|
||||||
|
resolution is called "linear probing".
|
||||||
|
|
||||||
|
There are more advanced collision resolution mechanisms (quadratic
|
||||||
|
probing, double hashing), but we don't use them because they
|
||||||
|
involve more non-sequential access to the array, and therefore
|
||||||
|
worse cache behavior. Linear probing works well as long as the
|
||||||
|
fullness/size ratio is kept below 75%. We make sure to regrow or
|
||||||
|
rehash the hash table whenever this threshold is exceeded.
|
||||||
|
|
||||||
|
Collisions make deletion tricky because finding collisions again
|
||||||
|
relies on new empty spots not being created. That's why
|
||||||
|
hash_table_remove only marks the spot as deleted rather than really
|
||||||
|
making it empty. */
|
||||||
|
|
||||||
|
struct mapping {
|
||||||
void *key;
|
void *key;
|
||||||
void *value;
|
void *value;
|
||||||
};
|
};
|
||||||
@ -60,13 +132,14 @@ struct hash_table {
|
|||||||
int count; /* number of non-empty, non-deleted
|
int count; /* number of non-empty, non-deleted
|
||||||
fields. */
|
fields. */
|
||||||
|
|
||||||
struct ht_pair *pairs;
|
struct mapping *mappings;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ENTRY_DELETED ((void *)0xdeadbeef)
|
#define ENTRY_DELETED ((void *)0xdeadbeef)
|
||||||
|
#define ENTRY_EMPTY NULL
|
||||||
|
|
||||||
#define DELETED_ENTRY_P(ptr) ((ptr) == ENTRY_DELETED)
|
#define DELETED_ENTRY_P(ptr) ((ptr) == ENTRY_DELETED)
|
||||||
#define EMPTY_ENTRY_P(ptr) ((ptr) == NULL)
|
#define EMPTY_ENTRY_P(ptr) ((ptr) == ENTRY_EMPTY)
|
||||||
|
|
||||||
/* Find a prime near, but greather than or equal to SIZE. */
|
/* Find a prime near, but greather than or equal to SIZE. */
|
||||||
|
|
||||||
@ -109,8 +182,8 @@ hash_table_new (int initial_size,
|
|||||||
ht->size = prime_size (initial_size);
|
ht->size = prime_size (initial_size);
|
||||||
ht->fullness = 0;
|
ht->fullness = 0;
|
||||||
ht->count = 0;
|
ht->count = 0;
|
||||||
ht->pairs = xmalloc (ht->size * sizeof (struct ht_pair));
|
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
||||||
memset (ht->pairs, '\0', ht->size * sizeof (struct ht_pair));
|
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
return ht;
|
return ht;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,34 +192,74 @@ hash_table_new (int initial_size,
|
|||||||
void
|
void
|
||||||
hash_table_destroy (struct hash_table *ht)
|
hash_table_destroy (struct hash_table *ht)
|
||||||
{
|
{
|
||||||
free (ht->pairs);
|
free (ht->mappings);
|
||||||
free (ht);
|
free (ht);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* The heart of almost all functions in this file -- find the mapping
|
||||||
|
whose KEY is equal to key, using a linear probing loop. Returns
|
||||||
|
the offset of the mapping in ht->mappings. This should probably be
|
||||||
|
declared inline. */
|
||||||
|
|
||||||
|
static int
|
||||||
|
find_mapping (struct hash_table *ht, const void *key)
|
||||||
|
{
|
||||||
|
struct mapping *mappings = ht->mappings;
|
||||||
|
int size = ht->size;
|
||||||
|
int location = ht->hash_function (key) % size;
|
||||||
|
while (1)
|
||||||
|
{
|
||||||
|
struct mapping *mp = mappings + location;
|
||||||
|
void *mp_key = mp->key;
|
||||||
|
|
||||||
|
if (EMPTY_ENTRY_P (mp_key))
|
||||||
|
return -1;
|
||||||
|
else if (DELETED_ENTRY_P (mp_key)
|
||||||
|
|| !ht->test_function (key, mp_key))
|
||||||
|
{
|
||||||
|
if (++location == size)
|
||||||
|
location = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return location;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Get the value that corresponds to the key KEY in the hash table HT.
|
/* Get the value that corresponds to the key KEY in the hash table HT.
|
||||||
If no value is found, return NULL. Note that NULL is a legal value
|
If no value is found, return NULL. Note that NULL is a legal value
|
||||||
for value; if you are storing NULLs in your hash table, you can use
|
for value; if you are storing NULLs in your hash table, you can use
|
||||||
hash_table_exists to be sure that a (possibly NULL) value exists in
|
hash_table_exists to be sure that a (possibly NULL) value exists in
|
||||||
the table. */
|
the table. Or, you can use hash_table_get_pair instead of this
|
||||||
|
function. */
|
||||||
|
|
||||||
void *
|
void *
|
||||||
hash_table_get (struct hash_table *ht, const void *key)
|
hash_table_get (struct hash_table *ht, const void *key)
|
||||||
{
|
{
|
||||||
int location = ht->hash_function (key) % ht->size;
|
int location = find_mapping (ht, key);
|
||||||
while (1)
|
if (location < 0)
|
||||||
|
return NULL;
|
||||||
|
else
|
||||||
|
return ht->mappings[location].value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Like hash_table_get, but writes out the pointers to both key and
|
||||||
|
value. Returns non-zero on success. */
|
||||||
|
|
||||||
|
int
|
||||||
|
hash_table_get_pair (struct hash_table *ht, const void *lookup_key,
|
||||||
|
void *orig_key, void *value)
|
||||||
|
{
|
||||||
|
int location = find_mapping (ht, lookup_key);
|
||||||
|
if (location < 0)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
{
|
{
|
||||||
struct ht_pair *the_pair = ht->pairs + location;
|
struct mapping *mp = ht->mappings + location;
|
||||||
if (EMPTY_ENTRY_P (the_pair->key))
|
if (orig_key)
|
||||||
return NULL;
|
*(void **)orig_key = mp->key;
|
||||||
else if (DELETED_ENTRY_P (the_pair->key)
|
if (value)
|
||||||
|| !ht->test_function (key, the_pair->key))
|
*(void **)value = mp->value;
|
||||||
{
|
return 1;
|
||||||
++location;
|
|
||||||
if (location == ht->size)
|
|
||||||
location = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return the_pair->value;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -155,39 +268,25 @@ hash_table_get (struct hash_table *ht, const void *key)
|
|||||||
int
|
int
|
||||||
hash_table_exists (struct hash_table *ht, const void *key)
|
hash_table_exists (struct hash_table *ht, const void *key)
|
||||||
{
|
{
|
||||||
int location = ht->hash_function (key) % ht->size;
|
return find_mapping (ht, key) >= 0;
|
||||||
while (1)
|
|
||||||
{
|
|
||||||
struct ht_pair *the_pair = ht->pairs + location;
|
|
||||||
if (EMPTY_ENTRY_P (the_pair->key))
|
|
||||||
return 0;
|
|
||||||
else if (DELETED_ENTRY_P (the_pair->key)
|
|
||||||
|| !ht->test_function (key, the_pair->key))
|
|
||||||
{
|
|
||||||
++location;
|
|
||||||
if (location == ht->size)
|
|
||||||
location = 0;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX(i, j) (((i) >= (j)) ? (i) : (j))
|
#define MAX(i, j) (((i) >= (j)) ? (i) : (j))
|
||||||
|
|
||||||
/* Grow hash table HT as necessary, and rehash all the key-value
|
/* Grow hash table HT as necessary, and rehash all the key-value
|
||||||
pairs. */
|
mappings. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
grow_hash_table (struct hash_table *ht)
|
grow_hash_table (struct hash_table *ht)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct ht_pair *old_pairs = ht->pairs;
|
struct mapping *old_mappings = ht->mappings;
|
||||||
int old_count = ht->count; /* for assert() below */
|
int old_count = ht->count; /* for assert() below */
|
||||||
int old_size = ht->size;
|
int old_size = ht->size;
|
||||||
|
|
||||||
/* Normally, the idea is to double ht->size (and round it to next
|
/* To minimize the number of regrowth, we'd like to resize the hash
|
||||||
prime) on each regrow:
|
table exponentially. Normally, this would be done by doubling
|
||||||
|
ht->size (and round it to next prime) on each regrow:
|
||||||
|
|
||||||
ht->size = prime_size (ht->size * 2);
|
ht->size = prime_size (ht->size * 2);
|
||||||
|
|
||||||
@ -198,24 +297,28 @@ grow_hash_table (struct hash_table *ht)
|
|||||||
only because we don't want to actually shrink the table. (But
|
only because we don't want to actually shrink the table. (But
|
||||||
maybe that's wrong.) */
|
maybe that's wrong.) */
|
||||||
|
|
||||||
int needed_size = prime_size (ht->count * 2);
|
int needed_size = prime_size (ht->count * 3);
|
||||||
ht->size = MAX (old_size, needed_size);
|
ht->size = MAX (old_size, needed_size);
|
||||||
|
|
||||||
ht->pairs = xmalloc (ht->size * sizeof (struct ht_pair));
|
printf ("growing from %d to %d\n", old_size, ht->size);
|
||||||
memset (ht->pairs, '\0', ht->size * sizeof (struct ht_pair));
|
|
||||||
|
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
||||||
|
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
|
|
||||||
/* Need to reset these two; hash_table_put will reinitialize them. */
|
/* Need to reset these two; hash_table_put will reinitialize them. */
|
||||||
ht->fullness = 0;
|
ht->fullness = 0;
|
||||||
ht->count = 0;
|
ht->count = 0;
|
||||||
for (i = 0; i < old_size; i++)
|
for (i = 0; i < old_size; i++)
|
||||||
{
|
{
|
||||||
struct ht_pair *the_pair = old_pairs + i;
|
struct mapping *mp = old_mappings + i;
|
||||||
if (!EMPTY_ENTRY_P (the_pair->key)
|
void *mp_key = mp->key;
|
||||||
&& !DELETED_ENTRY_P (the_pair->key))
|
|
||||||
hash_table_put (ht, the_pair->key, the_pair->value);
|
if (!EMPTY_ENTRY_P (mp_key)
|
||||||
|
&& !DELETED_ENTRY_P (mp_key))
|
||||||
|
hash_table_put (ht, mp_key, mp->value);
|
||||||
}
|
}
|
||||||
assert (ht->count == old_count);
|
assert (ht->count == old_count);
|
||||||
free (old_pairs);
|
free (old_mappings);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Put VALUE in the hash table HT under the key KEY. This regrows the
|
/* Put VALUE in the hash table HT under the key KEY. This regrows the
|
||||||
@ -224,27 +327,34 @@ grow_hash_table (struct hash_table *ht)
|
|||||||
void
|
void
|
||||||
hash_table_put (struct hash_table *ht, const void *key, void *value)
|
hash_table_put (struct hash_table *ht, const void *key, void *value)
|
||||||
{
|
{
|
||||||
int location = ht->hash_function (key) % ht->size;
|
/* Cannot use find_mapping here because we treat deleted entries
|
||||||
|
specially. */
|
||||||
|
|
||||||
|
struct mapping *mappings = ht->mappings;
|
||||||
|
int size = ht->size;
|
||||||
|
int location = ht->hash_function (key) % size;
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
struct ht_pair *the_pair = ht->pairs + location;
|
struct mapping *mp = mappings + location;
|
||||||
if (EMPTY_ENTRY_P (the_pair->key))
|
void *mp_key = mp->key;
|
||||||
|
|
||||||
|
if (EMPTY_ENTRY_P (mp_key))
|
||||||
{
|
{
|
||||||
++ht->fullness;
|
++ht->fullness;
|
||||||
++ht->count;
|
++ht->count;
|
||||||
just_insert:
|
just_insert:
|
||||||
the_pair->key = (void *)key; /* const? */
|
mp->key = (void *)key; /* const? */
|
||||||
the_pair->value = value;
|
mp->value = value;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
else if (DELETED_ENTRY_P (the_pair->key))
|
else if (DELETED_ENTRY_P (mp_key))
|
||||||
{
|
{
|
||||||
/* We're replacing a deleteed entry, so ht->count gets
|
/* We're replacing a deleteed entry, so ht->count gets
|
||||||
increased, but ht->fullness remains unchanged. */
|
increased, but ht->fullness remains unchanged. */
|
||||||
++ht->count;
|
++ht->count;
|
||||||
goto just_insert;
|
goto just_insert;
|
||||||
}
|
}
|
||||||
else if (ht->test_function (key, the_pair->key))
|
else if (ht->test_function (key, mp_key))
|
||||||
{
|
{
|
||||||
/* We're replacing an existing entry, so ht->count and
|
/* We're replacing an existing entry, so ht->count and
|
||||||
ht->fullness remain unchanged. */
|
ht->fullness remain unchanged. */
|
||||||
@ -252,8 +362,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
++location;
|
if (++location == size)
|
||||||
if (location == ht->size)
|
|
||||||
location = 0;
|
location = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -267,60 +376,79 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
|
|||||||
int
|
int
|
||||||
hash_table_remove (struct hash_table *ht, const void *key)
|
hash_table_remove (struct hash_table *ht, const void *key)
|
||||||
{
|
{
|
||||||
int location = ht->hash_function (key) % ht->size;
|
int location = find_mapping (ht, key);
|
||||||
while (1)
|
if (location < 0)
|
||||||
|
return 0;
|
||||||
|
else
|
||||||
{
|
{
|
||||||
struct ht_pair *the_pair = ht->pairs + location;
|
struct mapping *mappings = ht->mappings;
|
||||||
if (EMPTY_ENTRY_P (the_pair->key))
|
struct mapping *mp = mappings + location;
|
||||||
return 0;
|
/* We don't really remove an entry from the hash table: we just
|
||||||
else if (DELETED_ENTRY_P (the_pair->key)
|
mark it as deleted. This is because there may be other
|
||||||
|| !ht->test_function (key, the_pair->key))
|
entries located after this entry whose hash points to a
|
||||||
|
location before this entry. (Example: keys A, B and C have
|
||||||
|
the same hash. If you were to really *delete* B from the
|
||||||
|
table, C could no longer be found.) */
|
||||||
|
|
||||||
|
/* Optimization addendum: if the mapping that follows LOCATION
|
||||||
|
is already empty, that is a sure sign that nobody depends on
|
||||||
|
LOCATION being non-empty. (This is because we're using
|
||||||
|
linear probing. This would not be the case with double
|
||||||
|
hashing.) In that case, we may safely delete the mapping. */
|
||||||
|
|
||||||
|
/* This could be generalized so that the all the non-empty
|
||||||
|
locations following LOCATION are simply shifted leftward. It
|
||||||
|
would make deletion a bit slower, but it would remove the
|
||||||
|
ugly DELETED_ENTRY_P checks from all the rest of the code,
|
||||||
|
making the whole thing faster. */
|
||||||
|
int location_after = (location + 1) == ht->size ? 0 : location + 1;
|
||||||
|
struct mapping *mp_after = mappings + location_after;
|
||||||
|
|
||||||
|
if (EMPTY_ENTRY_P (mp_after->key))
|
||||||
{
|
{
|
||||||
++location;
|
mp->key = ENTRY_EMPTY;
|
||||||
if (location == ht->size)
|
--ht->fullness;
|
||||||
location = 0;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
mp->key = ENTRY_DELETED;
|
||||||
/* We don't really remove an entry from the hash table: we
|
|
||||||
just mark it as deleted. This is because there may be
|
|
||||||
other entries located after this entry whose hash number
|
|
||||||
points to a location before this entry. (Example: keys
|
|
||||||
A, B and C have the same hash. If you were to really
|
|
||||||
*delete* B from the table, C could no longer be found.)
|
|
||||||
|
|
||||||
As an optimization, it might be worthwhile to check
|
--ht->count;
|
||||||
whether the immediately preceding entry is empty and, if
|
return 1;
|
||||||
so, really delete the pair (set it to empty and decrease
|
|
||||||
the fullness along with the count). I *think* it should
|
|
||||||
be safe. */
|
|
||||||
the_pair->key = ENTRY_DELETED;
|
|
||||||
--ht->count;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Clear HT of all entries. After calling this function, the count
|
||||||
|
and the fullness of the hash table will be zero. The size will
|
||||||
|
remain unchanged. */
|
||||||
|
|
||||||
void
|
void
|
||||||
hash_table_clear (struct hash_table *ht)
|
hash_table_clear (struct hash_table *ht)
|
||||||
{
|
{
|
||||||
memset (ht->pairs, '\0', ht->size * sizeof (struct ht_pair));
|
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
ht->fullness = 0;
|
ht->fullness = 0;
|
||||||
ht->count = 0;
|
ht->count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
|
||||||
|
called with three arguments: the key, the value, and the CLOSURE.
|
||||||
|
Don't add or remove entries from HT while hash_table_map is being
|
||||||
|
called, or strange things may happen. */
|
||||||
|
|
||||||
void
|
void
|
||||||
hash_table_map (struct hash_table *ht,
|
hash_table_map (struct hash_table *ht,
|
||||||
int (*mapfun) (void *, void *, void *),
|
int (*mapfun) (void *, void *, void *),
|
||||||
void *closure)
|
void *closure)
|
||||||
{
|
{
|
||||||
|
struct mapping *mappings = ht->mappings;
|
||||||
int i;
|
int i;
|
||||||
for (i = 0; i < ht->size; i++)
|
for (i = 0; i < ht->size; i++)
|
||||||
{
|
{
|
||||||
struct ht_pair *the_pair = ht->pairs + i;
|
struct mapping *mp = mappings + i;
|
||||||
if (!EMPTY_ENTRY_P (the_pair->key)
|
void *mp_key = mp->key;
|
||||||
&& !DELETED_ENTRY_P (the_pair->key))
|
|
||||||
if (mapfun (the_pair->key, the_pair->value, closure))
|
if (!EMPTY_ENTRY_P (mp_key)
|
||||||
|
&& !DELETED_ENTRY_P (mp_key))
|
||||||
|
if (mapfun (mp_key, mp->value, closure))
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -345,12 +473,33 @@ string_hash (const void *sv)
|
|||||||
return h;
|
return h;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* If I ever need it: hashing of integers. */
|
||||||
|
|
||||||
|
unsigned int
|
||||||
|
inthash (unsigned int key)
|
||||||
|
{
|
||||||
|
key += (key << 12);
|
||||||
|
key ^= (key >> 22);
|
||||||
|
key += (key << 4);
|
||||||
|
key ^= (key >> 9);
|
||||||
|
key += (key << 10);
|
||||||
|
key ^= (key >> 2);
|
||||||
|
key += (key << 7);
|
||||||
|
key ^= (key >> 12);
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
int
|
int
|
||||||
string_cmp (const void *s1, const void *s2)
|
string_cmp (const void *s1, const void *s2)
|
||||||
{
|
{
|
||||||
return !strcmp ((const char *)s1, (const char *)s2);
|
return !strcmp ((const char *)s1, (const char *)s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return a hash table of initial size INITIAL_SIZE suitable to use
|
||||||
|
strings as keys. */
|
||||||
|
|
||||||
struct hash_table *
|
struct hash_table *
|
||||||
make_string_hash_table (int initial_size)
|
make_string_hash_table (int initial_size)
|
||||||
{
|
{
|
||||||
@ -364,7 +513,7 @@ make_string_hash_table (int initial_size)
|
|||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
int
|
int
|
||||||
print_hash_table_mapper (const void *key, void *value, void *count)
|
print_hash_table_mapper (void *key, void *value, void *count)
|
||||||
{
|
{
|
||||||
++*(int *)count;
|
++*(int *)count;
|
||||||
printf ("%s: %s\n", (const char *)key, (char *)value);
|
printf ("%s: %s\n", (const char *)key, (char *)value);
|
||||||
@ -390,12 +539,24 @@ main (void)
|
|||||||
if (len <= 1)
|
if (len <= 1)
|
||||||
continue;
|
continue;
|
||||||
line[--len] = '\0';
|
line[--len] = '\0';
|
||||||
hash_table_put (ht, strdup (line), "here I am!");
|
if (!hash_table_exists (ht, line))
|
||||||
if (len % 2)
|
hash_table_put (ht, strdup (line), "here I am!");
|
||||||
hash_table_remove (ht, line);
|
#if 1
|
||||||
|
if (len % 3)
|
||||||
|
{
|
||||||
|
char *line_copy;
|
||||||
|
if (hash_table_get_pair (ht, line, &line_copy, NULL))
|
||||||
|
{
|
||||||
|
hash_table_remove (ht, line);
|
||||||
|
free (line_copy);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
print_hash (ht);
|
|
||||||
#if 0
|
#if 0
|
||||||
|
print_hash (ht);
|
||||||
|
#endif
|
||||||
|
#if 1
|
||||||
printf ("%d %d %d\n", ht->count, ht->fullness, ht->size);
|
printf ("%d %d %d\n", ht->count, ht->fullness, ht->size);
|
||||||
#endif
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -348,12 +348,11 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
|||||||
newel->size = tag->attrs[attrid].value_raw_size;
|
newel->size = tag->attrs[attrid].value_raw_size;
|
||||||
|
|
||||||
/* A URL is relative if the host and protocol are not named, and the
|
/* A URL is relative if the host and protocol are not named, and the
|
||||||
name does not start with `/'.
|
name does not start with `/'. */
|
||||||
#### This logic might need some rethinking. */
|
|
||||||
if (no_proto && *link_uri != '/')
|
if (no_proto && *link_uri != '/')
|
||||||
newel->flags |= (URELATIVE | UNOPROTO);
|
newel->link_relative_p = 1;
|
||||||
else if (no_proto)
|
else if (!no_proto)
|
||||||
newel->flags |= UNOPROTO;
|
newel->link_complete_p = 1;
|
||||||
|
|
||||||
if (closure->tail)
|
if (closure->tail)
|
||||||
{
|
{
|
||||||
|
19
src/http.c
19
src/http.c
@ -239,18 +239,13 @@ static int
|
|||||||
http_process_type (const char *hdr, void *arg)
|
http_process_type (const char *hdr, void *arg)
|
||||||
{
|
{
|
||||||
char **result = (char **)arg;
|
char **result = (char **)arg;
|
||||||
char *p;
|
/* Locate P on `;' or the terminating zero, whichever comes first. */
|
||||||
|
const char *p = strchr (hdr, ';');
|
||||||
p = strrchr (hdr, ';');
|
if (!p)
|
||||||
if (p)
|
p = hdr + strlen (hdr);
|
||||||
{
|
while (p > hdr && ISSPACE (*(p - 1)))
|
||||||
int len = p - hdr;
|
--p;
|
||||||
*result = (char *)xmalloc (len + 1);
|
*result = strdupdelim (hdr, p);
|
||||||
memcpy (*result, hdr, len);
|
|
||||||
(*result)[len] = '\0';
|
|
||||||
}
|
|
||||||
else
|
|
||||||
*result = xstrdup (hdr);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
71
src/recur.c
71
src/recur.c
@ -168,7 +168,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
string_set_add (undesirable_urls, u->url);
|
string_set_add (undesirable_urls, u->url);
|
||||||
hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (u->url));
|
hash_table_put (dl_file_url_map, xstrdup (file), xstrdup (u->url));
|
||||||
hash_table_put (dl_url_file_map, xstrdup (u->url), xstrdup (file));
|
hash_table_put (dl_url_file_map, xstrdup (u->url), xstrdup (file));
|
||||||
urls_html = slist_append (urls_html, file);
|
urls_html = slist_prepend (urls_html, file);
|
||||||
if (opt.no_parent)
|
if (opt.no_parent)
|
||||||
base_dir = xstrdup (u->dir); /* Set the base dir. */
|
base_dir = xstrdup (u->dir); /* Set the base dir. */
|
||||||
/* Set the canonical this_url to be sent as referer. This
|
/* Set the canonical this_url to be sent as referer. This
|
||||||
@ -289,7 +289,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
/* If it is absolute link and they are not followed, chuck it
|
/* If it is absolute link and they are not followed, chuck it
|
||||||
out. */
|
out. */
|
||||||
if (!inl && u->proto != URLFTP)
|
if (!inl && u->proto != URLFTP)
|
||||||
if (opt.relative_only && !(cur_url->flags & URELATIVE))
|
if (opt.relative_only && !cur_url->link_relative_p)
|
||||||
{
|
{
|
||||||
DEBUGP (("It doesn't really look like a relative link.\n"));
|
DEBUGP (("It doesn't really look like a relative link.\n"));
|
||||||
string_set_add (undesirable_urls, constr);
|
string_set_add (undesirable_urls, constr);
|
||||||
@ -479,7 +479,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
xstrdup (constr), xstrdup (filename));
|
xstrdup (constr), xstrdup (filename));
|
||||||
/* If the URL is HTML, note it. */
|
/* If the URL is HTML, note it. */
|
||||||
if (dt & TEXTHTML)
|
if (dt & TEXTHTML)
|
||||||
urls_html = slist_append (urls_html, filename);
|
urls_html = slist_prepend (urls_html, filename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* If there was no error, and the type is text/html, parse
|
/* If there was no error, and the type is text/html, parse
|
||||||
@ -514,7 +514,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
store the local filename. */
|
store the local filename. */
|
||||||
if (opt.convert_links && (dt & RETROKF) && (filename != NULL))
|
if (opt.convert_links && (dt & RETROKF) && (filename != NULL))
|
||||||
{
|
{
|
||||||
cur_url->flags |= UABS2REL;
|
cur_url->convert = CO_CONVERT_TO_RELATIVE;
|
||||||
cur_url->local_name = xstrdup (filename);
|
cur_url->local_name = xstrdup (filename);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -544,12 +544,13 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Simple calls to convert_links will often fail because only the
|
/* convert_links() is called from recursive_retrieve() after we're
|
||||||
downloaded files are converted, and Wget cannot know which files
|
done with an HTML file. This call to convert_links is not complete
|
||||||
will be converted in the future. So, if we have file fileone.html
|
because it converts only the downloaded files, and Wget cannot know
|
||||||
with:
|
which files will be downloaded afterwards. So, if we have file
|
||||||
|
fileone.html with:
|
||||||
|
|
||||||
<a href=/c/something.gif>
|
<a href="/c/something.gif">
|
||||||
|
|
||||||
and /c/something.gif was not downloaded because it exceeded the
|
and /c/something.gif was not downloaded because it exceeded the
|
||||||
recursion depth, the reference will *not* be changed.
|
recursion depth, the reference will *not* be changed.
|
||||||
@ -572,14 +573,15 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
void
|
void
|
||||||
convert_all_links (void)
|
convert_all_links (void)
|
||||||
{
|
{
|
||||||
uerr_t res;
|
|
||||||
urlpos *l1, *urls;
|
|
||||||
struct urlinfo *u;
|
|
||||||
slist *html;
|
slist *html;
|
||||||
|
|
||||||
|
/* Destructively reverse urls_html to get it in the right order.
|
||||||
|
recursive_retrieve() used slist_prepend() consistently. */
|
||||||
|
urls_html = slist_nreverse (urls_html);
|
||||||
|
|
||||||
for (html = urls_html; html; html = html->next)
|
for (html = urls_html; html; html = html->next)
|
||||||
{
|
{
|
||||||
int meta_disallow_follow;
|
urlpos *urls, *cur_url;
|
||||||
char *url;
|
char *url;
|
||||||
|
|
||||||
DEBUGP (("Rescanning %s\n", html->string));
|
DEBUGP (("Rescanning %s\n", html->string));
|
||||||
@ -591,22 +593,17 @@ convert_all_links (void)
|
|||||||
else
|
else
|
||||||
DEBUGP (("I cannot find the corresponding URL.\n"));
|
DEBUGP (("I cannot find the corresponding URL.\n"));
|
||||||
/* Parse the HTML file... */
|
/* Parse the HTML file... */
|
||||||
urls = get_urls_html (html->string, url, FALSE, &meta_disallow_follow);
|
urls = get_urls_html (html->string, url, FALSE, NULL);
|
||||||
if (opt.use_robots && meta_disallow_follow)
|
/* We don't respect meta_disallow_follow here because, even if
|
||||||
{
|
the file is not followed, we might still want to convert the
|
||||||
/* The META tag says we are not to follow this file.
|
links that have been followed from other files. */
|
||||||
Respect that. */
|
for (cur_url = urls; cur_url; cur_url = cur_url->next)
|
||||||
free_urlpos (urls);
|
|
||||||
urls = NULL;
|
|
||||||
}
|
|
||||||
if (!urls)
|
|
||||||
continue;
|
|
||||||
for (l1 = urls; l1; l1 = l1->next)
|
|
||||||
{
|
{
|
||||||
char *local_name;
|
char *local_name;
|
||||||
|
|
||||||
/* The URL must be in canonical form to be compared. */
|
/* The URL must be in canonical form to be compared. */
|
||||||
u = newurl ();
|
struct urlinfo *u = newurl ();
|
||||||
res = parseurl (l1->url, u, 0);
|
uerr_t res = parseurl (cur_url->url, u, 0);
|
||||||
if (res != URLOK)
|
if (res != URLOK)
|
||||||
{
|
{
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
@ -617,20 +614,28 @@ convert_all_links (void)
|
|||||||
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
||||||
local_name = hash_table_get (dl_url_file_map, u->url);
|
local_name = hash_table_get (dl_url_file_map, u->url);
|
||||||
if (local_name)
|
if (local_name)
|
||||||
DEBUGP (("%s flagged for conversion, local %s\n",
|
DEBUGP (("%s marked for conversion, local %s\n",
|
||||||
u->url, local_name));
|
u->url, local_name));
|
||||||
/* Clear the flags. */
|
|
||||||
l1->flags &= ~ (UABS2REL | UREL2ABS);
|
|
||||||
/* Decide on the conversion direction. */
|
/* Decide on the conversion direction. */
|
||||||
if (local_name)
|
if (local_name)
|
||||||
{
|
{
|
||||||
l1->flags |= UABS2REL;
|
/* We've downloaded this URL. Convert it to relative
|
||||||
l1->local_name = xstrdup (local_name);
|
form. We do this even if the URL already is in
|
||||||
|
relative form, because our directory structure may
|
||||||
|
not be identical to that on the server (think `-nd',
|
||||||
|
`--cut-dirs', etc.) */
|
||||||
|
cur_url->convert = CO_CONVERT_TO_RELATIVE;
|
||||||
|
cur_url->local_name = xstrdup (local_name);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
l1->flags |= UREL2ABS;
|
/* We haven't downloaded this URL. If it's not already
|
||||||
l1->local_name = NULL;
|
complete (including a full host name), convert it to
|
||||||
|
that form, so it can be reached while browsing this
|
||||||
|
HTML locally. */
|
||||||
|
if (!cur_url->link_complete_p)
|
||||||
|
cur_url->convert = CO_CONVERT_TO_COMPLETE;
|
||||||
|
cur_url->local_name = NULL;
|
||||||
}
|
}
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
}
|
}
|
||||||
|
22
src/url.c
22
src/url.c
@ -1313,6 +1313,8 @@ convert_links (const char *file, urlpos *l)
|
|||||||
char *p;
|
char *p;
|
||||||
downloaded_file_t downloaded_file_return;
|
downloaded_file_t downloaded_file_return;
|
||||||
|
|
||||||
|
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
|
||||||
|
|
||||||
{
|
{
|
||||||
/* First we do a "dry run": go through the list L and see whether
|
/* First we do a "dry run": go through the list L and see whether
|
||||||
any URL needs to be converted in the first place. If not, just
|
any URL needs to be converted in the first place. If not, just
|
||||||
@ -1320,18 +1322,15 @@ convert_links (const char *file, urlpos *l)
|
|||||||
int count = 0;
|
int count = 0;
|
||||||
urlpos *dry = l;
|
urlpos *dry = l;
|
||||||
for (dry = l; dry; dry = dry->next)
|
for (dry = l; dry; dry = dry->next)
|
||||||
if (dry->flags & (UABS2REL | UREL2ABS))
|
if (dry->convert != CO_NOCONVERT)
|
||||||
++count;
|
++count;
|
||||||
if (!count)
|
if (!count)
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE, _("Nothing to do while converting %s.\n"),
|
logputs (LOG_VERBOSE, _("nothing to do.\n"));
|
||||||
file);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
logprintf (LOG_VERBOSE, _("Converting %s... "), file);
|
|
||||||
|
|
||||||
fm = read_file (file);
|
fm = read_file (file);
|
||||||
if (!fm)
|
if (!fm)
|
||||||
{
|
{
|
||||||
@ -1376,10 +1375,9 @@ convert_links (const char *file, urlpos *l)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
/* If the URL is not to be converted, skip it. */
|
/* If the URL is not to be converted, skip it. */
|
||||||
if (!(l->flags & (UABS2REL | UREL2ABS)))
|
if (l->convert == CO_NOCONVERT)
|
||||||
{
|
{
|
||||||
DEBUGP (("Skipping %s at position %d (flags %d).\n", l->url,
|
DEBUGP (("Skipping %s at position %d.\n", l->url, l->pos));
|
||||||
l->pos, l->flags));
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1387,7 +1385,7 @@ convert_links (const char *file, urlpos *l)
|
|||||||
quote, to the outfile. */
|
quote, to the outfile. */
|
||||||
fwrite (p, 1, url_start - p, fp);
|
fwrite (p, 1, url_start - p, fp);
|
||||||
p = url_start;
|
p = url_start;
|
||||||
if (l->flags & UABS2REL)
|
if (l->convert == CO_CONVERT_TO_RELATIVE)
|
||||||
{
|
{
|
||||||
/* Convert absolute URL to relative. */
|
/* Convert absolute URL to relative. */
|
||||||
char *newname = construct_relative (file, l->local_name);
|
char *newname = construct_relative (file, l->local_name);
|
||||||
@ -1396,11 +1394,11 @@ convert_links (const char *file, urlpos *l)
|
|||||||
p += l->size - 1;
|
p += l->size - 1;
|
||||||
putc (*p, fp); /* close quote */
|
putc (*p, fp); /* close quote */
|
||||||
++p;
|
++p;
|
||||||
DEBUGP (("ABS2REL: %s to %s at position %d in %s.\n",
|
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
|
||||||
l->url, newname, l->pos, file));
|
l->url, newname, l->pos, file));
|
||||||
free (newname);
|
free (newname);
|
||||||
}
|
}
|
||||||
else if (l->flags & UREL2ABS)
|
else if (l->convert == CO_CONVERT_TO_COMPLETE)
|
||||||
{
|
{
|
||||||
/* Convert the link to absolute URL. */
|
/* Convert the link to absolute URL. */
|
||||||
char *newlink = l->url;
|
char *newlink = l->url;
|
||||||
@ -1409,7 +1407,7 @@ convert_links (const char *file, urlpos *l)
|
|||||||
p += l->size - 1;
|
p += l->size - 1;
|
||||||
putc (*p, fp); /* close quote */
|
putc (*p, fp); /* close quote */
|
||||||
++p;
|
++p;
|
||||||
DEBUGP (("REL2ABS: <something> to %s at position %d in %s.\n",
|
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
|
||||||
newlink, l->pos, file));
|
newlink, l->pos, file));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
34
src/url.h
34
src/url.h
@ -44,23 +44,35 @@ struct urlinfo
|
|||||||
document */
|
document */
|
||||||
};
|
};
|
||||||
|
|
||||||
enum uflags
|
enum convert_options {
|
||||||
{
|
CO_NOCONVERT = 0, /* don't convert this URL */
|
||||||
URELATIVE = 0x0001, /* Is URL relative? */
|
CO_CONVERT_TO_RELATIVE, /* convert to relative, e.g. to
|
||||||
UNOPROTO = 0x0002, /* Is URL without a protocol? */
|
"../../otherdir/foo.gif" */
|
||||||
UABS2REL = 0x0004, /* Convert absolute to relative? */
|
CO_CONVERT_TO_COMPLETE /* convert to absolute, e.g. to
|
||||||
UREL2ABS = 0x0008 /* Convert relative to absolute? */
|
"http://orighost/somedir/bar.jpg". */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* A structure that defines the whereabouts of a URL, i.e. its
|
/* A structure that defines the whereabouts of a URL, i.e. its
|
||||||
position in an HTML document, etc. */
|
position in an HTML document, etc. */
|
||||||
|
|
||||||
typedef struct _urlpos
|
typedef struct _urlpos
|
||||||
{
|
{
|
||||||
char *url; /* URL */
|
char *url; /* linked URL, after it has been
|
||||||
char *local_name; /* Local file to which it was saved */
|
merged with the base */
|
||||||
enum uflags flags; /* Various flags */
|
char *local_name; /* Local file to which it was saved */
|
||||||
int pos, size; /* Relative position in the buffer */
|
|
||||||
struct _urlpos *next; /* Next struct in list */
|
/* Information about the original link: */
|
||||||
|
int link_relative_p; /* was the link relative? */
|
||||||
|
int link_complete_p; /* was the link complete (with the
|
||||||
|
host name, etc.) */
|
||||||
|
|
||||||
|
/* Conversion requirements: */
|
||||||
|
enum convert_options convert; /* is conversion required? */
|
||||||
|
|
||||||
|
/* URL's position in the buffer. */
|
||||||
|
int pos, size;
|
||||||
|
|
||||||
|
struct _urlpos *next; /* Next struct in list */
|
||||||
} urlpos;
|
} urlpos;
|
||||||
|
|
||||||
/* downloaded_file() takes a parameter of this type and returns this type. */
|
/* downloaded_file() takes a parameter of this type and returns this type. */
|
||||||
|
57
src/utils.c
57
src/utils.c
@ -931,7 +931,19 @@ merge_vecs (char **v1, char **v2)
|
|||||||
This used to also be used for searching, but now we have hash
|
This used to also be used for searching, but now we have hash
|
||||||
tables for that. */
|
tables for that. */
|
||||||
|
|
||||||
/* Append an element to the list. */
|
/* It's a shame that these simple things like linked lists and hash
|
||||||
|
tables (see hash.c) need to be implemented over and over again. It
|
||||||
|
would be nice to be able to use the routines from glib -- see
|
||||||
|
www.gtk.org for details. However, that would make Wget depend on
|
||||||
|
glib, and I want to avoid dependencies to external libraries for
|
||||||
|
reasons of convenience and portability (I suspect Wget is more
|
||||||
|
portable than anything ever written for Gnome). */
|
||||||
|
|
||||||
|
/* Append an element to the list. If the list has a huge number of
|
||||||
|
elements, this can get slow because it has to find the list's
|
||||||
|
ending. If you think you have to call slist_append in a loop,
|
||||||
|
think about calling slist_prepend() followed by slist_nreverse(). */
|
||||||
|
|
||||||
slist *
|
slist *
|
||||||
slist_append (slist *l, const char *s)
|
slist_append (slist *l, const char *s)
|
||||||
{
|
{
|
||||||
@ -950,6 +962,33 @@ slist_append (slist *l, const char *s)
|
|||||||
return beg;
|
return beg;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Prepend S to the list. Unlike slist_append(), this is O(1). */
|
||||||
|
|
||||||
|
slist *
|
||||||
|
slist_prepend (slist *l, const char *s)
|
||||||
|
{
|
||||||
|
slist *newel = (slist *)xmalloc (sizeof (slist));
|
||||||
|
newel->string = xstrdup (s);
|
||||||
|
newel->next = l;
|
||||||
|
return newel;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Destructively reverse L. */
|
||||||
|
|
||||||
|
slist *
|
||||||
|
slist_nreverse (slist *l)
|
||||||
|
{
|
||||||
|
slist *prev = NULL;
|
||||||
|
while (l)
|
||||||
|
{
|
||||||
|
slist *next = l->next;
|
||||||
|
l->next = prev;
|
||||||
|
prev = l;
|
||||||
|
l = next;
|
||||||
|
}
|
||||||
|
return prev;
|
||||||
|
}
|
||||||
|
|
||||||
/* Is there a specific entry in the list? */
|
/* Is there a specific entry in the list? */
|
||||||
int
|
int
|
||||||
slist_contains (slist *l, const char *s)
|
slist_contains (slist *l, const char *s)
|
||||||
@ -964,11 +1003,9 @@ slist_contains (slist *l, const char *s)
|
|||||||
void
|
void
|
||||||
slist_free (slist *l)
|
slist_free (slist *l)
|
||||||
{
|
{
|
||||||
slist *n;
|
|
||||||
|
|
||||||
while (l)
|
while (l)
|
||||||
{
|
{
|
||||||
n = l->next;
|
slist *n = l->next;
|
||||||
free (l->string);
|
free (l->string);
|
||||||
free (l);
|
free (l);
|
||||||
l = n;
|
l = n;
|
||||||
@ -983,13 +1020,21 @@ slist_free (slist *l)
|
|||||||
void
|
void
|
||||||
string_set_add (struct hash_table *ht, const char *s)
|
string_set_add (struct hash_table *ht, const char *s)
|
||||||
{
|
{
|
||||||
|
/* First check whether the set element already exists. If it does,
|
||||||
|
do nothing so that we don't have to free() the old element and
|
||||||
|
then strdup() a new one. */
|
||||||
|
if (hash_table_exists (ht, s))
|
||||||
|
return;
|
||||||
|
|
||||||
/* We use "1" as value. It provides us a useful and clear arbitrary
|
/* We use "1" as value. It provides us a useful and clear arbitrary
|
||||||
value, and it consumes no memory -- the pointers to the same
|
value, and it consumes no memory -- the pointers to the same
|
||||||
string "1" will be shared by all the key-value pairs in the hash
|
string "1" will be shared by all the key-value pairs in all `set'
|
||||||
table. */
|
hash tables. */
|
||||||
hash_table_put (ht, xstrdup (s), "1");
|
hash_table_put (ht, xstrdup (s), "1");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Synonym for hash_table_exists... */
|
||||||
|
|
||||||
int
|
int
|
||||||
string_set_exists (struct hash_table *ht, const char *s)
|
string_set_exists (struct hash_table *ht, const char *s)
|
||||||
{
|
{
|
||||||
|
@ -67,6 +67,8 @@ void read_file_free PARAMS ((struct file_memory *));
|
|||||||
void free_vec PARAMS ((char **));
|
void free_vec PARAMS ((char **));
|
||||||
char **merge_vecs PARAMS ((char **, char **));
|
char **merge_vecs PARAMS ((char **, char **));
|
||||||
slist *slist_append PARAMS ((slist *, const char *));
|
slist *slist_append PARAMS ((slist *, const char *));
|
||||||
|
slist *slist_prepend PARAMS ((slist *, const char *));
|
||||||
|
slist *slist_nreverse PARAMS ((slist *));
|
||||||
int slist_contains PARAMS ((slist *, const char *));
|
int slist_contains PARAMS ((slist *, const char *));
|
||||||
void slist_free PARAMS ((slist *));
|
void slist_free PARAMS ((slist *));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user