diff --git a/src/ChangeLog b/src/ChangeLog index a2411375..5160c558 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,38 @@ +2005-03-20 Hrvoje Niksic + + * url.c (unescape_single_char): New function. + (url_escape_dir): Use it to unescape slashes in directory + components. + (url_string): Escape unsafe chars in host name, except for the ':' + charaters, which can appear in IPv6 addresses. + + * main.c (main): Don't access the cookie jar directly. + + * log.c (escnonprint_internal): Correctly calculate the needed + string size. Don't forget the buffer's new size after having + reallocated it. + (log_cleanup): New function. Free the escnonprint ring data. + + * init.c (cleanup): Don't free the cookie jar explicitly, it is + now done by http_cleanup. + (cleanup): opt.user_headers is now a vector, free it with + free_vec. + + * http.c (gethttp): Make sure to free the request data, the status + message, and the response data before returning from the function. + (save_cookies): New function. + (http_cleanup): Free the cookie jar here. + + * hash.c: Renamed string_hash to hash_string and ptrhash to + hash_pointer. Exported hash_pointer. + + * xmalloc.c: Organized malloc_table (previously malloc_debug) as a + simple EQ hash table. register_ptr and unregister_ptr are now of + O(1) complexity. + + * xmalloc.c: Renamed "*_debug" to debugging_* and "*_real" to + checking_*. + 2005-03-12 Hrvoje Niksic * utils.c (debug_test_md5): Moved to gen-md5.c. diff --git a/src/convert.h b/src/convert.h index fff8410f..e9ca0401 100644 --- a/src/convert.h +++ b/src/convert.h @@ -96,5 +96,6 @@ void register_redirection PARAMS ((const char *, const char *)); void register_html PARAMS ((const char *, const char *)); void register_delete_file PARAMS ((const char *)); void convert_all_links PARAMS ((void)); +void convert_cleanup PARAMS ((void)); #endif /* CONVERT_H */ diff --git a/src/hash.c b/src/hash.c index 195e23ea..c9415cdf 100644 --- a/src/hash.c +++ b/src/hash.c @@ -244,8 +244,7 @@ prime_size (int size, int *prime_offset) return 0; } -static unsigned long ptrhash PARAMS ((const void *)); -static int ptrcmp PARAMS ((const void *, const void *)); +static int cmp_pointer PARAMS ((const void *, const void *)); /* Create a hash table with hash function HASH_FUNCTION and test function TEST_FUNCTION. The table is empty (its count is 0), but @@ -279,8 +278,8 @@ hash_table_new (int items, int size; struct hash_table *ht = xnew (struct hash_table); - ht->hash_function = hash_function ? hash_function : ptrhash; - ht->test_function = test_function ? test_function : ptrcmp; + ht->hash_function = hash_function ? hash_function : hash_pointer; + ht->test_function = test_function ? test_function : cmp_pointer; /* If the size of struct hash_table ever becomes a concern, this field can go. (Wget doesn't create many hashes.) */ @@ -596,8 +595,8 @@ hash_table_count (const struct hash_table *ht) We used to use the popular hash function from the Dragon Book, but this one seems to perform much better. */ -unsigned long -string_hash (const void *key) +static unsigned long +hash_string (const void *key) { const char *p = key; unsigned int h = *p; @@ -611,8 +610,8 @@ string_hash (const void *key) /* Frontend for strcmp usable for hash tables. */ -int -string_cmp (const void *s1, const void *s2) +static int +cmp_string (const void *s1, const void *s2) { return !strcmp ((const char *)s1, (const char *)s2); } @@ -623,7 +622,7 @@ string_cmp (const void *s1, const void *s2) struct hash_table * make_string_hash_table (int items) { - return hash_table_new (items, string_hash, string_cmp); + return hash_table_new (items, hash_string, cmp_string); } /* @@ -632,10 +631,10 @@ make_string_hash_table (int items) * */ -/* Like string_hash, but produce the same hash regardless of the case. */ +/* Like hash_string, but produce the same hash regardless of the case. */ static unsigned long -string_hash_nocase (const void *key) +hash_string_nocase (const void *key) { const char *p = key; unsigned int h = TOLOWER (*p); @@ -661,7 +660,7 @@ string_cmp_nocase (const void *s1, const void *s2) struct hash_table * make_nocase_string_hash_table (int items) { - return hash_table_new (items, string_hash_nocase, string_cmp_nocase); + return hash_table_new (items, hash_string_nocase, string_cmp_nocase); } /* Hashing of numeric values, such as pointers and integers. @@ -671,8 +670,8 @@ make_nocase_string_hash_table (int items) spreading of values and doesn't need to know the hash table size to work (unlike the very popular Knuth's multiplication hash). */ -static unsigned long -ptrhash (const void *ptr) +unsigned long +hash_pointer (const void *ptr) { unsigned long key = (unsigned long)ptr; key += (key << 12); @@ -697,7 +696,7 @@ ptrhash (const void *ptr) } static int -ptrcmp (const void *ptr1, const void *ptr2) +cmp_pointer (const void *ptr1, const void *ptr2) { return ptr1 == ptr2; } diff --git a/src/hash.h b/src/hash.h index d900d88d..2717c8e7 100644 --- a/src/hash.h +++ b/src/hash.h @@ -30,18 +30,6 @@ so, delete this exception statement from your version. */ #ifndef HASH_H #define HASH_H -/* From XEmacs, and hence from Dragon book. */ - -#define GOOD_HASH 65599 /* prime number just over 2^16; Dragon book, p. 435 */ -#define HASH2(a,b) (GOOD_HASH * (a) + (b)) -#define HASH3(a,b,c) (GOOD_HASH * HASH2 (a,b) + (c)) -#define HASH4(a,b,c,d) (GOOD_HASH * HASH3 (a,b,c) + (d)) -#define HASH5(a,b,c,d,e) (GOOD_HASH * HASH4 (a,b,c,d) + (e)) -#define HASH6(a,b,c,d,e,f) (GOOD_HASH * HASH5 (a,b,c,d,e) + (f)) -#define HASH7(a,b,c,d,e,f,g) (GOOD_HASH * HASH6 (a,b,c,d,e,f) + (g)) -#define HASH8(a,b,c,d,e,f,g,h) (GOOD_HASH * HASH7 (a,b,c,d,e,f,g) + (h)) -#define HASH9(a,b,c,d,e,f,g,h,i) (GOOD_HASH * HASH8 (a,b,c,d,e,f,g,h) + (i)) - struct hash_table; struct hash_table *hash_table_new PARAMS ((int, @@ -64,9 +52,21 @@ void hash_table_map PARAMS ((struct hash_table *, void *)); int hash_table_count PARAMS ((const struct hash_table *)); -unsigned long string_hash PARAMS ((const void *)); -int string_cmp PARAMS ((const void *, const void *)); struct hash_table *make_string_hash_table PARAMS ((int)); struct hash_table *make_nocase_string_hash_table PARAMS ((int)); +unsigned long hash_pointer PARAMS ((const void *)); + +/* From XEmacs, and hence from Dragon book. */ + +#define GOOD_HASH 65599 /* prime number just over 2^16; Dragon book, p. 435 */ +#define HASH2(a,b) (GOOD_HASH * (a) + (b)) +#define HASH3(a,b,c) (GOOD_HASH * HASH2 (a,b) + (c)) +#define HASH4(a,b,c,d) (GOOD_HASH * HASH3 (a,b,c) + (d)) +#define HASH5(a,b,c,d,e) (GOOD_HASH * HASH4 (a,b,c,d) + (e)) +#define HASH6(a,b,c,d,e,f) (GOOD_HASH * HASH5 (a,b,c,d,e) + (f)) +#define HASH7(a,b,c,d,e,f,g) (GOOD_HASH * HASH6 (a,b,c,d,e,f) + (g)) +#define HASH8(a,b,c,d,e,f,g,h) (GOOD_HASH * HASH7 (a,b,c,d,e,f,g) + (h)) +#define HASH9(a,b,c,d,e,f,g,h,i) (GOOD_HASH * HASH8 (a,b,c,d,e,f,g,h) + (i)) + #endif /* HASH_H */ diff --git a/src/http.c b/src/http.c index c77a93af..d66b28ff 100644 --- a/src/http.c +++ b/src/http.c @@ -84,7 +84,7 @@ extern int output_stream_regular; static int cookies_loaded_p; -struct cookie_jar *wget_cookie_jar; +static struct cookie_jar *wget_cookie_jar; #define TEXTHTML_S "text/html" #define TEXTXHTML_S "application/xhtml+xml" @@ -1349,14 +1349,23 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) look up conn->host in some cases. If that lookup failed, we don't need to bother with connect_to_host. */ if (host_lookup_failed) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } sock = connect_to_host (conn->host, conn->port); if (sock == E_HOST) - return HOSTERR; + { + request_free (req); + return HOSTERR; + } else if (sock < 0) - return (retryable_socket_connect_error (errno) - ? CONERROR : CONIMPOSSIBLE); + { + request_free (req); + return (retryable_socket_connect_error (errno) + ? CONERROR : CONIMPOSSIBLE); + } #ifdef HAVE_SSL if (proxy && u->scheme == SCHEME_HTTPS) @@ -1405,6 +1414,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) resp = resp_new (head); statcode = resp_status (resp, &message); resp_free (resp); + xfree (head); if (statcode != 200) { failed_tunnel: @@ -1582,6 +1592,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) hs->error = xstrdup (_("(no description)")); else hs->error = xstrdup (message); + xfree (message); type = resp_header_strdup (resp, "Content-Type"); if (type) @@ -1623,6 +1634,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) contrange = first_byte_pos; } resp_free (resp); + xfree (head); /* 20x responses are counted among successful by default. */ if (H_20X (statcode)) @@ -2832,7 +2844,17 @@ create_authorization_line (const char *au, const char *user, return NULL; } +void +save_cookies (void) +{ + if (wget_cookie_jar) + cookie_jar_save (wget_cookie_jar, opt.cookies_output); +} + void http_cleanup (void) { + xfree_null (pconn.host); + if (wget_cookie_jar) + cookie_jar_delete (wget_cookie_jar); } diff --git a/src/init.c b/src/init.c index 14abdec0..142ff81d 100644 --- a/src/init.c +++ b/src/init.c @@ -1,6 +1,5 @@ /* Reading/parsing the initialization file. - Copyright (C) 1995, 1996, 1997, 1998, 2000, 2001, 2003, 2004 - Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -53,16 +52,14 @@ so, delete this exception statement from your version. */ #include "init.h" #include "host.h" #include "netrc.h" -#include "cookies.h" /* for cookie_jar_delete */ #include "progress.h" #include "recur.h" /* for INFINITE_RECURSION */ +#include "convert.h" /* for convert_cleanup */ #ifndef errno extern int errno; #endif -extern struct cookie_jar *wget_cookie_jar; - /* We want tilde expansion enabled only when reading `.wgetrc' lines; otherwise, it will be performed by the shell. This variable will be set by the wgetrc-reading function. */ @@ -1337,8 +1334,7 @@ cleanup (void) cleanup_html_url (); downloaded_files_free (); host_cleanup (); - if (wget_cookie_jar) - cookie_jar_delete (wget_cookie_jar); + log_cleanup (); { extern acc_t *netrc_list; @@ -1366,7 +1362,7 @@ cleanup (void) xfree_null (opt.referer); xfree_null (opt.http_user); xfree_null (opt.http_passwd); - xfree_null (opt.user_header); + free_vec (opt.user_headers); #ifdef HAVE_SSL xfree_null (opt.sslcertkey); xfree_null (opt.sslcertfile); diff --git a/src/log.c b/src/log.c index 814d2d45..27d2cd70 100644 --- a/src/log.c +++ b/src/log.c @@ -1,5 +1,5 @@ /* Messages logging. - Copyright (C) 1998, 2000, 2001 Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -684,11 +684,11 @@ struct ringel { char *buffer; int size; }; +static struct ringel ring[RING_SIZE]; /* ring data */ static const char * escnonprint_internal (const char *str, int for_uri) { - static struct ringel ring[RING_SIZE]; /* ring data */ static int ringpos; /* current ring position */ int nprcnt = count_nonprint (str); @@ -706,12 +706,15 @@ escnonprint_internal (const char *str, int for_uri) i.e. with three *additional* chars (two in URI-mode). Size must also include the length of the original string and an additional char for the terminating \0. */ - int needed_size = strlen (str) + 1 + for_uri ? (2 * nprcnt) : (3 * nprcnt); + int needed_size = strlen (str) + 1 + (for_uri ? 2 * nprcnt : 3 * nprcnt); /* If the current buffer is uninitialized or too small, (re)allocate it. */ if (r->buffer == NULL || r->size < needed_size) - r->buffer = xrealloc (r->buffer, needed_size); + { + r->buffer = xrealloc (r->buffer, needed_size); + r->size = needed_size; + } copy_and_escape (str, r->buffer, for_uri); ringpos = (ringpos + 1) % RING_SIZE; @@ -748,6 +751,14 @@ escnonprint_uri (const char *str) { return escnonprint_internal (str, 1); } + +void +log_cleanup (void) +{ + int i; + for (i = 0; i < countof (ring); i++) + xfree_null (ring[i].buffer); +} /* When SIGHUP or SIGUSR1 are received, the output is redirected elsewhere. Such redirection is only allowed once. */ diff --git a/src/log.h b/src/log.h index d2961994..490d85fc 100644 --- a/src/log.h +++ b/src/log.h @@ -50,6 +50,7 @@ int log_set_save_context PARAMS ((int)); void log_init PARAMS ((const char *, int)); void log_close PARAMS ((void)); +void log_cleanup PARAMS ((void)); void log_request_redirect_output PARAMS ((const char *)); const char *escnonprint PARAMS ((const char *)); diff --git a/src/main.c b/src/main.c index eb044783..432c7d48 100644 --- a/src/main.c +++ b/src/main.c @@ -61,7 +61,6 @@ extern int errno; #include "retr.h" #include "recur.h" #include "host.h" -#include "cookies.h" #include "url.h" #include "progress.h" /* for progress_handle_sigwinch */ #include "convert.h" @@ -953,8 +952,8 @@ Can't timestamp and not clobber old files at the same time.\n")); legible (opt.quota)); } - if (opt.cookies_output && wget_cookie_jar) - cookie_jar_save (wget_cookie_jar, opt.cookies_output); + if (opt.cookies_output) + save_cookies (); if (opt.convert_links && !opt.delete_after) convert_all_links (); diff --git a/src/retr.c b/src/retr.c index ad075f2f..a19262a0 100644 --- a/src/retr.c +++ b/src/retr.c @@ -214,8 +214,7 @@ fd_read_body (int fd, FILE *out, wgint toread, wgint startpos, data arrives slowly. */ int progress_interactive = 0; - /*int exact = flags & rb_read_exactly;*/ - int exact = 1; + int exact = flags & rb_read_exactly; wgint skip = 0; /* How much data we've read/written. */ diff --git a/src/retr.h b/src/retr.h index 6f896d97..9b62bb69 100644 --- a/src/retr.h +++ b/src/retr.h @@ -62,6 +62,7 @@ struct url; uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *, int *, struct url *)); +void save_cookies PARAMS ((void)); #endif /* RETR_H */ diff --git a/src/url.c b/src/url.c index 56d5d9f9..041001ad 100644 --- a/src/url.c +++ b/src/url.c @@ -1,6 +1,5 @@ /* URL handling. - Copyright (C) 1995, 1996, 1997, 2000, 2001, 2003, 2003 - Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -1017,6 +1016,30 @@ url_full_path (const struct url *url) return full_path; } +/* Unescape CHR in an otherwise escaped STR. Used to selectively + escaping of certain characters, such as "/" and ":". Returns a + count of unescaped chars. */ + +static void +unescape_single_char (char *str, char chr) +{ + const char c1 = XNUM_TO_DIGIT (chr >> 4); + const char c2 = XNUM_TO_DIGIT (chr & 0xf); + char *h = str; /* hare */ + char *t = str; /* tortoise */ + for (; *h; h++, t++) + { + if (h[0] == '%' && h[1] == c1 && h[2] == c2) + { + *t = chr; + h += 2; + } + else + *t = *h; + } + *t = '\0'; +} + /* Escape unsafe and reserved characters, except for the slash characters. */ @@ -1024,28 +1047,10 @@ static char * url_escape_dir (const char *dir) { char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1); - char *h, *t; if (newdir == dir) return (char *)dir; - /* Unescape slashes in NEWDIR. */ - - h = newdir; /* hare */ - t = newdir; /* tortoise */ - - for (; *h; h++, t++) - { - /* url_escape_1 having converted '/' to "%2F" exactly. */ - if (*h == '%' && h[1] == '2' && h[2] == 'F') - { - *t = '/'; - h += 2; - } - else - *t = *h; - } - *t = '\0'; - + unescape_single_char (newdir, '/'); return newdir; } @@ -1845,7 +1850,7 @@ url_string (const struct url *url, int hide_password) { int size; char *result, *p; - char *quoted_user = NULL, *quoted_passwd = NULL; + char *quoted_host, *quoted_user = NULL, *quoted_passwd = NULL; int scheme_port = supported_schemes[url->scheme].default_port; const char *scheme_str = supported_schemes[url->scheme].leading_string; @@ -1868,12 +1873,19 @@ url_string (const struct url *url, int hide_password) } } - /* Numeric IPv6 addresses can contain ':' and need to be quoted with - brackets. */ - brackets_around_host = strchr (url->host, ':') != NULL; + /* In the unlikely event that the host name contains non-printable + characters, quote it for displaying to the user. */ + quoted_host = url_escape_allow_passthrough (url->host); + + /* Undo the quoting of colons that URL escaping performs. IPv6 + addresses may legally contain colons, and in that case must be + placed in square brackets. */ + if (quoted_host != url->host) + unescape_single_char (quoted_host, ':'); + brackets_around_host = strchr (quoted_host, ':') != NULL; size = (strlen (scheme_str) - + strlen (url->host) + + strlen (quoted_host) + (brackets_around_host ? 2 : 0) + fplen + 1); @@ -1902,7 +1914,7 @@ url_string (const struct url *url, int hide_password) if (brackets_around_host) *p++ = '['; - APPEND (p, url->host); + APPEND (p, quoted_host); if (brackets_around_host) *p++ = ']'; if (url->port != scheme_port) @@ -1919,9 +1931,10 @@ url_string (const struct url *url, int hide_password) if (quoted_user && quoted_user != url->user) xfree (quoted_user); - if (quoted_passwd && !hide_password - && quoted_passwd != url->passwd) + if (quoted_passwd && !hide_password && quoted_passwd != url->passwd) xfree (quoted_passwd); + if (quoted_host != url->host) + xfree (quoted_host); return result; } diff --git a/src/utils.c b/src/utils.c index 2319947a..eace57c3 100644 --- a/src/utils.c +++ b/src/utils.c @@ -558,6 +558,14 @@ fopen_excl (const char *fname, int binary) return NULL; return fdopen (fd, binary ? "wb" : "w"); #else /* not O_EXCL */ + /* Manually check whether the file exists. This is prone to race + conditions, but systems without O_EXCL haven't deserved + better. */ + if (file_exists_p (fname)) + { + errno = EEXIST; + return NULL; + } return fopen (fname, binary ? "wb" : "w"); #endif /* not O_EXCL */ } diff --git a/src/xmalloc.c b/src/xmalloc.c index 311506ec..ab5ac812 100644 --- a/src/xmalloc.c +++ b/src/xmalloc.c @@ -1,5 +1,5 @@ /* Wrappers around malloc and memory debugging support. - Copyright (C) 2003 Free Software Foundation, Inc. + Copyright (C) 2005 Free Software Foundation, Inc. This file is part of GNU Wget. @@ -42,6 +42,7 @@ so, delete this exception statement from your version. */ #include "wget.h" #include "xmalloc.h" +#include "hash.h" /* for hash_pointer */ #ifndef errno extern int errno; @@ -79,11 +80,11 @@ memfatal (const char *context, long attempted_size) If memory debugging is not turned on, xmalloc.h defines these: - #define xmalloc xmalloc_real - #define xmalloc0 xmalloc0_real - #define xrealloc xrealloc_real - #define xstrdup xstrdup_real - #define xfree xfree_real + #define xmalloc checking_malloc + #define xmalloc0 checking_malloc0 + #define xrealloc checking_realloc + #define xstrdup checking_strdup + #define xfree checking_free In case of memory debugging, the definitions are a bit more complex, because we want to provide more information, *and* we want @@ -91,13 +92,14 @@ memfatal (const char *context, long attempted_size) and friends need to be macros in the first place.) Then it looks like this: - #define xmalloc(a) xmalloc_debug (a, __FILE__, __LINE__) - #define xmalloc0(a) xmalloc0_debug (a, __FILE__, __LINE__) - #define xrealloc(a, b) xrealloc_debug (a, b, __FILE__, __LINE__) - #define xstrdup(a) xstrdup_debug (a, __FILE__, __LINE__) - #define xfree(a) xfree_debug (a, __FILE__, __LINE__) + #define xmalloc(a) debugging_malloc (a, __FILE__, __LINE__) + #define xmalloc0(a) debugging_malloc0 (a, __FILE__, __LINE__) + #define xrealloc(a, b) debugging_realloc (a, b, __FILE__, __LINE__) + #define xstrdup(a) debugging_strdup (a, __FILE__, __LINE__) + #define xfree(a) debugging_free (a, __FILE__, __LINE__) - Each of the *_debug function does its magic and calls the real one. */ + Each of the debugging_* functions does its magic and calls the + corresponding checking_* one. */ #ifdef DEBUG_MALLOC # define STATIC_IF_DEBUG static @@ -106,7 +108,7 @@ memfatal (const char *context, long attempted_size) #endif STATIC_IF_DEBUG void * -xmalloc_real (size_t size) +checking_malloc (size_t size) { void *ptr = malloc (size); if (!ptr) @@ -115,7 +117,7 @@ xmalloc_real (size_t size) } STATIC_IF_DEBUG void * -xmalloc0_real (size_t size) +checking_malloc0 (size_t size) { /* Using calloc can be faster than malloc+memset because some calloc implementations know when they're dealing with zeroed-out memory @@ -127,7 +129,7 @@ xmalloc0_real (size_t size) } STATIC_IF_DEBUG void * -xrealloc_real (void *ptr, size_t newsize) +checking_realloc (void *ptr, size_t newsize) { void *newptr; @@ -144,7 +146,7 @@ xrealloc_real (void *ptr, size_t newsize) } STATIC_IF_DEBUG char * -xstrdup_real (const char *s) +checking_strdup (const char *s) { char *copy; @@ -164,31 +166,31 @@ xstrdup_real (const char *s) } STATIC_IF_DEBUG void -xfree_real (void *ptr) +checking_free (void *ptr) { /* Wget's xfree() must not be passed a NULL pointer. This is for - historical reasons: many pre-C89 systems were known to bomb at - free(NULL), and Wget was careful to use xfree_null when there is + historical reasons: pre-C89 systems were reported to bomb at + free(NULL), and Wget was careful to not call xfree when there was a possibility of PTR being NULL. (It might have been better to simply have xfree() do nothing if ptr==NULL.) Since the code is already written that way, this assert simply - enforces that constraint. Code that thinks it doesn't deal with - NULL, and it in fact does, aborts immediately. If you trip on - this, either the code has a pointer handling bug or should have - called xfree_null instead of xfree. Correctly written code - should never trigger this assertion. + enforces the existing constraint. The benefit is double-checking + the logic: code that thinks it can't be passed a NULL pointer, + while it in fact can, aborts here. If you trip on this, either + the code has a pointer handling bug or should have called + xfree_null instead of xfree. Correctly written code should never + trigger this assertion. - If the assertion proves to be too much of a hassle, it can be - removed and a check that makes NULL a no-op placed in its stead. - If that is done, xfree_null is no longer needed and should be - removed. */ + The downside is that the uninitiated might not expect xfree(NULL) + to abort. If the assertion proves to be too much of a hassle, it + can be removed and a check that makes NULL a no-op placed in its + stead. If that is done, xfree_null is no longer needed and + should be removed. */ assert (ptr != NULL); + free (ptr); } - -/* xfree_real is unnecessary because free doesn't require any special - functionality. */ #ifdef DEBUG_MALLOC @@ -202,8 +204,10 @@ xfree_real (void *ptr) * Making malloc store its entry into a simple array and free remove stuff from that array. At the end, print the pointers which have not been freed, along with the source file and the line number. - This also has the side-effect of detecting freeing memory that - was never allocated. + + * Checking for "invalid frees", where free is called on a pointer + not obtained with malloc, or where the same pointer is freed + twice. Note that this kind of memory leak checking strongly depends on every malloc() being followed by a free(), even if the program is @@ -212,58 +216,82 @@ xfree_real (void *ptr) static int malloc_count, free_count; +/* Home-grown hash table of mallocs: */ + +#define SZ 100003 /* Prime number a little over 100,000. + Increase the table size if you need + to debug larger Wget runs. */ + static struct { - char *ptr; + const void *ptr; const char *file; int line; -} malloc_debug[100000]; +} malloc_table[SZ]; -/* Both register_ptr and unregister_ptr take O(n) operations to run, - which can be a real problem. It would be nice to use a hash table - for malloc_debug, but the functions in hash.c are not suitable - because they can call malloc() themselves. Maybe it would work if - the hash table were preallocated to a huge size, and if we set the - rehash threshold to 1.0. */ +/* Find PTR's position in malloc_table. If PTR is not found, return + the next available position. */ -/* Register PTR in malloc_debug. Abort if this is not possible - (presumably due to the number of current allocations exceeding the - size of malloc_debug.) */ - -static void -register_ptr (void *ptr, const char *file, int line) +static inline int +ptr_position (const void *ptr) { - int i; - for (i = 0; i < countof (malloc_debug); i++) - if (malloc_debug[i].ptr == NULL) - { - malloc_debug[i].ptr = ptr; - malloc_debug[i].file = file; - malloc_debug[i].line = line; - return; - } - abort (); + int i = hash_pointer (ptr) % SZ; + for (; malloc_table[i].ptr != NULL; i = (i + 1) % SZ) + if (malloc_table[i].ptr == ptr) + return i; + return i; } -/* Unregister PTR from malloc_debug. Abort if PTR is not present in - malloc_debug. (This catches calling free() with a bogus pointer.) */ +/* Register PTR in malloc_table. Abort if this is not possible + (presumably due to the number of current allocations exceeding the + size of malloc_table.) */ static void +register_ptr (const void *ptr, const char *file, int line) +{ + int i; + if (malloc_count - free_count > SZ) + abort (); + + i = ptr_position (ptr); + malloc_table[i].ptr = ptr; + malloc_table[i].file = file; + malloc_table[i].line = line; +} + +/* Unregister PTR from malloc_table. Return 0 if PTR is not present + in malloc_table. */ + +static int unregister_ptr (void *ptr) { - int i; - for (i = 0; i < countof (malloc_debug); i++) - if (malloc_debug[i].ptr == ptr) - { - malloc_debug[i].ptr = NULL; - return; - } - abort (); + int i = ptr_position (ptr); + if (malloc_table[i].ptr == NULL) + return 0; + malloc_table[i].ptr = NULL; + + /* Relocate malloc_table entries immediately following PTR. */ + for (i = (i + 1) % SZ; malloc_table[i].ptr != NULL; i = (i + 1) % SZ) + { + const void *ptr2 = malloc_table[i].ptr; + /* Find the new location for the key. */ + int j = hash_pointer (ptr2) % SZ; + for (; malloc_table[j].ptr != NULL; j = (j + 1) % SZ) + if (ptr2 == malloc_table[j].ptr) + /* No need to relocate entry at [i]; it's already at or near + its hash position. */ + goto cont_outer; + malloc_table[j] = malloc_table[i]; + malloc_table[i].ptr = NULL; + cont_outer: + ; + } + return 1; } -/* Print the malloc debug stats that can be gathered from the above - information. Currently this is the count of mallocs, frees, the - difference between the two, and the dump of the contents of - malloc_debug. The last part are the memory leaks. */ +/* Print the malloc debug stats gathered from the above information. + Currently this is the count of mallocs, frees, the difference + between the two, and the dump of the contents of malloc_table. The + last part are the memory leaks. */ void print_malloc_debug_stats (void) @@ -271,34 +299,35 @@ print_malloc_debug_stats (void) int i; printf ("\nMalloc: %d\nFree: %d\nBalance: %d\n\n", malloc_count, free_count, malloc_count - free_count); - for (i = 0; i < countof (malloc_debug); i++) - if (malloc_debug[i].ptr != NULL) - printf ("0x%08ld: %s:%d\n", (long)malloc_debug[i].ptr, - malloc_debug[i].file, malloc_debug[i].line); + for (i = 0; i < SZ; i++) + if (malloc_table[i].ptr != NULL) + printf ("0x%0*lx: %s:%d\n", 2 * sizeof (void *), + (long) malloc_table[i].ptr, + malloc_table[i].file, malloc_table[i].line); } void * -xmalloc_debug (size_t size, const char *source_file, int source_line) +debugging_malloc (size_t size, const char *source_file, int source_line) { - void *ptr = xmalloc_real (size); + void *ptr = checking_malloc (size); ++malloc_count; register_ptr (ptr, source_file, source_line); return ptr; } void * -xmalloc0_debug (size_t size, const char *source_file, int source_line) +debugging_malloc0 (size_t size, const char *source_file, int source_line) { - void *ptr = xmalloc0_real (size); + void *ptr = checking_malloc0 (size); ++malloc_count; register_ptr (ptr, source_file, source_line); return ptr; } void * -xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_line) +debugging_realloc (void *ptr, size_t newsize, const char *source_file, int source_line) { - void *newptr = xrealloc_real (ptr, newsize); + void *newptr = checking_realloc (ptr, newsize); if (!ptr) { ++malloc_count; @@ -313,29 +342,36 @@ xrealloc_debug (void *ptr, size_t newsize, const char *source_file, int source_l } char * -xstrdup_debug (const char *s, const char *source_file, int source_line) +debugging_strdup (const char *s, const char *source_file, int source_line) { - char *copy = xstrdup_real (s); + char *copy = checking_strdup (s); ++malloc_count; register_ptr (copy, source_file, source_line); return copy; } void -xfree_debug (void *ptr, const char *source_file, int source_line) +debugging_free (void *ptr, const char *source_file, int source_line) { - /* See xfree_real for rationale of this abort. We repeat it here + /* See checking_free for rationale of this abort. We repeat it here because we can print the file and the line where the offending free occurred. */ if (ptr == NULL) { - fprintf ("%s: xfree(NULL) at %s:%d\n", + fprintf (stderr, "%s: xfree(NULL) at %s:%d\n", exec_name, source_file, source_line); abort (); } + if (!unregister_ptr (ptr)) + { + fprintf (stderr, "%s: bad xfree(%0*lx) at %s:%d\n", + exec_name, 2 * sizeof (void *), (long) ptr, + source_file, source_line); + abort (); + } ++free_count; - unregister_ptr (ptr); - xfree_real (ptr); + + checking_free (ptr); } #endif /* DEBUG_MALLOC */ diff --git a/src/xmalloc.h b/src/xmalloc.h index 1ac2365d..ed2a188d 100644 --- a/src/xmalloc.h +++ b/src/xmalloc.h @@ -30,46 +30,50 @@ so, delete this exception statement from your version. */ #ifndef XMALLOC_H #define XMALLOC_H -/* Define this if you want primitive but extensive malloc debugging. - It will make Wget extremely slow, so only do it in development - builds. */ +/* Define this to get Wget's builtin malloc debugging, which is + primitive but fairly extensive. It will make Wget somewhat slower + and larger, so it should be used by developers only. */ #undef DEBUG_MALLOC /* When DEBUG_MALLOC is not defined (which is normally the case), the - allocation functions directly map to *_real wrappers. In the - DEBUG_MALLOC mode, they also record the file and line where the - offending malloc/free/... was invoked. + allocator identifiers are mapped to checking_* wrappers, which exit + Wget if malloc/realloc/strdup return NULL - *Note*: xfree(NULL) aborts. If the pointer you're freeing can be - NULL, use xfree_null instead. */ + In DEBUG_MALLOC mode, the allocators are mapped to debugging_* + wrappers, which also record the file and line from which the + allocation was attempted. At the end of the program, a detailed + summary of unfreed allocations is displayed. + + *Note*: xfree(NULL) aborts in both modes. If the pointer you're + freeing can be NULL, use xfree_null instead. */ #ifndef DEBUG_MALLOC -#define xmalloc xmalloc_real -#define xmalloc0 xmalloc0_real -#define xrealloc xrealloc_real -#define xstrdup xstrdup_real -#define xfree xfree_real +#define xmalloc checking_malloc +#define xmalloc0 checking_malloc0 +#define xrealloc checking_realloc +#define xstrdup checking_strdup +#define xfree checking_free -void *xmalloc_real PARAMS ((size_t)); -void *xmalloc0_real PARAMS ((size_t)); -void *xrealloc_real PARAMS ((void *, size_t)); -char *xstrdup_real PARAMS ((const char *)); -void xfree_real PARAMS ((void *)); +void *checking_malloc PARAMS ((size_t)); +void *checking_malloc0 PARAMS ((size_t)); +void *checking_realloc PARAMS ((void *, size_t)); +char *checking_strdup PARAMS ((const char *)); +void checking_free PARAMS ((void *)); #else /* DEBUG_MALLOC */ -#define xmalloc(s) xmalloc_debug (s, __FILE__, __LINE__) -#define xmalloc0(s) xmalloc0_debug (s, __FILE__, __LINE__) -#define xrealloc(p, s) xrealloc_debug (p, s, __FILE__, __LINE__) -#define xstrdup(p) xstrdup_debug (p, __FILE__, __LINE__) -#define xfree(p) xfree_debug (p, __FILE__, __LINE__) +#define xmalloc(s) debugging_malloc (s, __FILE__, __LINE__) +#define xmalloc0(s) debugging_malloc0 (s, __FILE__, __LINE__) +#define xrealloc(p, s) debugging_realloc (p, s, __FILE__, __LINE__) +#define xstrdup(p) debugging_strdup (p, __FILE__, __LINE__) +#define xfree(p) debugging_free (p, __FILE__, __LINE__) -void *xmalloc_debug PARAMS ((size_t, const char *, int)); -void *xmalloc0_debug PARAMS ((size_t, const char *, int)); -void *xrealloc_debug PARAMS ((void *, size_t, const char *, int)); -char *xstrdup_debug PARAMS ((const char *, const char *, int)); -void xfree_debug PARAMS ((void *, const char *, int)); +void *debugging_malloc PARAMS ((size_t, const char *, int)); +void *debugging_malloc0 PARAMS ((size_t, const char *, int)); +void *debugging_realloc PARAMS ((void *, size_t, const char *, int)); +char *debugging_strdup PARAMS ((const char *, const char *, int)); +void debugging_free PARAMS ((void *, const char *, int)); #endif /* DEBUG_MALLOC */