/* Dealing with host names. Copyright (C) 1995, 1996, 1997, 2000 Free Software Foundation, Inc. This file is part of Wget. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #ifdef HAVE_STRING_H # include #else # include #endif #include #include #ifdef WINDOWS # include #else # include # include # include # include #endif /* WINDOWS */ #ifdef HAVE_SYS_UTSNAME_H # include #endif #include #include "wget.h" #include "utils.h" #include "host.h" #include "url.h" #include "hash.h" #ifndef errno extern int errno; #endif /* Mapping between all known hosts to their addresses (n.n.n.n). */ struct hash_table *host_name_address_map; /* Mapping between all known addresses (n.n.n.n) to their hosts. This is the inverse of host_name_address_map. These two tables share the strdup'ed strings. */ struct hash_table *host_address_name_map; /* Mapping between auxilliary (slave) and master host names. */ struct hash_table *host_slave_master_map; /* Utility function: like xstrdup(), but also lowercases S. */ static char * xstrdup_lower (const char *s) { char *copy = xstrdup (s); char *p = copy; for (; *p; p++) *p = TOLOWER (*p); return copy; } /* The same as gethostbyname, but supports internet addresses of the form `N.N.N.N'. On some systems gethostbyname() knows how to do this automatically. */ struct hostent * ngethostbyname (const char *name) { struct hostent *hp; unsigned long addr; addr = (unsigned long)inet_addr (name); if ((int)addr != -1) hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET); else hp = gethostbyname (name); return hp; } /* Add host name HOST with the address ADDR_TEXT to the cache. Normally this means that the (HOST, ADDR_TEXT) pair will be to host_name_address_map and to host_address_name_map. (It is the caller's responsibility to make sure that HOST is not already in host_name_address_map.) If the ADDR_TEXT has already been seen and belongs to another host, HOST will be added to host_slave_master_map instead. */ static void add_host_to_cache (const char *host, const char *addr_text) { char *canonical_name = hash_table_get (host_address_name_map, addr_text); if (canonical_name) { DEBUGP (("Mapping %s to %s in host_slave_master_map.\n", host, canonical_name)); /* We've already dealt with that host under another name. */ hash_table_put (host_slave_master_map, xstrdup_lower (host), xstrdup_lower (canonical_name)); } else { /* This is really the first time we're dealing with that host. */ char *h_copy = xstrdup_lower (host); char *a_copy = xstrdup (addr_text); DEBUGP (("Caching %s <-> %s\n", h_copy, a_copy)); hash_table_put (host_name_address_map, h_copy, a_copy); hash_table_put (host_address_name_map, a_copy, h_copy); } } /* Store the address of HOSTNAME, internet-style (four octets in network order), to WHERE. First try to get the address from the cache; if it is not available, call the DNS functions and update the cache. Return 1 on successful finding of the hostname, 0 otherwise. */ int store_hostaddress (unsigned char *where, const char *hostname) { unsigned long addr; char *addr_text; char *canonical_name; struct hostent *hptr; struct in_addr in; char *inet_s; /* If the address is of the form d.d.d.d, there will be no trouble with it. */ addr = (unsigned long)inet_addr (hostname); /* If we have the numeric address, just store it. */ if ((int)addr != -1) { /* ADDR is defined to be in network byte order, meaning the code works on little and big endian 32-bit architectures without change. On big endian 64-bit architectures we need to be careful to copy the correct four bytes. */ int offset; have_addr: #ifdef WORDS_BIGENDIAN offset = sizeof (unsigned long) - 4; #else offset = 0; #endif memcpy (where, (char *)&addr + offset, 4); return 1; } /* By now we know that the address is not of the form d.d.d.d. Try to find it in our cache of host addresses. */ addr_text = hash_table_get (host_name_address_map, hostname); if (addr_text) { DEBUGP (("Found %s in host_name_address_map: %s\n", hostname, addr_text)); addr = (unsigned long)inet_addr (addr_text); goto have_addr; } /* Maybe this host is known to us under another name. If so, we'll find it in host_slave_master_map, and use the master name to find its address in host_name_address_map. */ canonical_name = hash_table_get (host_slave_master_map, hostname); if (canonical_name) { addr_text = hash_table_get (host_name_address_map, canonical_name); assert (addr_text != NULL); DEBUGP (("Found %s as slave of %s -> %s\n", hostname, canonical_name, addr_text)); addr = (unsigned long)inet_addr (addr_text); goto have_addr; } /* Since all else has failed, let's try gethostbyname(). Note that we use gethostbyname() rather than ngethostbyname(), because we already know that the address is not numerical. */ hptr = gethostbyname (hostname); if (!hptr) return 0; /* Copy the address of the host to socket description. */ memcpy (where, hptr->h_addr_list[0], hptr->h_length); assert (hptr->h_length == 4); /* Now that we've gone through the truoble of calling gethostbyname(), we can store this valuable information to the cache. First, we have to look for it by address to know if it's already in the cache by another name. */ /* Originally, we copied to in.s_addr, but it appears to be missing on some systems. */ memcpy (&in, *hptr->h_addr_list, sizeof (in)); inet_s = inet_ntoa (in); add_host_to_cache (hostname, inet_s); return 1; } /* Determine the "real" name of HOST, as perceived by Wget. If HOST is referenced by more than one name, "real" name is considered to be the first one encountered in the past. */ char * realhost (const char *host) { struct in_addr in; struct hostent *hptr; char *master_name; DEBUGP (("Checking for %s in host_name_address_map.\n", host)); if (hash_table_exists (host_name_address_map, host)) { DEBUGP (("Found; %s was already used, by that name.\n", host)); return xstrdup_lower (host); } DEBUGP (("Checking for %s in host_slave_master_map.\n", host)); master_name = hash_table_get (host_slave_master_map, host); if (master_name) { has_master: DEBUGP (("Found; %s was already used, by the name %s.\n", host, master_name)); return xstrdup (master_name); } DEBUGP (("First time I hear about %s by that name; looking it up.\n", host)); hptr = ngethostbyname (host); if (hptr) { char *inet_s; /* Originally, we copied to in.s_addr, but it appears to be missing on some systems. */ memcpy (&in, *hptr->h_addr_list, sizeof (in)); inet_s = inet_ntoa (in); add_host_to_cache (host, inet_s); /* add_host_to_cache() can establish a slave-master mapping. */ DEBUGP (("Checking again for %s in host_slave_master_map.\n", host)); master_name = hash_table_get (host_slave_master_map, host); if (master_name) goto has_master; } return xstrdup_lower (host); } /* Compare two hostnames (out of URL-s if the arguments are URL-s), taking care of aliases. It uses realhost() to determine a unique hostname for each of two hosts. If simple_check is non-zero, only strcmp() is used for comparison. */ int same_host (const char *u1, const char *u2) { const char *s; char *p1, *p2; char *real1, *real2; /* Skip protocol, if present. */ u1 += skip_url (u1); u2 += skip_url (u2); u1 += skip_proto (u1); u2 += skip_proto (u2); /* Skip username ans password, if present. */ u1 += skip_uname (u1); u2 += skip_uname (u2); for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++); p1 = strdupdelim (s, u1); for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++); p2 = strdupdelim (s, u2); DEBUGP (("Comparing hosts %s and %s...\n", p1, p2)); if (strcasecmp (p1, p2) == 0) { xfree (p1); xfree (p2); DEBUGP (("They are quite alike.\n")); return 1; } else if (opt.simple_check) { xfree (p1); xfree (p2); DEBUGP (("Since checking is simple, I'd say they are not the same.\n")); return 0; } real1 = realhost (p1); real2 = realhost (p2); xfree (p1); xfree (p2); if (strcasecmp (real1, real2) == 0) { DEBUGP (("They are alike, after realhost()->%s.\n", real1)); xfree (real1); xfree (real2); return 1; } else { DEBUGP (("They are not the same (%s, %s).\n", real1, real2)); xfree (real1); xfree (real2); return 0; } } /* Determine whether a URL is acceptable to be followed, according to a list of domains to accept. */ int accept_domain (struct urlinfo *u) { assert (u->host != NULL); if (opt.domains) { if (!sufmatch ((const char **)opt.domains, u->host)) return 0; } if (opt.exclude_domains) { if (sufmatch ((const char **)opt.exclude_domains, u->host)) return 0; } return 1; } /* Check whether WHAT is matched in LIST, each element of LIST being a pattern to match WHAT against, using backward matching (see match_backwards() in utils.c). If an element of LIST matched, 1 is returned, 0 otherwise. */ int sufmatch (const char **list, const char *what) { int i, j, k, lw; lw = strlen (what); for (i = 0; list[i]; i++) { for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--) if (TOLOWER (list[i][j]) != TOLOWER (what[k])) break; /* The domain must be first to reach to beginning. */ if (j == -1) return 1; } return 0; } /* Return email address of the form username@FQDN suitable for anonymous FTP passwords. This process is error-prone, and the escape hatch is the MY_HOST preprocessor constant, which can be used to hard-code either your hostname or FQDN at compile-time. If the FQDN cannot be determined, a warning is printed, and the function returns a short `username@' form, accepted by most anonymous servers. The returned string is generated by malloc() and should be freed using free(). If not even the username cannot be divined, it means things are seriously fucked up, and Wget exits. */ char * ftp_getaddress (void) { static char *address; /* Do the drill only the first time, as it won't change. */ if (!address) { char userid[32]; /* 9 should be enough for Unix, but I'd rather be on the safe side. */ char *host, *fqdn; if (!pwd_cuserid (userid)) { logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"), exec_name); exit (1); } #ifdef MY_HOST STRDUP_ALLOCA (host, MY_HOST); #else /* not MY_HOST */ #ifdef HAVE_UNAME { struct utsname ubuf; if (uname (&ubuf) < 0) { logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"), exec_name, strerror (errno)); fqdn = ""; goto giveup; } STRDUP_ALLOCA (host, ubuf.nodename); } #else /* not HAVE_UNAME */ #ifdef HAVE_GETHOSTNAME host = alloca (256); if (gethostname (host, 256) < 0) { logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"), exec_name); fqdn = ""; goto giveup; } #else /* not HAVE_GETHOSTNAME */ #error Cannot determine host name. #endif /* not HAVE_GETHOSTNAME */ #endif /* not HAVE_UNAME */ #endif /* not MY_HOST */ /* If the address we got so far contains a period, don't bother anymore. */ if (strchr (host, '.')) fqdn = host; else { /* #### I've seen the following scheme fail on at least one system! Do we care? */ char *tmpstore; /* According to Richard Stevens, the correct way to find the FQDN is to (1) find the host name, (2) find its IP address using gethostbyname(), and (3) get the FQDN using gethostbyaddr(). So that's what we'll do. Step one has been done above. */ /* (2) */ struct hostent *hp = gethostbyname (host); if (!hp || !hp->h_addr_list) { logprintf (LOG_ALWAYS, _("\ %s: Warning: cannot determine local IP address.\n"), exec_name); fqdn = ""; goto giveup; } /* Copy the argument, so the call to gethostbyaddr doesn't clobber it -- just in case. */ tmpstore = (char *)alloca (hp->h_length); memcpy (tmpstore, *hp->h_addr_list, hp->h_length); /* (3) */ hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype); if (!hp || !hp->h_name) { logprintf (LOG_ALWAYS, _("\ %s: Warning: cannot reverse-lookup local IP address.\n"), exec_name); fqdn = ""; goto giveup; } if (!strchr (hp->h_name, '.')) { #if 0 /* This gets ticked pretty often. Karl Berry reports that there can be valid reasons for the local host name not to be an FQDN, so I've decided to remove the annoying warning. */ logprintf (LOG_ALWAYS, _("\ %s: Warning: reverse-lookup of local address did not yield FQDN!\n"), exec_name); #endif fqdn = ""; goto giveup; } /* Once we're here, hp->h_name contains the correct FQDN. */ STRDUP_ALLOCA (fqdn, hp->h_name); } giveup: address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1); sprintf (address, "%s@%s", userid, fqdn); } return address; } /* Print error messages for host errors. */ char * herrmsg (int error) { /* Can't use switch since some constants are equal (at least on my system), and the compiler signals "duplicate case value". */ if (error == HOST_NOT_FOUND || error == NO_RECOVERY || error == NO_DATA || error == NO_ADDRESS || error == TRY_AGAIN) return _("Host not found"); else return _("Unknown error"); } void clean_hosts (void) { /* host_name_address_map and host_address_name_map share the strings. Because of that, calling free_keys_and_values once suffices for both. */ free_keys_and_values (host_name_address_map); hash_table_destroy (host_name_address_map); hash_table_destroy (host_address_name_map); free_keys_and_values (host_slave_master_map); hash_table_destroy (host_slave_master_map); } void host_init (void) { host_name_address_map = make_string_hash_table (0); host_address_name_map = make_string_hash_table (0); host_slave_master_map = make_string_hash_table (0); }