/* Dealing with host names. Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc. This file is part of Wget. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #ifdef HAVE_STRING_H # include #else # include #endif #include #include #ifdef WINDOWS # include #else # include # include # include # include #endif /* WINDOWS */ #ifdef HAVE_SYS_UTSNAME_H # include #endif #include #include "wget.h" #include "utils.h" #include "host.h" #include "url.h" #ifndef errno extern int errno; #endif /* Host list entry */ struct host { /* Host's symbolical name, as encountered at the time of first inclusion, e.g. "fly.cc.fer.hr". */ char *hostname; /* Host's "real" name, i.e. its IP address, written out in ASCII form of N.N.N.N, e.g. "161.53.70.130". */ char *realname; /* More than one HOSTNAME can correspond to the same REALNAME. For our purposes, the canonical name of the host is its HOSTNAME when it was first encountered. This entry is said to have QUALITY. */ int quality; /* Next entry in the list. */ struct host *next; }; static struct host *hlist; static struct host *add_hlist PARAMS ((struct host *, const char *, const char *, int)); /* The same as gethostbyname, but supports internet addresses of the form `N.N.N.N'. */ struct hostent * ngethostbyname (const char *name) { struct hostent *hp; unsigned long addr; addr = (unsigned long)inet_addr (name); if ((int)addr != -1) hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET); else hp = gethostbyname (name); return hp; } /* Search for HOST in the linked list L, by hostname. Return the entry, if found, or NULL. The search is case-insensitive. */ static struct host * search_host (struct host *l, const char *host) { for (; l; l = l->next) if (strcasecmp (l->hostname, host) == 0) return l; return NULL; } /* Like search_host, but searches by address. */ static struct host * search_address (struct host *l, const char *address) { for (; l; l = l->next) { int cmp = strcmp (l->realname, address); if (cmp == 0) return l; else if (cmp > 0) return NULL; } return NULL; } /* Store the address of HOSTNAME, internet-style, to WHERE. First check for it in the host list, and (if not found), use ngethostbyname to get it. Return 1 on successful finding of the hostname, 0 otherwise. */ int store_hostaddress (unsigned char *where, const char *hostname) { struct host *t; unsigned long addr; struct hostent *hptr; struct in_addr in; char *inet_s; /* If the address is of the form d.d.d.d, there will be no trouble with it. */ addr = (unsigned long)inet_addr (hostname); if ((int)addr == -1) { /* If it is not of that form, try to find it in the cache. */ t = search_host (hlist, hostname); if (t) addr = (unsigned long)inet_addr (t->realname); } /* If we have the numeric address, just store it. */ if ((int)addr != -1) { /* ADDR is in network byte order, meaning the code works on little and big endian 32-bit architectures without change. On big endian 64-bit architectures we need to be careful to copy the correct four bytes. */ int offset = 0; #ifdef WORDS_BIGENDIAN offset = sizeof (unsigned long) - 4; #endif memcpy (where, (char *)&addr + offset, 4); return 1; } /* Since all else has failed, let's try gethostbyname(). Note that we use gethostbyname() rather than ngethostbyname(), because we *know* the address is not numerical. */ hptr = gethostbyname (hostname); if (!hptr) return 0; /* Copy the address of the host to socket description. */ memcpy (where, hptr->h_addr_list[0], hptr->h_length); /* Now that we're here, we could as well cache the hostname for future use, as in realhost(). First, we have to look for it by address to know if it's already in the cache by another name. */ /* Originally, we copied to in.s_addr, but it appears to be missing on some systems. */ memcpy (&in, *hptr->h_addr_list, sizeof (in)); STRDUP_ALLOCA (inet_s, inet_ntoa (in)); t = search_address (hlist, inet_s); if (t) /* Found in the list, as realname. */ { /* Set the default, 0 quality. */ hlist = add_hlist (hlist, hostname, inet_s, 0); return 1; } /* Since this is really the first time this host is encountered, set quality to 1. */ hlist = add_hlist (hlist, hostname, inet_s, 1); return 1; } /* Add a host to the host list. The list is sorted by addresses. For equal addresses, the entries with quality should bubble towards the beginning of the list. */ static struct host * add_hlist (struct host *l, const char *nhost, const char *nreal, int quality) { struct host *t, *old, *beg; /* The entry goes to the beginning of the list if the list is empty or the order requires it. */ if (!l || (strcmp (nreal, l->realname) < 0)) { t = (struct host *)xmalloc (sizeof (struct host)); t->hostname = xstrdup (nhost); t->realname = xstrdup (nreal); t->quality = quality; t->next = l; return t; } beg = l; /* Second two one-before-the-last element. */ while (l->next) { int cmp; old = l; l = l->next; cmp = strcmp (nreal, l->realname); if (cmp >= 0) continue; /* If the next list element is greater than s, put s between the current and the next list element. */ t = (struct host *)xmalloc (sizeof (struct host)); old->next = t; t->next = l; t->hostname = xstrdup (nhost); t->realname = xstrdup (nreal); t->quality = quality; return beg; } t = (struct host *)xmalloc (sizeof (struct host)); t->hostname = xstrdup (nhost); t->realname = xstrdup (nreal); t->quality = quality; /* Insert the new element after the last element. */ l->next = t; t->next = NULL; return beg; } /* Determine the "real" name of HOST, as perceived by Wget. If HOST is referenced by more than one name, "real" name is considered to be the first one encountered in the past. If the host cannot be found in the list of already dealt-with hosts, try with its INET address. If this fails too, add it to the list. The routine does not call gethostbyname twice for the same host if it can possibly avoid it. */ char * realhost (const char *host) { struct host *l; struct in_addr in; struct hostent *hptr; char *inet_s; DEBUGP (("Checking for %s.\n", host)); /* Look for the host, looking by the host name. */ l = search_host (hlist, host); if (l && l->quality) /* Found it with quality */ { DEBUGP (("%s was already used, by that name.\n", host)); /* Here we return l->hostname, not host, because of the possible case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are the same, but we want the one that was first. */ return xstrdup (l->hostname); } else if (!l) /* Not found, with or without quality */ { /* The fact that gethostbyname will get called makes it necessary to store it to the list, to ensure that gethostbyname will not be called twice for the same string. However, the quality argument must be set appropriately. Note that add_hlist must be called *after* the realname search, or the quality would be always set to 0 */ DEBUGP (("This is the first time I hear about host %s by that name.\n", host)); hptr = ngethostbyname (host); if (!hptr) return xstrdup (host); /* Originally, we copied to in.s_addr, but it appears to be missing on some systems. */ memcpy (&in, *hptr->h_addr_list, sizeof (in)); STRDUP_ALLOCA (inet_s, inet_ntoa (in)); } else /* Found, without quality */ { /* This case happens when host is on the list, but not as first entry (the one with quality). Then we just get its INET address and pick up the first entry with quality. */ DEBUGP (("We've dealt with host %s, but under the name %s.\n", host, l->realname)); STRDUP_ALLOCA (inet_s, l->realname); } /* Now we certainly have the INET address. The following loop is guaranteed to pick either an entry with quality (because it is the first one), or none at all. */ l = search_address (hlist, inet_s); if (l) /* Found in the list, as realname. */ { /* Set the default, 0 quality. */ hlist = add_hlist (hlist, host, inet_s, 0); return xstrdup (l->hostname); } /* Since this is really the first time this host is encountered, set quality to 1. */ hlist = add_hlist (hlist, host, inet_s, 1); return xstrdup (host); } /* Compare two hostnames (out of URL-s if the arguments are URL-s), taking care of aliases. It uses realhost() to determine a unique hostname for each of two hosts. If simple_check is non-zero, only strcmp() is used for comparison. */ int same_host (const char *u1, const char *u2) { const char *s; char *p1, *p2; char *real1, *real2; /* Skip protocol, if present. */ u1 += skip_url (u1); u2 += skip_url (u2); u1 += skip_proto (u1); u2 += skip_proto (u2); /* Skip username ans password, if present. */ u1 += skip_uname (u1); u2 += skip_uname (u2); for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++); p1 = strdupdelim (s, u1); for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++); p2 = strdupdelim (s, u2); DEBUGP (("Comparing hosts %s and %s...\n", p1, p2)); if (strcasecmp (p1, p2) == 0) { free (p1); free (p2); DEBUGP (("They are quite alike.\n")); return 1; } else if (opt.simple_check) { free (p1); free (p2); DEBUGP (("Since checking is simple, I'd say they are not the same.\n")); return 0; } real1 = realhost (p1); real2 = realhost (p2); free (p1); free (p2); if (strcasecmp (real1, real2) == 0) { DEBUGP (("They are alike, after realhost()->%s.\n", real1)); free (real1); free (real2); return 1; } else { DEBUGP (("They are not the same (%s, %s).\n", real1, real2)); free (real1); free (real2); return 0; } } /* Determine whether a URL is acceptable to be followed, according to a list of domains to accept. */ int accept_domain (struct urlinfo *u) { assert (u->host != NULL); if (opt.domains) { if (!sufmatch ((const char **)opt.domains, u->host)) return 0; } if (opt.exclude_domains) { if (sufmatch ((const char **)opt.exclude_domains, u->host)) return 0; } return 1; } /* Check whether WHAT is matched in LIST, each element of LIST being a pattern to match WHAT against, using backward matching (see match_backwards() in utils.c). If an element of LIST matched, 1 is returned, 0 otherwise. */ int sufmatch (const char **list, const char *what) { int i, j, k, lw; lw = strlen (what); for (i = 0; list[i]; i++) { for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--) if (TOLOWER (list[i][j]) != TOLOWER (what[k])) break; /* The domain must be first to reach to beginning. */ if (j == -1) return 1; } return 0; } /* Return email address of the form username@FQDN suitable for anonymous FTP passwords. This process is error-prone, and the escape hatch is the MY_HOST preprocessor constant, which can be used to hard-code either your hostname or FQDN at compile-time. If the FQDN cannot be determined, a warning is printed, and the function returns a short `username@' form, accepted by most anonymous servers. If not even the username cannot be divined, it means things are seriously fucked up, and Wget exits. */ char * ftp_getaddress (void) { static char *address; /* Do the drill only the first time, as it won't change. */ if (!address) { char userid[32]; /* 9 should be enough for Unix, but I'd rather be on the safe side. */ char *host, *fqdn; if (!pwd_cuserid (userid)) { logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"), exec_name); exit (1); } #ifdef MY_HOST STRDUP_ALLOCA (host, MY_HOST); #else /* not MY_HOST */ #ifdef HAVE_UNAME { struct utsname ubuf; if (uname (&ubuf) < 0) { logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"), exec_name, strerror (errno)); fqdn = ""; goto giveup; } STRDUP_ALLOCA (host, ubuf.nodename); } #else /* not HAVE_UNAME */ #ifdef HAVE_GETHOSTNAME host = alloca (256); if (gethostname (host, 256) < 0) { logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"), exec_name); fqdn = ""; goto giveup; } #else /* not HAVE_GETHOSTNAME */ #error Cannot determine host name. #endif /* not HAVE_GETHOSTNAME */ #endif /* not HAVE_UNAME */ #endif /* not MY_HOST */ /* If the address we got so far contains a period, don't bother anymore. */ if (strchr (host, '.')) fqdn = host; else { /* #### I've seen the following scheme fail on at least one system! Do we care? */ char *tmpstore; /* According to Richard Stevens, the correct way to find the FQDN is to (1) find the host name, (2) find its IP address using gethostbyname(), and (3) get the FQDN using gethostbyaddr(). So that's what we'll do. Step one has been done above. */ /* (2) */ struct hostent *hp = gethostbyname (host); if (!hp || !hp->h_addr_list) { logprintf (LOG_ALWAYS, _("\ %s: Warning: cannot determine local IP address.\n"), exec_name); fqdn = ""; goto giveup; } /* Copy the argument, so the call to gethostbyaddr doesn't clobber it -- just in case. */ tmpstore = (char *)alloca (hp->h_length); memcpy (tmpstore, *hp->h_addr_list, hp->h_length); /* (3) */ hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype); if (!hp || !hp->h_name) { logprintf (LOG_ALWAYS, _("\ %s: Warning: cannot reverse-lookup local IP address.\n"), exec_name); fqdn = ""; goto giveup; } if (!strchr (hp->h_name, '.')) { #if 0 /* This gets ticked pretty often. Karl Berry reports that there can be valid reasons for the local host name not to be an FQDN, so I've decided to remove the annoying warning. */ logprintf (LOG_ALWAYS, _("\ %s: Warning: reverse-lookup of local address did not yield FQDN!\n"), exec_name); #endif fqdn = ""; goto giveup; } /* Once we're here, hp->h_name contains the correct FQDN. */ STRDUP_ALLOCA (fqdn, hp->h_name); } giveup: address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1); sprintf (address, "%s@%s", userid, fqdn); } return address; } /* Print error messages for host errors. */ char * herrmsg (int error) { /* Can't use switch since some constants are equal (at least on my system), and the compiler signals "duplicate case value". */ if (error == HOST_NOT_FOUND || error == NO_RECOVERY || error == NO_DATA || error == NO_ADDRESS || error == TRY_AGAIN) return _("Host not found"); else return _("Unknown error"); } /* Clean the host list. This is a separate function, so we needn't export HLIST and its implementation. Ha! */ void clean_hosts (void) { struct host *l = hlist; while (l) { struct host *p = l->next; free (l->hostname); free (l->realname); free (l); l = p; } hlist = NULL; }