mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
564 lines
16 KiB
C
564 lines
16 KiB
C
/* Dealing with host names.
|
|
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
|
|
|
|
This file is part of Wget.
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|
|
|
#include <config.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <ctype.h>
|
|
#ifdef HAVE_STRING_H
|
|
# include <string.h>
|
|
#else
|
|
# include <strings.h>
|
|
#endif
|
|
#include <assert.h>
|
|
#include <sys/types.h>
|
|
|
|
#ifdef WINDOWS
|
|
# include <winsock.h>
|
|
#else
|
|
# include <sys/socket.h>
|
|
# include <netinet/in.h>
|
|
# include <arpa/inet.h>
|
|
# include <netdb.h>
|
|
#endif /* WINDOWS */
|
|
|
|
#ifdef HAVE_SYS_UTSNAME_H
|
|
# include <sys/utsname.h>
|
|
#endif
|
|
#include <errno.h>
|
|
|
|
#include "wget.h"
|
|
#include "utils.h"
|
|
#include "host.h"
|
|
#include "url.h"
|
|
|
|
#ifndef errno
|
|
extern int errno;
|
|
#endif
|
|
|
|
/* Host list entry */
|
|
struct host
|
|
{
|
|
/* Host's symbolical name, as encountered at the time of first
|
|
inclusion, e.g. "fly.cc.fer.hr". */
|
|
char *hostname;
|
|
/* Host's "real" name, i.e. its IP address, written out in ASCII
|
|
form of N.N.N.N, e.g. "161.53.70.130". */
|
|
char *realname;
|
|
/* More than one HOSTNAME can correspond to the same REALNAME. For
|
|
our purposes, the canonical name of the host is its HOSTNAME when
|
|
it was first encountered. This entry is said to have QUALITY. */
|
|
int quality;
|
|
/* Next entry in the list. */
|
|
struct host *next;
|
|
};
|
|
|
|
static struct host *hlist;
|
|
|
|
static struct host *add_hlist PARAMS ((struct host *, const char *,
|
|
const char *, int));
|
|
|
|
/* The same as gethostbyname, but supports internet addresses of the
|
|
form `N.N.N.N'. */
|
|
struct hostent *
|
|
ngethostbyname (const char *name)
|
|
{
|
|
struct hostent *hp;
|
|
unsigned long addr;
|
|
|
|
addr = (unsigned long)inet_addr (name);
|
|
if ((int)addr != -1)
|
|
hp = gethostbyaddr ((char *)&addr, sizeof (addr), AF_INET);
|
|
else
|
|
hp = gethostbyname (name);
|
|
return hp;
|
|
}
|
|
|
|
/* Search for HOST in the linked list L, by hostname. Return the
|
|
entry, if found, or NULL. The search is case-insensitive. */
|
|
static struct host *
|
|
search_host (struct host *l, const char *host)
|
|
{
|
|
for (; l; l = l->next)
|
|
if (strcasecmp (l->hostname, host) == 0)
|
|
return l;
|
|
return NULL;
|
|
}
|
|
|
|
/* Like search_host, but searches by address. */
|
|
static struct host *
|
|
search_address (struct host *l, const char *address)
|
|
{
|
|
for (; l; l = l->next)
|
|
{
|
|
int cmp = strcmp (l->realname, address);
|
|
if (cmp == 0)
|
|
return l;
|
|
else if (cmp > 0)
|
|
return NULL;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* Store the address of HOSTNAME, internet-style, to WHERE. First
|
|
check for it in the host list, and (if not found), use
|
|
ngethostbyname to get it.
|
|
|
|
Return 1 on successful finding of the hostname, 0 otherwise. */
|
|
int
|
|
store_hostaddress (unsigned char *where, const char *hostname)
|
|
{
|
|
struct host *t;
|
|
unsigned long addr;
|
|
struct hostent *hptr;
|
|
struct in_addr in;
|
|
char *inet_s;
|
|
|
|
/* If the address is of the form d.d.d.d, there will be no trouble
|
|
with it. */
|
|
addr = (unsigned long)inet_addr (hostname);
|
|
if ((int)addr == -1)
|
|
{
|
|
/* If it is not of that form, try to find it in the cache. */
|
|
t = search_host (hlist, hostname);
|
|
if (t)
|
|
addr = (unsigned long)inet_addr (t->realname);
|
|
}
|
|
/* If we have the numeric address, just store it. */
|
|
if ((int)addr != -1)
|
|
{
|
|
/* This works on both little and big endian architecture, as
|
|
inet_addr returns the address in the proper order. */
|
|
#ifdef WORDS_BIGENDIAN
|
|
if (sizeof (addr) == 8)
|
|
addr <<= 32;
|
|
#endif
|
|
memcpy (where, &addr, 4);
|
|
return 1;
|
|
}
|
|
/* Since all else has failed, let's try gethostbyname(). Note that
|
|
we use gethostbyname() rather than ngethostbyname(), because we
|
|
*know* the address is not numerical. */
|
|
hptr = gethostbyname (hostname);
|
|
if (!hptr)
|
|
return 0;
|
|
/* Copy the address of the host to socket description. */
|
|
memcpy (where, hptr->h_addr_list[0], hptr->h_length);
|
|
/* Now that we're here, we could as well cache the hostname for
|
|
future use, as in realhost(). First, we have to look for it by
|
|
address to know if it's already in the cache by another name. */
|
|
|
|
/* Originally, we copied to in.s_addr, but it appears to be missing
|
|
on some systems. */
|
|
memcpy (&in, *hptr->h_addr_list, sizeof (in));
|
|
STRDUP_ALLOCA (inet_s, inet_ntoa (in));
|
|
t = search_address (hlist, inet_s);
|
|
if (t) /* Found in the list, as realname. */
|
|
{
|
|
/* Set the default, 0 quality. */
|
|
hlist = add_hlist (hlist, hostname, inet_s, 0);
|
|
return 1;
|
|
}
|
|
/* Since this is really the first time this host is encountered,
|
|
set quality to 1. */
|
|
hlist = add_hlist (hlist, hostname, inet_s, 1);
|
|
return 1;
|
|
}
|
|
|
|
/* Add a host to the host list. The list is sorted by addresses. For
|
|
equal addresses, the entries with quality should bubble towards the
|
|
beginning of the list. */
|
|
static struct host *
|
|
add_hlist (struct host *l, const char *nhost, const char *nreal, int quality)
|
|
{
|
|
struct host *t, *old, *beg;
|
|
|
|
/* The entry goes to the beginning of the list if the list is empty
|
|
or the order requires it. */
|
|
if (!l || (strcmp (nreal, l->realname) < 0))
|
|
{
|
|
t = (struct host *)xmalloc (sizeof (struct host));
|
|
t->hostname = xstrdup (nhost);
|
|
t->realname = xstrdup (nreal);
|
|
t->quality = quality;
|
|
t->next = l;
|
|
return t;
|
|
}
|
|
|
|
beg = l;
|
|
/* Second two one-before-the-last element. */
|
|
while (l->next)
|
|
{
|
|
int cmp;
|
|
old = l;
|
|
l = l->next;
|
|
cmp = strcmp (nreal, l->realname);
|
|
if (cmp >= 0)
|
|
continue;
|
|
/* If the next list element is greater than s, put s between the
|
|
current and the next list element. */
|
|
t = (struct host *)xmalloc (sizeof (struct host));
|
|
old->next = t;
|
|
t->next = l;
|
|
t->hostname = xstrdup (nhost);
|
|
t->realname = xstrdup (nreal);
|
|
t->quality = quality;
|
|
return beg;
|
|
}
|
|
t = (struct host *)xmalloc (sizeof (struct host));
|
|
t->hostname = xstrdup (nhost);
|
|
t->realname = xstrdup (nreal);
|
|
t->quality = quality;
|
|
/* Insert the new element after the last element. */
|
|
l->next = t;
|
|
t->next = NULL;
|
|
return beg;
|
|
}
|
|
|
|
/* Determine the "real" name of HOST, as perceived by Wget. If HOST
|
|
is referenced by more than one name, "real" name is considered to
|
|
be the first one encountered in the past.
|
|
|
|
If the host cannot be found in the list of already dealt-with
|
|
hosts, try with its INET address. If this fails too, add it to the
|
|
list. The routine does not call gethostbyname twice for the same
|
|
host if it can possibly avoid it. */
|
|
char *
|
|
realhost (const char *host)
|
|
{
|
|
struct host *l;
|
|
struct in_addr in;
|
|
struct hostent *hptr;
|
|
char *inet_s;
|
|
|
|
DEBUGP (("Checking for %s.\n", host));
|
|
/* Look for the host, looking by the host name. */
|
|
l = search_host (hlist, host);
|
|
if (l && l->quality) /* Found it with quality */
|
|
{
|
|
DEBUGP (("%s was already used, by that name.\n", host));
|
|
/* Here we return l->hostname, not host, because of the possible
|
|
case differences (e.g. jaGOR.srce.hr and jagor.srce.hr are
|
|
the same, but we want the one that was first. */
|
|
return xstrdup (l->hostname);
|
|
}
|
|
else if (!l) /* Not found, with or without quality */
|
|
{
|
|
/* The fact that gethostbyname will get called makes it
|
|
necessary to store it to the list, to ensure that
|
|
gethostbyname will not be called twice for the same string.
|
|
However, the quality argument must be set appropriately.
|
|
|
|
Note that add_hlist must be called *after* the realname
|
|
search, or the quality would be always set to 0 */
|
|
DEBUGP (("This is the first time I hear about host %s by that name.\n",
|
|
host));
|
|
hptr = ngethostbyname (host);
|
|
if (!hptr)
|
|
return xstrdup (host);
|
|
/* Originally, we copied to in.s_addr, but it appears to be
|
|
missing on some systems. */
|
|
memcpy (&in, *hptr->h_addr_list, sizeof (in));
|
|
STRDUP_ALLOCA (inet_s, inet_ntoa (in));
|
|
}
|
|
else /* Found, without quality */
|
|
{
|
|
/* This case happens when host is on the list,
|
|
but not as first entry (the one with quality).
|
|
Then we just get its INET address and pick
|
|
up the first entry with quality. */
|
|
DEBUGP (("We've dealt with host %s, but under the name %s.\n",
|
|
host, l->realname));
|
|
STRDUP_ALLOCA (inet_s, l->realname);
|
|
}
|
|
|
|
/* Now we certainly have the INET address. The following loop is
|
|
guaranteed to pick either an entry with quality (because it is
|
|
the first one), or none at all. */
|
|
l = search_address (hlist, inet_s);
|
|
if (l) /* Found in the list, as realname. */
|
|
{
|
|
/* Set the default, 0 quality. */
|
|
hlist = add_hlist (hlist, host, inet_s, 0);
|
|
return xstrdup (l->hostname);
|
|
}
|
|
/* Since this is really the first time this host is encountered,
|
|
set quality to 1. */
|
|
hlist = add_hlist (hlist, host, inet_s, 1);
|
|
return xstrdup (host);
|
|
}
|
|
|
|
/* Compare two hostnames (out of URL-s if the arguments are URL-s),
|
|
taking care of aliases. It uses realhost() to determine a unique
|
|
hostname for each of two hosts. If simple_check is non-zero, only
|
|
strcmp() is used for comparison. */
|
|
int
|
|
same_host (const char *u1, const char *u2)
|
|
{
|
|
const char *s;
|
|
char *p1, *p2;
|
|
char *real1, *real2;
|
|
|
|
/* Skip protocol, if present. */
|
|
u1 += skip_url (u1);
|
|
u2 += skip_url (u2);
|
|
u1 += skip_proto (u1);
|
|
u2 += skip_proto (u2);
|
|
|
|
/* Skip username ans password, if present. */
|
|
u1 += skip_uname (u1);
|
|
u2 += skip_uname (u2);
|
|
|
|
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
|
|
p1 = strdupdelim (s, u1);
|
|
for (s = u2; *u2 && *u2 != '/' && *u2 != ':'; u2++);
|
|
p2 = strdupdelim (s, u2);
|
|
DEBUGP (("Comparing hosts %s and %s...\n", p1, p2));
|
|
if (strcasecmp (p1, p2) == 0)
|
|
{
|
|
free (p1);
|
|
free (p2);
|
|
DEBUGP (("They are quite alike.\n"));
|
|
return 1;
|
|
}
|
|
else if (opt.simple_check)
|
|
{
|
|
free (p1);
|
|
free (p2);
|
|
DEBUGP (("Since checking is simple, I'd say they are not the same.\n"));
|
|
return 0;
|
|
}
|
|
real1 = realhost (p1);
|
|
real2 = realhost (p2);
|
|
free (p1);
|
|
free (p2);
|
|
if (strcasecmp (real1, real2) == 0)
|
|
{
|
|
DEBUGP (("They are alike, after realhost()->%s.\n", real1));
|
|
free (real1);
|
|
free (real2);
|
|
return 1;
|
|
}
|
|
else
|
|
{
|
|
DEBUGP (("They are not the same (%s, %s).\n", real1, real2));
|
|
free (real1);
|
|
free (real2);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
/* Determine whether a URL is acceptable to be followed, according to
|
|
a list of domains to accept. */
|
|
int
|
|
accept_domain (struct urlinfo *u)
|
|
{
|
|
assert (u->host != NULL);
|
|
if (opt.domains)
|
|
{
|
|
if (!sufmatch ((const char **)opt.domains, u->host))
|
|
return 0;
|
|
}
|
|
if (opt.exclude_domains)
|
|
{
|
|
if (sufmatch ((const char **)opt.exclude_domains, u->host))
|
|
return 0;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
/* Check whether WHAT is matched in LIST, each element of LIST being a
|
|
pattern to match WHAT against, using backward matching (see
|
|
match_backwards() in utils.c).
|
|
|
|
If an element of LIST matched, 1 is returned, 0 otherwise. */
|
|
int
|
|
sufmatch (const char **list, const char *what)
|
|
{
|
|
int i, j, k, lw;
|
|
|
|
lw = strlen (what);
|
|
for (i = 0; list[i]; i++)
|
|
{
|
|
for (j = strlen (list[i]), k = lw; j >= 0 && k >= 0; j--, k--)
|
|
if (TOLOWER (list[i][j]) != TOLOWER (what[k]))
|
|
break;
|
|
/* The domain must be first to reach to beginning. */
|
|
if (j == -1)
|
|
return 1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Return email address of the form username@FQDN suitable for
|
|
anonymous FTP passwords. This process is error-prone, and the
|
|
escape hatch is the MY_HOST preprocessor constant, which can be
|
|
used to hard-code either your hostname or FQDN at compile-time.
|
|
|
|
If the FQDN cannot be determined, a warning is printed, and the
|
|
function returns a short `username@' form, accepted by most
|
|
anonymous servers.
|
|
|
|
If not even the username cannot be divined, it means things are
|
|
seriously fucked up, and Wget exits. */
|
|
char *
|
|
ftp_getaddress (void)
|
|
{
|
|
static char *address;
|
|
|
|
/* Do the drill only the first time, as it won't change. */
|
|
if (!address)
|
|
{
|
|
char userid[32]; /* 9 should be enough for Unix, but
|
|
I'd rather be on the safe side. */
|
|
char *host, *fqdn;
|
|
|
|
if (!pwd_cuserid (userid))
|
|
{
|
|
logprintf (LOG_ALWAYS, _("%s: Cannot determine user-id.\n"),
|
|
exec_name);
|
|
exit (1);
|
|
}
|
|
#ifdef MY_HOST
|
|
STRDUP_ALLOCA (host, MY_HOST);
|
|
#else /* not MY_HOST */
|
|
#ifdef HAVE_UNAME
|
|
{
|
|
struct utsname ubuf;
|
|
if (uname (&ubuf) < 0)
|
|
{
|
|
logprintf (LOG_ALWAYS, _("%s: Warning: uname failed: %s\n"),
|
|
exec_name, strerror (errno));
|
|
fqdn = "";
|
|
goto giveup;
|
|
}
|
|
STRDUP_ALLOCA (host, ubuf.nodename);
|
|
}
|
|
#else /* not HAVE_UNAME */
|
|
#ifdef HAVE_GETHOSTNAME
|
|
host = alloca (256);
|
|
if (gethostname (host, 256) < 0)
|
|
{
|
|
logprintf (LOG_ALWAYS, _("%s: Warning: gethostname failed\n"),
|
|
exec_name);
|
|
fqdn = "";
|
|
goto giveup;
|
|
}
|
|
#else /* not HAVE_GETHOSTNAME */
|
|
#error Cannot determine host name.
|
|
#endif /* not HAVE_GETHOSTNAME */
|
|
#endif /* not HAVE_UNAME */
|
|
#endif /* not MY_HOST */
|
|
/* If the address we got so far contains a period, don't bother
|
|
anymore. */
|
|
if (strchr (host, '.'))
|
|
fqdn = host;
|
|
else
|
|
{
|
|
/* #### I've seen the following scheme fail on at least one
|
|
system! Do we care? */
|
|
char *tmpstore;
|
|
/* According to Richard Stevens, the correct way to find the
|
|
FQDN is to (1) find the host name, (2) find its IP
|
|
address using gethostbyname(), and (3) get the FQDN using
|
|
gethostbyaddr(). So that's what we'll do. Step one has
|
|
been done above. */
|
|
/* (2) */
|
|
struct hostent *hp = gethostbyname (host);
|
|
if (!hp || !hp->h_addr_list)
|
|
{
|
|
logprintf (LOG_ALWAYS, _("\
|
|
%s: Warning: cannot determine local IP address.\n"),
|
|
exec_name);
|
|
fqdn = "";
|
|
goto giveup;
|
|
}
|
|
/* Copy the argument, so the call to gethostbyaddr doesn't
|
|
clobber it -- just in case. */
|
|
tmpstore = (char *)alloca (hp->h_length);
|
|
memcpy (tmpstore, *hp->h_addr_list, hp->h_length);
|
|
/* (3) */
|
|
hp = gethostbyaddr (tmpstore, hp->h_length, hp->h_addrtype);
|
|
if (!hp || !hp->h_name)
|
|
{
|
|
logprintf (LOG_ALWAYS, _("\
|
|
%s: Warning: cannot reverse-lookup local IP address.\n"),
|
|
exec_name);
|
|
fqdn = "";
|
|
goto giveup;
|
|
}
|
|
if (!strchr (hp->h_name, '.'))
|
|
{
|
|
#if 0
|
|
/* This gets ticked pretty often. Karl Berry reports
|
|
that there can be valid reasons for the local host
|
|
name not to be an FQDN, so I've decided to remove the
|
|
annoying warning. */
|
|
logprintf (LOG_ALWAYS, _("\
|
|
%s: Warning: reverse-lookup of local address did not yield FQDN!\n"),
|
|
exec_name);
|
|
#endif
|
|
fqdn = "";
|
|
goto giveup;
|
|
}
|
|
/* Once we're here, hp->h_name contains the correct FQDN. */
|
|
STRDUP_ALLOCA (fqdn, hp->h_name);
|
|
}
|
|
giveup:
|
|
address = (char *)xmalloc (strlen (userid) + 1 + strlen (fqdn) + 1);
|
|
sprintf (address, "%s@%s", userid, fqdn);
|
|
}
|
|
return address;
|
|
}
|
|
|
|
/* Print error messages for host errors. */
|
|
char *
|
|
herrmsg (int error)
|
|
{
|
|
/* Can't use switch since some constants are equal (at least on my
|
|
system), and the compiler signals "duplicate case value". */
|
|
if (error == HOST_NOT_FOUND
|
|
|| error == NO_RECOVERY
|
|
|| error == NO_DATA
|
|
|| error == NO_ADDRESS
|
|
|| error == TRY_AGAIN)
|
|
return _("Host not found");
|
|
else
|
|
return _("Unknown error");
|
|
}
|
|
|
|
/* Clean the host list. This is a separate function, so we needn't
|
|
export HLIST and its implementation. Ha! */
|
|
void
|
|
clean_hosts (void)
|
|
{
|
|
struct host *l = hlist;
|
|
|
|
while (l)
|
|
{
|
|
struct host *p = l->next;
|
|
free (l->hostname);
|
|
free (l->realname);
|
|
free (l);
|
|
l = p;
|
|
}
|
|
hlist = NULL;
|
|
}
|