1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Rewrite parsing and handling of URLs.

Published in <sxs4rnnlklo.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-11-21 16:24:28 -08:00
parent f4dcb55851
commit d5be8ecca4
22 changed files with 1279 additions and 922 deletions

View File

@ -1,3 +1,7 @@
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
* configure.in: Check for strpbrk().
2001-05-14 Herold Heiko <Heiko.Herold@previnet.it>
* windows/Makefile.src:

View File

@ -172,7 +172,7 @@ dnl Checks for library functions.
dnl
AC_FUNC_ALLOCA
AC_FUNC_MMAP
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp)
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp strpbrk)
AC_CHECK_FUNCS(gettimeofday mktime strptime)
AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty)
AC_CHECK_FUNCS(uname gethostname)

View File

@ -1,3 +1,53 @@
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
* utils.c (path_simplify): Don't remove trailing slashes.
* ftp.c (ftp_get_listing): Use it.
* utils.c (file_merge): New function.
* url.c (opt_url): Removed.
* recur.c (recursive_retrieve): Inline "opt_url" logic.
* main.c (main): Use xfree(), not free().
* url.c (rewrite_url_maybe): Renamed to rewrite_shorthand_url.
* ftp.c (ccon): Move `ccon' typedef here, since it's only used
internally.
* config.h.in: Include a stub for HAVE_STRPBRK.
* cmpt.c (strpbrk): Include a replacement for systems without
strpbrk().
* ftp.c: Use url_set_dir and url_set_file when modifying the URL.
* url.c (url_set_dir): New function.
(url_set_file): Ditto.
* ftp-basic.c (ftp_process_type): Process FTP type here; the URL
parser makes the URL "params" available, so we can do that in this
function.
* retr.c: Ditto.
* ftp.c: Ditto; pass the local file information in `ccon'.
* http.c: Get rid of the ugly kludge that had URL being replaced
with the proxy URL when proxy retrieval was requested. Use a
separate parameter to http_loop and gethttp for the proxy URL.
* http.c: Changed to reflect the fact that local file, proxy, and
referer information are no longer stored in struct url. The local
file information is passed in `struct hstat' now.
* url.c: Reworked URL parsing to be more regular. Reencode the
URL using reencode_string.
Removed non-URL-related information from struct url. This
includes fields `proxy', `local', and `referer'.
2001-11-22 Jochen Hein <jochen@jochen.org>
* main.c (main): Split the copyright notice for easier

View File

@ -205,6 +205,24 @@ ret0:
}
#endif /* not HAVE_STRSTR */
#ifndef HAVE_STRPBRK
/* Find the first ocurrence in S of any character in ACCEPT. */
char *
strpbrk (const char *s, const char *accept)
{
while (*s != '\0')
{
const char *a = accept;
while (*a != '\0')
if (*a++ == *s)
return (char *) s;
++s;
}
return 0;
}
#endif /* HAVE_STRPBRK */
#ifndef HAVE_MKTIME
/* From GNU libc 2.0. */

View File

@ -141,6 +141,9 @@ char *alloca ();
/* Define if you have the strncasecmp function. */
#undef HAVE_STRNCASECMP
/* Define if you have the strpbrk function. */
#undef HAVE_STRPBRK
/* Define if you have the strptime function. */
#undef HAVE_STRPTIME

View File

@ -780,7 +780,7 @@ check_path_match (const char *cookie_path, const char *path)
int
set_cookie_header_cb (const char *hdr, void *closure)
{
struct urlinfo *u = (struct urlinfo *)closure;
struct url *u = (struct url *)closure;
struct cookie *cookie;
cookies_now = time (NULL);

View File

@ -633,6 +633,7 @@ ftp_pwd (struct rbuf *rbuf, char **pwd)
/* All OK. */
return FTPOK;
}
/* Sends the SIZE command to the server, and returns the value in 'size'.
* If an error occurs, size is set to zero. */
uerr_t
@ -690,3 +691,16 @@ ftp_size (struct rbuf *rbuf, const char *file, long int *size)
/* All OK. */
return FTPOK;
}
/* If URL's params are of the form "type=X", return character X.
Otherwise, return 'I' (the default type). */
char
ftp_process_type (const char *params)
{
if (params
&& 0 == strncasecmp (params, "type=", 5)
&& params[5] != '\0')
return TOUPPER (params[5]);
else
return 'I';
}

View File

@ -796,7 +796,7 @@ Unsupported listing type, trying Unix listing parser.\n"));
directories and files on the appropriate host. The references are
FTP. */
uerr_t
ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
ftp_index (const char *file, struct url *u, struct fileinfo *f)
{
FILE *fp;
char *upwd;

219
src/ftp.c
View File

@ -62,6 +62,18 @@ extern int h_errno;
extern char ftp_last_respline[];
typedef struct
{
int st; /* connection status */
int cmd; /* command code */
struct rbuf rbuf; /* control connection buffer */
long dltime; /* time of the download */
enum stype rs; /* remote system reported by ftp server */
char *id; /* initial directory */
char *target; /* target file name */
} ccon;
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
the string S, and return the number converted to long, if found, 0
otherwise. */
@ -108,7 +120,7 @@ ftp_expected_bytes (const char *s)
connection to the server. It always closes the data connection,
and closes the control connection in case of error. */
static uerr_t
getftp (struct urlinfo *u, long *len, long restval, ccon *con)
getftp (struct url *u, long *len, long restval, ccon *con)
{
int csock, dtsock, res;
uerr_t err;
@ -122,7 +134,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
long expected_bytes = 0L;
assert (con != NULL);
assert (u->local != NULL);
assert (con->target != NULL);
/* Debug-check of the sanity of the request by making sure that LIST
and RETR are never both requested (since we can handle only one
at a time. */
@ -144,6 +157,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
csock = RBUF_FD (&con->rbuf);
else /* cmd & DO_LOGIN */
{
char type_char;
/* Login to the server: */
/* First: Establish the control connection. */
@ -325,9 +340,10 @@ Error in server response, closing control connection.\n"));
logputs (LOG_VERBOSE, _("done.\n"));
/* Fifth: Set the FTP type. */
type_char = ftp_process_type (u->params);
if (!opt.server_response)
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", TOUPPER (u->ftp_type));
err = ftp_type (&con->rbuf, TOUPPER (u->ftp_type));
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", type_char);
err = ftp_type (&con->rbuf, type_char);
/* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */
switch (err)
{
@ -351,7 +367,7 @@ Error in server response, closing control connection.\n"));
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET,
_("Unknown type `%c', closing control connection.\n"),
TOUPPER (u->ftp_type));
type_char);
CLOSE (csock);
rbuf_uninitialize (&con->rbuf);
return err;
@ -701,7 +717,7 @@ Error in server response, closing control connection.\n"));
{
logprintf (LOG_NOTQUIET,
_("\nREST failed; will not truncate `%s'.\n"),
u->local);
con->target);
CLOSE (csock);
closeport (dtsock);
rbuf_uninitialize (&con->rbuf);
@ -850,16 +866,16 @@ Error in server response, closing control connection.\n"));
/* Open the file -- if opt.dfp is set, use it instead. */
if (!opt.dfp || con->cmd & DO_LIST)
{
mkalldirs (u->local);
mkalldirs (con->target);
if (opt.backups)
rotate_backups (u->local);
rotate_backups (con->target);
/* #### Is this correct? */
chmod (u->local, 0600);
chmod (con->target, 0600);
fp = fopen (u->local, restval ? "ab" : "wb");
fp = fopen (con->target, restval ? "ab" : "wb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
logprintf (LOG_NOTQUIET, "%s: %s\n", con->target, strerror (errno));
CLOSE (csock);
rbuf_uninitialize (&con->rbuf);
closeport (dtsock);
@ -928,7 +944,7 @@ Error in server response, closing control connection.\n"));
if (res == -2)
{
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
u->local, strerror (errno));
con->target, strerror (errno));
CLOSE (csock);
rbuf_uninitialize (&con->rbuf);
return FWRITEERR;
@ -993,10 +1009,10 @@ Error in server response, closing control connection.\n"));
print it out. */
if (opt.server_response && (con->cmd & DO_LIST))
{
mkalldirs (u->local);
fp = fopen (u->local, "r");
mkalldirs (con->target);
fp = fopen (con->target, "r");
if (!fp)
logprintf (LOG_ALWAYS, "%s: %s\n", u->local, strerror (errno));
logprintf (LOG_ALWAYS, "%s: %s\n", con->target, strerror (errno));
else
{
char *line;
@ -1020,7 +1036,7 @@ Error in server response, closing control connection.\n"));
This loop either gets commands from con, or (if ON_YOUR_OWN is
set), makes them up to retrieve the file given by the URL. */
static uerr_t
ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
{
int count, orig_lp;
long restval, len;
@ -1028,21 +1044,21 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
uerr_t err;
struct stat st;
if (!u->local)
u->local = url_filename (u);
if (!con->target)
con->target = url_filename (u);
if (opt.noclobber && file_exists_p (u->local))
if (opt.noclobber && file_exists_p (con->target))
{
logprintf (LOG_VERBOSE,
_("File `%s' already there, not retrieving.\n"), u->local);
_("File `%s' already there, not retrieving.\n"), con->target);
/* If the file is there, we suppose it's retrieved OK. */
return RETROK;
}
/* Remove it if it's a link. */
remove_link (u->local);
remove_link (con->target);
if (!opt.output_document)
locf = u->local;
locf = con->target;
else
locf = opt.output_document;
@ -1100,7 +1116,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
char *hurl = url_string (u, 1);
char tmp[15];
strcpy (tmp, " ");
if (count > 1)
@ -1175,7 +1191,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Need to hide the password from the URL. The `if' is here
so that we don't do the needless allocation every
time. */
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
char *hurl = url_string (u, 1);
logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n",
tms, hurl, len, locf, count);
xfree (hurl);
@ -1235,43 +1251,48 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Return the directory listing in a reusable format. The directory
is specifed in u->dir. */
uerr_t
ftp_get_listing (struct urlinfo *u, ccon *con, struct fileinfo **f)
ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f)
{
uerr_t err;
char *olocal = u->local;
char *list_filename, *ofile;
char *uf; /* url file name */
char *lf; /* list file name */
char *old_target = con->target;
con->st &= ~ON_YOUR_OWN;
con->cmd |= (DO_LIST | LEAVE_PENDING);
con->cmd &= ~DO_RETR;
/* Get the listing filename. */
ofile = u->file;
u->file = LIST_FILENAME;
list_filename = url_filename (u);
u->file = ofile;
u->local = list_filename;
DEBUGP ((_("Using `%s' as listing tmp file.\n"), list_filename));
/* Find the listing file name. We do it by taking the file name of
the URL and replacing the last component with the listing file
name. */
uf = url_filename (u);
lf = file_merge (uf, LIST_FILENAME);
xfree (uf);
DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
con->target = lf;
err = ftp_loop_internal (u, NULL, con);
u->local = olocal;
con->target = old_target;
if (err == RETROK)
*f = ftp_parse_ls (list_filename, con->rs);
*f = ftp_parse_ls (lf, con->rs);
else
*f = NULL;
if (opt.remove_listing)
{
if (unlink (list_filename))
if (unlink (lf))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
else
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), list_filename);
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), lf);
}
xfree (list_filename);
xfree (lf);
con->cmd &= ~DO_LIST;
return err;
}
static uerr_t ftp_retrieve_dirs PARAMS ((struct urlinfo *, struct fileinfo *,
static uerr_t ftp_retrieve_dirs PARAMS ((struct url *, struct fileinfo *,
ccon *));
static uerr_t ftp_retrieve_glob PARAMS ((struct urlinfo *, ccon *, int));
static uerr_t ftp_retrieve_glob PARAMS ((struct url *, ccon *, int));
static struct fileinfo *delelement PARAMS ((struct fileinfo *,
struct fileinfo **));
static void freefileinfo PARAMS ((struct fileinfo *f));
@ -1284,11 +1305,10 @@ static void freefileinfo PARAMS ((struct fileinfo *f));
If opt.recursive is set, after all files have been retrieved,
ftp_retrieve_dirs will be called to retrieve the directories. */
static uerr_t
ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con)
{
static int depth = 0;
uerr_t err;
char *olocal, *ofile;
struct fileinfo *orig;
long local_size;
time_t tml;
@ -1323,15 +1343,19 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
while (f)
{
char *old_target, *ofile;
if (downloaded_exceeds_quota ())
{
--depth;
return QUOTEXC;
}
olocal = u->local;
ofile = u->file;
u->file = f->name;
u->local = url_filename (u);
old_target = con->target;
ofile = xstrdup (u->file);
url_set_file (u, f->name);
con->target = url_filename (u);
err = RETROK;
dlthis = 1;
@ -1343,7 +1367,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
I'm not implementing it now since files on an FTP server are much
more likely than files on an HTTP server to legitimately have a
.orig suffix. */
if (!stat (u->local, &st))
if (!stat (con->target, &st))
{
int eq_size;
int cor_val;
@ -1360,7 +1384,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Remote file is older, file sizes can be compared and
are both equal. */
logprintf (LOG_VERBOSE, _("\
Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
Remote file no newer than local file `%s' -- not retrieving.\n"), con->target);
dlthis = 0;
}
else if (eq_size)
@ -1368,7 +1392,7 @@ Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
/* Remote file is newer or sizes cannot be matched */
logprintf (LOG_VERBOSE, _("\
Remote file is newer than local file `%s' -- retrieving.\n\n"),
u->local);
con->target);
}
else
{
@ -1396,30 +1420,30 @@ The sizes do not match (local %ld) -- retrieving.\n\n"), local_size);
struct stat st;
/* Check whether we already have the correct
symbolic link. */
int rc = lstat (u->local, &st);
int rc = lstat (con->target, &st);
if (rc == 0)
{
size_t len = strlen (f->linkto) + 1;
if (S_ISLNK (st.st_mode))
{
char *link_target = (char *)alloca (len);
size_t n = readlink (u->local, link_target, len);
size_t n = readlink (con->target, link_target, len);
if ((n == len - 1)
&& (memcmp (link_target, f->linkto, n) == 0))
{
logprintf (LOG_VERBOSE, _("\
Already have correct symlink %s -> %s\n\n"),
u->local, f->linkto);
con->target, f->linkto);
dlthis = 0;
break;
}
}
}
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
u->local, f->linkto);
con->target, f->linkto);
/* Unlink before creating symlink! */
unlink (u->local);
if (symlink (f->linkto, u->local) == -1)
unlink (con->target);
if (symlink (f->linkto, con->target) == -1)
logprintf (LOG_NOTQUIET, "symlink: %s\n",
strerror (errno));
logputs (LOG_VERBOSE, "\n");
@ -1427,7 +1451,7 @@ Already have correct symlink %s -> %s\n\n"),
#else /* not HAVE_SYMLINK */
logprintf (LOG_NOTQUIET,
_("Symlinks not supported, skipping symlink `%s'.\n"),
u->local);
con->target);
#endif /* not HAVE_SYMLINK */
}
else /* opt.retr_symlinks */
@ -1458,7 +1482,7 @@ Already have correct symlink %s -> %s\n\n"),
if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
&& f->tstamp != -1
&& dlthis
&& file_exists_p (u->local))
&& file_exists_p (con->target))
{
/* #### This code repeats in http.c and ftp.c. Move it to a
function! */
@ -1469,27 +1493,31 @@ Already have correct symlink %s -> %s\n\n"),
fl = opt.output_document;
}
else
fl = u->local;
fl = con->target;
if (fl)
touch (fl, f->tstamp);
}
else if (f->tstamp == -1)
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), u->local);
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), con->target);
if (f->perms && f->type == FT_PLAINFILE && dlthis)
chmod (u->local, f->perms);
chmod (con->target, f->perms);
else
DEBUGP (("Unrecognized permissions for %s.\n", u->local));
DEBUGP (("Unrecognized permissions for %s.\n", con->target));
xfree (con->target);
con->target = old_target;
url_set_file (u, ofile);
xfree (ofile);
xfree (u->local);
u->local = olocal;
u->file = ofile;
/* Break on fatals. */
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
break;
con->cmd &= ~ (DO_CWD | DO_LOGIN);
f = f->next;
} /* while */
}
/* We do not want to call ftp_retrieve_dirs here */
if (opt.recursive &&
!(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel))
@ -1506,51 +1534,62 @@ Already have correct symlink %s -> %s\n\n"),
ftp_retrieve_glob on each directory entry. The function knows
about excluded directories. */
static uerr_t
ftp_retrieve_dirs (struct urlinfo *u, struct fileinfo *f, ccon *con)
ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con)
{
char *odir;
char *current_container = NULL;
int current_length = 0;
char *container = NULL;
int container_size = 0;
for (; f; f = f->next)
{
int len;
int size;
char *odir, *newdir;
if (downloaded_exceeds_quota ())
break;
if (f->type != FT_DIRECTORY)
continue;
odir = u->dir;
len = strlen (u->dir) + 1 + strlen (f->name) + 1;
/* Allocate u->dir off stack, but reallocate only if a larger
string is needed. */
if (len > current_length)
current_container = (char *)alloca (len);
u->dir = current_container;
string is needed. It's a pity there's no "realloca" for an
item on the bottom of the stack. */
size = strlen (u->dir) + 1 + strlen (f->name) + 1;
if (size > container_size)
container = (char *)alloca (size);
newdir = container;
odir = u->dir;
if (*odir == '\0'
|| (*odir == '/' && *(odir + 1) == '\0'))
/* If ODIR is empty or just "/", simply append f->name to
ODIR. (In the former case, to preserve u->dir being
relative; in the latter case, to avoid double slash.) */
sprintf (u->dir, "%s%s", odir, f->name);
sprintf (newdir, "%s%s", odir, f->name);
else
/* Else, use a separator. */
sprintf (u->dir, "%s/%s", odir, f->name);
sprintf (newdir, "%s/%s", odir, f->name);
DEBUGP (("Composing new CWD relative to the initial directory.\n"));
DEBUGP ((" odir = '%s'\n f->name = '%s'\n u->dir = '%s'\n\n",
odir, f->name, u->dir));
if (!accdir (u->dir, ALLABS))
DEBUGP ((" odir = '%s'\n f->name = '%s'\n newdir = '%s'\n\n",
odir, f->name, newdir));
if (!accdir (newdir, ALLABS))
{
logprintf (LOG_VERBOSE, _("\
Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
u->dir = odir;
Not descending to `%s' as it is excluded/not-included.\n"), newdir);
continue;
}
con->st &= ~DONE_CWD;
odir = xstrdup (u->dir); /* because url_set_dir will free
u->dir. */
url_set_dir (u, newdir);
ftp_retrieve_glob (u, con, GETALL);
url_set_dir (u, odir);
xfree (odir);
/* Set the time-stamp? */
u->dir = odir;
}
if (opt.quota && opt.downloaded > opt.quota)
return QUOTEXC;
else
@ -1567,7 +1606,7 @@ Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
get the listing, so that the time-stamp is heeded); if it's GLOBALL,
use globbing; if it's GETALL, download the whole directory. */
static uerr_t
ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
ftp_retrieve_glob (struct url *u, ccon *con, int action)
{
struct fileinfo *orig, *start;
uerr_t res;
@ -1607,7 +1646,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
matchres = fnmatch (u->file, f->name, 0);
if (matchres == -1)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local,
logprintf (LOG_NOTQUIET, "%s: %s\n", con->target,
strerror (errno));
break;
}
@ -1657,7 +1696,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
of URL. Inherently, its capabilities are limited on what can be
encoded into a URL. */
uerr_t
ftp_loop (struct urlinfo *u, int *dt)
ftp_loop (struct url *u, int *dt)
{
ccon con; /* FTP connection */
uerr_t res;
@ -1686,7 +1725,7 @@ ftp_loop (struct urlinfo *u, int *dt)
{
char *filename = (opt.output_document
? xstrdup (opt.output_document)
: (u->local ? xstrdup (u->local)
: (con.target ? xstrdup (con.target)
: url_filename (u)));
res = ftp_index (filename, u, f);
if (res == FTPOK && opt.verbose)
@ -1736,6 +1775,8 @@ ftp_loop (struct urlinfo *u, int *dt)
CLOSE (RBUF_FD (&con.rbuf));
FREE_MAYBE (con.id);
con.id = NULL;
FREE_MAYBE (con.target);
con.target = NULL;
return res;
}

View File

@ -46,7 +46,7 @@ uerr_t ftp_syst PARAMS ((struct rbuf *, enum stype *));
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
struct urlinfo;
struct url;
/* File types. */
enum ftype
@ -98,19 +98,12 @@ enum wget_ftp_fstatus
correct. */
};
typedef struct
{
int st; /* connection status */
int cmd; /* command code */
struct rbuf rbuf; /* control connection buffer */
long dltime; /* time of the download */
enum stype rs; /* remote system reported by ftp server */
char *id; /* initial directory */
} ccon;
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
uerr_t ftp_loop PARAMS ((struct urlinfo *, int *));
uerr_t ftp_loop PARAMS ((struct url *, int *));
uerr_t ftp_index (const char *, struct url *, struct fileinfo *);
char ftp_process_type PARAMS ((const char *));
uerr_t ftp_index (const char *, struct urlinfo *, struct fileinfo *);
#endif /* FTP_H */

View File

@ -327,7 +327,7 @@ same_host (const char *u1, const char *u2)
/* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */
int
accept_domain (struct urlinfo *u)
accept_domain (struct url *u)
{
assert (u->host != NULL);
if (opt.domains)

View File

@ -20,7 +20,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef HOST_H
#define HOST_H
struct urlinfo;
struct url;
/* Function declarations */
@ -31,7 +31,7 @@ void clean_hosts PARAMS ((void));
char *realhost PARAMS ((const char *));
int same_host PARAMS ((const char *, const char *));
int accept_domain PARAMS ((struct urlinfo *));
int accept_domain PARAMS ((struct url *));
int sufmatch PARAMS ((const char **, const char *));
char *ftp_getaddress PARAMS ((void));

View File

@ -464,16 +464,22 @@ struct http_stat
long dltime; /* time of the download */
int no_truncate; /* whether truncating the file is
forbidden. */
const char *referer; /* value of the referer header. */
char **local_file; /* local file. */
};
/* Free the elements of hstat X. */
#define FREEHSTAT(x) do \
{ \
FREE_MAYBE ((x).newloc); \
FREE_MAYBE ((x).remote_time); \
FREE_MAYBE ((x).error); \
(x).newloc = (x).remote_time = (x).error = NULL; \
} while (0)
static void
free_hstat (struct http_stat *hs)
{
FREE_MAYBE (hs->newloc);
FREE_MAYBE (hs->remote_time);
FREE_MAYBE (hs->error);
/* Guard against being called twice. */
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
}
static char *create_authorization_line PARAMS ((const char *, const char *,
const char *, const char *,
@ -499,23 +505,22 @@ time_t http_atotm PARAMS ((char *));
response code correctly, it is not used in a sane way. The caller
can do that, though.
If u->proxy is non-NULL, the URL u will be taken as a proxy URL,
and u->proxy->url will be given to the proxy server (bad naming,
I'm afraid). */
If PROXY is non-NULL, the connection will be made to the proxy
server, and u->url will be requested. */
static uerr_t
gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
{
char *request, *type, *command, *path;
char *request, *type, *command, *full_path;
char *user, *passwd;
char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost;
char *pragma_h, *referer, *useragent, *range, *wwwauth;
char *authenticate_h;
char *proxyauth;
char *all_headers;
char *port_maybe;
char *request_keep_alive;
int sock, hcount, num_written, all_length, remport, statcode;
int sock, hcount, num_written, all_length, statcode;
long contlen, contrange;
struct urlinfo *ou;
struct url *conn;
uerr_t err;
FILE *fp;
int auth_tried_already;
@ -542,7 +547,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
/* initialize ssl_ctx on first run */
if (!ssl_ctx)
{
err=init_ssl (&ssl_ctx);
err = init_ssl (&ssl_ctx);
if (err != 0)
{
switch (err)
@ -579,12 +584,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
if (!(*dt & HEAD_ONLY))
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
know the local filename so we can save to it. */
assert (u->local != NULL);
assert (*hs->local_file != NULL);
authenticate_h = 0;
auth_tried_already = 0;
inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL);
inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
again:
/* We need to come back here when the initial attempt to retrieve
@ -602,29 +607,29 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
hs->remote_time = NULL;
hs->error = NULL;
/* Which structure to use to retrieve the original URL data. */
if (u->proxy)
ou = u->proxy;
else
ou = u;
/* If we're using a proxy, we will be connecting to the proxy
server. */
conn = proxy ? proxy : u;
/* First: establish the connection. */
if (inhibit_keep_alive
||
#ifndef HAVE_SSL
!persistent_available_p (u->host, u->port)
!persistent_available_p (conn->host, conn->port)
#else
!persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
!persistent_available_p (conn->host, conn->port,
u->scheme == SCHEME_HTTPS)
#endif /* HAVE_SSL */
)
{
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
err = make_connection (&sock, u->host, u->port);
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "),
conn->host, conn->port);
err = make_connection (&sock, conn->host, conn->port);
switch (err)
{
case HOSTERR:
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
logprintf (LOG_NOTQUIET, "%s: %s.\n", conn->host, herrmsg (h_errno));
return HOSTERR;
break;
case CONSOCKERR:
@ -635,7 +640,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
case CONREFUSED:
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET,
_("Connection to %s:%hu refused.\n"), u->host, u->port);
_("Connection to %s:%hu refused.\n"), conn->host,
conn->port);
CLOSE (sock);
return CONREFUSED;
case CONERROR:
@ -653,7 +659,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
break;
}
#ifdef HAVE_SSL
if (u->scheme == SCHEME_HTTPS)
if (conn->scheme == SCHEME_HTTPS)
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
{
logputs (LOG_VERBOSE, "\n");
@ -666,7 +672,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
}
else
{
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
conn->host, conn->port);
/* #### pc_last_fd should be accessed through an accessor
function. */
sock = pc_last_fd;
@ -676,22 +683,20 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
DEBUGP (("Reusing fd %d.\n", sock));
}
if (u->proxy)
path = u->proxy->url;
else
path = u->path;
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
referer = NULL;
if (ou->referer)
if (hs->referer)
{
referer = (char *)alloca (9 + strlen (ou->referer) + 3);
sprintf (referer, "Referer: %s\r\n", ou->referer);
referer = (char *)alloca (9 + strlen (hs->referer) + 3);
sprintf (referer, "Referer: %s\r\n", hs->referer);
}
if (*dt & SEND_NOCACHE)
pragma_h = "Pragma: no-cache\r\n";
else
pragma_h = "";
if (hs->restval)
{
range = (char *)alloca (13 + numdigit (hs->restval) + 4);
@ -714,9 +719,9 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
sprintf (useragent, "Wget/%s", version_string);
}
/* Construct the authentication, if userid is present. */
user = ou->user;
passwd = ou->passwd;
search_netrc (ou->host, (const char **)&user, (const char **)&passwd, 0);
user = u->user;
passwd = u->passwd;
search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
user = user ? user : opt.http_user;
passwd = passwd ? passwd : opt.http_passwd;
@ -750,12 +755,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
else
{
wwwauth = create_authorization_line (authenticate_h, user, passwd,
command, ou->path);
command, u->path);
}
}
proxyauth = NULL;
if (u->proxy)
if (proxy)
{
char *proxy_user, *proxy_passwd;
/* For normal username and password, URL components override
@ -770,31 +775,22 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
}
else
{
proxy_user = u->user;
proxy_passwd = u->passwd;
proxy_user = proxy->user;
proxy_passwd = proxy->passwd;
}
/* #### This is junky. Can't the proxy request, say, `Digest'
authentication? */
/* #### This does not appear right. Can't the proxy request,
say, `Digest' authentication? */
if (proxy_user && proxy_passwd)
proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
"Proxy-Authorization");
}
remhost = ou->host;
remport = ou->port;
/* String of the form :PORT. Used only for non-standard ports. */
port_maybe = NULL;
if (1
#ifdef HAVE_SSL
&& remport != (u->scheme == SCHEME_HTTPS
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
#else
&& remport != DEFAULT_HTTP_PORT
#endif
)
if (u->port != scheme_default_port (u->scheme))
{
port_maybe = (char *)alloca (numdigit (remport) + 2);
sprintf (port_maybe, ":%d", remport);
port_maybe = (char *)alloca (numdigit (u->port) + 2);
sprintf (port_maybe, ":%d", u->port);
}
if (!inhibit_keep_alive)
@ -803,18 +799,24 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
request_keep_alive = NULL;
if (opt.cookies)
cookies = build_cookies_request (ou->host, ou->port, ou->path,
cookies = build_cookies_request (u->host, u->port, u->path,
#ifdef HAVE_SSL
ou->scheme == SCHEME_HTTPS
u->scheme == SCHEME_HTTPS
#else
0
#endif
);
if (proxy)
full_path = xstrdup (u->url);
else
full_path = url_full_path (u);
/* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path)
request = (char *)alloca (strlen (command)
+ strlen (full_path)
+ strlen (useragent)
+ strlen (remhost)
+ strlen (u->host)
+ (port_maybe ? strlen (port_maybe) : 0)
+ strlen (HTTP_ACCEPT)
+ (request_keep_alive
@ -834,7 +836,8 @@ User-Agent: %s\r\n\
Host: %s%s\r\n\
Accept: %s\r\n\
%s%s%s%s%s%s%s%s\r\n",
command, path, useragent, remhost,
command, full_path,
useragent, u->host,
port_maybe ? port_maybe : "",
HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
@ -846,10 +849,12 @@ Accept: %s\r\n\
pragma_h,
opt.user_header ? opt.user_header : "");
DEBUGP (("---request begin---\n%s---request end---\n", request));
/* Free the temporary memory. */
/* Free the temporary memory. */
FREE_MAYBE (wwwauth);
FREE_MAYBE (proxyauth);
FREE_MAYBE (cookies);
xfree (full_path);
/* Send the request to server. */
#ifdef HAVE_SSL
@ -867,7 +872,7 @@ Accept: %s\r\n\
return WRITEFAILED;
}
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
u->proxy ? "Proxy" : "HTTP");
proxy ? "Proxy" : "HTTP");
contlen = contrange = -1;
type = NULL;
statcode = -1;
@ -1075,9 +1080,9 @@ Accept: %s\r\n\
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
#ifndef HAVE_SSL
register_persistent (u->host, u->port, sock);
register_persistent (conn->host, conn->port, sock);
#else
register_persistent (u->host, u->port, sock, ssl);
register_persistent (conn->host, conn->port, sock, ssl);
#endif /* HAVE_SSL */
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
@ -1086,7 +1091,7 @@ Accept: %s\r\n\
/* Authorization is required. */
FREE_MAYBE (type);
type = NULL;
FREEHSTAT (*hs);
free_hstat (hs);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
if (auth_tried_already)
@ -1163,16 +1168,17 @@ Accept: %s\r\n\
text/html file. If some case-insensitive variation on ".htm[l]" isn't
already the file's suffix, tack on ".html". */
{
char* last_period_in_local_filename = strrchr(u->local, '.');
char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
if (last_period_in_local_filename == NULL ||
!(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
strcasecmp(last_period_in_local_filename, ".html") == EQ))
{
size_t local_filename_len = strlen(u->local);
size_t local_filename_len = strlen(*hs->local_file);
u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
strcpy(u->local + local_filename_len, ".html");
*hs->local_file = xrealloc(*hs->local_file,
local_filename_len + sizeof(".html"));
strcpy(*hs->local_file + local_filename_len, ".html");
*dt |= ADDED_HTML_EXTENSION;
}
@ -1224,7 +1230,7 @@ Accept: %s\r\n\
_("\
\n\
Continued download failed on this file, which conflicts with `-c'.\n\
Refusing to truncate existing file `%s'.\n\n"), u->local);
Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
FREE_MAYBE (type);
FREE_MAYBE (all_headers);
CLOSE_INVALIDATE (sock);
@ -1300,13 +1306,13 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
/* Open the local file. */
if (!opt.dfp)
{
mkalldirs (u->local);
mkalldirs (*hs->local_file);
if (opt.backups)
rotate_backups (u->local);
fp = fopen (u->local, hs->restval ? "ab" : "wb");
rotate_backups (*hs->local_file);
fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */
FREE_MAYBE (all_headers);
@ -1375,7 +1381,8 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
/* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */
uerr_t
http_loop (struct urlinfo *u, char **newloc, int *dt)
http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
int *dt, struct url *proxy)
{
int count;
int use_ts, got_head = 0; /* time-stamping info */
@ -1388,6 +1395,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
size_t filename_len;
struct http_stat hstat; /* HTTP status */
struct stat st;
char *dummy = NULL;
/* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using
@ -1407,34 +1415,46 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
/* Determine the local filename. */
if (!u->local)
u->local = url_filename (u->proxy ? u->proxy : u);
if (local_file && *local_file)
hstat.local_file = local_file;
else if (local_file)
{
*local_file = url_filename (u);
hstat.local_file = local_file;
}
else
{
dummy = url_filename (u);
hstat.local_file = &dummy;
}
if (!opt.output_document)
locf = u->local;
locf = *hstat.local_file;
else
locf = opt.output_document;
filename_len = strlen (u->local);
hstat.referer = referer;
filename_len = strlen (*hstat.local_file);
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
if (opt.noclobber && file_exists_p (u->local))
if (opt.noclobber && file_exists_p (*hstat.local_file))
{
/* If opt.noclobber is turned on and file already exists, do not
retrieve the file */
logprintf (LOG_VERBOSE, _("\
File `%s' already there, will not retrieve.\n"), u->local);
File `%s' already there, will not retrieve.\n"), *hstat.local_file);
/* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF;
/* #### Bogusness alert. */
/* If its suffix is "html" or (yuck!) "htm", we suppose it's
text/html, a harmless lie. */
if (((suf = suffix (u->local)) != NULL)
/* If its suffix is "html" or "htm", assume text/html. */
if (((suf = suffix (*hstat.local_file)) != NULL)
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML;
xfree (suf);
/* Another harmless lie: */
FREE_MAYBE (dummy);
return RETROK;
}
@ -1461,7 +1481,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
in url.c. Replacing sprintf with inline calls to
strcpy() and long_to_string() made a difference.
--hniksic */
memcpy (filename_plus_orig_suffix, u->local, filename_len);
memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
memcpy (filename_plus_orig_suffix + filename_len,
".orig", sizeof (".orig"));
@ -1475,8 +1495,8 @@ File `%s' already there, will not retrieve.\n"), u->local);
if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
if (stat (u->local, &st) == 0)
local_filename = u->local;
if (stat (*hstat.local_file, &st) == 0)
local_filename = *hstat.local_file;
if (local_filename != NULL)
/* There was a local file, so we'll check later to see if the version
@ -1503,7 +1523,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
/* Print fetch message, if opt.verbose. */
if (opt.verbose)
{
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
char *hurl = url_string (u, 1);
char tmp[15];
strcpy (tmp, " ");
if (count > 1)
@ -1545,22 +1565,22 @@ File `%s' already there, will not retrieve.\n"), u->local);
Some proxies are notorious for caching incomplete data, so
we require a fresh get.
b) caching is explicitly inhibited. */
if ((u->proxy && count > 1) /* a */
|| !opt.allow_cache /* b */
if ((proxy && count > 1) /* a */
|| !opt.allow_cache /* b */
)
*dt |= SEND_NOCACHE;
else
*dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. :-) */
err = gethttp (u, &hstat, dt);
/* Try fetching the document, or at least its head. */
err = gethttp (u, &hstat, dt, proxy);
/* It's unfortunate that wget determines the local filename before finding
out the Content-Type of the file. Barring a major restructuring of the
code, we need to re-set locf here, since gethttp() may have xrealloc()d
u->local to tack on ".html". */
*hstat.local_file to tack on ".html". */
if (!opt.output_document)
locf = u->local;
locf = *hstat.local_file;
else
locf = opt.output_document;
@ -1577,29 +1597,32 @@ File `%s' already there, will not retrieve.\n"), u->local);
/* Non-fatal errors continue executing the loop, which will
bring them to "while" statement at the end, to judge
whether the number of tries was exceeded. */
FREEHSTAT (hstat);
free_hstat (&hstat);
printwhat (count, opt.ntry);
continue;
break;
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */
FREEHSTAT (hstat);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return err;
break;
case FWRITEERR: case FOPENERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
u->local, strerror (errno));
FREEHSTAT (hstat);
*hstat.local_file, strerror (errno));
free_hstat (&hstat);
FREE_MAYBE (dummy);
return err;
break;
case CONSSLERR:
/* Another fatal error. */
logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
FREEHSTAT (hstat);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return err;
break;
case NEWLOCATION:
@ -1609,14 +1632,18 @@ File `%s' already there, will not retrieve.\n"), u->local);
logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"),
hstat.statcode);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return WRONGCODE;
}
FREEHSTAT (hstat);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return NEWLOCATION;
break;
case RETRUNNEEDED:
/* The file was already fully retrieved. */
FREEHSTAT (hstat);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK;
break;
case RETRFINISHED:
@ -1631,14 +1658,15 @@ File `%s' already there, will not retrieve.\n"), u->local);
if (!opt.verbose)
{
/* #### Ugly ugly ugly! */
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
char *hurl = url_string (u, 1);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
xfree (hurl);
}
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n");
FREEHSTAT (hstat);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return WRONGCODE;
}
@ -1681,7 +1709,8 @@ Last-modified header invalid -- time-stamp ignored.\n"));
logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"),
local_filename);
FREEHSTAT (hstat);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK;
}
else if (tml >= tmr)
@ -1691,7 +1720,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
logputs (LOG_VERBOSE,
_("Remote file is newer, retrieving.\n"));
}
FREEHSTAT (hstat);
free_hstat (&hstat);
continue;
}
if ((tmr != (time_t) (-1))
@ -1710,7 +1739,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
fl = opt.output_document;
}
else
fl = u->local;
fl = *hstat.local_file;
if (fl)
touch (fl, tmr);
}
@ -1719,13 +1748,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
if (opt.spider)
{
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
FREE_MAYBE (dummy);
return RETROK;
}
/* It is now safe to free the remainder of hstat, since the
strings within it will no longer be used. */
FREEHSTAT (hstat);
tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
if (hstat.len == hstat.contlen)
@ -1748,6 +1774,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK;
}
else if (hstat.res == 0) /* No read error */
@ -1773,6 +1801,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK;
}
else if (hstat.len < hstat.contlen) /* meaning we lost the
@ -1782,6 +1812,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
_("%s (%s) - Connection closed at byte %ld. "),
tms, tmrate, hstat.len);
printwhat (count, opt.ntry);
free_hstat (&hstat);
continue;
}
else if (!opt.kill_longer) /* meaning we got more than expected */
@ -1801,6 +1832,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK;
}
else /* the same, but not accepted */
@ -1809,6 +1842,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
_("%s (%s) - Connection closed at byte %ld/%ld. "),
tms, tmrate, hstat.len, hstat.contlen);
printwhat (count, opt.ntry);
free_hstat (&hstat);
continue;
}
}
@ -1820,6 +1854,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
_("%s (%s) - Read error at byte %ld (%s)."),
tms, tmrate, hstat.len, strerror (errno));
printwhat (count, opt.ntry);
free_hstat (&hstat);
continue;
}
else /* hstat.res == -1 and contlen is given */
@ -1829,6 +1864,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
tms, tmrate, hstat.len, hstat.contlen,
strerror (errno));
printwhat (count, opt.ntry);
free_hstat (&hstat);
continue;
}
}

View File

@ -727,7 +727,7 @@ Can't timestamp and not clobber old files at the same time.\n"));
/* Fill in the arguments. */
for (i = 0; i < nurl; i++, optind++)
{
char *rewritten = rewrite_url_maybe (argv[optind]);
char *rewritten = rewrite_shorthand_url (argv[optind]);
if (rewritten)
{
printf ("Converted %s to %s\n", argv[optind], rewritten);
@ -845,10 +845,12 @@ Can't timestamp and not clobber old files at the same time.\n"));
{
convert_all_links ();
}
log_close ();
for (i = 0; i < nurl; i++)
free (url[i]);
xfree (url[i]);
cleanup ();
#ifdef DEBUG_MALLOC
print_malloc_debug_stats ();
#endif

View File

@ -120,9 +120,8 @@ recursive_retrieve (const char *file, const char *this_url)
int dt, inl, dash_p_leaf_HTML = FALSE;
int meta_disallow_follow;
int this_url_ftp; /* See below the explanation */
uerr_t err;
urlpos *url_list, *cur_url;
struct urlinfo *u;
struct url *u;
assert (this_url != NULL);
assert (file != NULL);
@ -140,9 +139,8 @@ recursive_retrieve (const char *file, const char *this_url)
hash_table_clear (undesirable_urls);
string_set_add (undesirable_urls, this_url);
/* Enter this_url to the hash table, in original and "enhanced" form. */
u = newurl ();
err = parseurl (this_url, u, 0);
if (err == URLOK)
u = url_parse (this_url, NULL);
if (u)
{
string_set_add (undesirable_urls, u->url);
if (opt.no_parent)
@ -156,7 +154,7 @@ recursive_retrieve (const char *file, const char *this_url)
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
base_dir = NULL;
}
freeurl (u, 1);
url_free (u);
depth = 1;
first_time = 0;
}
@ -210,11 +208,10 @@ recursive_retrieve (const char *file, const char *this_url)
break;
/* Parse the URL for convenient use in other functions, as well
as to get the optimized form. It also checks URL integrity. */
u = newurl ();
if (parseurl (cur_url->url, u, 0) != URLOK)
u = url_parse (cur_url->url, NULL);
if (!u)
{
DEBUGP (("Yuck! A bad URL.\n"));
freeurl (u, 1);
continue;
}
assert (u->url != NULL);
@ -281,8 +278,8 @@ recursive_retrieve (const char *file, const char *this_url)
if (!(base_dir && frontcmp (base_dir, u->dir)))
{
/* Failing that, check for parent dir. */
struct urlinfo *ut = newurl ();
if (parseurl (this_url, ut, 0) != URLOK)
struct url *ut = url_parse (this_url, NULL);
if (!ut)
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
else if (!frontcmp (ut->dir, u->dir))
{
@ -291,7 +288,7 @@ recursive_retrieve (const char *file, const char *this_url)
string_set_add (undesirable_urls, constr);
inl = 1;
}
freeurl (ut, 1);
url_free (ut);
}
}
/* If the file does not match the acceptance list, or is on the
@ -343,7 +340,16 @@ recursive_retrieve (const char *file, const char *this_url)
if (!inl)
{
if (!opt.simple_check)
opt_url (u);
{
/* Find the "true" host. */
char *host = realhost (u->host);
xfree (u->host);
u->host = host;
/* Refresh the printed representation of the URL. */
xfree (u->url);
u->url = url_string (u, 0);
}
else
{
char *p;
@ -351,7 +357,7 @@ recursive_retrieve (const char *file, const char *this_url)
for (p = u->host; *p; p++)
*p = TOLOWER (*p);
xfree (u->url);
u->url = str_url (u, 0);
u->url = url_string (u, 0);
}
xfree (constr);
constr = xstrdup (u->url);
@ -473,7 +479,7 @@ recursive_retrieve (const char *file, const char *this_url)
/* Free filename and constr. */
FREE_MAYBE (filename);
FREE_MAYBE (constr);
freeurl (u, 1);
url_free (u);
/* Increment the pbuf for the appropriate size. */
}
if (opt.convert_links && !opt.delete_after)
@ -573,13 +579,9 @@ convert_all_links (void)
char *local_name;
/* The URL must be in canonical form to be compared. */
struct urlinfo *u = newurl ();
uerr_t res = parseurl (cur_url->url, u, 0);
if (res != URLOK)
{
freeurl (u, 1);
continue;
}
struct url *u = url_parse (cur_url->url, NULL);
if (!u)
continue;
/* We decide the direction of conversion according to whether
a URL was downloaded. Downloaded URLs will be converted
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
@ -608,7 +610,7 @@ convert_all_links (void)
cur_url->convert = CO_CONVERT_TO_COMPLETE;
cur_url->local_name = NULL;
}
freeurl (u, 1);
url_free (u);
}
/* Convert the links in the file. */
convert_links (html->string, urls);

View File

@ -51,9 +51,6 @@ extern int errno;
int global_download_count;
void logflush PARAMS ((void));
/* From http.c. */
uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
/* Flags for show_progress(). */
enum spflags { SP_NONE, SP_INIT, SP_FINISH };
@ -314,9 +311,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
uerr_t result;
char *url;
int location_changed, dummy;
int local_use_proxy;
int use_proxy;
char *mynewloc, *proxy;
struct urlinfo *u;
struct url *u;
int up_error_code; /* url parse error code */
char *local_file;
struct hash_table *redirections = NULL;
/* If dt is NULL, just ignore it. */
@ -328,80 +327,74 @@ retrieve_url (const char *origurl, char **file, char **newloc,
if (file)
*file = NULL;
u = newurl ();
/* Parse the URL. */
result = parseurl (url, u, 0);
if (result != URLOK)
u = url_parse (url, &up_error_code);
if (!u)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
freeurl (u, 1);
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
if (redirections)
string_set_free (redirections);
xfree (url);
return result;
return URLERROR;
}
if (!refurl)
refurl = opt.referer;
redirected:
/* Set the referer. */
if (refurl)
u->referer = xstrdup (refurl);
else
{
if (opt.referer)
u->referer = xstrdup (opt.referer);
else
u->referer = NULL;
}
result = NOCONERROR;
mynewloc = NULL;
local_file = NULL;
local_use_proxy = USE_PROXY_P (u);
if (local_use_proxy)
use_proxy = USE_PROXY_P (u);
if (use_proxy)
{
struct urlinfo *pu = newurl ();
struct url *proxy_url;
/* Copy the original URL to new location. */
memcpy (pu, u, sizeof (*u));
pu->proxy = NULL; /* A minor correction :) */
/* Initialize u to nil. */
memset (u, 0, sizeof (*u));
u->proxy = pu;
/* Get the appropriate proxy server, appropriate for the
current scheme. */
proxy = getproxy (pu->scheme);
/* Get the proxy server for the current scheme. */
proxy = getproxy (u->scheme);
if (!proxy)
{
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1);
url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
return PROXERR;
}
/* Parse the proxy URL. */
result = parseurl (proxy, u, 0);
if (result != URLOK || u->scheme != SCHEME_HTTP)
proxy_url = url_parse (proxy, &up_error_code);
if (!proxy_url)
{
if (u->scheme == SCHEME_HTTP)
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
logprintf (LOG_NOTQUIET, "Error parsing proxy URL %s: %s.\n",
proxy, url_error (up_error_code));
if (redirections)
string_set_free (redirections);
xfree (url);
return PROXERR;
}
u->scheme = SCHEME_HTTP;
if (proxy_url->scheme != SCHEME_HTTP)
{
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
url_free (proxy_url);
if (redirections)
string_set_free (redirections);
xfree (url);
return PROXERR;
}
result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
url_free (proxy_url);
}
mynewloc = NULL;
if (u->scheme == SCHEME_HTTP
else if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL
|| u->scheme == SCHEME_HTTPS
#endif
)
result = http_loop (u, &mynewloc, dt);
{
result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
}
else if (u->scheme == SCHEME_FTP)
{
/* If this is a redirection, we must not allow recursive FTP
@ -412,13 +405,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
opt.recursive = 0;
result = ftp_loop (u, dt);
opt.recursive = oldrec;
#if 0
/* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML
according to the suffix. The HTML suffixes are `.html' and
`.htm', case-insensitive.
#### All of this is, of course, crap. These types should be
determined through mailcap. */
`.htm', case-insensitive. */
if (redirections && u->local && (u->scheme == SCHEME_FTP))
{
char *suf = suffix (u->local);
@ -426,16 +417,19 @@ retrieve_url (const char *origurl, char **file, char **newloc,
*dt |= TEXTHTML;
FREE_MAYBE (suf);
}
#endif
}
location_changed = (result == NEWLOCATION);
if (location_changed)
{
char *construced_newloc;
uerr_t newloc_result;
struct urlinfo *newloc_struct;
struct url *newloc_struct;
assert (mynewloc != NULL);
if (local_file)
xfree (local_file);
/* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs out
there break the rules and use relative URLs, and popular
@ -445,13 +439,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */
newloc_struct = newurl ();
newloc_result = parseurl (mynewloc, newloc_struct, 1);
if (newloc_result != URLOK)
newloc_struct = url_parse (mynewloc, NULL);
if (!newloc_struct)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
freeurl (newloc_struct, 1);
freeurl (u, 1);
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, "UNKNOWN");
url_free (newloc_struct);
url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
@ -473,14 +466,14 @@ retrieve_url (const char *origurl, char **file, char **newloc,
string_set_add (redirections, u->url);
}
/* The new location is OK. Let's check for redirection cycle by
/* The new location is OK. Check for redirection cycle by
peeking through the history of redirections. */
if (string_set_contains (redirections, newloc_struct->url))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc);
freeurl (newloc_struct, 1);
freeurl (u, 1);
url_free (newloc_struct);
url_free (u);
if (redirections)
string_set_free (redirections);
xfree (url);
@ -491,29 +484,27 @@ retrieve_url (const char *origurl, char **file, char **newloc,
xfree (url);
url = mynewloc;
freeurl (u, 1);
url_free (u);
u = newloc_struct;
goto redirected;
}
if (u->local)
if (local_file)
{
if (*dt & RETROKF)
{
register_download (url, u->local);
register_download (url, local_file);
if (*dt & TEXTHTML)
register_html (url, u->local);
register_html (url, local_file);
}
}
if (file)
{
if (u->local)
*file = xstrdup (u->local);
else
*file = NULL;
}
freeurl (u, 1);
*file = local_file ? local_file : NULL;
else
FREE_MAYBE (local_file);
url_free (u);
if (redirections)
string_set_free (redirections);

View File

@ -36,4 +36,12 @@ int downloaded_exceeds_quota PARAMS ((void));
void sleep_between_retrievals PARAMS ((int));
/* Because there's no http.h. */
struct url;
uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *,
int *, struct url *));
#endif /* RETR_H */

1246
src/url.c

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,9 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443
/* Note: the ordering here is related to the order of elements in
`supported_schemes' in url.c. */
enum url_scheme {
SCHEME_HTTP,
#ifdef HAVE_SSL
@ -35,24 +38,27 @@ enum url_scheme {
};
/* Structure containing info on a URL. */
struct urlinfo
struct url
{
char *url; /* Unchanged URL */
char *url; /* Original URL */
enum url_scheme scheme; /* URL scheme */
char *host; /* Extracted hostname */
unsigned short port;
char ftp_type;
char *path, *dir, *file, *qstring;
/* Path, dir, file, and query string
(properly decoded) */
char *user, *passwd; /* Username and password */
struct urlinfo *proxy; /* The exact string to pass to proxy
server */
char *referer; /* The source from which the request
URI was obtained */
char *local; /* The local filename of the URL
document */
int port; /* Port number */
/* URL components (URL-quoted). */
char *path;
char *params;
char *query;
char *fragment;
/* Extracted path info (unquoted). */
char *dir;
char *file;
/* Username and password (unquoted). */
char *user;
char *passwd;
};
enum convert_options {
@ -104,19 +110,21 @@ typedef enum
char *encode_string PARAMS ((const char *));
struct urlinfo *newurl PARAMS ((void));
void freeurl PARAMS ((struct urlinfo *, int));
enum url_scheme url_detect_scheme PARAMS ((const char *));
struct url *url_parse PARAMS ((const char *, int *));
const char *url_error PARAMS ((int));
char *url_full_path PARAMS ((const struct url *));
void url_set_dir PARAMS ((struct url *, const char *));
void url_set_file PARAMS ((struct url *, const char *));
void url_free PARAMS ((struct url *));
enum url_scheme url_scheme PARAMS ((const char *));
int url_skip_scheme PARAMS ((const char *));
int url_has_scheme PARAMS ((const char *));
int scheme_default_port PARAMS ((enum url_scheme));
int url_skip_uname PARAMS ((const char *));
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
char *str_url PARAMS ((const struct urlinfo *, int));
/* url_equal is not currently used. */
#if 0
int url_equal PARAMS ((const char *, const char *));
#endif /* 0 */
char *url_string PARAMS ((const struct url *, int));
urlpos *get_urls_file PARAMS ((const char *));
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
@ -126,8 +134,7 @@ char *uri_merge PARAMS ((const char *, const char *));
void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *));
char *url_filename PARAMS ((const struct urlinfo *));
void opt_url PARAMS ((struct urlinfo *));
char *url_filename PARAMS ((const struct url *));
char *getproxy PARAMS ((uerr_t));
int no_proxy_match PARAMS ((const char *, const char **));
@ -137,6 +144,6 @@ urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
char *rewrite_url_maybe PARAMS ((const char *));
char *rewrite_shorthand_url PARAMS ((const char *));
#endif /* URL_H */

View File

@ -404,30 +404,6 @@ datetime_str (time_t *tm)
ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return output;
}
/* Returns an error message for ERRNUM. #### This requires more work.
This function, as well as the whole error system, is very
ill-conceived. */
const char *
uerrmsg (uerr_t errnum)
{
switch (errnum)
{
case URLUNKNOWN:
return _("Unknown/unsupported protocol");
break;
case URLBADPORT:
return _("Invalid port specification");
break;
case URLBADHOST:
return _("Invalid host name");
break;
default:
abort ();
/* $@#@#$ compiler. */
return NULL;
}
}
/* The Windows versions of the following two functions are defined in
mswindows.c. */
@ -464,6 +440,14 @@ fork_to_background (void)
}
#endif /* not WINDOWS */
char *
ps (char *orig)
{
char *r = xstrdup (orig);
path_simplify (r);
return r;
}
/* Canonicalize PATH, and return a new path. The new path differs from PATH
in that:
Multple `/'s are collapsed to a single `/'.
@ -479,7 +463,8 @@ fork_to_background (void)
Always use '/' as stub_char.
Don't check for local things using canon_stat.
Change the original string instead of strdup-ing.
React correctly when beginning with `./' and `../'. */
React correctly when beginning with `./' and `../'.
Don't zip out trailing slashes. */
void
path_simplify (char *path)
{
@ -545,20 +530,15 @@ path_simplify (char *path)
i = start + 1;
}
/* Check for trailing `/'. */
if (start && !path[i])
{
zero_last:
path[--i] = '\0';
break;
}
/* Check for `../', `./' or trailing `.' by itself. */
if (path[i] == '.')
{
/* Handle trailing `.' by itself. */
if (!path[i + 1])
goto zero_last;
{
path[--i] = '\0';
break;
}
/* Handle `./'. */
if (path[i + 1] == '/')
@ -579,12 +559,6 @@ path_simplify (char *path)
}
} /* path == '.' */
} /* while */
if (!*path)
{
*path = stub_char;
path[1] = '\0';
}
}
/* "Touch" FILE, i.e. make its atime and mtime equal to the time
@ -728,6 +702,30 @@ make_directory (const char *directory)
}
return 0;
}
/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
should be a file name. For example, file_merge("/foo/bar", "baz")
will return "/foo/baz". file_merge("/foo/bar/", "baz") will return
"foo/bar/baz".
In other words, it's a simpler and gentler version of uri_merge_1. */
char *
file_merge (const char *base, const char *file)
{
char *result;
const char *cut = (const char *)strrchr (base, '/');
if (!cut)
cut = base + strlen (base);
result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
memcpy (result, base, cut - base);
result[cut - base] = '/';
strcpy (result + (cut - base) + 1, file);
return result;
}
static int in_acclist PARAMS ((const char *const *, const char *, int));

View File

@ -44,8 +44,6 @@ struct wget_timer;
char *time_str PARAMS ((time_t *));
char *datetime_str PARAMS ((time_t *));
const char *uerrmsg PARAMS ((uerr_t));
#ifdef DEBUG_MALLOC
void print_malloc_debug_stats ();
#endif
@ -63,6 +61,7 @@ int file_exists_p PARAMS ((const char *));
int file_non_directory_p PARAMS ((const char *));
int make_directory PARAMS ((const char *));
char *unique_name PARAMS ((const char *));
char *file_merge PARAMS ((const char *, const char *));
int acceptable PARAMS ((const char *));
int accdir PARAMS ((const char *s, enum accd));

View File

@ -285,9 +285,8 @@ typedef enum
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLHTTPS,
URLOK, URLHTTP, URLFTP, URLFILE, URLUNKNOWN, URLBADPORT,
URLBADHOST, FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR,
FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
FTPINVPASV, FTPNOPASV,
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,