1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Rewrite parsing and handling of URLs.

Published in <sxs4rnnlklo.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-11-21 16:24:28 -08:00
parent f4dcb55851
commit d5be8ecca4
22 changed files with 1279 additions and 922 deletions

View File

@ -1,3 +1,7 @@
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
* configure.in: Check for strpbrk().
2001-05-14 Herold Heiko <Heiko.Herold@previnet.it> 2001-05-14 Herold Heiko <Heiko.Herold@previnet.it>
* windows/Makefile.src: * windows/Makefile.src:

View File

@ -172,7 +172,7 @@ dnl Checks for library functions.
dnl dnl
AC_FUNC_ALLOCA AC_FUNC_ALLOCA
AC_FUNC_MMAP AC_FUNC_MMAP
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp) AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp strpbrk)
AC_CHECK_FUNCS(gettimeofday mktime strptime) AC_CHECK_FUNCS(gettimeofday mktime strptime)
AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty) AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty)
AC_CHECK_FUNCS(uname gethostname) AC_CHECK_FUNCS(uname gethostname)

View File

@ -1,3 +1,53 @@
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
* utils.c (path_simplify): Don't remove trailing slashes.
* ftp.c (ftp_get_listing): Use it.
* utils.c (file_merge): New function.
* url.c (opt_url): Removed.
* recur.c (recursive_retrieve): Inline "opt_url" logic.
* main.c (main): Use xfree(), not free().
* url.c (rewrite_url_maybe): Renamed to rewrite_shorthand_url.
* ftp.c (ccon): Move `ccon' typedef here, since it's only used
internally.
* config.h.in: Include a stub for HAVE_STRPBRK.
* cmpt.c (strpbrk): Include a replacement for systems without
strpbrk().
* ftp.c: Use url_set_dir and url_set_file when modifying the URL.
* url.c (url_set_dir): New function.
(url_set_file): Ditto.
* ftp-basic.c (ftp_process_type): Process FTP type here; the URL
parser makes the URL "params" available, so we can do that in this
function.
* retr.c: Ditto.
* ftp.c: Ditto; pass the local file information in `ccon'.
* http.c: Get rid of the ugly kludge that had URL being replaced
with the proxy URL when proxy retrieval was requested. Use a
separate parameter to http_loop and gethttp for the proxy URL.
* http.c: Changed to reflect the fact that local file, proxy, and
referer information are no longer stored in struct url. The local
file information is passed in `struct hstat' now.
* url.c: Reworked URL parsing to be more regular. Reencode the
URL using reencode_string.
Removed non-URL-related information from struct url. This
includes fields `proxy', `local', and `referer'.
2001-11-22 Jochen Hein <jochen@jochen.org> 2001-11-22 Jochen Hein <jochen@jochen.org>
* main.c (main): Split the copyright notice for easier * main.c (main): Split the copyright notice for easier

View File

@ -205,6 +205,24 @@ ret0:
} }
#endif /* not HAVE_STRSTR */ #endif /* not HAVE_STRSTR */
#ifndef HAVE_STRPBRK
/* Find the first ocurrence in S of any character in ACCEPT. */
char *
strpbrk (const char *s, const char *accept)
{
while (*s != '\0')
{
const char *a = accept;
while (*a != '\0')
if (*a++ == *s)
return (char *) s;
++s;
}
return 0;
}
#endif /* HAVE_STRPBRK */
#ifndef HAVE_MKTIME #ifndef HAVE_MKTIME
/* From GNU libc 2.0. */ /* From GNU libc 2.0. */

View File

@ -141,6 +141,9 @@ char *alloca ();
/* Define if you have the strncasecmp function. */ /* Define if you have the strncasecmp function. */
#undef HAVE_STRNCASECMP #undef HAVE_STRNCASECMP
/* Define if you have the strpbrk function. */
#undef HAVE_STRPBRK
/* Define if you have the strptime function. */ /* Define if you have the strptime function. */
#undef HAVE_STRPTIME #undef HAVE_STRPTIME

View File

@ -780,7 +780,7 @@ check_path_match (const char *cookie_path, const char *path)
int int
set_cookie_header_cb (const char *hdr, void *closure) set_cookie_header_cb (const char *hdr, void *closure)
{ {
struct urlinfo *u = (struct urlinfo *)closure; struct url *u = (struct url *)closure;
struct cookie *cookie; struct cookie *cookie;
cookies_now = time (NULL); cookies_now = time (NULL);

View File

@ -633,6 +633,7 @@ ftp_pwd (struct rbuf *rbuf, char **pwd)
/* All OK. */ /* All OK. */
return FTPOK; return FTPOK;
} }
/* Sends the SIZE command to the server, and returns the value in 'size'. /* Sends the SIZE command to the server, and returns the value in 'size'.
* If an error occurs, size is set to zero. */ * If an error occurs, size is set to zero. */
uerr_t uerr_t
@ -690,3 +691,16 @@ ftp_size (struct rbuf *rbuf, const char *file, long int *size)
/* All OK. */ /* All OK. */
return FTPOK; return FTPOK;
} }
/* If URL's params are of the form "type=X", return character X.
Otherwise, return 'I' (the default type). */
char
ftp_process_type (const char *params)
{
if (params
&& 0 == strncasecmp (params, "type=", 5)
&& params[5] != '\0')
return TOUPPER (params[5]);
else
return 'I';
}

View File

@ -796,7 +796,7 @@ Unsupported listing type, trying Unix listing parser.\n"));
directories and files on the appropriate host. The references are directories and files on the appropriate host. The references are
FTP. */ FTP. */
uerr_t uerr_t
ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f) ftp_index (const char *file, struct url *u, struct fileinfo *f)
{ {
FILE *fp; FILE *fp;
char *upwd; char *upwd;

219
src/ftp.c
View File

@ -62,6 +62,18 @@ extern int h_errno;
extern char ftp_last_respline[]; extern char ftp_last_respline[];
typedef struct
{
int st; /* connection status */
int cmd; /* command code */
struct rbuf rbuf; /* control connection buffer */
long dltime; /* time of the download */
enum stype rs; /* remote system reported by ftp server */
char *id; /* initial directory */
char *target; /* target file name */
} ccon;
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in /* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
the string S, and return the number converted to long, if found, 0 the string S, and return the number converted to long, if found, 0
otherwise. */ otherwise. */
@ -108,7 +120,7 @@ ftp_expected_bytes (const char *s)
connection to the server. It always closes the data connection, connection to the server. It always closes the data connection,
and closes the control connection in case of error. */ and closes the control connection in case of error. */
static uerr_t static uerr_t
getftp (struct urlinfo *u, long *len, long restval, ccon *con) getftp (struct url *u, long *len, long restval, ccon *con)
{ {
int csock, dtsock, res; int csock, dtsock, res;
uerr_t err; uerr_t err;
@ -122,7 +134,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
long expected_bytes = 0L; long expected_bytes = 0L;
assert (con != NULL); assert (con != NULL);
assert (u->local != NULL); assert (con->target != NULL);
/* Debug-check of the sanity of the request by making sure that LIST /* Debug-check of the sanity of the request by making sure that LIST
and RETR are never both requested (since we can handle only one and RETR are never both requested (since we can handle only one
at a time. */ at a time. */
@ -144,6 +157,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
csock = RBUF_FD (&con->rbuf); csock = RBUF_FD (&con->rbuf);
else /* cmd & DO_LOGIN */ else /* cmd & DO_LOGIN */
{ {
char type_char;
/* Login to the server: */ /* Login to the server: */
/* First: Establish the control connection. */ /* First: Establish the control connection. */
@ -325,9 +340,10 @@ Error in server response, closing control connection.\n"));
logputs (LOG_VERBOSE, _("done.\n")); logputs (LOG_VERBOSE, _("done.\n"));
/* Fifth: Set the FTP type. */ /* Fifth: Set the FTP type. */
type_char = ftp_process_type (u->params);
if (!opt.server_response) if (!opt.server_response)
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", TOUPPER (u->ftp_type)); logprintf (LOG_VERBOSE, "==> TYPE %c ... ", type_char);
err = ftp_type (&con->rbuf, TOUPPER (u->ftp_type)); err = ftp_type (&con->rbuf, type_char);
/* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */ /* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */
switch (err) switch (err)
{ {
@ -351,7 +367,7 @@ Error in server response, closing control connection.\n"));
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
_("Unknown type `%c', closing control connection.\n"), _("Unknown type `%c', closing control connection.\n"),
TOUPPER (u->ftp_type)); type_char);
CLOSE (csock); CLOSE (csock);
rbuf_uninitialize (&con->rbuf); rbuf_uninitialize (&con->rbuf);
return err; return err;
@ -701,7 +717,7 @@ Error in server response, closing control connection.\n"));
{ {
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
_("\nREST failed; will not truncate `%s'.\n"), _("\nREST failed; will not truncate `%s'.\n"),
u->local); con->target);
CLOSE (csock); CLOSE (csock);
closeport (dtsock); closeport (dtsock);
rbuf_uninitialize (&con->rbuf); rbuf_uninitialize (&con->rbuf);
@ -850,16 +866,16 @@ Error in server response, closing control connection.\n"));
/* Open the file -- if opt.dfp is set, use it instead. */ /* Open the file -- if opt.dfp is set, use it instead. */
if (!opt.dfp || con->cmd & DO_LIST) if (!opt.dfp || con->cmd & DO_LIST)
{ {
mkalldirs (u->local); mkalldirs (con->target);
if (opt.backups) if (opt.backups)
rotate_backups (u->local); rotate_backups (con->target);
/* #### Is this correct? */ /* #### Is this correct? */
chmod (u->local, 0600); chmod (con->target, 0600);
fp = fopen (u->local, restval ? "ab" : "wb"); fp = fopen (con->target, restval ? "ab" : "wb");
if (!fp) if (!fp)
{ {
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno)); logprintf (LOG_NOTQUIET, "%s: %s\n", con->target, strerror (errno));
CLOSE (csock); CLOSE (csock);
rbuf_uninitialize (&con->rbuf); rbuf_uninitialize (&con->rbuf);
closeport (dtsock); closeport (dtsock);
@ -928,7 +944,7 @@ Error in server response, closing control connection.\n"));
if (res == -2) if (res == -2)
{ {
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"), logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
u->local, strerror (errno)); con->target, strerror (errno));
CLOSE (csock); CLOSE (csock);
rbuf_uninitialize (&con->rbuf); rbuf_uninitialize (&con->rbuf);
return FWRITEERR; return FWRITEERR;
@ -993,10 +1009,10 @@ Error in server response, closing control connection.\n"));
print it out. */ print it out. */
if (opt.server_response && (con->cmd & DO_LIST)) if (opt.server_response && (con->cmd & DO_LIST))
{ {
mkalldirs (u->local); mkalldirs (con->target);
fp = fopen (u->local, "r"); fp = fopen (con->target, "r");
if (!fp) if (!fp)
logprintf (LOG_ALWAYS, "%s: %s\n", u->local, strerror (errno)); logprintf (LOG_ALWAYS, "%s: %s\n", con->target, strerror (errno));
else else
{ {
char *line; char *line;
@ -1020,7 +1036,7 @@ Error in server response, closing control connection.\n"));
This loop either gets commands from con, or (if ON_YOUR_OWN is This loop either gets commands from con, or (if ON_YOUR_OWN is
set), makes them up to retrieve the file given by the URL. */ set), makes them up to retrieve the file given by the URL. */
static uerr_t static uerr_t
ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con) ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
{ {
int count, orig_lp; int count, orig_lp;
long restval, len; long restval, len;
@ -1028,21 +1044,21 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
uerr_t err; uerr_t err;
struct stat st; struct stat st;
if (!u->local) if (!con->target)
u->local = url_filename (u); con->target = url_filename (u);
if (opt.noclobber && file_exists_p (u->local)) if (opt.noclobber && file_exists_p (con->target))
{ {
logprintf (LOG_VERBOSE, logprintf (LOG_VERBOSE,
_("File `%s' already there, not retrieving.\n"), u->local); _("File `%s' already there, not retrieving.\n"), con->target);
/* If the file is there, we suppose it's retrieved OK. */ /* If the file is there, we suppose it's retrieved OK. */
return RETROK; return RETROK;
} }
/* Remove it if it's a link. */ /* Remove it if it's a link. */
remove_link (u->local); remove_link (con->target);
if (!opt.output_document) if (!opt.output_document)
locf = u->local; locf = con->target;
else else
locf = opt.output_document; locf = opt.output_document;
@ -1100,7 +1116,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Print fetch message, if opt.verbose. */ /* Print fetch message, if opt.verbose. */
if (opt.verbose) if (opt.verbose)
{ {
char *hurl = str_url (u->proxy ? u->proxy : u, 1); char *hurl = url_string (u, 1);
char tmp[15]; char tmp[15];
strcpy (tmp, " "); strcpy (tmp, " ");
if (count > 1) if (count > 1)
@ -1175,7 +1191,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Need to hide the password from the URL. The `if' is here /* Need to hide the password from the URL. The `if' is here
so that we don't do the needless allocation every so that we don't do the needless allocation every
time. */ time. */
char *hurl = str_url (u->proxy ? u->proxy : u, 1); char *hurl = url_string (u, 1);
logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n", logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n",
tms, hurl, len, locf, count); tms, hurl, len, locf, count);
xfree (hurl); xfree (hurl);
@ -1235,43 +1251,48 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Return the directory listing in a reusable format. The directory /* Return the directory listing in a reusable format. The directory
is specifed in u->dir. */ is specifed in u->dir. */
uerr_t uerr_t
ftp_get_listing (struct urlinfo *u, ccon *con, struct fileinfo **f) ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f)
{ {
uerr_t err; uerr_t err;
char *olocal = u->local; char *uf; /* url file name */
char *list_filename, *ofile; char *lf; /* list file name */
char *old_target = con->target;
con->st &= ~ON_YOUR_OWN; con->st &= ~ON_YOUR_OWN;
con->cmd |= (DO_LIST | LEAVE_PENDING); con->cmd |= (DO_LIST | LEAVE_PENDING);
con->cmd &= ~DO_RETR; con->cmd &= ~DO_RETR;
/* Get the listing filename. */
ofile = u->file; /* Find the listing file name. We do it by taking the file name of
u->file = LIST_FILENAME; the URL and replacing the last component with the listing file
list_filename = url_filename (u); name. */
u->file = ofile; uf = url_filename (u);
u->local = list_filename; lf = file_merge (uf, LIST_FILENAME);
DEBUGP ((_("Using `%s' as listing tmp file.\n"), list_filename)); xfree (uf);
DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
con->target = lf;
err = ftp_loop_internal (u, NULL, con); err = ftp_loop_internal (u, NULL, con);
u->local = olocal; con->target = old_target;
if (err == RETROK) if (err == RETROK)
*f = ftp_parse_ls (list_filename, con->rs); *f = ftp_parse_ls (lf, con->rs);
else else
*f = NULL; *f = NULL;
if (opt.remove_listing) if (opt.remove_listing)
{ {
if (unlink (list_filename)) if (unlink (lf))
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno)); logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
else else
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), list_filename); logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), lf);
} }
xfree (list_filename); xfree (lf);
con->cmd &= ~DO_LIST; con->cmd &= ~DO_LIST;
return err; return err;
} }
static uerr_t ftp_retrieve_dirs PARAMS ((struct urlinfo *, struct fileinfo *, static uerr_t ftp_retrieve_dirs PARAMS ((struct url *, struct fileinfo *,
ccon *)); ccon *));
static uerr_t ftp_retrieve_glob PARAMS ((struct urlinfo *, ccon *, int)); static uerr_t ftp_retrieve_glob PARAMS ((struct url *, ccon *, int));
static struct fileinfo *delelement PARAMS ((struct fileinfo *, static struct fileinfo *delelement PARAMS ((struct fileinfo *,
struct fileinfo **)); struct fileinfo **));
static void freefileinfo PARAMS ((struct fileinfo *f)); static void freefileinfo PARAMS ((struct fileinfo *f));
@ -1284,11 +1305,10 @@ static void freefileinfo PARAMS ((struct fileinfo *f));
If opt.recursive is set, after all files have been retrieved, If opt.recursive is set, after all files have been retrieved,
ftp_retrieve_dirs will be called to retrieve the directories. */ ftp_retrieve_dirs will be called to retrieve the directories. */
static uerr_t static uerr_t
ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con) ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con)
{ {
static int depth = 0; static int depth = 0;
uerr_t err; uerr_t err;
char *olocal, *ofile;
struct fileinfo *orig; struct fileinfo *orig;
long local_size; long local_size;
time_t tml; time_t tml;
@ -1323,15 +1343,19 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
while (f) while (f)
{ {
char *old_target, *ofile;
if (downloaded_exceeds_quota ()) if (downloaded_exceeds_quota ())
{ {
--depth; --depth;
return QUOTEXC; return QUOTEXC;
} }
olocal = u->local; old_target = con->target;
ofile = u->file;
u->file = f->name; ofile = xstrdup (u->file);
u->local = url_filename (u); url_set_file (u, f->name);
con->target = url_filename (u);
err = RETROK; err = RETROK;
dlthis = 1; dlthis = 1;
@ -1343,7 +1367,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
I'm not implementing it now since files on an FTP server are much I'm not implementing it now since files on an FTP server are much
more likely than files on an HTTP server to legitimately have a more likely than files on an HTTP server to legitimately have a
.orig suffix. */ .orig suffix. */
if (!stat (u->local, &st)) if (!stat (con->target, &st))
{ {
int eq_size; int eq_size;
int cor_val; int cor_val;
@ -1360,7 +1384,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
/* Remote file is older, file sizes can be compared and /* Remote file is older, file sizes can be compared and
are both equal. */ are both equal. */
logprintf (LOG_VERBOSE, _("\ logprintf (LOG_VERBOSE, _("\
Remote file no newer than local file `%s' -- not retrieving.\n"), u->local); Remote file no newer than local file `%s' -- not retrieving.\n"), con->target);
dlthis = 0; dlthis = 0;
} }
else if (eq_size) else if (eq_size)
@ -1368,7 +1392,7 @@ Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
/* Remote file is newer or sizes cannot be matched */ /* Remote file is newer or sizes cannot be matched */
logprintf (LOG_VERBOSE, _("\ logprintf (LOG_VERBOSE, _("\
Remote file is newer than local file `%s' -- retrieving.\n\n"), Remote file is newer than local file `%s' -- retrieving.\n\n"),
u->local); con->target);
} }
else else
{ {
@ -1396,30 +1420,30 @@ The sizes do not match (local %ld) -- retrieving.\n\n"), local_size);
struct stat st; struct stat st;
/* Check whether we already have the correct /* Check whether we already have the correct
symbolic link. */ symbolic link. */
int rc = lstat (u->local, &st); int rc = lstat (con->target, &st);
if (rc == 0) if (rc == 0)
{ {
size_t len = strlen (f->linkto) + 1; size_t len = strlen (f->linkto) + 1;
if (S_ISLNK (st.st_mode)) if (S_ISLNK (st.st_mode))
{ {
char *link_target = (char *)alloca (len); char *link_target = (char *)alloca (len);
size_t n = readlink (u->local, link_target, len); size_t n = readlink (con->target, link_target, len);
if ((n == len - 1) if ((n == len - 1)
&& (memcmp (link_target, f->linkto, n) == 0)) && (memcmp (link_target, f->linkto, n) == 0))
{ {
logprintf (LOG_VERBOSE, _("\ logprintf (LOG_VERBOSE, _("\
Already have correct symlink %s -> %s\n\n"), Already have correct symlink %s -> %s\n\n"),
u->local, f->linkto); con->target, f->linkto);
dlthis = 0; dlthis = 0;
break; break;
} }
} }
} }
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"), logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
u->local, f->linkto); con->target, f->linkto);
/* Unlink before creating symlink! */ /* Unlink before creating symlink! */
unlink (u->local); unlink (con->target);
if (symlink (f->linkto, u->local) == -1) if (symlink (f->linkto, con->target) == -1)
logprintf (LOG_NOTQUIET, "symlink: %s\n", logprintf (LOG_NOTQUIET, "symlink: %s\n",
strerror (errno)); strerror (errno));
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
@ -1427,7 +1451,7 @@ Already have correct symlink %s -> %s\n\n"),
#else /* not HAVE_SYMLINK */ #else /* not HAVE_SYMLINK */
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
_("Symlinks not supported, skipping symlink `%s'.\n"), _("Symlinks not supported, skipping symlink `%s'.\n"),
u->local); con->target);
#endif /* not HAVE_SYMLINK */ #endif /* not HAVE_SYMLINK */
} }
else /* opt.retr_symlinks */ else /* opt.retr_symlinks */
@ -1458,7 +1482,7 @@ Already have correct symlink %s -> %s\n\n"),
if (!(f->type == FT_SYMLINK && !opt.retr_symlinks) if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
&& f->tstamp != -1 && f->tstamp != -1
&& dlthis && dlthis
&& file_exists_p (u->local)) && file_exists_p (con->target))
{ {
/* #### This code repeats in http.c and ftp.c. Move it to a /* #### This code repeats in http.c and ftp.c. Move it to a
function! */ function! */
@ -1469,27 +1493,31 @@ Already have correct symlink %s -> %s\n\n"),
fl = opt.output_document; fl = opt.output_document;
} }
else else
fl = u->local; fl = con->target;
if (fl) if (fl)
touch (fl, f->tstamp); touch (fl, f->tstamp);
} }
else if (f->tstamp == -1) else if (f->tstamp == -1)
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), u->local); logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), con->target);
if (f->perms && f->type == FT_PLAINFILE && dlthis) if (f->perms && f->type == FT_PLAINFILE && dlthis)
chmod (u->local, f->perms); chmod (con->target, f->perms);
else else
DEBUGP (("Unrecognized permissions for %s.\n", u->local)); DEBUGP (("Unrecognized permissions for %s.\n", con->target));
xfree (con->target);
con->target = old_target;
url_set_file (u, ofile);
xfree (ofile);
xfree (u->local);
u->local = olocal;
u->file = ofile;
/* Break on fatals. */ /* Break on fatals. */
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR) if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
break; break;
con->cmd &= ~ (DO_CWD | DO_LOGIN); con->cmd &= ~ (DO_CWD | DO_LOGIN);
f = f->next; f = f->next;
} /* while */ }
/* We do not want to call ftp_retrieve_dirs here */ /* We do not want to call ftp_retrieve_dirs here */
if (opt.recursive && if (opt.recursive &&
!(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel)) !(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel))
@ -1506,51 +1534,62 @@ Already have correct symlink %s -> %s\n\n"),
ftp_retrieve_glob on each directory entry. The function knows ftp_retrieve_glob on each directory entry. The function knows
about excluded directories. */ about excluded directories. */
static uerr_t static uerr_t
ftp_retrieve_dirs (struct urlinfo *u, struct fileinfo *f, ccon *con) ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con)
{ {
char *odir; char *container = NULL;
char *current_container = NULL; int container_size = 0;
int current_length = 0;
for (; f; f = f->next) for (; f; f = f->next)
{ {
int len; int size;
char *odir, *newdir;
if (downloaded_exceeds_quota ()) if (downloaded_exceeds_quota ())
break; break;
if (f->type != FT_DIRECTORY) if (f->type != FT_DIRECTORY)
continue; continue;
odir = u->dir;
len = strlen (u->dir) + 1 + strlen (f->name) + 1;
/* Allocate u->dir off stack, but reallocate only if a larger /* Allocate u->dir off stack, but reallocate only if a larger
string is needed. */ string is needed. It's a pity there's no "realloca" for an
if (len > current_length) item on the bottom of the stack. */
current_container = (char *)alloca (len); size = strlen (u->dir) + 1 + strlen (f->name) + 1;
u->dir = current_container; if (size > container_size)
container = (char *)alloca (size);
newdir = container;
odir = u->dir;
if (*odir == '\0' if (*odir == '\0'
|| (*odir == '/' && *(odir + 1) == '\0')) || (*odir == '/' && *(odir + 1) == '\0'))
/* If ODIR is empty or just "/", simply append f->name to /* If ODIR is empty or just "/", simply append f->name to
ODIR. (In the former case, to preserve u->dir being ODIR. (In the former case, to preserve u->dir being
relative; in the latter case, to avoid double slash.) */ relative; in the latter case, to avoid double slash.) */
sprintf (u->dir, "%s%s", odir, f->name); sprintf (newdir, "%s%s", odir, f->name);
else else
/* Else, use a separator. */ /* Else, use a separator. */
sprintf (u->dir, "%s/%s", odir, f->name); sprintf (newdir, "%s/%s", odir, f->name);
DEBUGP (("Composing new CWD relative to the initial directory.\n")); DEBUGP (("Composing new CWD relative to the initial directory.\n"));
DEBUGP ((" odir = '%s'\n f->name = '%s'\n u->dir = '%s'\n\n", DEBUGP ((" odir = '%s'\n f->name = '%s'\n newdir = '%s'\n\n",
odir, f->name, u->dir)); odir, f->name, newdir));
if (!accdir (u->dir, ALLABS)) if (!accdir (newdir, ALLABS))
{ {
logprintf (LOG_VERBOSE, _("\ logprintf (LOG_VERBOSE, _("\
Not descending to `%s' as it is excluded/not-included.\n"), u->dir); Not descending to `%s' as it is excluded/not-included.\n"), newdir);
u->dir = odir;
continue; continue;
} }
con->st &= ~DONE_CWD; con->st &= ~DONE_CWD;
odir = xstrdup (u->dir); /* because url_set_dir will free
u->dir. */
url_set_dir (u, newdir);
ftp_retrieve_glob (u, con, GETALL); ftp_retrieve_glob (u, con, GETALL);
url_set_dir (u, odir);
xfree (odir);
/* Set the time-stamp? */ /* Set the time-stamp? */
u->dir = odir;
} }
if (opt.quota && opt.downloaded > opt.quota) if (opt.quota && opt.downloaded > opt.quota)
return QUOTEXC; return QUOTEXC;
else else
@ -1567,7 +1606,7 @@ Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
get the listing, so that the time-stamp is heeded); if it's GLOBALL, get the listing, so that the time-stamp is heeded); if it's GLOBALL,
use globbing; if it's GETALL, download the whole directory. */ use globbing; if it's GETALL, download the whole directory. */
static uerr_t static uerr_t
ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action) ftp_retrieve_glob (struct url *u, ccon *con, int action)
{ {
struct fileinfo *orig, *start; struct fileinfo *orig, *start;
uerr_t res; uerr_t res;
@ -1607,7 +1646,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
matchres = fnmatch (u->file, f->name, 0); matchres = fnmatch (u->file, f->name, 0);
if (matchres == -1) if (matchres == -1)
{ {
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, logprintf (LOG_NOTQUIET, "%s: %s\n", con->target,
strerror (errno)); strerror (errno));
break; break;
} }
@ -1657,7 +1696,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
of URL. Inherently, its capabilities are limited on what can be of URL. Inherently, its capabilities are limited on what can be
encoded into a URL. */ encoded into a URL. */
uerr_t uerr_t
ftp_loop (struct urlinfo *u, int *dt) ftp_loop (struct url *u, int *dt)
{ {
ccon con; /* FTP connection */ ccon con; /* FTP connection */
uerr_t res; uerr_t res;
@ -1686,7 +1725,7 @@ ftp_loop (struct urlinfo *u, int *dt)
{ {
char *filename = (opt.output_document char *filename = (opt.output_document
? xstrdup (opt.output_document) ? xstrdup (opt.output_document)
: (u->local ? xstrdup (u->local) : (con.target ? xstrdup (con.target)
: url_filename (u))); : url_filename (u)));
res = ftp_index (filename, u, f); res = ftp_index (filename, u, f);
if (res == FTPOK && opt.verbose) if (res == FTPOK && opt.verbose)
@ -1736,6 +1775,8 @@ ftp_loop (struct urlinfo *u, int *dt)
CLOSE (RBUF_FD (&con.rbuf)); CLOSE (RBUF_FD (&con.rbuf));
FREE_MAYBE (con.id); FREE_MAYBE (con.id);
con.id = NULL; con.id = NULL;
FREE_MAYBE (con.target);
con.target = NULL;
return res; return res;
} }

View File

@ -46,7 +46,7 @@ uerr_t ftp_syst PARAMS ((struct rbuf *, enum stype *));
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **)); uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *)); uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
struct urlinfo; struct url;
/* File types. */ /* File types. */
enum ftype enum ftype
@ -98,19 +98,12 @@ enum wget_ftp_fstatus
correct. */ correct. */
}; };
typedef struct
{
int st; /* connection status */
int cmd; /* command code */
struct rbuf rbuf; /* control connection buffer */
long dltime; /* time of the download */
enum stype rs; /* remote system reported by ftp server */
char *id; /* initial directory */
} ccon;
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype)); struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
uerr_t ftp_loop PARAMS ((struct urlinfo *, int *)); uerr_t ftp_loop PARAMS ((struct url *, int *));
uerr_t ftp_index (const char *, struct url *, struct fileinfo *);
char ftp_process_type PARAMS ((const char *));
uerr_t ftp_index (const char *, struct urlinfo *, struct fileinfo *);
#endif /* FTP_H */ #endif /* FTP_H */

View File

@ -327,7 +327,7 @@ same_host (const char *u1, const char *u2)
/* Determine whether a URL is acceptable to be followed, according to /* Determine whether a URL is acceptable to be followed, according to
a list of domains to accept. */ a list of domains to accept. */
int int
accept_domain (struct urlinfo *u) accept_domain (struct url *u)
{ {
assert (u->host != NULL); assert (u->host != NULL);
if (opt.domains) if (opt.domains)

View File

@ -20,7 +20,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifndef HOST_H #ifndef HOST_H
#define HOST_H #define HOST_H
struct urlinfo; struct url;
/* Function declarations */ /* Function declarations */
@ -31,7 +31,7 @@ void clean_hosts PARAMS ((void));
char *realhost PARAMS ((const char *)); char *realhost PARAMS ((const char *));
int same_host PARAMS ((const char *, const char *)); int same_host PARAMS ((const char *, const char *));
int accept_domain PARAMS ((struct urlinfo *)); int accept_domain PARAMS ((struct url *));
int sufmatch PARAMS ((const char **, const char *)); int sufmatch PARAMS ((const char **, const char *));
char *ftp_getaddress PARAMS ((void)); char *ftp_getaddress PARAMS ((void));

View File

@ -464,16 +464,22 @@ struct http_stat
long dltime; /* time of the download */ long dltime; /* time of the download */
int no_truncate; /* whether truncating the file is int no_truncate; /* whether truncating the file is
forbidden. */ forbidden. */
const char *referer; /* value of the referer header. */
char **local_file; /* local file. */
}; };
/* Free the elements of hstat X. */ static void
#define FREEHSTAT(x) do \ free_hstat (struct http_stat *hs)
{ \ {
FREE_MAYBE ((x).newloc); \ FREE_MAYBE (hs->newloc);
FREE_MAYBE ((x).remote_time); \ FREE_MAYBE (hs->remote_time);
FREE_MAYBE ((x).error); \ FREE_MAYBE (hs->error);
(x).newloc = (x).remote_time = (x).error = NULL; \
} while (0) /* Guard against being called twice. */
hs->newloc = NULL;
hs->remote_time = NULL;
hs->error = NULL;
}
static char *create_authorization_line PARAMS ((const char *, const char *, static char *create_authorization_line PARAMS ((const char *, const char *,
const char *, const char *, const char *, const char *,
@ -499,23 +505,22 @@ time_t http_atotm PARAMS ((char *));
response code correctly, it is not used in a sane way. The caller response code correctly, it is not used in a sane way. The caller
can do that, though. can do that, though.
If u->proxy is non-NULL, the URL u will be taken as a proxy URL, If PROXY is non-NULL, the connection will be made to the proxy
and u->proxy->url will be given to the proxy server (bad naming, server, and u->url will be requested. */
I'm afraid). */
static uerr_t static uerr_t
gethttp (struct urlinfo *u, struct http_stat *hs, int *dt) gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
{ {
char *request, *type, *command, *path; char *request, *type, *command, *full_path;
char *user, *passwd; char *user, *passwd;
char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost; char *pragma_h, *referer, *useragent, *range, *wwwauth;
char *authenticate_h; char *authenticate_h;
char *proxyauth; char *proxyauth;
char *all_headers; char *all_headers;
char *port_maybe; char *port_maybe;
char *request_keep_alive; char *request_keep_alive;
int sock, hcount, num_written, all_length, remport, statcode; int sock, hcount, num_written, all_length, statcode;
long contlen, contrange; long contlen, contrange;
struct urlinfo *ou; struct url *conn;
uerr_t err; uerr_t err;
FILE *fp; FILE *fp;
int auth_tried_already; int auth_tried_already;
@ -542,7 +547,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
/* initialize ssl_ctx on first run */ /* initialize ssl_ctx on first run */
if (!ssl_ctx) if (!ssl_ctx)
{ {
err=init_ssl (&ssl_ctx); err = init_ssl (&ssl_ctx);
if (err != 0) if (err != 0)
{ {
switch (err) switch (err)
@ -579,12 +584,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
if (!(*dt & HEAD_ONLY)) if (!(*dt & HEAD_ONLY))
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
know the local filename so we can save to it. */ know the local filename so we can save to it. */
assert (u->local != NULL); assert (*hs->local_file != NULL);
authenticate_h = 0; authenticate_h = 0;
auth_tried_already = 0; auth_tried_already = 0;
inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL); inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
again: again:
/* We need to come back here when the initial attempt to retrieve /* We need to come back here when the initial attempt to retrieve
@ -602,29 +607,29 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
hs->remote_time = NULL; hs->remote_time = NULL;
hs->error = NULL; hs->error = NULL;
/* Which structure to use to retrieve the original URL data. */ /* If we're using a proxy, we will be connecting to the proxy
if (u->proxy) server. */
ou = u->proxy; conn = proxy ? proxy : u;
else
ou = u;
/* First: establish the connection. */ /* First: establish the connection. */
if (inhibit_keep_alive if (inhibit_keep_alive
|| ||
#ifndef HAVE_SSL #ifndef HAVE_SSL
!persistent_available_p (u->host, u->port) !persistent_available_p (conn->host, conn->port)
#else #else
!persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS) !persistent_available_p (conn->host, conn->port,
u->scheme == SCHEME_HTTPS)
#endif /* HAVE_SSL */ #endif /* HAVE_SSL */
) )
{ {
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port); logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "),
err = make_connection (&sock, u->host, u->port); conn->host, conn->port);
err = make_connection (&sock, conn->host, conn->port);
switch (err) switch (err)
{ {
case HOSTERR: case HOSTERR:
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno)); logprintf (LOG_NOTQUIET, "%s: %s.\n", conn->host, herrmsg (h_errno));
return HOSTERR; return HOSTERR;
break; break;
case CONSOCKERR: case CONSOCKERR:
@ -635,7 +640,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
case CONREFUSED: case CONREFUSED:
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
_("Connection to %s:%hu refused.\n"), u->host, u->port); _("Connection to %s:%hu refused.\n"), conn->host,
conn->port);
CLOSE (sock); CLOSE (sock);
return CONREFUSED; return CONREFUSED;
case CONERROR: case CONERROR:
@ -653,7 +659,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
break; break;
} }
#ifdef HAVE_SSL #ifdef HAVE_SSL
if (u->scheme == SCHEME_HTTPS) if (conn->scheme == SCHEME_HTTPS)
if (connect_ssl (&ssl, ssl_ctx,sock) != 0) if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
{ {
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
@ -666,7 +672,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
} }
else else
{ {
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port); logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
conn->host, conn->port);
/* #### pc_last_fd should be accessed through an accessor /* #### pc_last_fd should be accessed through an accessor
function. */ function. */
sock = pc_last_fd; sock = pc_last_fd;
@ -676,22 +683,20 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
DEBUGP (("Reusing fd %d.\n", sock)); DEBUGP (("Reusing fd %d.\n", sock));
} }
if (u->proxy)
path = u->proxy->url;
else
path = u->path;
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET"; command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
referer = NULL; referer = NULL;
if (ou->referer) if (hs->referer)
{ {
referer = (char *)alloca (9 + strlen (ou->referer) + 3); referer = (char *)alloca (9 + strlen (hs->referer) + 3);
sprintf (referer, "Referer: %s\r\n", ou->referer); sprintf (referer, "Referer: %s\r\n", hs->referer);
} }
if (*dt & SEND_NOCACHE) if (*dt & SEND_NOCACHE)
pragma_h = "Pragma: no-cache\r\n"; pragma_h = "Pragma: no-cache\r\n";
else else
pragma_h = ""; pragma_h = "";
if (hs->restval) if (hs->restval)
{ {
range = (char *)alloca (13 + numdigit (hs->restval) + 4); range = (char *)alloca (13 + numdigit (hs->restval) + 4);
@ -714,9 +719,9 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
sprintf (useragent, "Wget/%s", version_string); sprintf (useragent, "Wget/%s", version_string);
} }
/* Construct the authentication, if userid is present. */ /* Construct the authentication, if userid is present. */
user = ou->user; user = u->user;
passwd = ou->passwd; passwd = u->passwd;
search_netrc (ou->host, (const char **)&user, (const char **)&passwd, 0); search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
user = user ? user : opt.http_user; user = user ? user : opt.http_user;
passwd = passwd ? passwd : opt.http_passwd; passwd = passwd ? passwd : opt.http_passwd;
@ -750,12 +755,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
else else
{ {
wwwauth = create_authorization_line (authenticate_h, user, passwd, wwwauth = create_authorization_line (authenticate_h, user, passwd,
command, ou->path); command, u->path);
} }
} }
proxyauth = NULL; proxyauth = NULL;
if (u->proxy) if (proxy)
{ {
char *proxy_user, *proxy_passwd; char *proxy_user, *proxy_passwd;
/* For normal username and password, URL components override /* For normal username and password, URL components override
@ -770,31 +775,22 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
} }
else else
{ {
proxy_user = u->user; proxy_user = proxy->user;
proxy_passwd = u->passwd; proxy_passwd = proxy->passwd;
} }
/* #### This is junky. Can't the proxy request, say, `Digest' /* #### This does not appear right. Can't the proxy request,
authentication? */ say, `Digest' authentication? */
if (proxy_user && proxy_passwd) if (proxy_user && proxy_passwd)
proxyauth = basic_authentication_encode (proxy_user, proxy_passwd, proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
"Proxy-Authorization"); "Proxy-Authorization");
} }
remhost = ou->host;
remport = ou->port;
/* String of the form :PORT. Used only for non-standard ports. */ /* String of the form :PORT. Used only for non-standard ports. */
port_maybe = NULL; port_maybe = NULL;
if (1 if (u->port != scheme_default_port (u->scheme))
#ifdef HAVE_SSL
&& remport != (u->scheme == SCHEME_HTTPS
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
#else
&& remport != DEFAULT_HTTP_PORT
#endif
)
{ {
port_maybe = (char *)alloca (numdigit (remport) + 2); port_maybe = (char *)alloca (numdigit (u->port) + 2);
sprintf (port_maybe, ":%d", remport); sprintf (port_maybe, ":%d", u->port);
} }
if (!inhibit_keep_alive) if (!inhibit_keep_alive)
@ -803,18 +799,24 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
request_keep_alive = NULL; request_keep_alive = NULL;
if (opt.cookies) if (opt.cookies)
cookies = build_cookies_request (ou->host, ou->port, ou->path, cookies = build_cookies_request (u->host, u->port, u->path,
#ifdef HAVE_SSL #ifdef HAVE_SSL
ou->scheme == SCHEME_HTTPS u->scheme == SCHEME_HTTPS
#else #else
0 0
#endif #endif
); );
if (proxy)
full_path = xstrdup (u->url);
else
full_path = url_full_path (u);
/* Allocate the memory for the request. */ /* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path) request = (char *)alloca (strlen (command)
+ strlen (full_path)
+ strlen (useragent) + strlen (useragent)
+ strlen (remhost) + strlen (u->host)
+ (port_maybe ? strlen (port_maybe) : 0) + (port_maybe ? strlen (port_maybe) : 0)
+ strlen (HTTP_ACCEPT) + strlen (HTTP_ACCEPT)
+ (request_keep_alive + (request_keep_alive
@ -834,7 +836,8 @@ User-Agent: %s\r\n\
Host: %s%s\r\n\ Host: %s%s\r\n\
Accept: %s\r\n\ Accept: %s\r\n\
%s%s%s%s%s%s%s%s\r\n", %s%s%s%s%s%s%s%s\r\n",
command, path, useragent, remhost, command, full_path,
useragent, u->host,
port_maybe ? port_maybe : "", port_maybe ? port_maybe : "",
HTTP_ACCEPT, HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "", request_keep_alive ? request_keep_alive : "",
@ -846,10 +849,12 @@ Accept: %s\r\n\
pragma_h, pragma_h,
opt.user_header ? opt.user_header : ""); opt.user_header ? opt.user_header : "");
DEBUGP (("---request begin---\n%s---request end---\n", request)); DEBUGP (("---request begin---\n%s---request end---\n", request));
/* Free the temporary memory. */
/* Free the temporary memory. */
FREE_MAYBE (wwwauth); FREE_MAYBE (wwwauth);
FREE_MAYBE (proxyauth); FREE_MAYBE (proxyauth);
FREE_MAYBE (cookies); FREE_MAYBE (cookies);
xfree (full_path);
/* Send the request to server. */ /* Send the request to server. */
#ifdef HAVE_SSL #ifdef HAVE_SSL
@ -867,7 +872,7 @@ Accept: %s\r\n\
return WRITEFAILED; return WRITEFAILED;
} }
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "), logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
u->proxy ? "Proxy" : "HTTP"); proxy ? "Proxy" : "HTTP");
contlen = contrange = -1; contlen = contrange = -1;
type = NULL; type = NULL;
statcode = -1; statcode = -1;
@ -1075,9 +1080,9 @@ Accept: %s\r\n\
/* The server has promised that it will not close the connection /* The server has promised that it will not close the connection
when we're done. This means that we can register it. */ when we're done. This means that we can register it. */
#ifndef HAVE_SSL #ifndef HAVE_SSL
register_persistent (u->host, u->port, sock); register_persistent (conn->host, conn->port, sock);
#else #else
register_persistent (u->host, u->port, sock, ssl); register_persistent (conn->host, conn->port, sock, ssl);
#endif /* HAVE_SSL */ #endif /* HAVE_SSL */
if ((statcode == HTTP_STATUS_UNAUTHORIZED) if ((statcode == HTTP_STATUS_UNAUTHORIZED)
@ -1086,7 +1091,7 @@ Accept: %s\r\n\
/* Authorization is required. */ /* Authorization is required. */
FREE_MAYBE (type); FREE_MAYBE (type);
type = NULL; type = NULL;
FREEHSTAT (*hs); free_hstat (hs);
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */ might be more bytes in the body. */
if (auth_tried_already) if (auth_tried_already)
@ -1163,16 +1168,17 @@ Accept: %s\r\n\
text/html file. If some case-insensitive variation on ".htm[l]" isn't text/html file. If some case-insensitive variation on ".htm[l]" isn't
already the file's suffix, tack on ".html". */ already the file's suffix, tack on ".html". */
{ {
char* last_period_in_local_filename = strrchr(u->local, '.'); char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
if (last_period_in_local_filename == NULL || if (last_period_in_local_filename == NULL ||
!(strcasecmp(last_period_in_local_filename, ".htm") == EQ || !(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
strcasecmp(last_period_in_local_filename, ".html") == EQ)) strcasecmp(last_period_in_local_filename, ".html") == EQ))
{ {
size_t local_filename_len = strlen(u->local); size_t local_filename_len = strlen(*hs->local_file);
u->local = xrealloc(u->local, local_filename_len + sizeof(".html")); *hs->local_file = xrealloc(*hs->local_file,
strcpy(u->local + local_filename_len, ".html"); local_filename_len + sizeof(".html"));
strcpy(*hs->local_file + local_filename_len, ".html");
*dt |= ADDED_HTML_EXTENSION; *dt |= ADDED_HTML_EXTENSION;
} }
@ -1224,7 +1230,7 @@ Accept: %s\r\n\
_("\ _("\
\n\ \n\
Continued download failed on this file, which conflicts with `-c'.\n\ Continued download failed on this file, which conflicts with `-c'.\n\
Refusing to truncate existing file `%s'.\n\n"), u->local); Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
FREE_MAYBE (type); FREE_MAYBE (type);
FREE_MAYBE (all_headers); FREE_MAYBE (all_headers);
CLOSE_INVALIDATE (sock); CLOSE_INVALIDATE (sock);
@ -1300,13 +1306,13 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
/* Open the local file. */ /* Open the local file. */
if (!opt.dfp) if (!opt.dfp)
{ {
mkalldirs (u->local); mkalldirs (*hs->local_file);
if (opt.backups) if (opt.backups)
rotate_backups (u->local); rotate_backups (*hs->local_file);
fp = fopen (u->local, hs->restval ? "ab" : "wb"); fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
if (!fp) if (!fp)
{ {
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno)); logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
might be more bytes in the body. */ might be more bytes in the body. */
FREE_MAYBE (all_headers); FREE_MAYBE (all_headers);
@ -1375,7 +1381,8 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
/* The genuine HTTP loop! This is the part where the retrieval is /* The genuine HTTP loop! This is the part where the retrieval is
retried, and retried, and retried, and... */ retried, and retried, and retried, and... */
uerr_t uerr_t
http_loop (struct urlinfo *u, char **newloc, int *dt) http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
int *dt, struct url *proxy)
{ {
int count; int count;
int use_ts, got_head = 0; /* time-stamping info */ int use_ts, got_head = 0; /* time-stamping info */
@ -1388,6 +1395,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
size_t filename_len; size_t filename_len;
struct http_stat hstat; /* HTTP status */ struct http_stat hstat; /* HTTP status */
struct stat st; struct stat st;
char *dummy = NULL;
/* This used to be done in main(), but it's a better idea to do it /* This used to be done in main(), but it's a better idea to do it
here so that we don't go through the hoops if we're just using here so that we don't go through the hoops if we're just using
@ -1407,34 +1415,46 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n")); logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
/* Determine the local filename. */ /* Determine the local filename. */
if (!u->local) if (local_file && *local_file)
u->local = url_filename (u->proxy ? u->proxy : u); hstat.local_file = local_file;
else if (local_file)
{
*local_file = url_filename (u);
hstat.local_file = local_file;
}
else
{
dummy = url_filename (u);
hstat.local_file = &dummy;
}
if (!opt.output_document) if (!opt.output_document)
locf = u->local; locf = *hstat.local_file;
else else
locf = opt.output_document; locf = opt.output_document;
filename_len = strlen (u->local); hstat.referer = referer;
filename_len = strlen (*hstat.local_file);
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig")); filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
if (opt.noclobber && file_exists_p (u->local)) if (opt.noclobber && file_exists_p (*hstat.local_file))
{ {
/* If opt.noclobber is turned on and file already exists, do not /* If opt.noclobber is turned on and file already exists, do not
retrieve the file */ retrieve the file */
logprintf (LOG_VERBOSE, _("\ logprintf (LOG_VERBOSE, _("\
File `%s' already there, will not retrieve.\n"), u->local); File `%s' already there, will not retrieve.\n"), *hstat.local_file);
/* If the file is there, we suppose it's retrieved OK. */ /* If the file is there, we suppose it's retrieved OK. */
*dt |= RETROKF; *dt |= RETROKF;
/* #### Bogusness alert. */ /* #### Bogusness alert. */
/* If its suffix is "html" or (yuck!) "htm", we suppose it's /* If its suffix is "html" or "htm", assume text/html. */
text/html, a harmless lie. */ if (((suf = suffix (*hstat.local_file)) != NULL)
if (((suf = suffix (u->local)) != NULL)
&& (!strcmp (suf, "html") || !strcmp (suf, "htm"))) && (!strcmp (suf, "html") || !strcmp (suf, "htm")))
*dt |= TEXTHTML; *dt |= TEXTHTML;
xfree (suf); xfree (suf);
/* Another harmless lie: */
FREE_MAYBE (dummy);
return RETROK; return RETROK;
} }
@ -1461,7 +1481,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
in url.c. Replacing sprintf with inline calls to in url.c. Replacing sprintf with inline calls to
strcpy() and long_to_string() made a difference. strcpy() and long_to_string() made a difference.
--hniksic */ --hniksic */
memcpy (filename_plus_orig_suffix, u->local, filename_len); memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
memcpy (filename_plus_orig_suffix + filename_len, memcpy (filename_plus_orig_suffix + filename_len,
".orig", sizeof (".orig")); ".orig", sizeof (".orig"));
@ -1475,8 +1495,8 @@ File `%s' already there, will not retrieve.\n"), u->local);
if (!local_dot_orig_file_exists) if (!local_dot_orig_file_exists)
/* Couldn't stat() <file>.orig, so try to stat() <file>. */ /* Couldn't stat() <file>.orig, so try to stat() <file>. */
if (stat (u->local, &st) == 0) if (stat (*hstat.local_file, &st) == 0)
local_filename = u->local; local_filename = *hstat.local_file;
if (local_filename != NULL) if (local_filename != NULL)
/* There was a local file, so we'll check later to see if the version /* There was a local file, so we'll check later to see if the version
@ -1503,7 +1523,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
/* Print fetch message, if opt.verbose. */ /* Print fetch message, if opt.verbose. */
if (opt.verbose) if (opt.verbose)
{ {
char *hurl = str_url (u->proxy ? u->proxy : u, 1); char *hurl = url_string (u, 1);
char tmp[15]; char tmp[15];
strcpy (tmp, " "); strcpy (tmp, " ");
if (count > 1) if (count > 1)
@ -1545,22 +1565,22 @@ File `%s' already there, will not retrieve.\n"), u->local);
Some proxies are notorious for caching incomplete data, so Some proxies are notorious for caching incomplete data, so
we require a fresh get. we require a fresh get.
b) caching is explicitly inhibited. */ b) caching is explicitly inhibited. */
if ((u->proxy && count > 1) /* a */ if ((proxy && count > 1) /* a */
|| !opt.allow_cache /* b */ || !opt.allow_cache /* b */
) )
*dt |= SEND_NOCACHE; *dt |= SEND_NOCACHE;
else else
*dt &= ~SEND_NOCACHE; *dt &= ~SEND_NOCACHE;
/* Try fetching the document, or at least its head. :-) */ /* Try fetching the document, or at least its head. */
err = gethttp (u, &hstat, dt); err = gethttp (u, &hstat, dt, proxy);
/* It's unfortunate that wget determines the local filename before finding /* It's unfortunate that wget determines the local filename before finding
out the Content-Type of the file. Barring a major restructuring of the out the Content-Type of the file. Barring a major restructuring of the
code, we need to re-set locf here, since gethttp() may have xrealloc()d code, we need to re-set locf here, since gethttp() may have xrealloc()d
u->local to tack on ".html". */ *hstat.local_file to tack on ".html". */
if (!opt.output_document) if (!opt.output_document)
locf = u->local; locf = *hstat.local_file;
else else
locf = opt.output_document; locf = opt.output_document;
@ -1577,29 +1597,32 @@ File `%s' already there, will not retrieve.\n"), u->local);
/* Non-fatal errors continue executing the loop, which will /* Non-fatal errors continue executing the loop, which will
bring them to "while" statement at the end, to judge bring them to "while" statement at the end, to judge
whether the number of tries was exceeded. */ whether the number of tries was exceeded. */
FREEHSTAT (hstat); free_hstat (&hstat);
printwhat (count, opt.ntry); printwhat (count, opt.ntry);
continue; continue;
break; break;
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED: case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
case SSLERRCTXCREATE: case CONTNOTSUPPORTED: case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
/* Fatal errors just return from the function. */ /* Fatal errors just return from the function. */
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return err; return err;
break; break;
case FWRITEERR: case FOPENERR: case FWRITEERR: case FOPENERR:
/* Another fatal error. */ /* Another fatal error. */
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"), logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
u->local, strerror (errno)); *hstat.local_file, strerror (errno));
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return err; return err;
break; break;
case CONSSLERR: case CONSSLERR:
/* Another fatal error. */ /* Another fatal error. */
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n")); logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return err; return err;
break; break;
case NEWLOCATION: case NEWLOCATION:
@ -1609,14 +1632,18 @@ File `%s' already there, will not retrieve.\n"), u->local);
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
_("ERROR: Redirection (%d) without location.\n"), _("ERROR: Redirection (%d) without location.\n"),
hstat.statcode); hstat.statcode);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return WRONGCODE; return WRONGCODE;
} }
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return NEWLOCATION; return NEWLOCATION;
break; break;
case RETRUNNEEDED: case RETRUNNEEDED:
/* The file was already fully retrieved. */ /* The file was already fully retrieved. */
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK; return RETROK;
break; break;
case RETRFINISHED: case RETRFINISHED:
@ -1631,14 +1658,15 @@ File `%s' already there, will not retrieve.\n"), u->local);
if (!opt.verbose) if (!opt.verbose)
{ {
/* #### Ugly ugly ugly! */ /* #### Ugly ugly ugly! */
char *hurl = str_url (u->proxy ? u->proxy : u, 1); char *hurl = url_string (u, 1);
logprintf (LOG_NONVERBOSE, "%s:\n", hurl); logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
xfree (hurl); xfree (hurl);
} }
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
tms, hstat.statcode, hstat.error); tms, hstat.statcode, hstat.error);
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return WRONGCODE; return WRONGCODE;
} }
@ -1681,7 +1709,8 @@ Last-modified header invalid -- time-stamp ignored.\n"));
logprintf (LOG_VERBOSE, _("\ logprintf (LOG_VERBOSE, _("\
Server file no newer than local file `%s' -- not retrieving.\n\n"), Server file no newer than local file `%s' -- not retrieving.\n\n"),
local_filename); local_filename);
FREEHSTAT (hstat); free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK; return RETROK;
} }
else if (tml >= tmr) else if (tml >= tmr)
@ -1691,7 +1720,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
logputs (LOG_VERBOSE, logputs (LOG_VERBOSE,
_("Remote file is newer, retrieving.\n")); _("Remote file is newer, retrieving.\n"));
} }
FREEHSTAT (hstat); free_hstat (&hstat);
continue; continue;
} }
if ((tmr != (time_t) (-1)) if ((tmr != (time_t) (-1))
@ -1710,7 +1739,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
fl = opt.output_document; fl = opt.output_document;
} }
else else
fl = u->local; fl = *hstat.local_file;
if (fl) if (fl)
touch (fl, tmr); touch (fl, tmr);
} }
@ -1719,13 +1748,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
if (opt.spider) if (opt.spider)
{ {
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
FREE_MAYBE (dummy);
return RETROK; return RETROK;
} }
/* It is now safe to free the remainder of hstat, since the
strings within it will no longer be used. */
FREEHSTAT (hstat);
tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0); tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
if (hstat.len == hstat.contlen) if (hstat.len == hstat.contlen)
@ -1748,6 +1774,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf); downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK; return RETROK;
} }
else if (hstat.res == 0) /* No read error */ else if (hstat.res == 0) /* No read error */
@ -1773,6 +1801,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf); downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK; return RETROK;
} }
else if (hstat.len < hstat.contlen) /* meaning we lost the else if (hstat.len < hstat.contlen) /* meaning we lost the
@ -1782,6 +1812,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
_("%s (%s) - Connection closed at byte %ld. "), _("%s (%s) - Connection closed at byte %ld. "),
tms, tmrate, hstat.len); tms, tmrate, hstat.len);
printwhat (count, opt.ntry); printwhat (count, opt.ntry);
free_hstat (&hstat);
continue; continue;
} }
else if (!opt.kill_longer) /* meaning we got more than expected */ else if (!opt.kill_longer) /* meaning we got more than expected */
@ -1801,6 +1832,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
else else
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf); downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
free_hstat (&hstat);
FREE_MAYBE (dummy);
return RETROK; return RETROK;
} }
else /* the same, but not accepted */ else /* the same, but not accepted */
@ -1809,6 +1842,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
_("%s (%s) - Connection closed at byte %ld/%ld. "), _("%s (%s) - Connection closed at byte %ld/%ld. "),
tms, tmrate, hstat.len, hstat.contlen); tms, tmrate, hstat.len, hstat.contlen);
printwhat (count, opt.ntry); printwhat (count, opt.ntry);
free_hstat (&hstat);
continue; continue;
} }
} }
@ -1820,6 +1854,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
_("%s (%s) - Read error at byte %ld (%s)."), _("%s (%s) - Read error at byte %ld (%s)."),
tms, tmrate, hstat.len, strerror (errno)); tms, tmrate, hstat.len, strerror (errno));
printwhat (count, opt.ntry); printwhat (count, opt.ntry);
free_hstat (&hstat);
continue; continue;
} }
else /* hstat.res == -1 and contlen is given */ else /* hstat.res == -1 and contlen is given */
@ -1829,6 +1864,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
tms, tmrate, hstat.len, hstat.contlen, tms, tmrate, hstat.len, hstat.contlen,
strerror (errno)); strerror (errno));
printwhat (count, opt.ntry); printwhat (count, opt.ntry);
free_hstat (&hstat);
continue; continue;
} }
} }

View File

@ -727,7 +727,7 @@ Can't timestamp and not clobber old files at the same time.\n"));
/* Fill in the arguments. */ /* Fill in the arguments. */
for (i = 0; i < nurl; i++, optind++) for (i = 0; i < nurl; i++, optind++)
{ {
char *rewritten = rewrite_url_maybe (argv[optind]); char *rewritten = rewrite_shorthand_url (argv[optind]);
if (rewritten) if (rewritten)
{ {
printf ("Converted %s to %s\n", argv[optind], rewritten); printf ("Converted %s to %s\n", argv[optind], rewritten);
@ -845,10 +845,12 @@ Can't timestamp and not clobber old files at the same time.\n"));
{ {
convert_all_links (); convert_all_links ();
} }
log_close (); log_close ();
for (i = 0; i < nurl; i++) for (i = 0; i < nurl; i++)
free (url[i]); xfree (url[i]);
cleanup (); cleanup ();
#ifdef DEBUG_MALLOC #ifdef DEBUG_MALLOC
print_malloc_debug_stats (); print_malloc_debug_stats ();
#endif #endif

View File

@ -120,9 +120,8 @@ recursive_retrieve (const char *file, const char *this_url)
int dt, inl, dash_p_leaf_HTML = FALSE; int dt, inl, dash_p_leaf_HTML = FALSE;
int meta_disallow_follow; int meta_disallow_follow;
int this_url_ftp; /* See below the explanation */ int this_url_ftp; /* See below the explanation */
uerr_t err;
urlpos *url_list, *cur_url; urlpos *url_list, *cur_url;
struct urlinfo *u; struct url *u;
assert (this_url != NULL); assert (this_url != NULL);
assert (file != NULL); assert (file != NULL);
@ -140,9 +139,8 @@ recursive_retrieve (const char *file, const char *this_url)
hash_table_clear (undesirable_urls); hash_table_clear (undesirable_urls);
string_set_add (undesirable_urls, this_url); string_set_add (undesirable_urls, this_url);
/* Enter this_url to the hash table, in original and "enhanced" form. */ /* Enter this_url to the hash table, in original and "enhanced" form. */
u = newurl (); u = url_parse (this_url, NULL);
err = parseurl (this_url, u, 0); if (u)
if (err == URLOK)
{ {
string_set_add (undesirable_urls, u->url); string_set_add (undesirable_urls, u->url);
if (opt.no_parent) if (opt.no_parent)
@ -156,7 +154,7 @@ recursive_retrieve (const char *file, const char *this_url)
DEBUGP (("Double yuck! The *base* URL is broken.\n")); DEBUGP (("Double yuck! The *base* URL is broken.\n"));
base_dir = NULL; base_dir = NULL;
} }
freeurl (u, 1); url_free (u);
depth = 1; depth = 1;
first_time = 0; first_time = 0;
} }
@ -210,11 +208,10 @@ recursive_retrieve (const char *file, const char *this_url)
break; break;
/* Parse the URL for convenient use in other functions, as well /* Parse the URL for convenient use in other functions, as well
as to get the optimized form. It also checks URL integrity. */ as to get the optimized form. It also checks URL integrity. */
u = newurl (); u = url_parse (cur_url->url, NULL);
if (parseurl (cur_url->url, u, 0) != URLOK) if (!u)
{ {
DEBUGP (("Yuck! A bad URL.\n")); DEBUGP (("Yuck! A bad URL.\n"));
freeurl (u, 1);
continue; continue;
} }
assert (u->url != NULL); assert (u->url != NULL);
@ -281,8 +278,8 @@ recursive_retrieve (const char *file, const char *this_url)
if (!(base_dir && frontcmp (base_dir, u->dir))) if (!(base_dir && frontcmp (base_dir, u->dir)))
{ {
/* Failing that, check for parent dir. */ /* Failing that, check for parent dir. */
struct urlinfo *ut = newurl (); struct url *ut = url_parse (this_url, NULL);
if (parseurl (this_url, ut, 0) != URLOK) if (!ut)
DEBUGP (("Double yuck! The *base* URL is broken.\n")); DEBUGP (("Double yuck! The *base* URL is broken.\n"));
else if (!frontcmp (ut->dir, u->dir)) else if (!frontcmp (ut->dir, u->dir))
{ {
@ -291,7 +288,7 @@ recursive_retrieve (const char *file, const char *this_url)
string_set_add (undesirable_urls, constr); string_set_add (undesirable_urls, constr);
inl = 1; inl = 1;
} }
freeurl (ut, 1); url_free (ut);
} }
} }
/* If the file does not match the acceptance list, or is on the /* If the file does not match the acceptance list, or is on the
@ -343,7 +340,16 @@ recursive_retrieve (const char *file, const char *this_url)
if (!inl) if (!inl)
{ {
if (!opt.simple_check) if (!opt.simple_check)
opt_url (u); {
/* Find the "true" host. */
char *host = realhost (u->host);
xfree (u->host);
u->host = host;
/* Refresh the printed representation of the URL. */
xfree (u->url);
u->url = url_string (u, 0);
}
else else
{ {
char *p; char *p;
@ -351,7 +357,7 @@ recursive_retrieve (const char *file, const char *this_url)
for (p = u->host; *p; p++) for (p = u->host; *p; p++)
*p = TOLOWER (*p); *p = TOLOWER (*p);
xfree (u->url); xfree (u->url);
u->url = str_url (u, 0); u->url = url_string (u, 0);
} }
xfree (constr); xfree (constr);
constr = xstrdup (u->url); constr = xstrdup (u->url);
@ -473,7 +479,7 @@ recursive_retrieve (const char *file, const char *this_url)
/* Free filename and constr. */ /* Free filename and constr. */
FREE_MAYBE (filename); FREE_MAYBE (filename);
FREE_MAYBE (constr); FREE_MAYBE (constr);
freeurl (u, 1); url_free (u);
/* Increment the pbuf for the appropriate size. */ /* Increment the pbuf for the appropriate size. */
} }
if (opt.convert_links && !opt.delete_after) if (opt.convert_links && !opt.delete_after)
@ -573,13 +579,9 @@ convert_all_links (void)
char *local_name; char *local_name;
/* The URL must be in canonical form to be compared. */ /* The URL must be in canonical form to be compared. */
struct urlinfo *u = newurl (); struct url *u = url_parse (cur_url->url, NULL);
uerr_t res = parseurl (cur_url->url, u, 0); if (!u)
if (res != URLOK) continue;
{
freeurl (u, 1);
continue;
}
/* We decide the direction of conversion according to whether /* We decide the direction of conversion according to whether
a URL was downloaded. Downloaded URLs will be converted a URL was downloaded. Downloaded URLs will be converted
ABS2REL, whereas non-downloaded will be converted REL2ABS. */ ABS2REL, whereas non-downloaded will be converted REL2ABS. */
@ -608,7 +610,7 @@ convert_all_links (void)
cur_url->convert = CO_CONVERT_TO_COMPLETE; cur_url->convert = CO_CONVERT_TO_COMPLETE;
cur_url->local_name = NULL; cur_url->local_name = NULL;
} }
freeurl (u, 1); url_free (u);
} }
/* Convert the links in the file. */ /* Convert the links in the file. */
convert_links (html->string, urls); convert_links (html->string, urls);

View File

@ -51,9 +51,6 @@ extern int errno;
int global_download_count; int global_download_count;
void logflush PARAMS ((void)); void logflush PARAMS ((void));
/* From http.c. */
uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
/* Flags for show_progress(). */ /* Flags for show_progress(). */
enum spflags { SP_NONE, SP_INIT, SP_FINISH }; enum spflags { SP_NONE, SP_INIT, SP_FINISH };
@ -314,9 +311,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
uerr_t result; uerr_t result;
char *url; char *url;
int location_changed, dummy; int location_changed, dummy;
int local_use_proxy; int use_proxy;
char *mynewloc, *proxy; char *mynewloc, *proxy;
struct urlinfo *u; struct url *u;
int up_error_code; /* url parse error code */
char *local_file;
struct hash_table *redirections = NULL; struct hash_table *redirections = NULL;
/* If dt is NULL, just ignore it. */ /* If dt is NULL, just ignore it. */
@ -328,80 +327,74 @@ retrieve_url (const char *origurl, char **file, char **newloc,
if (file) if (file)
*file = NULL; *file = NULL;
u = newurl (); u = url_parse (url, &up_error_code);
/* Parse the URL. */ if (!u)
result = parseurl (url, u, 0);
if (result != URLOK)
{ {
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result)); logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
freeurl (u, 1);
if (redirections) if (redirections)
string_set_free (redirections); string_set_free (redirections);
xfree (url); xfree (url);
return result; return URLERROR;
} }
if (!refurl)
refurl = opt.referer;
redirected: redirected:
/* Set the referer. */ result = NOCONERROR;
if (refurl) mynewloc = NULL;
u->referer = xstrdup (refurl); local_file = NULL;
else
{
if (opt.referer)
u->referer = xstrdup (opt.referer);
else
u->referer = NULL;
}
local_use_proxy = USE_PROXY_P (u); use_proxy = USE_PROXY_P (u);
if (local_use_proxy) if (use_proxy)
{ {
struct urlinfo *pu = newurl (); struct url *proxy_url;
/* Copy the original URL to new location. */ /* Get the proxy server for the current scheme. */
memcpy (pu, u, sizeof (*u)); proxy = getproxy (u->scheme);
pu->proxy = NULL; /* A minor correction :) */
/* Initialize u to nil. */
memset (u, 0, sizeof (*u));
u->proxy = pu;
/* Get the appropriate proxy server, appropriate for the
current scheme. */
proxy = getproxy (pu->scheme);
if (!proxy) if (!proxy)
{ {
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n")); logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
freeurl (u, 1); url_free (u);
if (redirections) if (redirections)
string_set_free (redirections); string_set_free (redirections);
xfree (url); xfree (url);
return PROXERR; return PROXERR;
} }
/* Parse the proxy URL. */ /* Parse the proxy URL. */
result = parseurl (proxy, u, 0); proxy_url = url_parse (proxy, &up_error_code);
if (result != URLOK || u->scheme != SCHEME_HTTP) if (!proxy_url)
{ {
if (u->scheme == SCHEME_HTTP) logprintf (LOG_NOTQUIET, "Error parsing proxy URL %s: %s.\n",
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result)); proxy, url_error (up_error_code));
else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
freeurl (u, 1);
if (redirections) if (redirections)
string_set_free (redirections); string_set_free (redirections);
xfree (url); xfree (url);
return PROXERR; return PROXERR;
} }
u->scheme = SCHEME_HTTP; if (proxy_url->scheme != SCHEME_HTTP)
{
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
url_free (proxy_url);
if (redirections)
string_set_free (redirections);
xfree (url);
return PROXERR;
}
result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
url_free (proxy_url);
} }
else if (u->scheme == SCHEME_HTTP
mynewloc = NULL;
if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL #ifdef HAVE_SSL
|| u->scheme == SCHEME_HTTPS || u->scheme == SCHEME_HTTPS
#endif #endif
) )
result = http_loop (u, &mynewloc, dt); {
result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
}
else if (u->scheme == SCHEME_FTP) else if (u->scheme == SCHEME_FTP)
{ {
/* If this is a redirection, we must not allow recursive FTP /* If this is a redirection, we must not allow recursive FTP
@ -412,13 +405,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
opt.recursive = 0; opt.recursive = 0;
result = ftp_loop (u, dt); result = ftp_loop (u, dt);
opt.recursive = oldrec; opt.recursive = oldrec;
#if 0
/* There is a possibility of having HTTP being redirected to /* There is a possibility of having HTTP being redirected to
FTP. In these cases we must decide whether the text is HTML FTP. In these cases we must decide whether the text is HTML
according to the suffix. The HTML suffixes are `.html' and according to the suffix. The HTML suffixes are `.html' and
`.htm', case-insensitive. `.htm', case-insensitive. */
#### All of this is, of course, crap. These types should be
determined through mailcap. */
if (redirections && u->local && (u->scheme == SCHEME_FTP)) if (redirections && u->local && (u->scheme == SCHEME_FTP))
{ {
char *suf = suffix (u->local); char *suf = suffix (u->local);
@ -426,16 +417,19 @@ retrieve_url (const char *origurl, char **file, char **newloc,
*dt |= TEXTHTML; *dt |= TEXTHTML;
FREE_MAYBE (suf); FREE_MAYBE (suf);
} }
#endif
} }
location_changed = (result == NEWLOCATION); location_changed = (result == NEWLOCATION);
if (location_changed) if (location_changed)
{ {
char *construced_newloc; char *construced_newloc;
uerr_t newloc_result; struct url *newloc_struct;
struct urlinfo *newloc_struct;
assert (mynewloc != NULL); assert (mynewloc != NULL);
if (local_file)
xfree (local_file);
/* The HTTP specs only allow absolute URLs to appear in /* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs out redirects, but a ton of boneheaded webservers and CGIs out
there break the rules and use relative URLs, and popular there break the rules and use relative URLs, and popular
@ -445,13 +439,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
mynewloc = construced_newloc; mynewloc = construced_newloc;
/* Now, see if this new location makes sense. */ /* Now, see if this new location makes sense. */
newloc_struct = newurl (); newloc_struct = url_parse (mynewloc, NULL);
newloc_result = parseurl (mynewloc, newloc_struct, 1); if (!newloc_struct)
if (newloc_result != URLOK)
{ {
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result)); logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, "UNKNOWN");
freeurl (newloc_struct, 1); url_free (newloc_struct);
freeurl (u, 1); url_free (u);
if (redirections) if (redirections)
string_set_free (redirections); string_set_free (redirections);
xfree (url); xfree (url);
@ -473,14 +466,14 @@ retrieve_url (const char *origurl, char **file, char **newloc,
string_set_add (redirections, u->url); string_set_add (redirections, u->url);
} }
/* The new location is OK. Let's check for redirection cycle by /* The new location is OK. Check for redirection cycle by
peeking through the history of redirections. */ peeking through the history of redirections. */
if (string_set_contains (redirections, newloc_struct->url)) if (string_set_contains (redirections, newloc_struct->url))
{ {
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"), logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
mynewloc); mynewloc);
freeurl (newloc_struct, 1); url_free (newloc_struct);
freeurl (u, 1); url_free (u);
if (redirections) if (redirections)
string_set_free (redirections); string_set_free (redirections);
xfree (url); xfree (url);
@ -491,29 +484,27 @@ retrieve_url (const char *origurl, char **file, char **newloc,
xfree (url); xfree (url);
url = mynewloc; url = mynewloc;
freeurl (u, 1); url_free (u);
u = newloc_struct; u = newloc_struct;
goto redirected; goto redirected;
} }
if (u->local) if (local_file)
{ {
if (*dt & RETROKF) if (*dt & RETROKF)
{ {
register_download (url, u->local); register_download (url, local_file);
if (*dt & TEXTHTML) if (*dt & TEXTHTML)
register_html (url, u->local); register_html (url, local_file);
} }
} }
if (file) if (file)
{ *file = local_file ? local_file : NULL;
if (u->local) else
*file = xstrdup (u->local); FREE_MAYBE (local_file);
else
*file = NULL; url_free (u);
}
freeurl (u, 1);
if (redirections) if (redirections)
string_set_free (redirections); string_set_free (redirections);

View File

@ -36,4 +36,12 @@ int downloaded_exceeds_quota PARAMS ((void));
void sleep_between_retrievals PARAMS ((int)); void sleep_between_retrievals PARAMS ((int));
/* Because there's no http.h. */
struct url;
uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *,
int *, struct url *));
#endif /* RETR_H */ #endif /* RETR_H */

1246
src/url.c

File diff suppressed because it is too large Load Diff

View File

@ -25,6 +25,9 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define DEFAULT_FTP_PORT 21 #define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443 #define DEFAULT_HTTPS_PORT 443
/* Note: the ordering here is related to the order of elements in
`supported_schemes' in url.c. */
enum url_scheme { enum url_scheme {
SCHEME_HTTP, SCHEME_HTTP,
#ifdef HAVE_SSL #ifdef HAVE_SSL
@ -35,24 +38,27 @@ enum url_scheme {
}; };
/* Structure containing info on a URL. */ /* Structure containing info on a URL. */
struct urlinfo struct url
{ {
char *url; /* Unchanged URL */ char *url; /* Original URL */
enum url_scheme scheme; /* URL scheme */ enum url_scheme scheme; /* URL scheme */
char *host; /* Extracted hostname */ char *host; /* Extracted hostname */
unsigned short port; int port; /* Port number */
char ftp_type;
char *path, *dir, *file, *qstring; /* URL components (URL-quoted). */
/* Path, dir, file, and query string char *path;
(properly decoded) */ char *params;
char *user, *passwd; /* Username and password */ char *query;
struct urlinfo *proxy; /* The exact string to pass to proxy char *fragment;
server */
char *referer; /* The source from which the request /* Extracted path info (unquoted). */
URI was obtained */ char *dir;
char *local; /* The local filename of the URL char *file;
document */
/* Username and password (unquoted). */
char *user;
char *passwd;
}; };
enum convert_options { enum convert_options {
@ -104,19 +110,21 @@ typedef enum
char *encode_string PARAMS ((const char *)); char *encode_string PARAMS ((const char *));
struct urlinfo *newurl PARAMS ((void)); struct url *url_parse PARAMS ((const char *, int *));
void freeurl PARAMS ((struct urlinfo *, int)); const char *url_error PARAMS ((int));
enum url_scheme url_detect_scheme PARAMS ((const char *)); char *url_full_path PARAMS ((const struct url *));
void url_set_dir PARAMS ((struct url *, const char *));
void url_set_file PARAMS ((struct url *, const char *));
void url_free PARAMS ((struct url *));
enum url_scheme url_scheme PARAMS ((const char *));
int url_skip_scheme PARAMS ((const char *)); int url_skip_scheme PARAMS ((const char *));
int url_has_scheme PARAMS ((const char *)); int url_has_scheme PARAMS ((const char *));
int scheme_default_port PARAMS ((enum url_scheme));
int url_skip_uname PARAMS ((const char *)); int url_skip_uname PARAMS ((const char *));
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int)); char *url_string PARAMS ((const struct url *, int));
char *str_url PARAMS ((const struct urlinfo *, int));
/* url_equal is not currently used. */
#if 0
int url_equal PARAMS ((const char *, const char *));
#endif /* 0 */
urlpos *get_urls_file PARAMS ((const char *)); urlpos *get_urls_file PARAMS ((const char *));
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *)); urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
@ -126,8 +134,7 @@ char *uri_merge PARAMS ((const char *, const char *));
void rotate_backups PARAMS ((const char *)); void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *)); int mkalldirs PARAMS ((const char *));
char *url_filename PARAMS ((const struct urlinfo *)); char *url_filename PARAMS ((const struct url *));
void opt_url PARAMS ((struct urlinfo *));
char *getproxy PARAMS ((uerr_t)); char *getproxy PARAMS ((uerr_t));
int no_proxy_match PARAMS ((const char *, const char **)); int no_proxy_match PARAMS ((const char *, const char **));
@ -137,6 +144,6 @@ urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *)); downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
char *rewrite_url_maybe PARAMS ((const char *)); char *rewrite_shorthand_url PARAMS ((const char *));
#endif /* URL_H */ #endif /* URL_H */

View File

@ -404,30 +404,6 @@ datetime_str (time_t *tm)
ptm->tm_hour, ptm->tm_min, ptm->tm_sec); ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
return output; return output;
} }
/* Returns an error message for ERRNUM. #### This requires more work.
This function, as well as the whole error system, is very
ill-conceived. */
const char *
uerrmsg (uerr_t errnum)
{
switch (errnum)
{
case URLUNKNOWN:
return _("Unknown/unsupported protocol");
break;
case URLBADPORT:
return _("Invalid port specification");
break;
case URLBADHOST:
return _("Invalid host name");
break;
default:
abort ();
/* $@#@#$ compiler. */
return NULL;
}
}
/* The Windows versions of the following two functions are defined in /* The Windows versions of the following two functions are defined in
mswindows.c. */ mswindows.c. */
@ -464,6 +440,14 @@ fork_to_background (void)
} }
#endif /* not WINDOWS */ #endif /* not WINDOWS */
char *
ps (char *orig)
{
char *r = xstrdup (orig);
path_simplify (r);
return r;
}
/* Canonicalize PATH, and return a new path. The new path differs from PATH /* Canonicalize PATH, and return a new path. The new path differs from PATH
in that: in that:
Multple `/'s are collapsed to a single `/'. Multple `/'s are collapsed to a single `/'.
@ -479,7 +463,8 @@ fork_to_background (void)
Always use '/' as stub_char. Always use '/' as stub_char.
Don't check for local things using canon_stat. Don't check for local things using canon_stat.
Change the original string instead of strdup-ing. Change the original string instead of strdup-ing.
React correctly when beginning with `./' and `../'. */ React correctly when beginning with `./' and `../'.
Don't zip out trailing slashes. */
void void
path_simplify (char *path) path_simplify (char *path)
{ {
@ -545,20 +530,15 @@ path_simplify (char *path)
i = start + 1; i = start + 1;
} }
/* Check for trailing `/'. */
if (start && !path[i])
{
zero_last:
path[--i] = '\0';
break;
}
/* Check for `../', `./' or trailing `.' by itself. */ /* Check for `../', `./' or trailing `.' by itself. */
if (path[i] == '.') if (path[i] == '.')
{ {
/* Handle trailing `.' by itself. */ /* Handle trailing `.' by itself. */
if (!path[i + 1]) if (!path[i + 1])
goto zero_last; {
path[--i] = '\0';
break;
}
/* Handle `./'. */ /* Handle `./'. */
if (path[i + 1] == '/') if (path[i + 1] == '/')
@ -579,12 +559,6 @@ path_simplify (char *path)
} }
} /* path == '.' */ } /* path == '.' */
} /* while */ } /* while */
if (!*path)
{
*path = stub_char;
path[1] = '\0';
}
} }
/* "Touch" FILE, i.e. make its atime and mtime equal to the time /* "Touch" FILE, i.e. make its atime and mtime equal to the time
@ -728,6 +702,30 @@ make_directory (const char *directory)
} }
return 0; return 0;
} }
/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
should be a file name. For example, file_merge("/foo/bar", "baz")
will return "/foo/baz". file_merge("/foo/bar/", "baz") will return
"foo/bar/baz".
In other words, it's a simpler and gentler version of uri_merge_1. */
char *
file_merge (const char *base, const char *file)
{
char *result;
const char *cut = (const char *)strrchr (base, '/');
if (!cut)
cut = base + strlen (base);
result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
memcpy (result, base, cut - base);
result[cut - base] = '/';
strcpy (result + (cut - base) + 1, file);
return result;
}
static int in_acclist PARAMS ((const char *const *, const char *, int)); static int in_acclist PARAMS ((const char *const *, const char *, int));

View File

@ -44,8 +44,6 @@ struct wget_timer;
char *time_str PARAMS ((time_t *)); char *time_str PARAMS ((time_t *));
char *datetime_str PARAMS ((time_t *)); char *datetime_str PARAMS ((time_t *));
const char *uerrmsg PARAMS ((uerr_t));
#ifdef DEBUG_MALLOC #ifdef DEBUG_MALLOC
void print_malloc_debug_stats (); void print_malloc_debug_stats ();
#endif #endif
@ -63,6 +61,7 @@ int file_exists_p PARAMS ((const char *));
int file_non_directory_p PARAMS ((const char *)); int file_non_directory_p PARAMS ((const char *));
int make_directory PARAMS ((const char *)); int make_directory PARAMS ((const char *));
char *unique_name PARAMS ((const char *)); char *unique_name PARAMS ((const char *));
char *file_merge PARAMS ((const char *, const char *));
int acceptable PARAMS ((const char *)); int acceptable PARAMS ((const char *));
int accdir PARAMS ((const char *s, enum accd)); int accdir PARAMS ((const char *s, enum accd));

View File

@ -285,9 +285,8 @@ typedef enum
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK, BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR, CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR, FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLHTTPS, FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR,
URLOK, URLHTTP, URLFTP, URLFILE, URLUNKNOWN, URLBADPORT, FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
URLBADHOST, FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE, HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
FTPINVPASV, FTPNOPASV, FTPINVPASV, FTPNOPASV,
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC, CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,