mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Rewrite parsing and handling of URLs.
Published in <sxs4rnnlklo.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
f4dcb55851
commit
d5be8ecca4
@ -1,3 +1,7 @@
|
||||
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* configure.in: Check for strpbrk().
|
||||
|
||||
2001-05-14 Herold Heiko <Heiko.Herold@previnet.it>
|
||||
|
||||
* windows/Makefile.src:
|
||||
|
@ -172,7 +172,7 @@ dnl Checks for library functions.
|
||||
dnl
|
||||
AC_FUNC_ALLOCA
|
||||
AC_FUNC_MMAP
|
||||
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp)
|
||||
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp strpbrk)
|
||||
AC_CHECK_FUNCS(gettimeofday mktime strptime)
|
||||
AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty)
|
||||
AC_CHECK_FUNCS(uname gethostname)
|
||||
|
@ -1,3 +1,53 @@
|
||||
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* utils.c (path_simplify): Don't remove trailing slashes.
|
||||
|
||||
* ftp.c (ftp_get_listing): Use it.
|
||||
|
||||
* utils.c (file_merge): New function.
|
||||
|
||||
* url.c (opt_url): Removed.
|
||||
|
||||
* recur.c (recursive_retrieve): Inline "opt_url" logic.
|
||||
|
||||
* main.c (main): Use xfree(), not free().
|
||||
|
||||
* url.c (rewrite_url_maybe): Renamed to rewrite_shorthand_url.
|
||||
|
||||
* ftp.c (ccon): Move `ccon' typedef here, since it's only used
|
||||
internally.
|
||||
|
||||
* config.h.in: Include a stub for HAVE_STRPBRK.
|
||||
|
||||
* cmpt.c (strpbrk): Include a replacement for systems without
|
||||
strpbrk().
|
||||
|
||||
* ftp.c: Use url_set_dir and url_set_file when modifying the URL.
|
||||
|
||||
* url.c (url_set_dir): New function.
|
||||
(url_set_file): Ditto.
|
||||
|
||||
* ftp-basic.c (ftp_process_type): Process FTP type here; the URL
|
||||
parser makes the URL "params" available, so we can do that in this
|
||||
function.
|
||||
|
||||
* retr.c: Ditto.
|
||||
|
||||
* ftp.c: Ditto; pass the local file information in `ccon'.
|
||||
|
||||
* http.c: Get rid of the ugly kludge that had URL being replaced
|
||||
with the proxy URL when proxy retrieval was requested. Use a
|
||||
separate parameter to http_loop and gethttp for the proxy URL.
|
||||
|
||||
* http.c: Changed to reflect the fact that local file, proxy, and
|
||||
referer information are no longer stored in struct url. The local
|
||||
file information is passed in `struct hstat' now.
|
||||
|
||||
* url.c: Reworked URL parsing to be more regular. Reencode the
|
||||
URL using reencode_string.
|
||||
Removed non-URL-related information from struct url. This
|
||||
includes fields `proxy', `local', and `referer'.
|
||||
|
||||
2001-11-22 Jochen Hein <jochen@jochen.org>
|
||||
|
||||
* main.c (main): Split the copyright notice for easier
|
||||
|
18
src/cmpt.c
18
src/cmpt.c
@ -205,6 +205,24 @@ ret0:
|
||||
}
|
||||
#endif /* not HAVE_STRSTR */
|
||||
|
||||
#ifndef HAVE_STRPBRK
|
||||
/* Find the first ocurrence in S of any character in ACCEPT. */
|
||||
char *
|
||||
strpbrk (const char *s, const char *accept)
|
||||
{
|
||||
while (*s != '\0')
|
||||
{
|
||||
const char *a = accept;
|
||||
while (*a != '\0')
|
||||
if (*a++ == *s)
|
||||
return (char *) s;
|
||||
++s;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif /* HAVE_STRPBRK */
|
||||
|
||||
#ifndef HAVE_MKTIME
|
||||
/* From GNU libc 2.0. */
|
||||
|
||||
|
@ -141,6 +141,9 @@ char *alloca ();
|
||||
/* Define if you have the strncasecmp function. */
|
||||
#undef HAVE_STRNCASECMP
|
||||
|
||||
/* Define if you have the strpbrk function. */
|
||||
#undef HAVE_STRPBRK
|
||||
|
||||
/* Define if you have the strptime function. */
|
||||
#undef HAVE_STRPTIME
|
||||
|
||||
|
@ -780,7 +780,7 @@ check_path_match (const char *cookie_path, const char *path)
|
||||
int
|
||||
set_cookie_header_cb (const char *hdr, void *closure)
|
||||
{
|
||||
struct urlinfo *u = (struct urlinfo *)closure;
|
||||
struct url *u = (struct url *)closure;
|
||||
struct cookie *cookie;
|
||||
|
||||
cookies_now = time (NULL);
|
||||
|
@ -633,6 +633,7 @@ ftp_pwd (struct rbuf *rbuf, char **pwd)
|
||||
/* All OK. */
|
||||
return FTPOK;
|
||||
}
|
||||
|
||||
/* Sends the SIZE command to the server, and returns the value in 'size'.
|
||||
* If an error occurs, size is set to zero. */
|
||||
uerr_t
|
||||
@ -690,3 +691,16 @@ ftp_size (struct rbuf *rbuf, const char *file, long int *size)
|
||||
/* All OK. */
|
||||
return FTPOK;
|
||||
}
|
||||
|
||||
/* If URL's params are of the form "type=X", return character X.
|
||||
Otherwise, return 'I' (the default type). */
|
||||
char
|
||||
ftp_process_type (const char *params)
|
||||
{
|
||||
if (params
|
||||
&& 0 == strncasecmp (params, "type=", 5)
|
||||
&& params[5] != '\0')
|
||||
return TOUPPER (params[5]);
|
||||
else
|
||||
return 'I';
|
||||
}
|
||||
|
@ -796,7 +796,7 @@ Unsupported listing type, trying Unix listing parser.\n"));
|
||||
directories and files on the appropriate host. The references are
|
||||
FTP. */
|
||||
uerr_t
|
||||
ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
|
||||
ftp_index (const char *file, struct url *u, struct fileinfo *f)
|
||||
{
|
||||
FILE *fp;
|
||||
char *upwd;
|
||||
|
219
src/ftp.c
219
src/ftp.c
@ -62,6 +62,18 @@ extern int h_errno;
|
||||
|
||||
extern char ftp_last_respline[];
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int st; /* connection status */
|
||||
int cmd; /* command code */
|
||||
struct rbuf rbuf; /* control connection buffer */
|
||||
long dltime; /* time of the download */
|
||||
enum stype rs; /* remote system reported by ftp server */
|
||||
char *id; /* initial directory */
|
||||
char *target; /* target file name */
|
||||
} ccon;
|
||||
|
||||
|
||||
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
|
||||
the string S, and return the number converted to long, if found, 0
|
||||
otherwise. */
|
||||
@ -108,7 +120,7 @@ ftp_expected_bytes (const char *s)
|
||||
connection to the server. It always closes the data connection,
|
||||
and closes the control connection in case of error. */
|
||||
static uerr_t
|
||||
getftp (struct urlinfo *u, long *len, long restval, ccon *con)
|
||||
getftp (struct url *u, long *len, long restval, ccon *con)
|
||||
{
|
||||
int csock, dtsock, res;
|
||||
uerr_t err;
|
||||
@ -122,7 +134,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
|
||||
long expected_bytes = 0L;
|
||||
|
||||
assert (con != NULL);
|
||||
assert (u->local != NULL);
|
||||
assert (con->target != NULL);
|
||||
|
||||
/* Debug-check of the sanity of the request by making sure that LIST
|
||||
and RETR are never both requested (since we can handle only one
|
||||
at a time. */
|
||||
@ -144,6 +157,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
|
||||
csock = RBUF_FD (&con->rbuf);
|
||||
else /* cmd & DO_LOGIN */
|
||||
{
|
||||
char type_char;
|
||||
|
||||
/* Login to the server: */
|
||||
|
||||
/* First: Establish the control connection. */
|
||||
@ -325,9 +340,10 @@ Error in server response, closing control connection.\n"));
|
||||
logputs (LOG_VERBOSE, _("done.\n"));
|
||||
|
||||
/* Fifth: Set the FTP type. */
|
||||
type_char = ftp_process_type (u->params);
|
||||
if (!opt.server_response)
|
||||
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", TOUPPER (u->ftp_type));
|
||||
err = ftp_type (&con->rbuf, TOUPPER (u->ftp_type));
|
||||
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", type_char);
|
||||
err = ftp_type (&con->rbuf, type_char);
|
||||
/* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */
|
||||
switch (err)
|
||||
{
|
||||
@ -351,7 +367,7 @@ Error in server response, closing control connection.\n"));
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET,
|
||||
_("Unknown type `%c', closing control connection.\n"),
|
||||
TOUPPER (u->ftp_type));
|
||||
type_char);
|
||||
CLOSE (csock);
|
||||
rbuf_uninitialize (&con->rbuf);
|
||||
return err;
|
||||
@ -701,7 +717,7 @@ Error in server response, closing control connection.\n"));
|
||||
{
|
||||
logprintf (LOG_NOTQUIET,
|
||||
_("\nREST failed; will not truncate `%s'.\n"),
|
||||
u->local);
|
||||
con->target);
|
||||
CLOSE (csock);
|
||||
closeport (dtsock);
|
||||
rbuf_uninitialize (&con->rbuf);
|
||||
@ -850,16 +866,16 @@ Error in server response, closing control connection.\n"));
|
||||
/* Open the file -- if opt.dfp is set, use it instead. */
|
||||
if (!opt.dfp || con->cmd & DO_LIST)
|
||||
{
|
||||
mkalldirs (u->local);
|
||||
mkalldirs (con->target);
|
||||
if (opt.backups)
|
||||
rotate_backups (u->local);
|
||||
rotate_backups (con->target);
|
||||
/* #### Is this correct? */
|
||||
chmod (u->local, 0600);
|
||||
chmod (con->target, 0600);
|
||||
|
||||
fp = fopen (u->local, restval ? "ab" : "wb");
|
||||
fp = fopen (con->target, restval ? "ab" : "wb");
|
||||
if (!fp)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", con->target, strerror (errno));
|
||||
CLOSE (csock);
|
||||
rbuf_uninitialize (&con->rbuf);
|
||||
closeport (dtsock);
|
||||
@ -928,7 +944,7 @@ Error in server response, closing control connection.\n"));
|
||||
if (res == -2)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
|
||||
u->local, strerror (errno));
|
||||
con->target, strerror (errno));
|
||||
CLOSE (csock);
|
||||
rbuf_uninitialize (&con->rbuf);
|
||||
return FWRITEERR;
|
||||
@ -993,10 +1009,10 @@ Error in server response, closing control connection.\n"));
|
||||
print it out. */
|
||||
if (opt.server_response && (con->cmd & DO_LIST))
|
||||
{
|
||||
mkalldirs (u->local);
|
||||
fp = fopen (u->local, "r");
|
||||
mkalldirs (con->target);
|
||||
fp = fopen (con->target, "r");
|
||||
if (!fp)
|
||||
logprintf (LOG_ALWAYS, "%s: %s\n", u->local, strerror (errno));
|
||||
logprintf (LOG_ALWAYS, "%s: %s\n", con->target, strerror (errno));
|
||||
else
|
||||
{
|
||||
char *line;
|
||||
@ -1020,7 +1036,7 @@ Error in server response, closing control connection.\n"));
|
||||
This loop either gets commands from con, or (if ON_YOUR_OWN is
|
||||
set), makes them up to retrieve the file given by the URL. */
|
||||
static uerr_t
|
||||
ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
|
||||
{
|
||||
int count, orig_lp;
|
||||
long restval, len;
|
||||
@ -1028,21 +1044,21 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
uerr_t err;
|
||||
struct stat st;
|
||||
|
||||
if (!u->local)
|
||||
u->local = url_filename (u);
|
||||
if (!con->target)
|
||||
con->target = url_filename (u);
|
||||
|
||||
if (opt.noclobber && file_exists_p (u->local))
|
||||
if (opt.noclobber && file_exists_p (con->target))
|
||||
{
|
||||
logprintf (LOG_VERBOSE,
|
||||
_("File `%s' already there, not retrieving.\n"), u->local);
|
||||
_("File `%s' already there, not retrieving.\n"), con->target);
|
||||
/* If the file is there, we suppose it's retrieved OK. */
|
||||
return RETROK;
|
||||
}
|
||||
|
||||
/* Remove it if it's a link. */
|
||||
remove_link (u->local);
|
||||
remove_link (con->target);
|
||||
if (!opt.output_document)
|
||||
locf = u->local;
|
||||
locf = con->target;
|
||||
else
|
||||
locf = opt.output_document;
|
||||
|
||||
@ -1100,7 +1116,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
/* Print fetch message, if opt.verbose. */
|
||||
if (opt.verbose)
|
||||
{
|
||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
||||
char *hurl = url_string (u, 1);
|
||||
char tmp[15];
|
||||
strcpy (tmp, " ");
|
||||
if (count > 1)
|
||||
@ -1175,7 +1191,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
/* Need to hide the password from the URL. The `if' is here
|
||||
so that we don't do the needless allocation every
|
||||
time. */
|
||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
||||
char *hurl = url_string (u, 1);
|
||||
logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n",
|
||||
tms, hurl, len, locf, count);
|
||||
xfree (hurl);
|
||||
@ -1235,43 +1251,48 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
/* Return the directory listing in a reusable format. The directory
|
||||
is specifed in u->dir. */
|
||||
uerr_t
|
||||
ftp_get_listing (struct urlinfo *u, ccon *con, struct fileinfo **f)
|
||||
ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f)
|
||||
{
|
||||
uerr_t err;
|
||||
char *olocal = u->local;
|
||||
char *list_filename, *ofile;
|
||||
char *uf; /* url file name */
|
||||
char *lf; /* list file name */
|
||||
char *old_target = con->target;
|
||||
|
||||
con->st &= ~ON_YOUR_OWN;
|
||||
con->cmd |= (DO_LIST | LEAVE_PENDING);
|
||||
con->cmd &= ~DO_RETR;
|
||||
/* Get the listing filename. */
|
||||
ofile = u->file;
|
||||
u->file = LIST_FILENAME;
|
||||
list_filename = url_filename (u);
|
||||
u->file = ofile;
|
||||
u->local = list_filename;
|
||||
DEBUGP ((_("Using `%s' as listing tmp file.\n"), list_filename));
|
||||
|
||||
/* Find the listing file name. We do it by taking the file name of
|
||||
the URL and replacing the last component with the listing file
|
||||
name. */
|
||||
uf = url_filename (u);
|
||||
lf = file_merge (uf, LIST_FILENAME);
|
||||
xfree (uf);
|
||||
DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
|
||||
|
||||
con->target = lf;
|
||||
err = ftp_loop_internal (u, NULL, con);
|
||||
u->local = olocal;
|
||||
con->target = old_target;
|
||||
|
||||
if (err == RETROK)
|
||||
*f = ftp_parse_ls (list_filename, con->rs);
|
||||
*f = ftp_parse_ls (lf, con->rs);
|
||||
else
|
||||
*f = NULL;
|
||||
if (opt.remove_listing)
|
||||
{
|
||||
if (unlink (list_filename))
|
||||
if (unlink (lf))
|
||||
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
|
||||
else
|
||||
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), list_filename);
|
||||
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), lf);
|
||||
}
|
||||
xfree (list_filename);
|
||||
xfree (lf);
|
||||
con->cmd &= ~DO_LIST;
|
||||
return err;
|
||||
}
|
||||
|
||||
static uerr_t ftp_retrieve_dirs PARAMS ((struct urlinfo *, struct fileinfo *,
|
||||
static uerr_t ftp_retrieve_dirs PARAMS ((struct url *, struct fileinfo *,
|
||||
ccon *));
|
||||
static uerr_t ftp_retrieve_glob PARAMS ((struct urlinfo *, ccon *, int));
|
||||
static uerr_t ftp_retrieve_glob PARAMS ((struct url *, ccon *, int));
|
||||
static struct fileinfo *delelement PARAMS ((struct fileinfo *,
|
||||
struct fileinfo **));
|
||||
static void freefileinfo PARAMS ((struct fileinfo *f));
|
||||
@ -1284,11 +1305,10 @@ static void freefileinfo PARAMS ((struct fileinfo *f));
|
||||
If opt.recursive is set, after all files have been retrieved,
|
||||
ftp_retrieve_dirs will be called to retrieve the directories. */
|
||||
static uerr_t
|
||||
ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con)
|
||||
{
|
||||
static int depth = 0;
|
||||
uerr_t err;
|
||||
char *olocal, *ofile;
|
||||
struct fileinfo *orig;
|
||||
long local_size;
|
||||
time_t tml;
|
||||
@ -1323,15 +1343,19 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
|
||||
while (f)
|
||||
{
|
||||
char *old_target, *ofile;
|
||||
|
||||
if (downloaded_exceeds_quota ())
|
||||
{
|
||||
--depth;
|
||||
return QUOTEXC;
|
||||
}
|
||||
olocal = u->local;
|
||||
ofile = u->file;
|
||||
u->file = f->name;
|
||||
u->local = url_filename (u);
|
||||
old_target = con->target;
|
||||
|
||||
ofile = xstrdup (u->file);
|
||||
url_set_file (u, f->name);
|
||||
|
||||
con->target = url_filename (u);
|
||||
err = RETROK;
|
||||
|
||||
dlthis = 1;
|
||||
@ -1343,7 +1367,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
I'm not implementing it now since files on an FTP server are much
|
||||
more likely than files on an HTTP server to legitimately have a
|
||||
.orig suffix. */
|
||||
if (!stat (u->local, &st))
|
||||
if (!stat (con->target, &st))
|
||||
{
|
||||
int eq_size;
|
||||
int cor_val;
|
||||
@ -1360,7 +1384,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
/* Remote file is older, file sizes can be compared and
|
||||
are both equal. */
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
|
||||
Remote file no newer than local file `%s' -- not retrieving.\n"), con->target);
|
||||
dlthis = 0;
|
||||
}
|
||||
else if (eq_size)
|
||||
@ -1368,7 +1392,7 @@ Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
|
||||
/* Remote file is newer or sizes cannot be matched */
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Remote file is newer than local file `%s' -- retrieving.\n\n"),
|
||||
u->local);
|
||||
con->target);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1396,30 +1420,30 @@ The sizes do not match (local %ld) -- retrieving.\n\n"), local_size);
|
||||
struct stat st;
|
||||
/* Check whether we already have the correct
|
||||
symbolic link. */
|
||||
int rc = lstat (u->local, &st);
|
||||
int rc = lstat (con->target, &st);
|
||||
if (rc == 0)
|
||||
{
|
||||
size_t len = strlen (f->linkto) + 1;
|
||||
if (S_ISLNK (st.st_mode))
|
||||
{
|
||||
char *link_target = (char *)alloca (len);
|
||||
size_t n = readlink (u->local, link_target, len);
|
||||
size_t n = readlink (con->target, link_target, len);
|
||||
if ((n == len - 1)
|
||||
&& (memcmp (link_target, f->linkto, n) == 0))
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Already have correct symlink %s -> %s\n\n"),
|
||||
u->local, f->linkto);
|
||||
con->target, f->linkto);
|
||||
dlthis = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
|
||||
u->local, f->linkto);
|
||||
con->target, f->linkto);
|
||||
/* Unlink before creating symlink! */
|
||||
unlink (u->local);
|
||||
if (symlink (f->linkto, u->local) == -1)
|
||||
unlink (con->target);
|
||||
if (symlink (f->linkto, con->target) == -1)
|
||||
logprintf (LOG_NOTQUIET, "symlink: %s\n",
|
||||
strerror (errno));
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
@ -1427,7 +1451,7 @@ Already have correct symlink %s -> %s\n\n"),
|
||||
#else /* not HAVE_SYMLINK */
|
||||
logprintf (LOG_NOTQUIET,
|
||||
_("Symlinks not supported, skipping symlink `%s'.\n"),
|
||||
u->local);
|
||||
con->target);
|
||||
#endif /* not HAVE_SYMLINK */
|
||||
}
|
||||
else /* opt.retr_symlinks */
|
||||
@ -1458,7 +1482,7 @@ Already have correct symlink %s -> %s\n\n"),
|
||||
if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
|
||||
&& f->tstamp != -1
|
||||
&& dlthis
|
||||
&& file_exists_p (u->local))
|
||||
&& file_exists_p (con->target))
|
||||
{
|
||||
/* #### This code repeats in http.c and ftp.c. Move it to a
|
||||
function! */
|
||||
@ -1469,27 +1493,31 @@ Already have correct symlink %s -> %s\n\n"),
|
||||
fl = opt.output_document;
|
||||
}
|
||||
else
|
||||
fl = u->local;
|
||||
fl = con->target;
|
||||
if (fl)
|
||||
touch (fl, f->tstamp);
|
||||
}
|
||||
else if (f->tstamp == -1)
|
||||
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), u->local);
|
||||
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), con->target);
|
||||
|
||||
if (f->perms && f->type == FT_PLAINFILE && dlthis)
|
||||
chmod (u->local, f->perms);
|
||||
chmod (con->target, f->perms);
|
||||
else
|
||||
DEBUGP (("Unrecognized permissions for %s.\n", u->local));
|
||||
DEBUGP (("Unrecognized permissions for %s.\n", con->target));
|
||||
|
||||
xfree (con->target);
|
||||
con->target = old_target;
|
||||
|
||||
url_set_file (u, ofile);
|
||||
xfree (ofile);
|
||||
|
||||
xfree (u->local);
|
||||
u->local = olocal;
|
||||
u->file = ofile;
|
||||
/* Break on fatals. */
|
||||
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
|
||||
break;
|
||||
con->cmd &= ~ (DO_CWD | DO_LOGIN);
|
||||
f = f->next;
|
||||
} /* while */
|
||||
}
|
||||
|
||||
/* We do not want to call ftp_retrieve_dirs here */
|
||||
if (opt.recursive &&
|
||||
!(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel))
|
||||
@ -1506,51 +1534,62 @@ Already have correct symlink %s -> %s\n\n"),
|
||||
ftp_retrieve_glob on each directory entry. The function knows
|
||||
about excluded directories. */
|
||||
static uerr_t
|
||||
ftp_retrieve_dirs (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
||||
ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con)
|
||||
{
|
||||
char *odir;
|
||||
char *current_container = NULL;
|
||||
int current_length = 0;
|
||||
char *container = NULL;
|
||||
int container_size = 0;
|
||||
|
||||
for (; f; f = f->next)
|
||||
{
|
||||
int len;
|
||||
int size;
|
||||
char *odir, *newdir;
|
||||
|
||||
if (downloaded_exceeds_quota ())
|
||||
break;
|
||||
if (f->type != FT_DIRECTORY)
|
||||
continue;
|
||||
odir = u->dir;
|
||||
len = strlen (u->dir) + 1 + strlen (f->name) + 1;
|
||||
|
||||
/* Allocate u->dir off stack, but reallocate only if a larger
|
||||
string is needed. */
|
||||
if (len > current_length)
|
||||
current_container = (char *)alloca (len);
|
||||
u->dir = current_container;
|
||||
string is needed. It's a pity there's no "realloca" for an
|
||||
item on the bottom of the stack. */
|
||||
size = strlen (u->dir) + 1 + strlen (f->name) + 1;
|
||||
if (size > container_size)
|
||||
container = (char *)alloca (size);
|
||||
newdir = container;
|
||||
|
||||
odir = u->dir;
|
||||
if (*odir == '\0'
|
||||
|| (*odir == '/' && *(odir + 1) == '\0'))
|
||||
/* If ODIR is empty or just "/", simply append f->name to
|
||||
ODIR. (In the former case, to preserve u->dir being
|
||||
relative; in the latter case, to avoid double slash.) */
|
||||
sprintf (u->dir, "%s%s", odir, f->name);
|
||||
sprintf (newdir, "%s%s", odir, f->name);
|
||||
else
|
||||
/* Else, use a separator. */
|
||||
sprintf (u->dir, "%s/%s", odir, f->name);
|
||||
sprintf (newdir, "%s/%s", odir, f->name);
|
||||
|
||||
DEBUGP (("Composing new CWD relative to the initial directory.\n"));
|
||||
DEBUGP ((" odir = '%s'\n f->name = '%s'\n u->dir = '%s'\n\n",
|
||||
odir, f->name, u->dir));
|
||||
if (!accdir (u->dir, ALLABS))
|
||||
DEBUGP ((" odir = '%s'\n f->name = '%s'\n newdir = '%s'\n\n",
|
||||
odir, f->name, newdir));
|
||||
if (!accdir (newdir, ALLABS))
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
|
||||
u->dir = odir;
|
||||
Not descending to `%s' as it is excluded/not-included.\n"), newdir);
|
||||
continue;
|
||||
}
|
||||
|
||||
con->st &= ~DONE_CWD;
|
||||
|
||||
odir = xstrdup (u->dir); /* because url_set_dir will free
|
||||
u->dir. */
|
||||
url_set_dir (u, newdir);
|
||||
ftp_retrieve_glob (u, con, GETALL);
|
||||
url_set_dir (u, odir);
|
||||
xfree (odir);
|
||||
|
||||
/* Set the time-stamp? */
|
||||
u->dir = odir;
|
||||
}
|
||||
|
||||
if (opt.quota && opt.downloaded > opt.quota)
|
||||
return QUOTEXC;
|
||||
else
|
||||
@ -1567,7 +1606,7 @@ Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
|
||||
get the listing, so that the time-stamp is heeded); if it's GLOBALL,
|
||||
use globbing; if it's GETALL, download the whole directory. */
|
||||
static uerr_t
|
||||
ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
|
||||
ftp_retrieve_glob (struct url *u, ccon *con, int action)
|
||||
{
|
||||
struct fileinfo *orig, *start;
|
||||
uerr_t res;
|
||||
@ -1607,7 +1646,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
|
||||
matchres = fnmatch (u->file, f->name, 0);
|
||||
if (matchres == -1)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local,
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", con->target,
|
||||
strerror (errno));
|
||||
break;
|
||||
}
|
||||
@ -1657,7 +1696,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
|
||||
of URL. Inherently, its capabilities are limited on what can be
|
||||
encoded into a URL. */
|
||||
uerr_t
|
||||
ftp_loop (struct urlinfo *u, int *dt)
|
||||
ftp_loop (struct url *u, int *dt)
|
||||
{
|
||||
ccon con; /* FTP connection */
|
||||
uerr_t res;
|
||||
@ -1686,7 +1725,7 @@ ftp_loop (struct urlinfo *u, int *dt)
|
||||
{
|
||||
char *filename = (opt.output_document
|
||||
? xstrdup (opt.output_document)
|
||||
: (u->local ? xstrdup (u->local)
|
||||
: (con.target ? xstrdup (con.target)
|
||||
: url_filename (u)));
|
||||
res = ftp_index (filename, u, f);
|
||||
if (res == FTPOK && opt.verbose)
|
||||
@ -1736,6 +1775,8 @@ ftp_loop (struct urlinfo *u, int *dt)
|
||||
CLOSE (RBUF_FD (&con.rbuf));
|
||||
FREE_MAYBE (con.id);
|
||||
con.id = NULL;
|
||||
FREE_MAYBE (con.target);
|
||||
con.target = NULL;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
19
src/ftp.h
19
src/ftp.h
@ -46,7 +46,7 @@ uerr_t ftp_syst PARAMS ((struct rbuf *, enum stype *));
|
||||
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
|
||||
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
|
||||
|
||||
struct urlinfo;
|
||||
struct url;
|
||||
|
||||
/* File types. */
|
||||
enum ftype
|
||||
@ -98,19 +98,12 @@ enum wget_ftp_fstatus
|
||||
correct. */
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int st; /* connection status */
|
||||
int cmd; /* command code */
|
||||
struct rbuf rbuf; /* control connection buffer */
|
||||
long dltime; /* time of the download */
|
||||
enum stype rs; /* remote system reported by ftp server */
|
||||
char *id; /* initial directory */
|
||||
} ccon;
|
||||
|
||||
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
|
||||
uerr_t ftp_loop PARAMS ((struct urlinfo *, int *));
|
||||
uerr_t ftp_loop PARAMS ((struct url *, int *));
|
||||
|
||||
uerr_t ftp_index (const char *, struct url *, struct fileinfo *);
|
||||
|
||||
char ftp_process_type PARAMS ((const char *));
|
||||
|
||||
uerr_t ftp_index (const char *, struct urlinfo *, struct fileinfo *);
|
||||
|
||||
#endif /* FTP_H */
|
||||
|
@ -327,7 +327,7 @@ same_host (const char *u1, const char *u2)
|
||||
/* Determine whether a URL is acceptable to be followed, according to
|
||||
a list of domains to accept. */
|
||||
int
|
||||
accept_domain (struct urlinfo *u)
|
||||
accept_domain (struct url *u)
|
||||
{
|
||||
assert (u->host != NULL);
|
||||
if (opt.domains)
|
||||
|
@ -20,7 +20,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
#ifndef HOST_H
|
||||
#define HOST_H
|
||||
|
||||
struct urlinfo;
|
||||
struct url;
|
||||
|
||||
/* Function declarations */
|
||||
|
||||
@ -31,7 +31,7 @@ void clean_hosts PARAMS ((void));
|
||||
|
||||
char *realhost PARAMS ((const char *));
|
||||
int same_host PARAMS ((const char *, const char *));
|
||||
int accept_domain PARAMS ((struct urlinfo *));
|
||||
int accept_domain PARAMS ((struct url *));
|
||||
int sufmatch PARAMS ((const char **, const char *));
|
||||
|
||||
char *ftp_getaddress PARAMS ((void));
|
||||
|
270
src/http.c
270
src/http.c
@ -464,16 +464,22 @@ struct http_stat
|
||||
long dltime; /* time of the download */
|
||||
int no_truncate; /* whether truncating the file is
|
||||
forbidden. */
|
||||
const char *referer; /* value of the referer header. */
|
||||
char **local_file; /* local file. */
|
||||
};
|
||||
|
||||
/* Free the elements of hstat X. */
|
||||
#define FREEHSTAT(x) do \
|
||||
{ \
|
||||
FREE_MAYBE ((x).newloc); \
|
||||
FREE_MAYBE ((x).remote_time); \
|
||||
FREE_MAYBE ((x).error); \
|
||||
(x).newloc = (x).remote_time = (x).error = NULL; \
|
||||
} while (0)
|
||||
static void
|
||||
free_hstat (struct http_stat *hs)
|
||||
{
|
||||
FREE_MAYBE (hs->newloc);
|
||||
FREE_MAYBE (hs->remote_time);
|
||||
FREE_MAYBE (hs->error);
|
||||
|
||||
/* Guard against being called twice. */
|
||||
hs->newloc = NULL;
|
||||
hs->remote_time = NULL;
|
||||
hs->error = NULL;
|
||||
}
|
||||
|
||||
static char *create_authorization_line PARAMS ((const char *, const char *,
|
||||
const char *, const char *,
|
||||
@ -499,23 +505,22 @@ time_t http_atotm PARAMS ((char *));
|
||||
response code correctly, it is not used in a sane way. The caller
|
||||
can do that, though.
|
||||
|
||||
If u->proxy is non-NULL, the URL u will be taken as a proxy URL,
|
||||
and u->proxy->url will be given to the proxy server (bad naming,
|
||||
I'm afraid). */
|
||||
If PROXY is non-NULL, the connection will be made to the proxy
|
||||
server, and u->url will be requested. */
|
||||
static uerr_t
|
||||
gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
||||
{
|
||||
char *request, *type, *command, *path;
|
||||
char *request, *type, *command, *full_path;
|
||||
char *user, *passwd;
|
||||
char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost;
|
||||
char *pragma_h, *referer, *useragent, *range, *wwwauth;
|
||||
char *authenticate_h;
|
||||
char *proxyauth;
|
||||
char *all_headers;
|
||||
char *port_maybe;
|
||||
char *request_keep_alive;
|
||||
int sock, hcount, num_written, all_length, remport, statcode;
|
||||
int sock, hcount, num_written, all_length, statcode;
|
||||
long contlen, contrange;
|
||||
struct urlinfo *ou;
|
||||
struct url *conn;
|
||||
uerr_t err;
|
||||
FILE *fp;
|
||||
int auth_tried_already;
|
||||
@ -542,7 +547,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
/* initialize ssl_ctx on first run */
|
||||
if (!ssl_ctx)
|
||||
{
|
||||
err=init_ssl (&ssl_ctx);
|
||||
err = init_ssl (&ssl_ctx);
|
||||
if (err != 0)
|
||||
{
|
||||
switch (err)
|
||||
@ -579,12 +584,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
if (!(*dt & HEAD_ONLY))
|
||||
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
|
||||
know the local filename so we can save to it. */
|
||||
assert (u->local != NULL);
|
||||
assert (*hs->local_file != NULL);
|
||||
|
||||
authenticate_h = 0;
|
||||
auth_tried_already = 0;
|
||||
|
||||
inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL);
|
||||
inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
|
||||
|
||||
again:
|
||||
/* We need to come back here when the initial attempt to retrieve
|
||||
@ -602,29 +607,29 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
hs->remote_time = NULL;
|
||||
hs->error = NULL;
|
||||
|
||||
/* Which structure to use to retrieve the original URL data. */
|
||||
if (u->proxy)
|
||||
ou = u->proxy;
|
||||
else
|
||||
ou = u;
|
||||
/* If we're using a proxy, we will be connecting to the proxy
|
||||
server. */
|
||||
conn = proxy ? proxy : u;
|
||||
|
||||
/* First: establish the connection. */
|
||||
if (inhibit_keep_alive
|
||||
||
|
||||
#ifndef HAVE_SSL
|
||||
!persistent_available_p (u->host, u->port)
|
||||
!persistent_available_p (conn->host, conn->port)
|
||||
#else
|
||||
!persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
|
||||
!persistent_available_p (conn->host, conn->port,
|
||||
u->scheme == SCHEME_HTTPS)
|
||||
#endif /* HAVE_SSL */
|
||||
)
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
|
||||
err = make_connection (&sock, u->host, u->port);
|
||||
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "),
|
||||
conn->host, conn->port);
|
||||
err = make_connection (&sock, conn->host, conn->port);
|
||||
switch (err)
|
||||
{
|
||||
case HOSTERR:
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
|
||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", conn->host, herrmsg (h_errno));
|
||||
return HOSTERR;
|
||||
break;
|
||||
case CONSOCKERR:
|
||||
@ -635,7 +640,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
case CONREFUSED:
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET,
|
||||
_("Connection to %s:%hu refused.\n"), u->host, u->port);
|
||||
_("Connection to %s:%hu refused.\n"), conn->host,
|
||||
conn->port);
|
||||
CLOSE (sock);
|
||||
return CONREFUSED;
|
||||
case CONERROR:
|
||||
@ -653,7 +659,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_SSL
|
||||
if (u->scheme == SCHEME_HTTPS)
|
||||
if (conn->scheme == SCHEME_HTTPS)
|
||||
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
|
||||
{
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
@ -666,7 +672,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
}
|
||||
else
|
||||
{
|
||||
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
|
||||
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
|
||||
conn->host, conn->port);
|
||||
/* #### pc_last_fd should be accessed through an accessor
|
||||
function. */
|
||||
sock = pc_last_fd;
|
||||
@ -676,22 +683,20 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
DEBUGP (("Reusing fd %d.\n", sock));
|
||||
}
|
||||
|
||||
if (u->proxy)
|
||||
path = u->proxy->url;
|
||||
else
|
||||
path = u->path;
|
||||
|
||||
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
|
||||
|
||||
referer = NULL;
|
||||
if (ou->referer)
|
||||
if (hs->referer)
|
||||
{
|
||||
referer = (char *)alloca (9 + strlen (ou->referer) + 3);
|
||||
sprintf (referer, "Referer: %s\r\n", ou->referer);
|
||||
referer = (char *)alloca (9 + strlen (hs->referer) + 3);
|
||||
sprintf (referer, "Referer: %s\r\n", hs->referer);
|
||||
}
|
||||
|
||||
if (*dt & SEND_NOCACHE)
|
||||
pragma_h = "Pragma: no-cache\r\n";
|
||||
else
|
||||
pragma_h = "";
|
||||
|
||||
if (hs->restval)
|
||||
{
|
||||
range = (char *)alloca (13 + numdigit (hs->restval) + 4);
|
||||
@ -714,9 +719,9 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
sprintf (useragent, "Wget/%s", version_string);
|
||||
}
|
||||
/* Construct the authentication, if userid is present. */
|
||||
user = ou->user;
|
||||
passwd = ou->passwd;
|
||||
search_netrc (ou->host, (const char **)&user, (const char **)&passwd, 0);
|
||||
user = u->user;
|
||||
passwd = u->passwd;
|
||||
search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
|
||||
user = user ? user : opt.http_user;
|
||||
passwd = passwd ? passwd : opt.http_passwd;
|
||||
|
||||
@ -750,12 +755,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
else
|
||||
{
|
||||
wwwauth = create_authorization_line (authenticate_h, user, passwd,
|
||||
command, ou->path);
|
||||
command, u->path);
|
||||
}
|
||||
}
|
||||
|
||||
proxyauth = NULL;
|
||||
if (u->proxy)
|
||||
if (proxy)
|
||||
{
|
||||
char *proxy_user, *proxy_passwd;
|
||||
/* For normal username and password, URL components override
|
||||
@ -770,31 +775,22 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
}
|
||||
else
|
||||
{
|
||||
proxy_user = u->user;
|
||||
proxy_passwd = u->passwd;
|
||||
proxy_user = proxy->user;
|
||||
proxy_passwd = proxy->passwd;
|
||||
}
|
||||
/* #### This is junky. Can't the proxy request, say, `Digest'
|
||||
authentication? */
|
||||
/* #### This does not appear right. Can't the proxy request,
|
||||
say, `Digest' authentication? */
|
||||
if (proxy_user && proxy_passwd)
|
||||
proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
|
||||
"Proxy-Authorization");
|
||||
}
|
||||
remhost = ou->host;
|
||||
remport = ou->port;
|
||||
|
||||
/* String of the form :PORT. Used only for non-standard ports. */
|
||||
port_maybe = NULL;
|
||||
if (1
|
||||
#ifdef HAVE_SSL
|
||||
&& remport != (u->scheme == SCHEME_HTTPS
|
||||
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
|
||||
#else
|
||||
&& remport != DEFAULT_HTTP_PORT
|
||||
#endif
|
||||
)
|
||||
if (u->port != scheme_default_port (u->scheme))
|
||||
{
|
||||
port_maybe = (char *)alloca (numdigit (remport) + 2);
|
||||
sprintf (port_maybe, ":%d", remport);
|
||||
port_maybe = (char *)alloca (numdigit (u->port) + 2);
|
||||
sprintf (port_maybe, ":%d", u->port);
|
||||
}
|
||||
|
||||
if (!inhibit_keep_alive)
|
||||
@ -803,18 +799,24 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
request_keep_alive = NULL;
|
||||
|
||||
if (opt.cookies)
|
||||
cookies = build_cookies_request (ou->host, ou->port, ou->path,
|
||||
cookies = build_cookies_request (u->host, u->port, u->path,
|
||||
#ifdef HAVE_SSL
|
||||
ou->scheme == SCHEME_HTTPS
|
||||
u->scheme == SCHEME_HTTPS
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
);
|
||||
|
||||
if (proxy)
|
||||
full_path = xstrdup (u->url);
|
||||
else
|
||||
full_path = url_full_path (u);
|
||||
|
||||
/* Allocate the memory for the request. */
|
||||
request = (char *)alloca (strlen (command) + strlen (path)
|
||||
request = (char *)alloca (strlen (command)
|
||||
+ strlen (full_path)
|
||||
+ strlen (useragent)
|
||||
+ strlen (remhost)
|
||||
+ strlen (u->host)
|
||||
+ (port_maybe ? strlen (port_maybe) : 0)
|
||||
+ strlen (HTTP_ACCEPT)
|
||||
+ (request_keep_alive
|
||||
@ -834,7 +836,8 @@ User-Agent: %s\r\n\
|
||||
Host: %s%s\r\n\
|
||||
Accept: %s\r\n\
|
||||
%s%s%s%s%s%s%s%s\r\n",
|
||||
command, path, useragent, remhost,
|
||||
command, full_path,
|
||||
useragent, u->host,
|
||||
port_maybe ? port_maybe : "",
|
||||
HTTP_ACCEPT,
|
||||
request_keep_alive ? request_keep_alive : "",
|
||||
@ -846,10 +849,12 @@ Accept: %s\r\n\
|
||||
pragma_h,
|
||||
opt.user_header ? opt.user_header : "");
|
||||
DEBUGP (("---request begin---\n%s---request end---\n", request));
|
||||
/* Free the temporary memory. */
|
||||
|
||||
/* Free the temporary memory. */
|
||||
FREE_MAYBE (wwwauth);
|
||||
FREE_MAYBE (proxyauth);
|
||||
FREE_MAYBE (cookies);
|
||||
xfree (full_path);
|
||||
|
||||
/* Send the request to server. */
|
||||
#ifdef HAVE_SSL
|
||||
@ -867,7 +872,7 @@ Accept: %s\r\n\
|
||||
return WRITEFAILED;
|
||||
}
|
||||
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
|
||||
u->proxy ? "Proxy" : "HTTP");
|
||||
proxy ? "Proxy" : "HTTP");
|
||||
contlen = contrange = -1;
|
||||
type = NULL;
|
||||
statcode = -1;
|
||||
@ -1075,9 +1080,9 @@ Accept: %s\r\n\
|
||||
/* The server has promised that it will not close the connection
|
||||
when we're done. This means that we can register it. */
|
||||
#ifndef HAVE_SSL
|
||||
register_persistent (u->host, u->port, sock);
|
||||
register_persistent (conn->host, conn->port, sock);
|
||||
#else
|
||||
register_persistent (u->host, u->port, sock, ssl);
|
||||
register_persistent (conn->host, conn->port, sock, ssl);
|
||||
#endif /* HAVE_SSL */
|
||||
|
||||
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
|
||||
@ -1086,7 +1091,7 @@ Accept: %s\r\n\
|
||||
/* Authorization is required. */
|
||||
FREE_MAYBE (type);
|
||||
type = NULL;
|
||||
FREEHSTAT (*hs);
|
||||
free_hstat (hs);
|
||||
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
||||
might be more bytes in the body. */
|
||||
if (auth_tried_already)
|
||||
@ -1163,16 +1168,17 @@ Accept: %s\r\n\
|
||||
text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
||||
already the file's suffix, tack on ".html". */
|
||||
{
|
||||
char* last_period_in_local_filename = strrchr(u->local, '.');
|
||||
char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
|
||||
|
||||
if (last_period_in_local_filename == NULL ||
|
||||
!(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
|
||||
strcasecmp(last_period_in_local_filename, ".html") == EQ))
|
||||
{
|
||||
size_t local_filename_len = strlen(u->local);
|
||||
size_t local_filename_len = strlen(*hs->local_file);
|
||||
|
||||
u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
|
||||
strcpy(u->local + local_filename_len, ".html");
|
||||
*hs->local_file = xrealloc(*hs->local_file,
|
||||
local_filename_len + sizeof(".html"));
|
||||
strcpy(*hs->local_file + local_filename_len, ".html");
|
||||
|
||||
*dt |= ADDED_HTML_EXTENSION;
|
||||
}
|
||||
@ -1224,7 +1230,7 @@ Accept: %s\r\n\
|
||||
_("\
|
||||
\n\
|
||||
Continued download failed on this file, which conflicts with `-c'.\n\
|
||||
Refusing to truncate existing file `%s'.\n\n"), u->local);
|
||||
Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
|
||||
FREE_MAYBE (type);
|
||||
FREE_MAYBE (all_headers);
|
||||
CLOSE_INVALIDATE (sock);
|
||||
@ -1300,13 +1306,13 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
|
||||
/* Open the local file. */
|
||||
if (!opt.dfp)
|
||||
{
|
||||
mkalldirs (u->local);
|
||||
mkalldirs (*hs->local_file);
|
||||
if (opt.backups)
|
||||
rotate_backups (u->local);
|
||||
fp = fopen (u->local, hs->restval ? "ab" : "wb");
|
||||
rotate_backups (*hs->local_file);
|
||||
fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
|
||||
if (!fp)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
|
||||
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
|
||||
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
||||
might be more bytes in the body. */
|
||||
FREE_MAYBE (all_headers);
|
||||
@ -1375,7 +1381,8 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
|
||||
/* The genuine HTTP loop! This is the part where the retrieval is
|
||||
retried, and retried, and retried, and... */
|
||||
uerr_t
|
||||
http_loop (struct urlinfo *u, char **newloc, int *dt)
|
||||
http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
||||
int *dt, struct url *proxy)
|
||||
{
|
||||
int count;
|
||||
int use_ts, got_head = 0; /* time-stamping info */
|
||||
@ -1388,6 +1395,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
|
||||
size_t filename_len;
|
||||
struct http_stat hstat; /* HTTP status */
|
||||
struct stat st;
|
||||
char *dummy = NULL;
|
||||
|
||||
/* This used to be done in main(), but it's a better idea to do it
|
||||
here so that we don't go through the hoops if we're just using
|
||||
@ -1407,34 +1415,46 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
|
||||
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
||||
|
||||
/* Determine the local filename. */
|
||||
if (!u->local)
|
||||
u->local = url_filename (u->proxy ? u->proxy : u);
|
||||
if (local_file && *local_file)
|
||||
hstat.local_file = local_file;
|
||||
else if (local_file)
|
||||
{
|
||||
*local_file = url_filename (u);
|
||||
hstat.local_file = local_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
dummy = url_filename (u);
|
||||
hstat.local_file = &dummy;
|
||||
}
|
||||
|
||||
if (!opt.output_document)
|
||||
locf = u->local;
|
||||
locf = *hstat.local_file;
|
||||
else
|
||||
locf = opt.output_document;
|
||||
|
||||
filename_len = strlen (u->local);
|
||||
hstat.referer = referer;
|
||||
|
||||
filename_len = strlen (*hstat.local_file);
|
||||
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
||||
|
||||
if (opt.noclobber && file_exists_p (u->local))
|
||||
if (opt.noclobber && file_exists_p (*hstat.local_file))
|
||||
{
|
||||
/* If opt.noclobber is turned on and file already exists, do not
|
||||
retrieve the file */
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
File `%s' already there, will not retrieve.\n"), u->local);
|
||||
File `%s' already there, will not retrieve.\n"), *hstat.local_file);
|
||||
/* If the file is there, we suppose it's retrieved OK. */
|
||||
*dt |= RETROKF;
|
||||
|
||||
/* #### Bogusness alert. */
|
||||
/* If its suffix is "html" or (yuck!) "htm", we suppose it's
|
||||
text/html, a harmless lie. */
|
||||
if (((suf = suffix (u->local)) != NULL)
|
||||
/* If its suffix is "html" or "htm", assume text/html. */
|
||||
if (((suf = suffix (*hstat.local_file)) != NULL)
|
||||
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
||||
*dt |= TEXTHTML;
|
||||
xfree (suf);
|
||||
/* Another harmless lie: */
|
||||
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
|
||||
@ -1461,7 +1481,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
in url.c. Replacing sprintf with inline calls to
|
||||
strcpy() and long_to_string() made a difference.
|
||||
--hniksic */
|
||||
memcpy (filename_plus_orig_suffix, u->local, filename_len);
|
||||
memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
|
||||
memcpy (filename_plus_orig_suffix + filename_len,
|
||||
".orig", sizeof (".orig"));
|
||||
|
||||
@ -1475,8 +1495,8 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
|
||||
if (!local_dot_orig_file_exists)
|
||||
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
||||
if (stat (u->local, &st) == 0)
|
||||
local_filename = u->local;
|
||||
if (stat (*hstat.local_file, &st) == 0)
|
||||
local_filename = *hstat.local_file;
|
||||
|
||||
if (local_filename != NULL)
|
||||
/* There was a local file, so we'll check later to see if the version
|
||||
@ -1503,7 +1523,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
/* Print fetch message, if opt.verbose. */
|
||||
if (opt.verbose)
|
||||
{
|
||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
||||
char *hurl = url_string (u, 1);
|
||||
char tmp[15];
|
||||
strcpy (tmp, " ");
|
||||
if (count > 1)
|
||||
@ -1545,22 +1565,22 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
Some proxies are notorious for caching incomplete data, so
|
||||
we require a fresh get.
|
||||
b) caching is explicitly inhibited. */
|
||||
if ((u->proxy && count > 1) /* a */
|
||||
|| !opt.allow_cache /* b */
|
||||
if ((proxy && count > 1) /* a */
|
||||
|| !opt.allow_cache /* b */
|
||||
)
|
||||
*dt |= SEND_NOCACHE;
|
||||
else
|
||||
*dt &= ~SEND_NOCACHE;
|
||||
|
||||
/* Try fetching the document, or at least its head. :-) */
|
||||
err = gethttp (u, &hstat, dt);
|
||||
/* Try fetching the document, or at least its head. */
|
||||
err = gethttp (u, &hstat, dt, proxy);
|
||||
|
||||
/* It's unfortunate that wget determines the local filename before finding
|
||||
out the Content-Type of the file. Barring a major restructuring of the
|
||||
code, we need to re-set locf here, since gethttp() may have xrealloc()d
|
||||
u->local to tack on ".html". */
|
||||
*hstat.local_file to tack on ".html". */
|
||||
if (!opt.output_document)
|
||||
locf = u->local;
|
||||
locf = *hstat.local_file;
|
||||
else
|
||||
locf = opt.output_document;
|
||||
|
||||
@ -1577,29 +1597,32 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
/* Non-fatal errors continue executing the loop, which will
|
||||
bring them to "while" statement at the end, to judge
|
||||
whether the number of tries was exceeded. */
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
printwhat (count, opt.ntry);
|
||||
continue;
|
||||
break;
|
||||
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
|
||||
case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
|
||||
/* Fatal errors just return from the function. */
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return err;
|
||||
break;
|
||||
case FWRITEERR: case FOPENERR:
|
||||
/* Another fatal error. */
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
||||
u->local, strerror (errno));
|
||||
FREEHSTAT (hstat);
|
||||
*hstat.local_file, strerror (errno));
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return err;
|
||||
break;
|
||||
case CONSSLERR:
|
||||
/* Another fatal error. */
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return err;
|
||||
break;
|
||||
case NEWLOCATION:
|
||||
@ -1609,14 +1632,18 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
logprintf (LOG_NOTQUIET,
|
||||
_("ERROR: Redirection (%d) without location.\n"),
|
||||
hstat.statcode);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return WRONGCODE;
|
||||
}
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return NEWLOCATION;
|
||||
break;
|
||||
case RETRUNNEEDED:
|
||||
/* The file was already fully retrieved. */
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
break;
|
||||
case RETRFINISHED:
|
||||
@ -1631,14 +1658,15 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
||||
if (!opt.verbose)
|
||||
{
|
||||
/* #### Ugly ugly ugly! */
|
||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
||||
char *hurl = url_string (u, 1);
|
||||
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
|
||||
xfree (hurl);
|
||||
}
|
||||
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
|
||||
tms, hstat.statcode, hstat.error);
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return WRONGCODE;
|
||||
}
|
||||
|
||||
@ -1681,7 +1709,8 @@ Last-modified header invalid -- time-stamp ignored.\n"));
|
||||
logprintf (LOG_VERBOSE, _("\
|
||||
Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
||||
local_filename);
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
else if (tml >= tmr)
|
||||
@ -1691,7 +1720,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
logputs (LOG_VERBOSE,
|
||||
_("Remote file is newer, retrieving.\n"));
|
||||
}
|
||||
FREEHSTAT (hstat);
|
||||
free_hstat (&hstat);
|
||||
continue;
|
||||
}
|
||||
if ((tmr != (time_t) (-1))
|
||||
@ -1710,7 +1739,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
fl = opt.output_document;
|
||||
}
|
||||
else
|
||||
fl = u->local;
|
||||
fl = *hstat.local_file;
|
||||
if (fl)
|
||||
touch (fl, tmr);
|
||||
}
|
||||
@ -1719,13 +1748,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
if (opt.spider)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
|
||||
/* It is now safe to free the remainder of hstat, since the
|
||||
strings within it will no longer be used. */
|
||||
FREEHSTAT (hstat);
|
||||
|
||||
tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
|
||||
|
||||
if (hstat.len == hstat.contlen)
|
||||
@ -1748,6 +1774,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
else
|
||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
||||
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
else if (hstat.res == 0) /* No read error */
|
||||
@ -1773,6 +1801,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
else
|
||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
||||
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
else if (hstat.len < hstat.contlen) /* meaning we lost the
|
||||
@ -1782,6 +1812,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
_("%s (%s) - Connection closed at byte %ld. "),
|
||||
tms, tmrate, hstat.len);
|
||||
printwhat (count, opt.ntry);
|
||||
free_hstat (&hstat);
|
||||
continue;
|
||||
}
|
||||
else if (!opt.kill_longer) /* meaning we got more than expected */
|
||||
@ -1801,6 +1832,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
else
|
||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
||||
|
||||
free_hstat (&hstat);
|
||||
FREE_MAYBE (dummy);
|
||||
return RETROK;
|
||||
}
|
||||
else /* the same, but not accepted */
|
||||
@ -1809,6 +1842,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
_("%s (%s) - Connection closed at byte %ld/%ld. "),
|
||||
tms, tmrate, hstat.len, hstat.contlen);
|
||||
printwhat (count, opt.ntry);
|
||||
free_hstat (&hstat);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@ -1820,6 +1854,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
_("%s (%s) - Read error at byte %ld (%s)."),
|
||||
tms, tmrate, hstat.len, strerror (errno));
|
||||
printwhat (count, opt.ntry);
|
||||
free_hstat (&hstat);
|
||||
continue;
|
||||
}
|
||||
else /* hstat.res == -1 and contlen is given */
|
||||
@ -1829,6 +1864,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
||||
tms, tmrate, hstat.len, hstat.contlen,
|
||||
strerror (errno));
|
||||
printwhat (count, opt.ntry);
|
||||
free_hstat (&hstat);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
@ -727,7 +727,7 @@ Can't timestamp and not clobber old files at the same time.\n"));
|
||||
/* Fill in the arguments. */
|
||||
for (i = 0; i < nurl; i++, optind++)
|
||||
{
|
||||
char *rewritten = rewrite_url_maybe (argv[optind]);
|
||||
char *rewritten = rewrite_shorthand_url (argv[optind]);
|
||||
if (rewritten)
|
||||
{
|
||||
printf ("Converted %s to %s\n", argv[optind], rewritten);
|
||||
@ -845,10 +845,12 @@ Can't timestamp and not clobber old files at the same time.\n"));
|
||||
{
|
||||
convert_all_links ();
|
||||
}
|
||||
|
||||
log_close ();
|
||||
for (i = 0; i < nurl; i++)
|
||||
free (url[i]);
|
||||
xfree (url[i]);
|
||||
cleanup ();
|
||||
|
||||
#ifdef DEBUG_MALLOC
|
||||
print_malloc_debug_stats ();
|
||||
#endif
|
||||
|
48
src/recur.c
48
src/recur.c
@ -120,9 +120,8 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
int dt, inl, dash_p_leaf_HTML = FALSE;
|
||||
int meta_disallow_follow;
|
||||
int this_url_ftp; /* See below the explanation */
|
||||
uerr_t err;
|
||||
urlpos *url_list, *cur_url;
|
||||
struct urlinfo *u;
|
||||
struct url *u;
|
||||
|
||||
assert (this_url != NULL);
|
||||
assert (file != NULL);
|
||||
@ -140,9 +139,8 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
hash_table_clear (undesirable_urls);
|
||||
string_set_add (undesirable_urls, this_url);
|
||||
/* Enter this_url to the hash table, in original and "enhanced" form. */
|
||||
u = newurl ();
|
||||
err = parseurl (this_url, u, 0);
|
||||
if (err == URLOK)
|
||||
u = url_parse (this_url, NULL);
|
||||
if (u)
|
||||
{
|
||||
string_set_add (undesirable_urls, u->url);
|
||||
if (opt.no_parent)
|
||||
@ -156,7 +154,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
|
||||
base_dir = NULL;
|
||||
}
|
||||
freeurl (u, 1);
|
||||
url_free (u);
|
||||
depth = 1;
|
||||
first_time = 0;
|
||||
}
|
||||
@ -210,11 +208,10 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
break;
|
||||
/* Parse the URL for convenient use in other functions, as well
|
||||
as to get the optimized form. It also checks URL integrity. */
|
||||
u = newurl ();
|
||||
if (parseurl (cur_url->url, u, 0) != URLOK)
|
||||
u = url_parse (cur_url->url, NULL);
|
||||
if (!u)
|
||||
{
|
||||
DEBUGP (("Yuck! A bad URL.\n"));
|
||||
freeurl (u, 1);
|
||||
continue;
|
||||
}
|
||||
assert (u->url != NULL);
|
||||
@ -281,8 +278,8 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
if (!(base_dir && frontcmp (base_dir, u->dir)))
|
||||
{
|
||||
/* Failing that, check for parent dir. */
|
||||
struct urlinfo *ut = newurl ();
|
||||
if (parseurl (this_url, ut, 0) != URLOK)
|
||||
struct url *ut = url_parse (this_url, NULL);
|
||||
if (!ut)
|
||||
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
|
||||
else if (!frontcmp (ut->dir, u->dir))
|
||||
{
|
||||
@ -291,7 +288,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
string_set_add (undesirable_urls, constr);
|
||||
inl = 1;
|
||||
}
|
||||
freeurl (ut, 1);
|
||||
url_free (ut);
|
||||
}
|
||||
}
|
||||
/* If the file does not match the acceptance list, or is on the
|
||||
@ -343,7 +340,16 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
if (!inl)
|
||||
{
|
||||
if (!opt.simple_check)
|
||||
opt_url (u);
|
||||
{
|
||||
/* Find the "true" host. */
|
||||
char *host = realhost (u->host);
|
||||
xfree (u->host);
|
||||
u->host = host;
|
||||
|
||||
/* Refresh the printed representation of the URL. */
|
||||
xfree (u->url);
|
||||
u->url = url_string (u, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *p;
|
||||
@ -351,7 +357,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
for (p = u->host; *p; p++)
|
||||
*p = TOLOWER (*p);
|
||||
xfree (u->url);
|
||||
u->url = str_url (u, 0);
|
||||
u->url = url_string (u, 0);
|
||||
}
|
||||
xfree (constr);
|
||||
constr = xstrdup (u->url);
|
||||
@ -473,7 +479,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
/* Free filename and constr. */
|
||||
FREE_MAYBE (filename);
|
||||
FREE_MAYBE (constr);
|
||||
freeurl (u, 1);
|
||||
url_free (u);
|
||||
/* Increment the pbuf for the appropriate size. */
|
||||
}
|
||||
if (opt.convert_links && !opt.delete_after)
|
||||
@ -573,13 +579,9 @@ convert_all_links (void)
|
||||
char *local_name;
|
||||
|
||||
/* The URL must be in canonical form to be compared. */
|
||||
struct urlinfo *u = newurl ();
|
||||
uerr_t res = parseurl (cur_url->url, u, 0);
|
||||
if (res != URLOK)
|
||||
{
|
||||
freeurl (u, 1);
|
||||
continue;
|
||||
}
|
||||
struct url *u = url_parse (cur_url->url, NULL);
|
||||
if (!u)
|
||||
continue;
|
||||
/* We decide the direction of conversion according to whether
|
||||
a URL was downloaded. Downloaded URLs will be converted
|
||||
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
||||
@ -608,7 +610,7 @@ convert_all_links (void)
|
||||
cur_url->convert = CO_CONVERT_TO_COMPLETE;
|
||||
cur_url->local_name = NULL;
|
||||
}
|
||||
freeurl (u, 1);
|
||||
url_free (u);
|
||||
}
|
||||
/* Convert the links in the file. */
|
||||
convert_links (html->string, urls);
|
||||
|
139
src/retr.c
139
src/retr.c
@ -51,9 +51,6 @@ extern int errno;
|
||||
int global_download_count;
|
||||
|
||||
void logflush PARAMS ((void));
|
||||
|
||||
/* From http.c. */
|
||||
uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
|
||||
|
||||
/* Flags for show_progress(). */
|
||||
enum spflags { SP_NONE, SP_INIT, SP_FINISH };
|
||||
@ -314,9 +311,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
uerr_t result;
|
||||
char *url;
|
||||
int location_changed, dummy;
|
||||
int local_use_proxy;
|
||||
int use_proxy;
|
||||
char *mynewloc, *proxy;
|
||||
struct urlinfo *u;
|
||||
struct url *u;
|
||||
int up_error_code; /* url parse error code */
|
||||
char *local_file;
|
||||
struct hash_table *redirections = NULL;
|
||||
|
||||
/* If dt is NULL, just ignore it. */
|
||||
@ -328,80 +327,74 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
if (file)
|
||||
*file = NULL;
|
||||
|
||||
u = newurl ();
|
||||
/* Parse the URL. */
|
||||
result = parseurl (url, u, 0);
|
||||
if (result != URLOK)
|
||||
u = url_parse (url, &up_error_code);
|
||||
if (!u)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
|
||||
freeurl (u, 1);
|
||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
xfree (url);
|
||||
return result;
|
||||
return URLERROR;
|
||||
}
|
||||
|
||||
if (!refurl)
|
||||
refurl = opt.referer;
|
||||
|
||||
redirected:
|
||||
|
||||
/* Set the referer. */
|
||||
if (refurl)
|
||||
u->referer = xstrdup (refurl);
|
||||
else
|
||||
{
|
||||
if (opt.referer)
|
||||
u->referer = xstrdup (opt.referer);
|
||||
else
|
||||
u->referer = NULL;
|
||||
}
|
||||
result = NOCONERROR;
|
||||
mynewloc = NULL;
|
||||
local_file = NULL;
|
||||
|
||||
local_use_proxy = USE_PROXY_P (u);
|
||||
if (local_use_proxy)
|
||||
use_proxy = USE_PROXY_P (u);
|
||||
if (use_proxy)
|
||||
{
|
||||
struct urlinfo *pu = newurl ();
|
||||
struct url *proxy_url;
|
||||
|
||||
/* Copy the original URL to new location. */
|
||||
memcpy (pu, u, sizeof (*u));
|
||||
pu->proxy = NULL; /* A minor correction :) */
|
||||
/* Initialize u to nil. */
|
||||
memset (u, 0, sizeof (*u));
|
||||
u->proxy = pu;
|
||||
/* Get the appropriate proxy server, appropriate for the
|
||||
current scheme. */
|
||||
proxy = getproxy (pu->scheme);
|
||||
/* Get the proxy server for the current scheme. */
|
||||
proxy = getproxy (u->scheme);
|
||||
if (!proxy)
|
||||
{
|
||||
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
||||
freeurl (u, 1);
|
||||
url_free (u);
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
xfree (url);
|
||||
return PROXERR;
|
||||
}
|
||||
|
||||
/* Parse the proxy URL. */
|
||||
result = parseurl (proxy, u, 0);
|
||||
if (result != URLOK || u->scheme != SCHEME_HTTP)
|
||||
proxy_url = url_parse (proxy, &up_error_code);
|
||||
if (!proxy_url)
|
||||
{
|
||||
if (u->scheme == SCHEME_HTTP)
|
||||
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
|
||||
else
|
||||
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
||||
freeurl (u, 1);
|
||||
logprintf (LOG_NOTQUIET, "Error parsing proxy URL %s: %s.\n",
|
||||
proxy, url_error (up_error_code));
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
xfree (url);
|
||||
return PROXERR;
|
||||
}
|
||||
u->scheme = SCHEME_HTTP;
|
||||
if (proxy_url->scheme != SCHEME_HTTP)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
|
||||
url_free (proxy_url);
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
xfree (url);
|
||||
return PROXERR;
|
||||
}
|
||||
|
||||
result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
|
||||
url_free (proxy_url);
|
||||
}
|
||||
|
||||
mynewloc = NULL;
|
||||
|
||||
if (u->scheme == SCHEME_HTTP
|
||||
else if (u->scheme == SCHEME_HTTP
|
||||
#ifdef HAVE_SSL
|
||||
|| u->scheme == SCHEME_HTTPS
|
||||
#endif
|
||||
)
|
||||
result = http_loop (u, &mynewloc, dt);
|
||||
{
|
||||
result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
|
||||
}
|
||||
else if (u->scheme == SCHEME_FTP)
|
||||
{
|
||||
/* If this is a redirection, we must not allow recursive FTP
|
||||
@ -412,13 +405,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
opt.recursive = 0;
|
||||
result = ftp_loop (u, dt);
|
||||
opt.recursive = oldrec;
|
||||
#if 0
|
||||
/* There is a possibility of having HTTP being redirected to
|
||||
FTP. In these cases we must decide whether the text is HTML
|
||||
according to the suffix. The HTML suffixes are `.html' and
|
||||
`.htm', case-insensitive.
|
||||
|
||||
#### All of this is, of course, crap. These types should be
|
||||
determined through mailcap. */
|
||||
`.htm', case-insensitive. */
|
||||
if (redirections && u->local && (u->scheme == SCHEME_FTP))
|
||||
{
|
||||
char *suf = suffix (u->local);
|
||||
@ -426,16 +417,19 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
*dt |= TEXTHTML;
|
||||
FREE_MAYBE (suf);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
location_changed = (result == NEWLOCATION);
|
||||
if (location_changed)
|
||||
{
|
||||
char *construced_newloc;
|
||||
uerr_t newloc_result;
|
||||
struct urlinfo *newloc_struct;
|
||||
struct url *newloc_struct;
|
||||
|
||||
assert (mynewloc != NULL);
|
||||
|
||||
if (local_file)
|
||||
xfree (local_file);
|
||||
|
||||
/* The HTTP specs only allow absolute URLs to appear in
|
||||
redirects, but a ton of boneheaded webservers and CGIs out
|
||||
there break the rules and use relative URLs, and popular
|
||||
@ -445,13 +439,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
mynewloc = construced_newloc;
|
||||
|
||||
/* Now, see if this new location makes sense. */
|
||||
newloc_struct = newurl ();
|
||||
newloc_result = parseurl (mynewloc, newloc_struct, 1);
|
||||
if (newloc_result != URLOK)
|
||||
newloc_struct = url_parse (mynewloc, NULL);
|
||||
if (!newloc_struct)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
|
||||
freeurl (newloc_struct, 1);
|
||||
freeurl (u, 1);
|
||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, "UNKNOWN");
|
||||
url_free (newloc_struct);
|
||||
url_free (u);
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
xfree (url);
|
||||
@ -473,14 +466,14 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
string_set_add (redirections, u->url);
|
||||
}
|
||||
|
||||
/* The new location is OK. Let's check for redirection cycle by
|
||||
/* The new location is OK. Check for redirection cycle by
|
||||
peeking through the history of redirections. */
|
||||
if (string_set_contains (redirections, newloc_struct->url))
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
|
||||
mynewloc);
|
||||
freeurl (newloc_struct, 1);
|
||||
freeurl (u, 1);
|
||||
url_free (newloc_struct);
|
||||
url_free (u);
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
xfree (url);
|
||||
@ -491,29 +484,27 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
|
||||
xfree (url);
|
||||
url = mynewloc;
|
||||
freeurl (u, 1);
|
||||
url_free (u);
|
||||
u = newloc_struct;
|
||||
goto redirected;
|
||||
}
|
||||
|
||||
if (u->local)
|
||||
if (local_file)
|
||||
{
|
||||
if (*dt & RETROKF)
|
||||
{
|
||||
register_download (url, u->local);
|
||||
register_download (url, local_file);
|
||||
if (*dt & TEXTHTML)
|
||||
register_html (url, u->local);
|
||||
register_html (url, local_file);
|
||||
}
|
||||
}
|
||||
|
||||
if (file)
|
||||
{
|
||||
if (u->local)
|
||||
*file = xstrdup (u->local);
|
||||
else
|
||||
*file = NULL;
|
||||
}
|
||||
freeurl (u, 1);
|
||||
*file = local_file ? local_file : NULL;
|
||||
else
|
||||
FREE_MAYBE (local_file);
|
||||
|
||||
url_free (u);
|
||||
if (redirections)
|
||||
string_set_free (redirections);
|
||||
|
||||
|
@ -36,4 +36,12 @@ int downloaded_exceeds_quota PARAMS ((void));
|
||||
|
||||
void sleep_between_retrievals PARAMS ((int));
|
||||
|
||||
/* Because there's no http.h. */
|
||||
|
||||
struct url;
|
||||
|
||||
uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *,
|
||||
int *, struct url *));
|
||||
|
||||
|
||||
#endif /* RETR_H */
|
||||
|
59
src/url.h
59
src/url.h
@ -25,6 +25,9 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
#define DEFAULT_FTP_PORT 21
|
||||
#define DEFAULT_HTTPS_PORT 443
|
||||
|
||||
/* Note: the ordering here is related to the order of elements in
|
||||
`supported_schemes' in url.c. */
|
||||
|
||||
enum url_scheme {
|
||||
SCHEME_HTTP,
|
||||
#ifdef HAVE_SSL
|
||||
@ -35,24 +38,27 @@ enum url_scheme {
|
||||
};
|
||||
|
||||
/* Structure containing info on a URL. */
|
||||
struct urlinfo
|
||||
struct url
|
||||
{
|
||||
char *url; /* Unchanged URL */
|
||||
char *url; /* Original URL */
|
||||
enum url_scheme scheme; /* URL scheme */
|
||||
|
||||
char *host; /* Extracted hostname */
|
||||
unsigned short port;
|
||||
char ftp_type;
|
||||
char *path, *dir, *file, *qstring;
|
||||
/* Path, dir, file, and query string
|
||||
(properly decoded) */
|
||||
char *user, *passwd; /* Username and password */
|
||||
struct urlinfo *proxy; /* The exact string to pass to proxy
|
||||
server */
|
||||
char *referer; /* The source from which the request
|
||||
URI was obtained */
|
||||
char *local; /* The local filename of the URL
|
||||
document */
|
||||
int port; /* Port number */
|
||||
|
||||
/* URL components (URL-quoted). */
|
||||
char *path;
|
||||
char *params;
|
||||
char *query;
|
||||
char *fragment;
|
||||
|
||||
/* Extracted path info (unquoted). */
|
||||
char *dir;
|
||||
char *file;
|
||||
|
||||
/* Username and password (unquoted). */
|
||||
char *user;
|
||||
char *passwd;
|
||||
};
|
||||
|
||||
enum convert_options {
|
||||
@ -104,19 +110,21 @@ typedef enum
|
||||
|
||||
char *encode_string PARAMS ((const char *));
|
||||
|
||||
struct urlinfo *newurl PARAMS ((void));
|
||||
void freeurl PARAMS ((struct urlinfo *, int));
|
||||
enum url_scheme url_detect_scheme PARAMS ((const char *));
|
||||
struct url *url_parse PARAMS ((const char *, int *));
|
||||
const char *url_error PARAMS ((int));
|
||||
char *url_full_path PARAMS ((const struct url *));
|
||||
void url_set_dir PARAMS ((struct url *, const char *));
|
||||
void url_set_file PARAMS ((struct url *, const char *));
|
||||
void url_free PARAMS ((struct url *));
|
||||
|
||||
enum url_scheme url_scheme PARAMS ((const char *));
|
||||
int url_skip_scheme PARAMS ((const char *));
|
||||
int url_has_scheme PARAMS ((const char *));
|
||||
int scheme_default_port PARAMS ((enum url_scheme));
|
||||
|
||||
int url_skip_uname PARAMS ((const char *));
|
||||
|
||||
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
|
||||
char *str_url PARAMS ((const struct urlinfo *, int));
|
||||
/* url_equal is not currently used. */
|
||||
#if 0
|
||||
int url_equal PARAMS ((const char *, const char *));
|
||||
#endif /* 0 */
|
||||
char *url_string PARAMS ((const struct url *, int));
|
||||
|
||||
urlpos *get_urls_file PARAMS ((const char *));
|
||||
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
|
||||
@ -126,8 +134,7 @@ char *uri_merge PARAMS ((const char *, const char *));
|
||||
|
||||
void rotate_backups PARAMS ((const char *));
|
||||
int mkalldirs PARAMS ((const char *));
|
||||
char *url_filename PARAMS ((const struct urlinfo *));
|
||||
void opt_url PARAMS ((struct urlinfo *));
|
||||
char *url_filename PARAMS ((const struct url *));
|
||||
|
||||
char *getproxy PARAMS ((uerr_t));
|
||||
int no_proxy_match PARAMS ((const char *, const char **));
|
||||
@ -137,6 +144,6 @@ urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
|
||||
|
||||
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
|
||||
|
||||
char *rewrite_url_maybe PARAMS ((const char *));
|
||||
char *rewrite_shorthand_url PARAMS ((const char *));
|
||||
|
||||
#endif /* URL_H */
|
||||
|
78
src/utils.c
78
src/utils.c
@ -404,30 +404,6 @@ datetime_str (time_t *tm)
|
||||
ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
|
||||
return output;
|
||||
}
|
||||
|
||||
/* Returns an error message for ERRNUM. #### This requires more work.
|
||||
This function, as well as the whole error system, is very
|
||||
ill-conceived. */
|
||||
const char *
|
||||
uerrmsg (uerr_t errnum)
|
||||
{
|
||||
switch (errnum)
|
||||
{
|
||||
case URLUNKNOWN:
|
||||
return _("Unknown/unsupported protocol");
|
||||
break;
|
||||
case URLBADPORT:
|
||||
return _("Invalid port specification");
|
||||
break;
|
||||
case URLBADHOST:
|
||||
return _("Invalid host name");
|
||||
break;
|
||||
default:
|
||||
abort ();
|
||||
/* $@#@#$ compiler. */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* The Windows versions of the following two functions are defined in
|
||||
mswindows.c. */
|
||||
@ -464,6 +440,14 @@ fork_to_background (void)
|
||||
}
|
||||
#endif /* not WINDOWS */
|
||||
|
||||
char *
|
||||
ps (char *orig)
|
||||
{
|
||||
char *r = xstrdup (orig);
|
||||
path_simplify (r);
|
||||
return r;
|
||||
}
|
||||
|
||||
/* Canonicalize PATH, and return a new path. The new path differs from PATH
|
||||
in that:
|
||||
Multple `/'s are collapsed to a single `/'.
|
||||
@ -479,7 +463,8 @@ fork_to_background (void)
|
||||
Always use '/' as stub_char.
|
||||
Don't check for local things using canon_stat.
|
||||
Change the original string instead of strdup-ing.
|
||||
React correctly when beginning with `./' and `../'. */
|
||||
React correctly when beginning with `./' and `../'.
|
||||
Don't zip out trailing slashes. */
|
||||
void
|
||||
path_simplify (char *path)
|
||||
{
|
||||
@ -545,20 +530,15 @@ path_simplify (char *path)
|
||||
i = start + 1;
|
||||
}
|
||||
|
||||
/* Check for trailing `/'. */
|
||||
if (start && !path[i])
|
||||
{
|
||||
zero_last:
|
||||
path[--i] = '\0';
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for `../', `./' or trailing `.' by itself. */
|
||||
if (path[i] == '.')
|
||||
{
|
||||
/* Handle trailing `.' by itself. */
|
||||
if (!path[i + 1])
|
||||
goto zero_last;
|
||||
{
|
||||
path[--i] = '\0';
|
||||
break;
|
||||
}
|
||||
|
||||
/* Handle `./'. */
|
||||
if (path[i + 1] == '/')
|
||||
@ -579,12 +559,6 @@ path_simplify (char *path)
|
||||
}
|
||||
} /* path == '.' */
|
||||
} /* while */
|
||||
|
||||
if (!*path)
|
||||
{
|
||||
*path = stub_char;
|
||||
path[1] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/* "Touch" FILE, i.e. make its atime and mtime equal to the time
|
||||
@ -728,6 +702,30 @@ make_directory (const char *directory)
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
|
||||
should be a file name. For example, file_merge("/foo/bar", "baz")
|
||||
will return "/foo/baz". file_merge("/foo/bar/", "baz") will return
|
||||
"foo/bar/baz".
|
||||
|
||||
In other words, it's a simpler and gentler version of uri_merge_1. */
|
||||
|
||||
char *
|
||||
file_merge (const char *base, const char *file)
|
||||
{
|
||||
char *result;
|
||||
const char *cut = (const char *)strrchr (base, '/');
|
||||
|
||||
if (!cut)
|
||||
cut = base + strlen (base);
|
||||
|
||||
result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
|
||||
memcpy (result, base, cut - base);
|
||||
result[cut - base] = '/';
|
||||
strcpy (result + (cut - base) + 1, file);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int in_acclist PARAMS ((const char *const *, const char *, int));
|
||||
|
||||
|
@ -44,8 +44,6 @@ struct wget_timer;
|
||||
char *time_str PARAMS ((time_t *));
|
||||
char *datetime_str PARAMS ((time_t *));
|
||||
|
||||
const char *uerrmsg PARAMS ((uerr_t));
|
||||
|
||||
#ifdef DEBUG_MALLOC
|
||||
void print_malloc_debug_stats ();
|
||||
#endif
|
||||
@ -63,6 +61,7 @@ int file_exists_p PARAMS ((const char *));
|
||||
int file_non_directory_p PARAMS ((const char *));
|
||||
int make_directory PARAMS ((const char *));
|
||||
char *unique_name PARAMS ((const char *));
|
||||
char *file_merge PARAMS ((const char *, const char *));
|
||||
|
||||
int acceptable PARAMS ((const char *));
|
||||
int accdir PARAMS ((const char *s, enum accd));
|
||||
|
@ -285,9 +285,8 @@ typedef enum
|
||||
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
|
||||
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
|
||||
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
|
||||
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLHTTPS,
|
||||
URLOK, URLHTTP, URLFTP, URLFILE, URLUNKNOWN, URLBADPORT,
|
||||
URLBADHOST, FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
|
||||
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR,
|
||||
FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
|
||||
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
|
||||
FTPINVPASV, FTPNOPASV,
|
||||
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,
|
||||
|
Loading…
Reference in New Issue
Block a user