mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Rewrite parsing and handling of URLs.
Published in <sxs4rnnlklo.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
f4dcb55851
commit
d5be8ecca4
@ -1,3 +1,7 @@
|
|||||||
|
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* configure.in: Check for strpbrk().
|
||||||
|
|
||||||
2001-05-14 Herold Heiko <Heiko.Herold@previnet.it>
|
2001-05-14 Herold Heiko <Heiko.Herold@previnet.it>
|
||||||
|
|
||||||
* windows/Makefile.src:
|
* windows/Makefile.src:
|
||||||
|
@ -172,7 +172,7 @@ dnl Checks for library functions.
|
|||||||
dnl
|
dnl
|
||||||
AC_FUNC_ALLOCA
|
AC_FUNC_ALLOCA
|
||||||
AC_FUNC_MMAP
|
AC_FUNC_MMAP
|
||||||
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp)
|
AC_CHECK_FUNCS(strdup strstr strcasecmp strncasecmp strpbrk)
|
||||||
AC_CHECK_FUNCS(gettimeofday mktime strptime)
|
AC_CHECK_FUNCS(gettimeofday mktime strptime)
|
||||||
AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty)
|
AC_CHECK_FUNCS(strerror snprintf vsnprintf select signal symlink access isatty)
|
||||||
AC_CHECK_FUNCS(uname gethostname)
|
AC_CHECK_FUNCS(uname gethostname)
|
||||||
|
@ -1,3 +1,53 @@
|
|||||||
|
2001-11-22 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* utils.c (path_simplify): Don't remove trailing slashes.
|
||||||
|
|
||||||
|
* ftp.c (ftp_get_listing): Use it.
|
||||||
|
|
||||||
|
* utils.c (file_merge): New function.
|
||||||
|
|
||||||
|
* url.c (opt_url): Removed.
|
||||||
|
|
||||||
|
* recur.c (recursive_retrieve): Inline "opt_url" logic.
|
||||||
|
|
||||||
|
* main.c (main): Use xfree(), not free().
|
||||||
|
|
||||||
|
* url.c (rewrite_url_maybe): Renamed to rewrite_shorthand_url.
|
||||||
|
|
||||||
|
* ftp.c (ccon): Move `ccon' typedef here, since it's only used
|
||||||
|
internally.
|
||||||
|
|
||||||
|
* config.h.in: Include a stub for HAVE_STRPBRK.
|
||||||
|
|
||||||
|
* cmpt.c (strpbrk): Include a replacement for systems without
|
||||||
|
strpbrk().
|
||||||
|
|
||||||
|
* ftp.c: Use url_set_dir and url_set_file when modifying the URL.
|
||||||
|
|
||||||
|
* url.c (url_set_dir): New function.
|
||||||
|
(url_set_file): Ditto.
|
||||||
|
|
||||||
|
* ftp-basic.c (ftp_process_type): Process FTP type here; the URL
|
||||||
|
parser makes the URL "params" available, so we can do that in this
|
||||||
|
function.
|
||||||
|
|
||||||
|
* retr.c: Ditto.
|
||||||
|
|
||||||
|
* ftp.c: Ditto; pass the local file information in `ccon'.
|
||||||
|
|
||||||
|
* http.c: Get rid of the ugly kludge that had URL being replaced
|
||||||
|
with the proxy URL when proxy retrieval was requested. Use a
|
||||||
|
separate parameter to http_loop and gethttp for the proxy URL.
|
||||||
|
|
||||||
|
* http.c: Changed to reflect the fact that local file, proxy, and
|
||||||
|
referer information are no longer stored in struct url. The local
|
||||||
|
file information is passed in `struct hstat' now.
|
||||||
|
|
||||||
|
* url.c: Reworked URL parsing to be more regular. Reencode the
|
||||||
|
URL using reencode_string.
|
||||||
|
Removed non-URL-related information from struct url. This
|
||||||
|
includes fields `proxy', `local', and `referer'.
|
||||||
|
|
||||||
2001-11-22 Jochen Hein <jochen@jochen.org>
|
2001-11-22 Jochen Hein <jochen@jochen.org>
|
||||||
|
|
||||||
* main.c (main): Split the copyright notice for easier
|
* main.c (main): Split the copyright notice for easier
|
||||||
|
18
src/cmpt.c
18
src/cmpt.c
@ -205,6 +205,24 @@ ret0:
|
|||||||
}
|
}
|
||||||
#endif /* not HAVE_STRSTR */
|
#endif /* not HAVE_STRSTR */
|
||||||
|
|
||||||
|
#ifndef HAVE_STRPBRK
|
||||||
|
/* Find the first ocurrence in S of any character in ACCEPT. */
|
||||||
|
char *
|
||||||
|
strpbrk (const char *s, const char *accept)
|
||||||
|
{
|
||||||
|
while (*s != '\0')
|
||||||
|
{
|
||||||
|
const char *a = accept;
|
||||||
|
while (*a != '\0')
|
||||||
|
if (*a++ == *s)
|
||||||
|
return (char *) s;
|
||||||
|
++s;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* HAVE_STRPBRK */
|
||||||
|
|
||||||
#ifndef HAVE_MKTIME
|
#ifndef HAVE_MKTIME
|
||||||
/* From GNU libc 2.0. */
|
/* From GNU libc 2.0. */
|
||||||
|
|
||||||
|
@ -141,6 +141,9 @@ char *alloca ();
|
|||||||
/* Define if you have the strncasecmp function. */
|
/* Define if you have the strncasecmp function. */
|
||||||
#undef HAVE_STRNCASECMP
|
#undef HAVE_STRNCASECMP
|
||||||
|
|
||||||
|
/* Define if you have the strpbrk function. */
|
||||||
|
#undef HAVE_STRPBRK
|
||||||
|
|
||||||
/* Define if you have the strptime function. */
|
/* Define if you have the strptime function. */
|
||||||
#undef HAVE_STRPTIME
|
#undef HAVE_STRPTIME
|
||||||
|
|
||||||
|
@ -780,7 +780,7 @@ check_path_match (const char *cookie_path, const char *path)
|
|||||||
int
|
int
|
||||||
set_cookie_header_cb (const char *hdr, void *closure)
|
set_cookie_header_cb (const char *hdr, void *closure)
|
||||||
{
|
{
|
||||||
struct urlinfo *u = (struct urlinfo *)closure;
|
struct url *u = (struct url *)closure;
|
||||||
struct cookie *cookie;
|
struct cookie *cookie;
|
||||||
|
|
||||||
cookies_now = time (NULL);
|
cookies_now = time (NULL);
|
||||||
|
@ -633,6 +633,7 @@ ftp_pwd (struct rbuf *rbuf, char **pwd)
|
|||||||
/* All OK. */
|
/* All OK. */
|
||||||
return FTPOK;
|
return FTPOK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Sends the SIZE command to the server, and returns the value in 'size'.
|
/* Sends the SIZE command to the server, and returns the value in 'size'.
|
||||||
* If an error occurs, size is set to zero. */
|
* If an error occurs, size is set to zero. */
|
||||||
uerr_t
|
uerr_t
|
||||||
@ -690,3 +691,16 @@ ftp_size (struct rbuf *rbuf, const char *file, long int *size)
|
|||||||
/* All OK. */
|
/* All OK. */
|
||||||
return FTPOK;
|
return FTPOK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* If URL's params are of the form "type=X", return character X.
|
||||||
|
Otherwise, return 'I' (the default type). */
|
||||||
|
char
|
||||||
|
ftp_process_type (const char *params)
|
||||||
|
{
|
||||||
|
if (params
|
||||||
|
&& 0 == strncasecmp (params, "type=", 5)
|
||||||
|
&& params[5] != '\0')
|
||||||
|
return TOUPPER (params[5]);
|
||||||
|
else
|
||||||
|
return 'I';
|
||||||
|
}
|
||||||
|
@ -796,7 +796,7 @@ Unsupported listing type, trying Unix listing parser.\n"));
|
|||||||
directories and files on the appropriate host. The references are
|
directories and files on the appropriate host. The references are
|
||||||
FTP. */
|
FTP. */
|
||||||
uerr_t
|
uerr_t
|
||||||
ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
|
ftp_index (const char *file, struct url *u, struct fileinfo *f)
|
||||||
{
|
{
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
char *upwd;
|
char *upwd;
|
||||||
|
219
src/ftp.c
219
src/ftp.c
@ -62,6 +62,18 @@ extern int h_errno;
|
|||||||
|
|
||||||
extern char ftp_last_respline[];
|
extern char ftp_last_respline[];
|
||||||
|
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
int st; /* connection status */
|
||||||
|
int cmd; /* command code */
|
||||||
|
struct rbuf rbuf; /* control connection buffer */
|
||||||
|
long dltime; /* time of the download */
|
||||||
|
enum stype rs; /* remote system reported by ftp server */
|
||||||
|
char *id; /* initial directory */
|
||||||
|
char *target; /* target file name */
|
||||||
|
} ccon;
|
||||||
|
|
||||||
|
|
||||||
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
|
/* Look for regexp "( *[0-9]+ *byte" (literal parenthesis) anywhere in
|
||||||
the string S, and return the number converted to long, if found, 0
|
the string S, and return the number converted to long, if found, 0
|
||||||
otherwise. */
|
otherwise. */
|
||||||
@ -108,7 +120,7 @@ ftp_expected_bytes (const char *s)
|
|||||||
connection to the server. It always closes the data connection,
|
connection to the server. It always closes the data connection,
|
||||||
and closes the control connection in case of error. */
|
and closes the control connection in case of error. */
|
||||||
static uerr_t
|
static uerr_t
|
||||||
getftp (struct urlinfo *u, long *len, long restval, ccon *con)
|
getftp (struct url *u, long *len, long restval, ccon *con)
|
||||||
{
|
{
|
||||||
int csock, dtsock, res;
|
int csock, dtsock, res;
|
||||||
uerr_t err;
|
uerr_t err;
|
||||||
@ -122,7 +134,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
|
|||||||
long expected_bytes = 0L;
|
long expected_bytes = 0L;
|
||||||
|
|
||||||
assert (con != NULL);
|
assert (con != NULL);
|
||||||
assert (u->local != NULL);
|
assert (con->target != NULL);
|
||||||
|
|
||||||
/* Debug-check of the sanity of the request by making sure that LIST
|
/* Debug-check of the sanity of the request by making sure that LIST
|
||||||
and RETR are never both requested (since we can handle only one
|
and RETR are never both requested (since we can handle only one
|
||||||
at a time. */
|
at a time. */
|
||||||
@ -144,6 +157,8 @@ getftp (struct urlinfo *u, long *len, long restval, ccon *con)
|
|||||||
csock = RBUF_FD (&con->rbuf);
|
csock = RBUF_FD (&con->rbuf);
|
||||||
else /* cmd & DO_LOGIN */
|
else /* cmd & DO_LOGIN */
|
||||||
{
|
{
|
||||||
|
char type_char;
|
||||||
|
|
||||||
/* Login to the server: */
|
/* Login to the server: */
|
||||||
|
|
||||||
/* First: Establish the control connection. */
|
/* First: Establish the control connection. */
|
||||||
@ -325,9 +340,10 @@ Error in server response, closing control connection.\n"));
|
|||||||
logputs (LOG_VERBOSE, _("done.\n"));
|
logputs (LOG_VERBOSE, _("done.\n"));
|
||||||
|
|
||||||
/* Fifth: Set the FTP type. */
|
/* Fifth: Set the FTP type. */
|
||||||
|
type_char = ftp_process_type (u->params);
|
||||||
if (!opt.server_response)
|
if (!opt.server_response)
|
||||||
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", TOUPPER (u->ftp_type));
|
logprintf (LOG_VERBOSE, "==> TYPE %c ... ", type_char);
|
||||||
err = ftp_type (&con->rbuf, TOUPPER (u->ftp_type));
|
err = ftp_type (&con->rbuf, type_char);
|
||||||
/* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */
|
/* FTPRERR, WRITEFAILED, FTPUNKNOWNTYPE */
|
||||||
switch (err)
|
switch (err)
|
||||||
{
|
{
|
||||||
@ -351,7 +367,7 @@ Error in server response, closing control connection.\n"));
|
|||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("Unknown type `%c', closing control connection.\n"),
|
_("Unknown type `%c', closing control connection.\n"),
|
||||||
TOUPPER (u->ftp_type));
|
type_char);
|
||||||
CLOSE (csock);
|
CLOSE (csock);
|
||||||
rbuf_uninitialize (&con->rbuf);
|
rbuf_uninitialize (&con->rbuf);
|
||||||
return err;
|
return err;
|
||||||
@ -701,7 +717,7 @@ Error in server response, closing control connection.\n"));
|
|||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("\nREST failed; will not truncate `%s'.\n"),
|
_("\nREST failed; will not truncate `%s'.\n"),
|
||||||
u->local);
|
con->target);
|
||||||
CLOSE (csock);
|
CLOSE (csock);
|
||||||
closeport (dtsock);
|
closeport (dtsock);
|
||||||
rbuf_uninitialize (&con->rbuf);
|
rbuf_uninitialize (&con->rbuf);
|
||||||
@ -850,16 +866,16 @@ Error in server response, closing control connection.\n"));
|
|||||||
/* Open the file -- if opt.dfp is set, use it instead. */
|
/* Open the file -- if opt.dfp is set, use it instead. */
|
||||||
if (!opt.dfp || con->cmd & DO_LIST)
|
if (!opt.dfp || con->cmd & DO_LIST)
|
||||||
{
|
{
|
||||||
mkalldirs (u->local);
|
mkalldirs (con->target);
|
||||||
if (opt.backups)
|
if (opt.backups)
|
||||||
rotate_backups (u->local);
|
rotate_backups (con->target);
|
||||||
/* #### Is this correct? */
|
/* #### Is this correct? */
|
||||||
chmod (u->local, 0600);
|
chmod (con->target, 0600);
|
||||||
|
|
||||||
fp = fopen (u->local, restval ? "ab" : "wb");
|
fp = fopen (con->target, restval ? "ab" : "wb");
|
||||||
if (!fp)
|
if (!fp)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
|
logprintf (LOG_NOTQUIET, "%s: %s\n", con->target, strerror (errno));
|
||||||
CLOSE (csock);
|
CLOSE (csock);
|
||||||
rbuf_uninitialize (&con->rbuf);
|
rbuf_uninitialize (&con->rbuf);
|
||||||
closeport (dtsock);
|
closeport (dtsock);
|
||||||
@ -928,7 +944,7 @@ Error in server response, closing control connection.\n"));
|
|||||||
if (res == -2)
|
if (res == -2)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
|
logprintf (LOG_NOTQUIET, _("%s: %s, closing control connection.\n"),
|
||||||
u->local, strerror (errno));
|
con->target, strerror (errno));
|
||||||
CLOSE (csock);
|
CLOSE (csock);
|
||||||
rbuf_uninitialize (&con->rbuf);
|
rbuf_uninitialize (&con->rbuf);
|
||||||
return FWRITEERR;
|
return FWRITEERR;
|
||||||
@ -993,10 +1009,10 @@ Error in server response, closing control connection.\n"));
|
|||||||
print it out. */
|
print it out. */
|
||||||
if (opt.server_response && (con->cmd & DO_LIST))
|
if (opt.server_response && (con->cmd & DO_LIST))
|
||||||
{
|
{
|
||||||
mkalldirs (u->local);
|
mkalldirs (con->target);
|
||||||
fp = fopen (u->local, "r");
|
fp = fopen (con->target, "r");
|
||||||
if (!fp)
|
if (!fp)
|
||||||
logprintf (LOG_ALWAYS, "%s: %s\n", u->local, strerror (errno));
|
logprintf (LOG_ALWAYS, "%s: %s\n", con->target, strerror (errno));
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char *line;
|
char *line;
|
||||||
@ -1020,7 +1036,7 @@ Error in server response, closing control connection.\n"));
|
|||||||
This loop either gets commands from con, or (if ON_YOUR_OWN is
|
This loop either gets commands from con, or (if ON_YOUR_OWN is
|
||||||
set), makes them up to retrieve the file given by the URL. */
|
set), makes them up to retrieve the file given by the URL. */
|
||||||
static uerr_t
|
static uerr_t
|
||||||
ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
ftp_loop_internal (struct url *u, struct fileinfo *f, ccon *con)
|
||||||
{
|
{
|
||||||
int count, orig_lp;
|
int count, orig_lp;
|
||||||
long restval, len;
|
long restval, len;
|
||||||
@ -1028,21 +1044,21 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
uerr_t err;
|
uerr_t err;
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
|
||||||
if (!u->local)
|
if (!con->target)
|
||||||
u->local = url_filename (u);
|
con->target = url_filename (u);
|
||||||
|
|
||||||
if (opt.noclobber && file_exists_p (u->local))
|
if (opt.noclobber && file_exists_p (con->target))
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE,
|
logprintf (LOG_VERBOSE,
|
||||||
_("File `%s' already there, not retrieving.\n"), u->local);
|
_("File `%s' already there, not retrieving.\n"), con->target);
|
||||||
/* If the file is there, we suppose it's retrieved OK. */
|
/* If the file is there, we suppose it's retrieved OK. */
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remove it if it's a link. */
|
/* Remove it if it's a link. */
|
||||||
remove_link (u->local);
|
remove_link (con->target);
|
||||||
if (!opt.output_document)
|
if (!opt.output_document)
|
||||||
locf = u->local;
|
locf = con->target;
|
||||||
else
|
else
|
||||||
locf = opt.output_document;
|
locf = opt.output_document;
|
||||||
|
|
||||||
@ -1100,7 +1116,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
/* Print fetch message, if opt.verbose. */
|
/* Print fetch message, if opt.verbose. */
|
||||||
if (opt.verbose)
|
if (opt.verbose)
|
||||||
{
|
{
|
||||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
char *hurl = url_string (u, 1);
|
||||||
char tmp[15];
|
char tmp[15];
|
||||||
strcpy (tmp, " ");
|
strcpy (tmp, " ");
|
||||||
if (count > 1)
|
if (count > 1)
|
||||||
@ -1175,7 +1191,7 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
/* Need to hide the password from the URL. The `if' is here
|
/* Need to hide the password from the URL. The `if' is here
|
||||||
so that we don't do the needless allocation every
|
so that we don't do the needless allocation every
|
||||||
time. */
|
time. */
|
||||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
char *hurl = url_string (u, 1);
|
||||||
logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n",
|
logprintf (LOG_NONVERBOSE, "%s URL: %s [%ld] -> \"%s\" [%d]\n",
|
||||||
tms, hurl, len, locf, count);
|
tms, hurl, len, locf, count);
|
||||||
xfree (hurl);
|
xfree (hurl);
|
||||||
@ -1235,43 +1251,48 @@ ftp_loop_internal (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
/* Return the directory listing in a reusable format. The directory
|
/* Return the directory listing in a reusable format. The directory
|
||||||
is specifed in u->dir. */
|
is specifed in u->dir. */
|
||||||
uerr_t
|
uerr_t
|
||||||
ftp_get_listing (struct urlinfo *u, ccon *con, struct fileinfo **f)
|
ftp_get_listing (struct url *u, ccon *con, struct fileinfo **f)
|
||||||
{
|
{
|
||||||
uerr_t err;
|
uerr_t err;
|
||||||
char *olocal = u->local;
|
char *uf; /* url file name */
|
||||||
char *list_filename, *ofile;
|
char *lf; /* list file name */
|
||||||
|
char *old_target = con->target;
|
||||||
|
|
||||||
con->st &= ~ON_YOUR_OWN;
|
con->st &= ~ON_YOUR_OWN;
|
||||||
con->cmd |= (DO_LIST | LEAVE_PENDING);
|
con->cmd |= (DO_LIST | LEAVE_PENDING);
|
||||||
con->cmd &= ~DO_RETR;
|
con->cmd &= ~DO_RETR;
|
||||||
/* Get the listing filename. */
|
|
||||||
ofile = u->file;
|
/* Find the listing file name. We do it by taking the file name of
|
||||||
u->file = LIST_FILENAME;
|
the URL and replacing the last component with the listing file
|
||||||
list_filename = url_filename (u);
|
name. */
|
||||||
u->file = ofile;
|
uf = url_filename (u);
|
||||||
u->local = list_filename;
|
lf = file_merge (uf, LIST_FILENAME);
|
||||||
DEBUGP ((_("Using `%s' as listing tmp file.\n"), list_filename));
|
xfree (uf);
|
||||||
|
DEBUGP ((_("Using `%s' as listing tmp file.\n"), lf));
|
||||||
|
|
||||||
|
con->target = lf;
|
||||||
err = ftp_loop_internal (u, NULL, con);
|
err = ftp_loop_internal (u, NULL, con);
|
||||||
u->local = olocal;
|
con->target = old_target;
|
||||||
|
|
||||||
if (err == RETROK)
|
if (err == RETROK)
|
||||||
*f = ftp_parse_ls (list_filename, con->rs);
|
*f = ftp_parse_ls (lf, con->rs);
|
||||||
else
|
else
|
||||||
*f = NULL;
|
*f = NULL;
|
||||||
if (opt.remove_listing)
|
if (opt.remove_listing)
|
||||||
{
|
{
|
||||||
if (unlink (list_filename))
|
if (unlink (lf))
|
||||||
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
|
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
|
||||||
else
|
else
|
||||||
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), list_filename);
|
logprintf (LOG_VERBOSE, _("Removed `%s'.\n"), lf);
|
||||||
}
|
}
|
||||||
xfree (list_filename);
|
xfree (lf);
|
||||||
con->cmd &= ~DO_LIST;
|
con->cmd &= ~DO_LIST;
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static uerr_t ftp_retrieve_dirs PARAMS ((struct urlinfo *, struct fileinfo *,
|
static uerr_t ftp_retrieve_dirs PARAMS ((struct url *, struct fileinfo *,
|
||||||
ccon *));
|
ccon *));
|
||||||
static uerr_t ftp_retrieve_glob PARAMS ((struct urlinfo *, ccon *, int));
|
static uerr_t ftp_retrieve_glob PARAMS ((struct url *, ccon *, int));
|
||||||
static struct fileinfo *delelement PARAMS ((struct fileinfo *,
|
static struct fileinfo *delelement PARAMS ((struct fileinfo *,
|
||||||
struct fileinfo **));
|
struct fileinfo **));
|
||||||
static void freefileinfo PARAMS ((struct fileinfo *f));
|
static void freefileinfo PARAMS ((struct fileinfo *f));
|
||||||
@ -1284,11 +1305,10 @@ static void freefileinfo PARAMS ((struct fileinfo *f));
|
|||||||
If opt.recursive is set, after all files have been retrieved,
|
If opt.recursive is set, after all files have been retrieved,
|
||||||
ftp_retrieve_dirs will be called to retrieve the directories. */
|
ftp_retrieve_dirs will be called to retrieve the directories. */
|
||||||
static uerr_t
|
static uerr_t
|
||||||
ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
ftp_retrieve_list (struct url *u, struct fileinfo *f, ccon *con)
|
||||||
{
|
{
|
||||||
static int depth = 0;
|
static int depth = 0;
|
||||||
uerr_t err;
|
uerr_t err;
|
||||||
char *olocal, *ofile;
|
|
||||||
struct fileinfo *orig;
|
struct fileinfo *orig;
|
||||||
long local_size;
|
long local_size;
|
||||||
time_t tml;
|
time_t tml;
|
||||||
@ -1323,15 +1343,19 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
|
|
||||||
while (f)
|
while (f)
|
||||||
{
|
{
|
||||||
|
char *old_target, *ofile;
|
||||||
|
|
||||||
if (downloaded_exceeds_quota ())
|
if (downloaded_exceeds_quota ())
|
||||||
{
|
{
|
||||||
--depth;
|
--depth;
|
||||||
return QUOTEXC;
|
return QUOTEXC;
|
||||||
}
|
}
|
||||||
olocal = u->local;
|
old_target = con->target;
|
||||||
ofile = u->file;
|
|
||||||
u->file = f->name;
|
ofile = xstrdup (u->file);
|
||||||
u->local = url_filename (u);
|
url_set_file (u, f->name);
|
||||||
|
|
||||||
|
con->target = url_filename (u);
|
||||||
err = RETROK;
|
err = RETROK;
|
||||||
|
|
||||||
dlthis = 1;
|
dlthis = 1;
|
||||||
@ -1343,7 +1367,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
I'm not implementing it now since files on an FTP server are much
|
I'm not implementing it now since files on an FTP server are much
|
||||||
more likely than files on an HTTP server to legitimately have a
|
more likely than files on an HTTP server to legitimately have a
|
||||||
.orig suffix. */
|
.orig suffix. */
|
||||||
if (!stat (u->local, &st))
|
if (!stat (con->target, &st))
|
||||||
{
|
{
|
||||||
int eq_size;
|
int eq_size;
|
||||||
int cor_val;
|
int cor_val;
|
||||||
@ -1360,7 +1384,7 @@ ftp_retrieve_list (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
|||||||
/* Remote file is older, file sizes can be compared and
|
/* Remote file is older, file sizes can be compared and
|
||||||
are both equal. */
|
are both equal. */
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
|
Remote file no newer than local file `%s' -- not retrieving.\n"), con->target);
|
||||||
dlthis = 0;
|
dlthis = 0;
|
||||||
}
|
}
|
||||||
else if (eq_size)
|
else if (eq_size)
|
||||||
@ -1368,7 +1392,7 @@ Remote file no newer than local file `%s' -- not retrieving.\n"), u->local);
|
|||||||
/* Remote file is newer or sizes cannot be matched */
|
/* Remote file is newer or sizes cannot be matched */
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
Remote file is newer than local file `%s' -- retrieving.\n\n"),
|
Remote file is newer than local file `%s' -- retrieving.\n\n"),
|
||||||
u->local);
|
con->target);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1396,30 +1420,30 @@ The sizes do not match (local %ld) -- retrieving.\n\n"), local_size);
|
|||||||
struct stat st;
|
struct stat st;
|
||||||
/* Check whether we already have the correct
|
/* Check whether we already have the correct
|
||||||
symbolic link. */
|
symbolic link. */
|
||||||
int rc = lstat (u->local, &st);
|
int rc = lstat (con->target, &st);
|
||||||
if (rc == 0)
|
if (rc == 0)
|
||||||
{
|
{
|
||||||
size_t len = strlen (f->linkto) + 1;
|
size_t len = strlen (f->linkto) + 1;
|
||||||
if (S_ISLNK (st.st_mode))
|
if (S_ISLNK (st.st_mode))
|
||||||
{
|
{
|
||||||
char *link_target = (char *)alloca (len);
|
char *link_target = (char *)alloca (len);
|
||||||
size_t n = readlink (u->local, link_target, len);
|
size_t n = readlink (con->target, link_target, len);
|
||||||
if ((n == len - 1)
|
if ((n == len - 1)
|
||||||
&& (memcmp (link_target, f->linkto, n) == 0))
|
&& (memcmp (link_target, f->linkto, n) == 0))
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
Already have correct symlink %s -> %s\n\n"),
|
Already have correct symlink %s -> %s\n\n"),
|
||||||
u->local, f->linkto);
|
con->target, f->linkto);
|
||||||
dlthis = 0;
|
dlthis = 0;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
|
logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"),
|
||||||
u->local, f->linkto);
|
con->target, f->linkto);
|
||||||
/* Unlink before creating symlink! */
|
/* Unlink before creating symlink! */
|
||||||
unlink (u->local);
|
unlink (con->target);
|
||||||
if (symlink (f->linkto, u->local) == -1)
|
if (symlink (f->linkto, con->target) == -1)
|
||||||
logprintf (LOG_NOTQUIET, "symlink: %s\n",
|
logprintf (LOG_NOTQUIET, "symlink: %s\n",
|
||||||
strerror (errno));
|
strerror (errno));
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
@ -1427,7 +1451,7 @@ Already have correct symlink %s -> %s\n\n"),
|
|||||||
#else /* not HAVE_SYMLINK */
|
#else /* not HAVE_SYMLINK */
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("Symlinks not supported, skipping symlink `%s'.\n"),
|
_("Symlinks not supported, skipping symlink `%s'.\n"),
|
||||||
u->local);
|
con->target);
|
||||||
#endif /* not HAVE_SYMLINK */
|
#endif /* not HAVE_SYMLINK */
|
||||||
}
|
}
|
||||||
else /* opt.retr_symlinks */
|
else /* opt.retr_symlinks */
|
||||||
@ -1458,7 +1482,7 @@ Already have correct symlink %s -> %s\n\n"),
|
|||||||
if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
|
if (!(f->type == FT_SYMLINK && !opt.retr_symlinks)
|
||||||
&& f->tstamp != -1
|
&& f->tstamp != -1
|
||||||
&& dlthis
|
&& dlthis
|
||||||
&& file_exists_p (u->local))
|
&& file_exists_p (con->target))
|
||||||
{
|
{
|
||||||
/* #### This code repeats in http.c and ftp.c. Move it to a
|
/* #### This code repeats in http.c and ftp.c. Move it to a
|
||||||
function! */
|
function! */
|
||||||
@ -1469,27 +1493,31 @@ Already have correct symlink %s -> %s\n\n"),
|
|||||||
fl = opt.output_document;
|
fl = opt.output_document;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fl = u->local;
|
fl = con->target;
|
||||||
if (fl)
|
if (fl)
|
||||||
touch (fl, f->tstamp);
|
touch (fl, f->tstamp);
|
||||||
}
|
}
|
||||||
else if (f->tstamp == -1)
|
else if (f->tstamp == -1)
|
||||||
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), u->local);
|
logprintf (LOG_NOTQUIET, _("%s: corrupt time-stamp.\n"), con->target);
|
||||||
|
|
||||||
if (f->perms && f->type == FT_PLAINFILE && dlthis)
|
if (f->perms && f->type == FT_PLAINFILE && dlthis)
|
||||||
chmod (u->local, f->perms);
|
chmod (con->target, f->perms);
|
||||||
else
|
else
|
||||||
DEBUGP (("Unrecognized permissions for %s.\n", u->local));
|
DEBUGP (("Unrecognized permissions for %s.\n", con->target));
|
||||||
|
|
||||||
|
xfree (con->target);
|
||||||
|
con->target = old_target;
|
||||||
|
|
||||||
|
url_set_file (u, ofile);
|
||||||
|
xfree (ofile);
|
||||||
|
|
||||||
xfree (u->local);
|
|
||||||
u->local = olocal;
|
|
||||||
u->file = ofile;
|
|
||||||
/* Break on fatals. */
|
/* Break on fatals. */
|
||||||
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
|
if (err == QUOTEXC || err == HOSTERR || err == FWRITEERR)
|
||||||
break;
|
break;
|
||||||
con->cmd &= ~ (DO_CWD | DO_LOGIN);
|
con->cmd &= ~ (DO_CWD | DO_LOGIN);
|
||||||
f = f->next;
|
f = f->next;
|
||||||
} /* while */
|
}
|
||||||
|
|
||||||
/* We do not want to call ftp_retrieve_dirs here */
|
/* We do not want to call ftp_retrieve_dirs here */
|
||||||
if (opt.recursive &&
|
if (opt.recursive &&
|
||||||
!(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel))
|
!(opt.reclevel != INFINITE_RECURSION && depth >= opt.reclevel))
|
||||||
@ -1506,51 +1534,62 @@ Already have correct symlink %s -> %s\n\n"),
|
|||||||
ftp_retrieve_glob on each directory entry. The function knows
|
ftp_retrieve_glob on each directory entry. The function knows
|
||||||
about excluded directories. */
|
about excluded directories. */
|
||||||
static uerr_t
|
static uerr_t
|
||||||
ftp_retrieve_dirs (struct urlinfo *u, struct fileinfo *f, ccon *con)
|
ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con)
|
||||||
{
|
{
|
||||||
char *odir;
|
char *container = NULL;
|
||||||
char *current_container = NULL;
|
int container_size = 0;
|
||||||
int current_length = 0;
|
|
||||||
|
|
||||||
for (; f; f = f->next)
|
for (; f; f = f->next)
|
||||||
{
|
{
|
||||||
int len;
|
int size;
|
||||||
|
char *odir, *newdir;
|
||||||
|
|
||||||
if (downloaded_exceeds_quota ())
|
if (downloaded_exceeds_quota ())
|
||||||
break;
|
break;
|
||||||
if (f->type != FT_DIRECTORY)
|
if (f->type != FT_DIRECTORY)
|
||||||
continue;
|
continue;
|
||||||
odir = u->dir;
|
|
||||||
len = strlen (u->dir) + 1 + strlen (f->name) + 1;
|
|
||||||
/* Allocate u->dir off stack, but reallocate only if a larger
|
/* Allocate u->dir off stack, but reallocate only if a larger
|
||||||
string is needed. */
|
string is needed. It's a pity there's no "realloca" for an
|
||||||
if (len > current_length)
|
item on the bottom of the stack. */
|
||||||
current_container = (char *)alloca (len);
|
size = strlen (u->dir) + 1 + strlen (f->name) + 1;
|
||||||
u->dir = current_container;
|
if (size > container_size)
|
||||||
|
container = (char *)alloca (size);
|
||||||
|
newdir = container;
|
||||||
|
|
||||||
|
odir = u->dir;
|
||||||
if (*odir == '\0'
|
if (*odir == '\0'
|
||||||
|| (*odir == '/' && *(odir + 1) == '\0'))
|
|| (*odir == '/' && *(odir + 1) == '\0'))
|
||||||
/* If ODIR is empty or just "/", simply append f->name to
|
/* If ODIR is empty or just "/", simply append f->name to
|
||||||
ODIR. (In the former case, to preserve u->dir being
|
ODIR. (In the former case, to preserve u->dir being
|
||||||
relative; in the latter case, to avoid double slash.) */
|
relative; in the latter case, to avoid double slash.) */
|
||||||
sprintf (u->dir, "%s%s", odir, f->name);
|
sprintf (newdir, "%s%s", odir, f->name);
|
||||||
else
|
else
|
||||||
/* Else, use a separator. */
|
/* Else, use a separator. */
|
||||||
sprintf (u->dir, "%s/%s", odir, f->name);
|
sprintf (newdir, "%s/%s", odir, f->name);
|
||||||
|
|
||||||
DEBUGP (("Composing new CWD relative to the initial directory.\n"));
|
DEBUGP (("Composing new CWD relative to the initial directory.\n"));
|
||||||
DEBUGP ((" odir = '%s'\n f->name = '%s'\n u->dir = '%s'\n\n",
|
DEBUGP ((" odir = '%s'\n f->name = '%s'\n newdir = '%s'\n\n",
|
||||||
odir, f->name, u->dir));
|
odir, f->name, newdir));
|
||||||
if (!accdir (u->dir, ALLABS))
|
if (!accdir (newdir, ALLABS))
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
|
Not descending to `%s' as it is excluded/not-included.\n"), newdir);
|
||||||
u->dir = odir;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
con->st &= ~DONE_CWD;
|
con->st &= ~DONE_CWD;
|
||||||
|
|
||||||
|
odir = xstrdup (u->dir); /* because url_set_dir will free
|
||||||
|
u->dir. */
|
||||||
|
url_set_dir (u, newdir);
|
||||||
ftp_retrieve_glob (u, con, GETALL);
|
ftp_retrieve_glob (u, con, GETALL);
|
||||||
|
url_set_dir (u, odir);
|
||||||
|
xfree (odir);
|
||||||
|
|
||||||
/* Set the time-stamp? */
|
/* Set the time-stamp? */
|
||||||
u->dir = odir;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (opt.quota && opt.downloaded > opt.quota)
|
if (opt.quota && opt.downloaded > opt.quota)
|
||||||
return QUOTEXC;
|
return QUOTEXC;
|
||||||
else
|
else
|
||||||
@ -1567,7 +1606,7 @@ Not descending to `%s' as it is excluded/not-included.\n"), u->dir);
|
|||||||
get the listing, so that the time-stamp is heeded); if it's GLOBALL,
|
get the listing, so that the time-stamp is heeded); if it's GLOBALL,
|
||||||
use globbing; if it's GETALL, download the whole directory. */
|
use globbing; if it's GETALL, download the whole directory. */
|
||||||
static uerr_t
|
static uerr_t
|
||||||
ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
|
ftp_retrieve_glob (struct url *u, ccon *con, int action)
|
||||||
{
|
{
|
||||||
struct fileinfo *orig, *start;
|
struct fileinfo *orig, *start;
|
||||||
uerr_t res;
|
uerr_t res;
|
||||||
@ -1607,7 +1646,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
|
|||||||
matchres = fnmatch (u->file, f->name, 0);
|
matchres = fnmatch (u->file, f->name, 0);
|
||||||
if (matchres == -1)
|
if (matchres == -1)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local,
|
logprintf (LOG_NOTQUIET, "%s: %s\n", con->target,
|
||||||
strerror (errno));
|
strerror (errno));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -1657,7 +1696,7 @@ ftp_retrieve_glob (struct urlinfo *u, ccon *con, int action)
|
|||||||
of URL. Inherently, its capabilities are limited on what can be
|
of URL. Inherently, its capabilities are limited on what can be
|
||||||
encoded into a URL. */
|
encoded into a URL. */
|
||||||
uerr_t
|
uerr_t
|
||||||
ftp_loop (struct urlinfo *u, int *dt)
|
ftp_loop (struct url *u, int *dt)
|
||||||
{
|
{
|
||||||
ccon con; /* FTP connection */
|
ccon con; /* FTP connection */
|
||||||
uerr_t res;
|
uerr_t res;
|
||||||
@ -1686,7 +1725,7 @@ ftp_loop (struct urlinfo *u, int *dt)
|
|||||||
{
|
{
|
||||||
char *filename = (opt.output_document
|
char *filename = (opt.output_document
|
||||||
? xstrdup (opt.output_document)
|
? xstrdup (opt.output_document)
|
||||||
: (u->local ? xstrdup (u->local)
|
: (con.target ? xstrdup (con.target)
|
||||||
: url_filename (u)));
|
: url_filename (u)));
|
||||||
res = ftp_index (filename, u, f);
|
res = ftp_index (filename, u, f);
|
||||||
if (res == FTPOK && opt.verbose)
|
if (res == FTPOK && opt.verbose)
|
||||||
@ -1736,6 +1775,8 @@ ftp_loop (struct urlinfo *u, int *dt)
|
|||||||
CLOSE (RBUF_FD (&con.rbuf));
|
CLOSE (RBUF_FD (&con.rbuf));
|
||||||
FREE_MAYBE (con.id);
|
FREE_MAYBE (con.id);
|
||||||
con.id = NULL;
|
con.id = NULL;
|
||||||
|
FREE_MAYBE (con.target);
|
||||||
|
con.target = NULL;
|
||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
19
src/ftp.h
19
src/ftp.h
@ -46,7 +46,7 @@ uerr_t ftp_syst PARAMS ((struct rbuf *, enum stype *));
|
|||||||
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
|
uerr_t ftp_pwd PARAMS ((struct rbuf *, char **));
|
||||||
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
|
uerr_t ftp_size PARAMS ((struct rbuf *, const char *, long int *));
|
||||||
|
|
||||||
struct urlinfo;
|
struct url;
|
||||||
|
|
||||||
/* File types. */
|
/* File types. */
|
||||||
enum ftype
|
enum ftype
|
||||||
@ -98,19 +98,12 @@ enum wget_ftp_fstatus
|
|||||||
correct. */
|
correct. */
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int st; /* connection status */
|
|
||||||
int cmd; /* command code */
|
|
||||||
struct rbuf rbuf; /* control connection buffer */
|
|
||||||
long dltime; /* time of the download */
|
|
||||||
enum stype rs; /* remote system reported by ftp server */
|
|
||||||
char *id; /* initial directory */
|
|
||||||
} ccon;
|
|
||||||
|
|
||||||
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
|
struct fileinfo *ftp_parse_ls PARAMS ((const char *, const enum stype));
|
||||||
uerr_t ftp_loop PARAMS ((struct urlinfo *, int *));
|
uerr_t ftp_loop PARAMS ((struct url *, int *));
|
||||||
|
|
||||||
|
uerr_t ftp_index (const char *, struct url *, struct fileinfo *);
|
||||||
|
|
||||||
|
char ftp_process_type PARAMS ((const char *));
|
||||||
|
|
||||||
uerr_t ftp_index (const char *, struct urlinfo *, struct fileinfo *);
|
|
||||||
|
|
||||||
#endif /* FTP_H */
|
#endif /* FTP_H */
|
||||||
|
@ -327,7 +327,7 @@ same_host (const char *u1, const char *u2)
|
|||||||
/* Determine whether a URL is acceptable to be followed, according to
|
/* Determine whether a URL is acceptable to be followed, according to
|
||||||
a list of domains to accept. */
|
a list of domains to accept. */
|
||||||
int
|
int
|
||||||
accept_domain (struct urlinfo *u)
|
accept_domain (struct url *u)
|
||||||
{
|
{
|
||||||
assert (u->host != NULL);
|
assert (u->host != NULL);
|
||||||
if (opt.domains)
|
if (opt.domains)
|
||||||
|
@ -20,7 +20,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|||||||
#ifndef HOST_H
|
#ifndef HOST_H
|
||||||
#define HOST_H
|
#define HOST_H
|
||||||
|
|
||||||
struct urlinfo;
|
struct url;
|
||||||
|
|
||||||
/* Function declarations */
|
/* Function declarations */
|
||||||
|
|
||||||
@ -31,7 +31,7 @@ void clean_hosts PARAMS ((void));
|
|||||||
|
|
||||||
char *realhost PARAMS ((const char *));
|
char *realhost PARAMS ((const char *));
|
||||||
int same_host PARAMS ((const char *, const char *));
|
int same_host PARAMS ((const char *, const char *));
|
||||||
int accept_domain PARAMS ((struct urlinfo *));
|
int accept_domain PARAMS ((struct url *));
|
||||||
int sufmatch PARAMS ((const char **, const char *));
|
int sufmatch PARAMS ((const char **, const char *));
|
||||||
|
|
||||||
char *ftp_getaddress PARAMS ((void));
|
char *ftp_getaddress PARAMS ((void));
|
||||||
|
264
src/http.c
264
src/http.c
@ -464,16 +464,22 @@ struct http_stat
|
|||||||
long dltime; /* time of the download */
|
long dltime; /* time of the download */
|
||||||
int no_truncate; /* whether truncating the file is
|
int no_truncate; /* whether truncating the file is
|
||||||
forbidden. */
|
forbidden. */
|
||||||
|
const char *referer; /* value of the referer header. */
|
||||||
|
char **local_file; /* local file. */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Free the elements of hstat X. */
|
static void
|
||||||
#define FREEHSTAT(x) do \
|
free_hstat (struct http_stat *hs)
|
||||||
{ \
|
{
|
||||||
FREE_MAYBE ((x).newloc); \
|
FREE_MAYBE (hs->newloc);
|
||||||
FREE_MAYBE ((x).remote_time); \
|
FREE_MAYBE (hs->remote_time);
|
||||||
FREE_MAYBE ((x).error); \
|
FREE_MAYBE (hs->error);
|
||||||
(x).newloc = (x).remote_time = (x).error = NULL; \
|
|
||||||
} while (0)
|
/* Guard against being called twice. */
|
||||||
|
hs->newloc = NULL;
|
||||||
|
hs->remote_time = NULL;
|
||||||
|
hs->error = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
static char *create_authorization_line PARAMS ((const char *, const char *,
|
static char *create_authorization_line PARAMS ((const char *, const char *,
|
||||||
const char *, const char *,
|
const char *, const char *,
|
||||||
@ -499,23 +505,22 @@ time_t http_atotm PARAMS ((char *));
|
|||||||
response code correctly, it is not used in a sane way. The caller
|
response code correctly, it is not used in a sane way. The caller
|
||||||
can do that, though.
|
can do that, though.
|
||||||
|
|
||||||
If u->proxy is non-NULL, the URL u will be taken as a proxy URL,
|
If PROXY is non-NULL, the connection will be made to the proxy
|
||||||
and u->proxy->url will be given to the proxy server (bad naming,
|
server, and u->url will be requested. */
|
||||||
I'm afraid). */
|
|
||||||
static uerr_t
|
static uerr_t
|
||||||
gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy)
|
||||||
{
|
{
|
||||||
char *request, *type, *command, *path;
|
char *request, *type, *command, *full_path;
|
||||||
char *user, *passwd;
|
char *user, *passwd;
|
||||||
char *pragma_h, *referer, *useragent, *range, *wwwauth, *remhost;
|
char *pragma_h, *referer, *useragent, *range, *wwwauth;
|
||||||
char *authenticate_h;
|
char *authenticate_h;
|
||||||
char *proxyauth;
|
char *proxyauth;
|
||||||
char *all_headers;
|
char *all_headers;
|
||||||
char *port_maybe;
|
char *port_maybe;
|
||||||
char *request_keep_alive;
|
char *request_keep_alive;
|
||||||
int sock, hcount, num_written, all_length, remport, statcode;
|
int sock, hcount, num_written, all_length, statcode;
|
||||||
long contlen, contrange;
|
long contlen, contrange;
|
||||||
struct urlinfo *ou;
|
struct url *conn;
|
||||||
uerr_t err;
|
uerr_t err;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
int auth_tried_already;
|
int auth_tried_already;
|
||||||
@ -579,12 +584,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
if (!(*dt & HEAD_ONLY))
|
if (!(*dt & HEAD_ONLY))
|
||||||
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
|
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
|
||||||
know the local filename so we can save to it. */
|
know the local filename so we can save to it. */
|
||||||
assert (u->local != NULL);
|
assert (*hs->local_file != NULL);
|
||||||
|
|
||||||
authenticate_h = 0;
|
authenticate_h = 0;
|
||||||
auth_tried_already = 0;
|
auth_tried_already = 0;
|
||||||
|
|
||||||
inhibit_keep_alive = (!opt.http_keep_alive || u->proxy != NULL);
|
inhibit_keep_alive = !opt.http_keep_alive || proxy != NULL;
|
||||||
|
|
||||||
again:
|
again:
|
||||||
/* We need to come back here when the initial attempt to retrieve
|
/* We need to come back here when the initial attempt to retrieve
|
||||||
@ -602,29 +607,29 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
hs->remote_time = NULL;
|
hs->remote_time = NULL;
|
||||||
hs->error = NULL;
|
hs->error = NULL;
|
||||||
|
|
||||||
/* Which structure to use to retrieve the original URL data. */
|
/* If we're using a proxy, we will be connecting to the proxy
|
||||||
if (u->proxy)
|
server. */
|
||||||
ou = u->proxy;
|
conn = proxy ? proxy : u;
|
||||||
else
|
|
||||||
ou = u;
|
|
||||||
|
|
||||||
/* First: establish the connection. */
|
/* First: establish the connection. */
|
||||||
if (inhibit_keep_alive
|
if (inhibit_keep_alive
|
||||||
||
|
||
|
||||||
#ifndef HAVE_SSL
|
#ifndef HAVE_SSL
|
||||||
!persistent_available_p (u->host, u->port)
|
!persistent_available_p (conn->host, conn->port)
|
||||||
#else
|
#else
|
||||||
!persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
|
!persistent_available_p (conn->host, conn->port,
|
||||||
|
u->scheme == SCHEME_HTTPS)
|
||||||
#endif /* HAVE_SSL */
|
#endif /* HAVE_SSL */
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
|
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "),
|
||||||
err = make_connection (&sock, u->host, u->port);
|
conn->host, conn->port);
|
||||||
|
err = make_connection (&sock, conn->host, conn->port);
|
||||||
switch (err)
|
switch (err)
|
||||||
{
|
{
|
||||||
case HOSTERR:
|
case HOSTERR:
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", u->host, herrmsg (h_errno));
|
logprintf (LOG_NOTQUIET, "%s: %s.\n", conn->host, herrmsg (h_errno));
|
||||||
return HOSTERR;
|
return HOSTERR;
|
||||||
break;
|
break;
|
||||||
case CONSOCKERR:
|
case CONSOCKERR:
|
||||||
@ -635,7 +640,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
case CONREFUSED:
|
case CONREFUSED:
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("Connection to %s:%hu refused.\n"), u->host, u->port);
|
_("Connection to %s:%hu refused.\n"), conn->host,
|
||||||
|
conn->port);
|
||||||
CLOSE (sock);
|
CLOSE (sock);
|
||||||
return CONREFUSED;
|
return CONREFUSED;
|
||||||
case CONERROR:
|
case CONERROR:
|
||||||
@ -653,7 +659,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
if (u->scheme == SCHEME_HTTPS)
|
if (conn->scheme == SCHEME_HTTPS)
|
||||||
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
|
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
|
||||||
{
|
{
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
@ -666,7 +672,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
|
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"),
|
||||||
|
conn->host, conn->port);
|
||||||
/* #### pc_last_fd should be accessed through an accessor
|
/* #### pc_last_fd should be accessed through an accessor
|
||||||
function. */
|
function. */
|
||||||
sock = pc_last_fd;
|
sock = pc_last_fd;
|
||||||
@ -676,22 +683,20 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
DEBUGP (("Reusing fd %d.\n", sock));
|
DEBUGP (("Reusing fd %d.\n", sock));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (u->proxy)
|
|
||||||
path = u->proxy->url;
|
|
||||||
else
|
|
||||||
path = u->path;
|
|
||||||
|
|
||||||
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
|
command = (*dt & HEAD_ONLY) ? "HEAD" : "GET";
|
||||||
|
|
||||||
referer = NULL;
|
referer = NULL;
|
||||||
if (ou->referer)
|
if (hs->referer)
|
||||||
{
|
{
|
||||||
referer = (char *)alloca (9 + strlen (ou->referer) + 3);
|
referer = (char *)alloca (9 + strlen (hs->referer) + 3);
|
||||||
sprintf (referer, "Referer: %s\r\n", ou->referer);
|
sprintf (referer, "Referer: %s\r\n", hs->referer);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*dt & SEND_NOCACHE)
|
if (*dt & SEND_NOCACHE)
|
||||||
pragma_h = "Pragma: no-cache\r\n";
|
pragma_h = "Pragma: no-cache\r\n";
|
||||||
else
|
else
|
||||||
pragma_h = "";
|
pragma_h = "";
|
||||||
|
|
||||||
if (hs->restval)
|
if (hs->restval)
|
||||||
{
|
{
|
||||||
range = (char *)alloca (13 + numdigit (hs->restval) + 4);
|
range = (char *)alloca (13 + numdigit (hs->restval) + 4);
|
||||||
@ -714,9 +719,9 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
sprintf (useragent, "Wget/%s", version_string);
|
sprintf (useragent, "Wget/%s", version_string);
|
||||||
}
|
}
|
||||||
/* Construct the authentication, if userid is present. */
|
/* Construct the authentication, if userid is present. */
|
||||||
user = ou->user;
|
user = u->user;
|
||||||
passwd = ou->passwd;
|
passwd = u->passwd;
|
||||||
search_netrc (ou->host, (const char **)&user, (const char **)&passwd, 0);
|
search_netrc (u->host, (const char **)&user, (const char **)&passwd, 0);
|
||||||
user = user ? user : opt.http_user;
|
user = user ? user : opt.http_user;
|
||||||
passwd = passwd ? passwd : opt.http_passwd;
|
passwd = passwd ? passwd : opt.http_passwd;
|
||||||
|
|
||||||
@ -750,12 +755,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
wwwauth = create_authorization_line (authenticate_h, user, passwd,
|
wwwauth = create_authorization_line (authenticate_h, user, passwd,
|
||||||
command, ou->path);
|
command, u->path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
proxyauth = NULL;
|
proxyauth = NULL;
|
||||||
if (u->proxy)
|
if (proxy)
|
||||||
{
|
{
|
||||||
char *proxy_user, *proxy_passwd;
|
char *proxy_user, *proxy_passwd;
|
||||||
/* For normal username and password, URL components override
|
/* For normal username and password, URL components override
|
||||||
@ -770,31 +775,22 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
proxy_user = u->user;
|
proxy_user = proxy->user;
|
||||||
proxy_passwd = u->passwd;
|
proxy_passwd = proxy->passwd;
|
||||||
}
|
}
|
||||||
/* #### This is junky. Can't the proxy request, say, `Digest'
|
/* #### This does not appear right. Can't the proxy request,
|
||||||
authentication? */
|
say, `Digest' authentication? */
|
||||||
if (proxy_user && proxy_passwd)
|
if (proxy_user && proxy_passwd)
|
||||||
proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
|
proxyauth = basic_authentication_encode (proxy_user, proxy_passwd,
|
||||||
"Proxy-Authorization");
|
"Proxy-Authorization");
|
||||||
}
|
}
|
||||||
remhost = ou->host;
|
|
||||||
remport = ou->port;
|
|
||||||
|
|
||||||
/* String of the form :PORT. Used only for non-standard ports. */
|
/* String of the form :PORT. Used only for non-standard ports. */
|
||||||
port_maybe = NULL;
|
port_maybe = NULL;
|
||||||
if (1
|
if (u->port != scheme_default_port (u->scheme))
|
||||||
#ifdef HAVE_SSL
|
|
||||||
&& remport != (u->scheme == SCHEME_HTTPS
|
|
||||||
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
|
|
||||||
#else
|
|
||||||
&& remport != DEFAULT_HTTP_PORT
|
|
||||||
#endif
|
|
||||||
)
|
|
||||||
{
|
{
|
||||||
port_maybe = (char *)alloca (numdigit (remport) + 2);
|
port_maybe = (char *)alloca (numdigit (u->port) + 2);
|
||||||
sprintf (port_maybe, ":%d", remport);
|
sprintf (port_maybe, ":%d", u->port);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!inhibit_keep_alive)
|
if (!inhibit_keep_alive)
|
||||||
@ -803,18 +799,24 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
|||||||
request_keep_alive = NULL;
|
request_keep_alive = NULL;
|
||||||
|
|
||||||
if (opt.cookies)
|
if (opt.cookies)
|
||||||
cookies = build_cookies_request (ou->host, ou->port, ou->path,
|
cookies = build_cookies_request (u->host, u->port, u->path,
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
ou->scheme == SCHEME_HTTPS
|
u->scheme == SCHEME_HTTPS
|
||||||
#else
|
#else
|
||||||
0
|
0
|
||||||
#endif
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (proxy)
|
||||||
|
full_path = xstrdup (u->url);
|
||||||
|
else
|
||||||
|
full_path = url_full_path (u);
|
||||||
|
|
||||||
/* Allocate the memory for the request. */
|
/* Allocate the memory for the request. */
|
||||||
request = (char *)alloca (strlen (command) + strlen (path)
|
request = (char *)alloca (strlen (command)
|
||||||
|
+ strlen (full_path)
|
||||||
+ strlen (useragent)
|
+ strlen (useragent)
|
||||||
+ strlen (remhost)
|
+ strlen (u->host)
|
||||||
+ (port_maybe ? strlen (port_maybe) : 0)
|
+ (port_maybe ? strlen (port_maybe) : 0)
|
||||||
+ strlen (HTTP_ACCEPT)
|
+ strlen (HTTP_ACCEPT)
|
||||||
+ (request_keep_alive
|
+ (request_keep_alive
|
||||||
@ -834,7 +836,8 @@ User-Agent: %s\r\n\
|
|||||||
Host: %s%s\r\n\
|
Host: %s%s\r\n\
|
||||||
Accept: %s\r\n\
|
Accept: %s\r\n\
|
||||||
%s%s%s%s%s%s%s%s\r\n",
|
%s%s%s%s%s%s%s%s\r\n",
|
||||||
command, path, useragent, remhost,
|
command, full_path,
|
||||||
|
useragent, u->host,
|
||||||
port_maybe ? port_maybe : "",
|
port_maybe ? port_maybe : "",
|
||||||
HTTP_ACCEPT,
|
HTTP_ACCEPT,
|
||||||
request_keep_alive ? request_keep_alive : "",
|
request_keep_alive ? request_keep_alive : "",
|
||||||
@ -846,10 +849,12 @@ Accept: %s\r\n\
|
|||||||
pragma_h,
|
pragma_h,
|
||||||
opt.user_header ? opt.user_header : "");
|
opt.user_header ? opt.user_header : "");
|
||||||
DEBUGP (("---request begin---\n%s---request end---\n", request));
|
DEBUGP (("---request begin---\n%s---request end---\n", request));
|
||||||
|
|
||||||
/* Free the temporary memory. */
|
/* Free the temporary memory. */
|
||||||
FREE_MAYBE (wwwauth);
|
FREE_MAYBE (wwwauth);
|
||||||
FREE_MAYBE (proxyauth);
|
FREE_MAYBE (proxyauth);
|
||||||
FREE_MAYBE (cookies);
|
FREE_MAYBE (cookies);
|
||||||
|
xfree (full_path);
|
||||||
|
|
||||||
/* Send the request to server. */
|
/* Send the request to server. */
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
@ -867,7 +872,7 @@ Accept: %s\r\n\
|
|||||||
return WRITEFAILED;
|
return WRITEFAILED;
|
||||||
}
|
}
|
||||||
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
|
logprintf (LOG_VERBOSE, _("%s request sent, awaiting response... "),
|
||||||
u->proxy ? "Proxy" : "HTTP");
|
proxy ? "Proxy" : "HTTP");
|
||||||
contlen = contrange = -1;
|
contlen = contrange = -1;
|
||||||
type = NULL;
|
type = NULL;
|
||||||
statcode = -1;
|
statcode = -1;
|
||||||
@ -1075,9 +1080,9 @@ Accept: %s\r\n\
|
|||||||
/* The server has promised that it will not close the connection
|
/* The server has promised that it will not close the connection
|
||||||
when we're done. This means that we can register it. */
|
when we're done. This means that we can register it. */
|
||||||
#ifndef HAVE_SSL
|
#ifndef HAVE_SSL
|
||||||
register_persistent (u->host, u->port, sock);
|
register_persistent (conn->host, conn->port, sock);
|
||||||
#else
|
#else
|
||||||
register_persistent (u->host, u->port, sock, ssl);
|
register_persistent (conn->host, conn->port, sock, ssl);
|
||||||
#endif /* HAVE_SSL */
|
#endif /* HAVE_SSL */
|
||||||
|
|
||||||
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
|
if ((statcode == HTTP_STATUS_UNAUTHORIZED)
|
||||||
@ -1086,7 +1091,7 @@ Accept: %s\r\n\
|
|||||||
/* Authorization is required. */
|
/* Authorization is required. */
|
||||||
FREE_MAYBE (type);
|
FREE_MAYBE (type);
|
||||||
type = NULL;
|
type = NULL;
|
||||||
FREEHSTAT (*hs);
|
free_hstat (hs);
|
||||||
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
||||||
might be more bytes in the body. */
|
might be more bytes in the body. */
|
||||||
if (auth_tried_already)
|
if (auth_tried_already)
|
||||||
@ -1163,16 +1168,17 @@ Accept: %s\r\n\
|
|||||||
text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
text/html file. If some case-insensitive variation on ".htm[l]" isn't
|
||||||
already the file's suffix, tack on ".html". */
|
already the file's suffix, tack on ".html". */
|
||||||
{
|
{
|
||||||
char* last_period_in_local_filename = strrchr(u->local, '.');
|
char* last_period_in_local_filename = strrchr(*hs->local_file, '.');
|
||||||
|
|
||||||
if (last_period_in_local_filename == NULL ||
|
if (last_period_in_local_filename == NULL ||
|
||||||
!(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
|
!(strcasecmp(last_period_in_local_filename, ".htm") == EQ ||
|
||||||
strcasecmp(last_period_in_local_filename, ".html") == EQ))
|
strcasecmp(last_period_in_local_filename, ".html") == EQ))
|
||||||
{
|
{
|
||||||
size_t local_filename_len = strlen(u->local);
|
size_t local_filename_len = strlen(*hs->local_file);
|
||||||
|
|
||||||
u->local = xrealloc(u->local, local_filename_len + sizeof(".html"));
|
*hs->local_file = xrealloc(*hs->local_file,
|
||||||
strcpy(u->local + local_filename_len, ".html");
|
local_filename_len + sizeof(".html"));
|
||||||
|
strcpy(*hs->local_file + local_filename_len, ".html");
|
||||||
|
|
||||||
*dt |= ADDED_HTML_EXTENSION;
|
*dt |= ADDED_HTML_EXTENSION;
|
||||||
}
|
}
|
||||||
@ -1224,7 +1230,7 @@ Accept: %s\r\n\
|
|||||||
_("\
|
_("\
|
||||||
\n\
|
\n\
|
||||||
Continued download failed on this file, which conflicts with `-c'.\n\
|
Continued download failed on this file, which conflicts with `-c'.\n\
|
||||||
Refusing to truncate existing file `%s'.\n\n"), u->local);
|
Refusing to truncate existing file `%s'.\n\n"), *hs->local_file);
|
||||||
FREE_MAYBE (type);
|
FREE_MAYBE (type);
|
||||||
FREE_MAYBE (all_headers);
|
FREE_MAYBE (all_headers);
|
||||||
CLOSE_INVALIDATE (sock);
|
CLOSE_INVALIDATE (sock);
|
||||||
@ -1300,13 +1306,13 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
|
|||||||
/* Open the local file. */
|
/* Open the local file. */
|
||||||
if (!opt.dfp)
|
if (!opt.dfp)
|
||||||
{
|
{
|
||||||
mkalldirs (u->local);
|
mkalldirs (*hs->local_file);
|
||||||
if (opt.backups)
|
if (opt.backups)
|
||||||
rotate_backups (u->local);
|
rotate_backups (*hs->local_file);
|
||||||
fp = fopen (u->local, hs->restval ? "ab" : "wb");
|
fp = fopen (*hs->local_file, hs->restval ? "ab" : "wb");
|
||||||
if (!fp)
|
if (!fp)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s\n", u->local, strerror (errno));
|
logprintf (LOG_NOTQUIET, "%s: %s\n", *hs->local_file, strerror (errno));
|
||||||
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
CLOSE_INVALIDATE (sock); /* would be CLOSE_FINISH, but there
|
||||||
might be more bytes in the body. */
|
might be more bytes in the body. */
|
||||||
FREE_MAYBE (all_headers);
|
FREE_MAYBE (all_headers);
|
||||||
@ -1375,7 +1381,8 @@ Refusing to truncate existing file `%s'.\n\n"), u->local);
|
|||||||
/* The genuine HTTP loop! This is the part where the retrieval is
|
/* The genuine HTTP loop! This is the part where the retrieval is
|
||||||
retried, and retried, and retried, and... */
|
retried, and retried, and retried, and... */
|
||||||
uerr_t
|
uerr_t
|
||||||
http_loop (struct urlinfo *u, char **newloc, int *dt)
|
http_loop (struct url *u, char **newloc, char **local_file, const char *referer,
|
||||||
|
int *dt, struct url *proxy)
|
||||||
{
|
{
|
||||||
int count;
|
int count;
|
||||||
int use_ts, got_head = 0; /* time-stamping info */
|
int use_ts, got_head = 0; /* time-stamping info */
|
||||||
@ -1388,6 +1395,7 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
|
|||||||
size_t filename_len;
|
size_t filename_len;
|
||||||
struct http_stat hstat; /* HTTP status */
|
struct http_stat hstat; /* HTTP status */
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
char *dummy = NULL;
|
||||||
|
|
||||||
/* This used to be done in main(), but it's a better idea to do it
|
/* This used to be done in main(), but it's a better idea to do it
|
||||||
here so that we don't go through the hoops if we're just using
|
here so that we don't go through the hoops if we're just using
|
||||||
@ -1407,34 +1415,46 @@ http_loop (struct urlinfo *u, char **newloc, int *dt)
|
|||||||
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
logputs (LOG_VERBOSE, _("Warning: wildcards not supported in HTTP.\n"));
|
||||||
|
|
||||||
/* Determine the local filename. */
|
/* Determine the local filename. */
|
||||||
if (!u->local)
|
if (local_file && *local_file)
|
||||||
u->local = url_filename (u->proxy ? u->proxy : u);
|
hstat.local_file = local_file;
|
||||||
|
else if (local_file)
|
||||||
|
{
|
||||||
|
*local_file = url_filename (u);
|
||||||
|
hstat.local_file = local_file;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
dummy = url_filename (u);
|
||||||
|
hstat.local_file = &dummy;
|
||||||
|
}
|
||||||
|
|
||||||
if (!opt.output_document)
|
if (!opt.output_document)
|
||||||
locf = u->local;
|
locf = *hstat.local_file;
|
||||||
else
|
else
|
||||||
locf = opt.output_document;
|
locf = opt.output_document;
|
||||||
|
|
||||||
filename_len = strlen (u->local);
|
hstat.referer = referer;
|
||||||
|
|
||||||
|
filename_len = strlen (*hstat.local_file);
|
||||||
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
filename_plus_orig_suffix = alloca (filename_len + sizeof (".orig"));
|
||||||
|
|
||||||
if (opt.noclobber && file_exists_p (u->local))
|
if (opt.noclobber && file_exists_p (*hstat.local_file))
|
||||||
{
|
{
|
||||||
/* If opt.noclobber is turned on and file already exists, do not
|
/* If opt.noclobber is turned on and file already exists, do not
|
||||||
retrieve the file */
|
retrieve the file */
|
||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
File `%s' already there, will not retrieve.\n"), u->local);
|
File `%s' already there, will not retrieve.\n"), *hstat.local_file);
|
||||||
/* If the file is there, we suppose it's retrieved OK. */
|
/* If the file is there, we suppose it's retrieved OK. */
|
||||||
*dt |= RETROKF;
|
*dt |= RETROKF;
|
||||||
|
|
||||||
/* #### Bogusness alert. */
|
/* #### Bogusness alert. */
|
||||||
/* If its suffix is "html" or (yuck!) "htm", we suppose it's
|
/* If its suffix is "html" or "htm", assume text/html. */
|
||||||
text/html, a harmless lie. */
|
if (((suf = suffix (*hstat.local_file)) != NULL)
|
||||||
if (((suf = suffix (u->local)) != NULL)
|
|
||||||
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
&& (!strcmp (suf, "html") || !strcmp (suf, "htm")))
|
||||||
*dt |= TEXTHTML;
|
*dt |= TEXTHTML;
|
||||||
xfree (suf);
|
xfree (suf);
|
||||||
/* Another harmless lie: */
|
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1461,7 +1481,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
in url.c. Replacing sprintf with inline calls to
|
in url.c. Replacing sprintf with inline calls to
|
||||||
strcpy() and long_to_string() made a difference.
|
strcpy() and long_to_string() made a difference.
|
||||||
--hniksic */
|
--hniksic */
|
||||||
memcpy (filename_plus_orig_suffix, u->local, filename_len);
|
memcpy (filename_plus_orig_suffix, *hstat.local_file, filename_len);
|
||||||
memcpy (filename_plus_orig_suffix + filename_len,
|
memcpy (filename_plus_orig_suffix + filename_len,
|
||||||
".orig", sizeof (".orig"));
|
".orig", sizeof (".orig"));
|
||||||
|
|
||||||
@ -1475,8 +1495,8 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
|
|
||||||
if (!local_dot_orig_file_exists)
|
if (!local_dot_orig_file_exists)
|
||||||
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
/* Couldn't stat() <file>.orig, so try to stat() <file>. */
|
||||||
if (stat (u->local, &st) == 0)
|
if (stat (*hstat.local_file, &st) == 0)
|
||||||
local_filename = u->local;
|
local_filename = *hstat.local_file;
|
||||||
|
|
||||||
if (local_filename != NULL)
|
if (local_filename != NULL)
|
||||||
/* There was a local file, so we'll check later to see if the version
|
/* There was a local file, so we'll check later to see if the version
|
||||||
@ -1503,7 +1523,7 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
/* Print fetch message, if opt.verbose. */
|
/* Print fetch message, if opt.verbose. */
|
||||||
if (opt.verbose)
|
if (opt.verbose)
|
||||||
{
|
{
|
||||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
char *hurl = url_string (u, 1);
|
||||||
char tmp[15];
|
char tmp[15];
|
||||||
strcpy (tmp, " ");
|
strcpy (tmp, " ");
|
||||||
if (count > 1)
|
if (count > 1)
|
||||||
@ -1545,22 +1565,22 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
Some proxies are notorious for caching incomplete data, so
|
Some proxies are notorious for caching incomplete data, so
|
||||||
we require a fresh get.
|
we require a fresh get.
|
||||||
b) caching is explicitly inhibited. */
|
b) caching is explicitly inhibited. */
|
||||||
if ((u->proxy && count > 1) /* a */
|
if ((proxy && count > 1) /* a */
|
||||||
|| !opt.allow_cache /* b */
|
|| !opt.allow_cache /* b */
|
||||||
)
|
)
|
||||||
*dt |= SEND_NOCACHE;
|
*dt |= SEND_NOCACHE;
|
||||||
else
|
else
|
||||||
*dt &= ~SEND_NOCACHE;
|
*dt &= ~SEND_NOCACHE;
|
||||||
|
|
||||||
/* Try fetching the document, or at least its head. :-) */
|
/* Try fetching the document, or at least its head. */
|
||||||
err = gethttp (u, &hstat, dt);
|
err = gethttp (u, &hstat, dt, proxy);
|
||||||
|
|
||||||
/* It's unfortunate that wget determines the local filename before finding
|
/* It's unfortunate that wget determines the local filename before finding
|
||||||
out the Content-Type of the file. Barring a major restructuring of the
|
out the Content-Type of the file. Barring a major restructuring of the
|
||||||
code, we need to re-set locf here, since gethttp() may have xrealloc()d
|
code, we need to re-set locf here, since gethttp() may have xrealloc()d
|
||||||
u->local to tack on ".html". */
|
*hstat.local_file to tack on ".html". */
|
||||||
if (!opt.output_document)
|
if (!opt.output_document)
|
||||||
locf = u->local;
|
locf = *hstat.local_file;
|
||||||
else
|
else
|
||||||
locf = opt.output_document;
|
locf = opt.output_document;
|
||||||
|
|
||||||
@ -1577,29 +1597,32 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
/* Non-fatal errors continue executing the loop, which will
|
/* Non-fatal errors continue executing the loop, which will
|
||||||
bring them to "while" statement at the end, to judge
|
bring them to "while" statement at the end, to judge
|
||||||
whether the number of tries was exceeded. */
|
whether the number of tries was exceeded. */
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
continue;
|
continue;
|
||||||
break;
|
break;
|
||||||
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
|
case HOSTERR: case CONREFUSED: case PROXERR: case AUTHFAILED:
|
||||||
case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
|
case SSLERRCTXCREATE: case CONTNOTSUPPORTED:
|
||||||
/* Fatal errors just return from the function. */
|
/* Fatal errors just return from the function. */
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return err;
|
return err;
|
||||||
break;
|
break;
|
||||||
case FWRITEERR: case FOPENERR:
|
case FWRITEERR: case FOPENERR:
|
||||||
/* Another fatal error. */
|
/* Another fatal error. */
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
logprintf (LOG_NOTQUIET, _("Cannot write to `%s' (%s).\n"),
|
||||||
u->local, strerror (errno));
|
*hstat.local_file, strerror (errno));
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return err;
|
return err;
|
||||||
break;
|
break;
|
||||||
case CONSSLERR:
|
case CONSSLERR:
|
||||||
/* Another fatal error. */
|
/* Another fatal error. */
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
logprintf (LOG_NOTQUIET, _("Unable to establish SSL connection.\n"));
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return err;
|
return err;
|
||||||
break;
|
break;
|
||||||
case NEWLOCATION:
|
case NEWLOCATION:
|
||||||
@ -1609,14 +1632,18 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
logprintf (LOG_NOTQUIET,
|
logprintf (LOG_NOTQUIET,
|
||||||
_("ERROR: Redirection (%d) without location.\n"),
|
_("ERROR: Redirection (%d) without location.\n"),
|
||||||
hstat.statcode);
|
hstat.statcode);
|
||||||
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return WRONGCODE;
|
return WRONGCODE;
|
||||||
}
|
}
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return NEWLOCATION;
|
return NEWLOCATION;
|
||||||
break;
|
break;
|
||||||
case RETRUNNEEDED:
|
case RETRUNNEEDED:
|
||||||
/* The file was already fully retrieved. */
|
/* The file was already fully retrieved. */
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
break;
|
break;
|
||||||
case RETRFINISHED:
|
case RETRFINISHED:
|
||||||
@ -1631,14 +1658,15 @@ File `%s' already there, will not retrieve.\n"), u->local);
|
|||||||
if (!opt.verbose)
|
if (!opt.verbose)
|
||||||
{
|
{
|
||||||
/* #### Ugly ugly ugly! */
|
/* #### Ugly ugly ugly! */
|
||||||
char *hurl = str_url (u->proxy ? u->proxy : u, 1);
|
char *hurl = url_string (u, 1);
|
||||||
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
|
logprintf (LOG_NONVERBOSE, "%s:\n", hurl);
|
||||||
xfree (hurl);
|
xfree (hurl);
|
||||||
}
|
}
|
||||||
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
|
logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"),
|
||||||
tms, hstat.statcode, hstat.error);
|
tms, hstat.statcode, hstat.error);
|
||||||
logputs (LOG_VERBOSE, "\n");
|
logputs (LOG_VERBOSE, "\n");
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return WRONGCODE;
|
return WRONGCODE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1681,7 +1709,8 @@ Last-modified header invalid -- time-stamp ignored.\n"));
|
|||||||
logprintf (LOG_VERBOSE, _("\
|
logprintf (LOG_VERBOSE, _("\
|
||||||
Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
Server file no newer than local file `%s' -- not retrieving.\n\n"),
|
||||||
local_filename);
|
local_filename);
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else if (tml >= tmr)
|
else if (tml >= tmr)
|
||||||
@ -1691,7 +1720,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
logputs (LOG_VERBOSE,
|
logputs (LOG_VERBOSE,
|
||||||
_("Remote file is newer, retrieving.\n"));
|
_("Remote file is newer, retrieving.\n"));
|
||||||
}
|
}
|
||||||
FREEHSTAT (hstat);
|
free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if ((tmr != (time_t) (-1))
|
if ((tmr != (time_t) (-1))
|
||||||
@ -1710,7 +1739,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
fl = opt.output_document;
|
fl = opt.output_document;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fl = u->local;
|
fl = *hstat.local_file;
|
||||||
if (fl)
|
if (fl)
|
||||||
touch (fl, tmr);
|
touch (fl, tmr);
|
||||||
}
|
}
|
||||||
@ -1719,13 +1748,10 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
if (opt.spider)
|
if (opt.spider)
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
|
logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* It is now safe to free the remainder of hstat, since the
|
|
||||||
strings within it will no longer be used. */
|
|
||||||
FREEHSTAT (hstat);
|
|
||||||
|
|
||||||
tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
|
tmrate = rate (hstat.len - hstat.restval, hstat.dltime, 0);
|
||||||
|
|
||||||
if (hstat.len == hstat.contlen)
|
if (hstat.len == hstat.contlen)
|
||||||
@ -1748,6 +1774,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
else
|
else
|
||||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
||||||
|
|
||||||
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else if (hstat.res == 0) /* No read error */
|
else if (hstat.res == 0) /* No read error */
|
||||||
@ -1773,6 +1801,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
else
|
else
|
||||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
||||||
|
|
||||||
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else if (hstat.len < hstat.contlen) /* meaning we lost the
|
else if (hstat.len < hstat.contlen) /* meaning we lost the
|
||||||
@ -1782,6 +1812,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
_("%s (%s) - Connection closed at byte %ld. "),
|
_("%s (%s) - Connection closed at byte %ld. "),
|
||||||
tms, tmrate, hstat.len);
|
tms, tmrate, hstat.len);
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
|
free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else if (!opt.kill_longer) /* meaning we got more than expected */
|
else if (!opt.kill_longer) /* meaning we got more than expected */
|
||||||
@ -1801,6 +1832,8 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
else
|
else
|
||||||
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
downloaded_file(FILE_DOWNLOADED_NORMALLY, locf);
|
||||||
|
|
||||||
|
free_hstat (&hstat);
|
||||||
|
FREE_MAYBE (dummy);
|
||||||
return RETROK;
|
return RETROK;
|
||||||
}
|
}
|
||||||
else /* the same, but not accepted */
|
else /* the same, but not accepted */
|
||||||
@ -1809,6 +1842,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
_("%s (%s) - Connection closed at byte %ld/%ld. "),
|
_("%s (%s) - Connection closed at byte %ld/%ld. "),
|
||||||
tms, tmrate, hstat.len, hstat.contlen);
|
tms, tmrate, hstat.len, hstat.contlen);
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
|
free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1820,6 +1854,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
_("%s (%s) - Read error at byte %ld (%s)."),
|
_("%s (%s) - Read error at byte %ld (%s)."),
|
||||||
tms, tmrate, hstat.len, strerror (errno));
|
tms, tmrate, hstat.len, strerror (errno));
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
|
free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else /* hstat.res == -1 and contlen is given */
|
else /* hstat.res == -1 and contlen is given */
|
||||||
@ -1829,6 +1864,7 @@ The sizes do not match (local %ld) -- retrieving.\n"), local_size);
|
|||||||
tms, tmrate, hstat.len, hstat.contlen,
|
tms, tmrate, hstat.len, hstat.contlen,
|
||||||
strerror (errno));
|
strerror (errno));
|
||||||
printwhat (count, opt.ntry);
|
printwhat (count, opt.ntry);
|
||||||
|
free_hstat (&hstat);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -727,7 +727,7 @@ Can't timestamp and not clobber old files at the same time.\n"));
|
|||||||
/* Fill in the arguments. */
|
/* Fill in the arguments. */
|
||||||
for (i = 0; i < nurl; i++, optind++)
|
for (i = 0; i < nurl; i++, optind++)
|
||||||
{
|
{
|
||||||
char *rewritten = rewrite_url_maybe (argv[optind]);
|
char *rewritten = rewrite_shorthand_url (argv[optind]);
|
||||||
if (rewritten)
|
if (rewritten)
|
||||||
{
|
{
|
||||||
printf ("Converted %s to %s\n", argv[optind], rewritten);
|
printf ("Converted %s to %s\n", argv[optind], rewritten);
|
||||||
@ -845,10 +845,12 @@ Can't timestamp and not clobber old files at the same time.\n"));
|
|||||||
{
|
{
|
||||||
convert_all_links ();
|
convert_all_links ();
|
||||||
}
|
}
|
||||||
|
|
||||||
log_close ();
|
log_close ();
|
||||||
for (i = 0; i < nurl; i++)
|
for (i = 0; i < nurl; i++)
|
||||||
free (url[i]);
|
xfree (url[i]);
|
||||||
cleanup ();
|
cleanup ();
|
||||||
|
|
||||||
#ifdef DEBUG_MALLOC
|
#ifdef DEBUG_MALLOC
|
||||||
print_malloc_debug_stats ();
|
print_malloc_debug_stats ();
|
||||||
#endif
|
#endif
|
||||||
|
46
src/recur.c
46
src/recur.c
@ -120,9 +120,8 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
int dt, inl, dash_p_leaf_HTML = FALSE;
|
int dt, inl, dash_p_leaf_HTML = FALSE;
|
||||||
int meta_disallow_follow;
|
int meta_disallow_follow;
|
||||||
int this_url_ftp; /* See below the explanation */
|
int this_url_ftp; /* See below the explanation */
|
||||||
uerr_t err;
|
|
||||||
urlpos *url_list, *cur_url;
|
urlpos *url_list, *cur_url;
|
||||||
struct urlinfo *u;
|
struct url *u;
|
||||||
|
|
||||||
assert (this_url != NULL);
|
assert (this_url != NULL);
|
||||||
assert (file != NULL);
|
assert (file != NULL);
|
||||||
@ -140,9 +139,8 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
hash_table_clear (undesirable_urls);
|
hash_table_clear (undesirable_urls);
|
||||||
string_set_add (undesirable_urls, this_url);
|
string_set_add (undesirable_urls, this_url);
|
||||||
/* Enter this_url to the hash table, in original and "enhanced" form. */
|
/* Enter this_url to the hash table, in original and "enhanced" form. */
|
||||||
u = newurl ();
|
u = url_parse (this_url, NULL);
|
||||||
err = parseurl (this_url, u, 0);
|
if (u)
|
||||||
if (err == URLOK)
|
|
||||||
{
|
{
|
||||||
string_set_add (undesirable_urls, u->url);
|
string_set_add (undesirable_urls, u->url);
|
||||||
if (opt.no_parent)
|
if (opt.no_parent)
|
||||||
@ -156,7 +154,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
|
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
|
||||||
base_dir = NULL;
|
base_dir = NULL;
|
||||||
}
|
}
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
depth = 1;
|
depth = 1;
|
||||||
first_time = 0;
|
first_time = 0;
|
||||||
}
|
}
|
||||||
@ -210,11 +208,10 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
break;
|
break;
|
||||||
/* Parse the URL for convenient use in other functions, as well
|
/* Parse the URL for convenient use in other functions, as well
|
||||||
as to get the optimized form. It also checks URL integrity. */
|
as to get the optimized form. It also checks URL integrity. */
|
||||||
u = newurl ();
|
u = url_parse (cur_url->url, NULL);
|
||||||
if (parseurl (cur_url->url, u, 0) != URLOK)
|
if (!u)
|
||||||
{
|
{
|
||||||
DEBUGP (("Yuck! A bad URL.\n"));
|
DEBUGP (("Yuck! A bad URL.\n"));
|
||||||
freeurl (u, 1);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
assert (u->url != NULL);
|
assert (u->url != NULL);
|
||||||
@ -281,8 +278,8 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
if (!(base_dir && frontcmp (base_dir, u->dir)))
|
if (!(base_dir && frontcmp (base_dir, u->dir)))
|
||||||
{
|
{
|
||||||
/* Failing that, check for parent dir. */
|
/* Failing that, check for parent dir. */
|
||||||
struct urlinfo *ut = newurl ();
|
struct url *ut = url_parse (this_url, NULL);
|
||||||
if (parseurl (this_url, ut, 0) != URLOK)
|
if (!ut)
|
||||||
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
|
DEBUGP (("Double yuck! The *base* URL is broken.\n"));
|
||||||
else if (!frontcmp (ut->dir, u->dir))
|
else if (!frontcmp (ut->dir, u->dir))
|
||||||
{
|
{
|
||||||
@ -291,7 +288,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
string_set_add (undesirable_urls, constr);
|
string_set_add (undesirable_urls, constr);
|
||||||
inl = 1;
|
inl = 1;
|
||||||
}
|
}
|
||||||
freeurl (ut, 1);
|
url_free (ut);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* If the file does not match the acceptance list, or is on the
|
/* If the file does not match the acceptance list, or is on the
|
||||||
@ -343,7 +340,16 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
if (!inl)
|
if (!inl)
|
||||||
{
|
{
|
||||||
if (!opt.simple_check)
|
if (!opt.simple_check)
|
||||||
opt_url (u);
|
{
|
||||||
|
/* Find the "true" host. */
|
||||||
|
char *host = realhost (u->host);
|
||||||
|
xfree (u->host);
|
||||||
|
u->host = host;
|
||||||
|
|
||||||
|
/* Refresh the printed representation of the URL. */
|
||||||
|
xfree (u->url);
|
||||||
|
u->url = url_string (u, 0);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
char *p;
|
char *p;
|
||||||
@ -351,7 +357,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
for (p = u->host; *p; p++)
|
for (p = u->host; *p; p++)
|
||||||
*p = TOLOWER (*p);
|
*p = TOLOWER (*p);
|
||||||
xfree (u->url);
|
xfree (u->url);
|
||||||
u->url = str_url (u, 0);
|
u->url = url_string (u, 0);
|
||||||
}
|
}
|
||||||
xfree (constr);
|
xfree (constr);
|
||||||
constr = xstrdup (u->url);
|
constr = xstrdup (u->url);
|
||||||
@ -473,7 +479,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
|||||||
/* Free filename and constr. */
|
/* Free filename and constr. */
|
||||||
FREE_MAYBE (filename);
|
FREE_MAYBE (filename);
|
||||||
FREE_MAYBE (constr);
|
FREE_MAYBE (constr);
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
/* Increment the pbuf for the appropriate size. */
|
/* Increment the pbuf for the appropriate size. */
|
||||||
}
|
}
|
||||||
if (opt.convert_links && !opt.delete_after)
|
if (opt.convert_links && !opt.delete_after)
|
||||||
@ -573,13 +579,9 @@ convert_all_links (void)
|
|||||||
char *local_name;
|
char *local_name;
|
||||||
|
|
||||||
/* The URL must be in canonical form to be compared. */
|
/* The URL must be in canonical form to be compared. */
|
||||||
struct urlinfo *u = newurl ();
|
struct url *u = url_parse (cur_url->url, NULL);
|
||||||
uerr_t res = parseurl (cur_url->url, u, 0);
|
if (!u)
|
||||||
if (res != URLOK)
|
|
||||||
{
|
|
||||||
freeurl (u, 1);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
/* We decide the direction of conversion according to whether
|
/* We decide the direction of conversion according to whether
|
||||||
a URL was downloaded. Downloaded URLs will be converted
|
a URL was downloaded. Downloaded URLs will be converted
|
||||||
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
||||||
@ -608,7 +610,7 @@ convert_all_links (void)
|
|||||||
cur_url->convert = CO_CONVERT_TO_COMPLETE;
|
cur_url->convert = CO_CONVERT_TO_COMPLETE;
|
||||||
cur_url->local_name = NULL;
|
cur_url->local_name = NULL;
|
||||||
}
|
}
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
}
|
}
|
||||||
/* Convert the links in the file. */
|
/* Convert the links in the file. */
|
||||||
convert_links (html->string, urls);
|
convert_links (html->string, urls);
|
||||||
|
135
src/retr.c
135
src/retr.c
@ -51,9 +51,6 @@ extern int errno;
|
|||||||
int global_download_count;
|
int global_download_count;
|
||||||
|
|
||||||
void logflush PARAMS ((void));
|
void logflush PARAMS ((void));
|
||||||
|
|
||||||
/* From http.c. */
|
|
||||||
uerr_t http_loop PARAMS ((struct urlinfo *, char **, int *));
|
|
||||||
|
|
||||||
/* Flags for show_progress(). */
|
/* Flags for show_progress(). */
|
||||||
enum spflags { SP_NONE, SP_INIT, SP_FINISH };
|
enum spflags { SP_NONE, SP_INIT, SP_FINISH };
|
||||||
@ -314,9 +311,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
uerr_t result;
|
uerr_t result;
|
||||||
char *url;
|
char *url;
|
||||||
int location_changed, dummy;
|
int location_changed, dummy;
|
||||||
int local_use_proxy;
|
int use_proxy;
|
||||||
char *mynewloc, *proxy;
|
char *mynewloc, *proxy;
|
||||||
struct urlinfo *u;
|
struct url *u;
|
||||||
|
int up_error_code; /* url parse error code */
|
||||||
|
char *local_file;
|
||||||
struct hash_table *redirections = NULL;
|
struct hash_table *redirections = NULL;
|
||||||
|
|
||||||
/* If dt is NULL, just ignore it. */
|
/* If dt is NULL, just ignore it. */
|
||||||
@ -328,80 +327,74 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
if (file)
|
if (file)
|
||||||
*file = NULL;
|
*file = NULL;
|
||||||
|
|
||||||
u = newurl ();
|
u = url_parse (url, &up_error_code);
|
||||||
/* Parse the URL. */
|
if (!u)
|
||||||
result = parseurl (url, u, 0);
|
|
||||||
if (result != URLOK)
|
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, uerrmsg (result));
|
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
|
||||||
freeurl (u, 1);
|
|
||||||
if (redirections)
|
if (redirections)
|
||||||
string_set_free (redirections);
|
string_set_free (redirections);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
return result;
|
return URLERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!refurl)
|
||||||
|
refurl = opt.referer;
|
||||||
|
|
||||||
redirected:
|
redirected:
|
||||||
|
|
||||||
/* Set the referer. */
|
result = NOCONERROR;
|
||||||
if (refurl)
|
mynewloc = NULL;
|
||||||
u->referer = xstrdup (refurl);
|
local_file = NULL;
|
||||||
else
|
|
||||||
{
|
|
||||||
if (opt.referer)
|
|
||||||
u->referer = xstrdup (opt.referer);
|
|
||||||
else
|
|
||||||
u->referer = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
local_use_proxy = USE_PROXY_P (u);
|
use_proxy = USE_PROXY_P (u);
|
||||||
if (local_use_proxy)
|
if (use_proxy)
|
||||||
{
|
{
|
||||||
struct urlinfo *pu = newurl ();
|
struct url *proxy_url;
|
||||||
|
|
||||||
/* Copy the original URL to new location. */
|
/* Get the proxy server for the current scheme. */
|
||||||
memcpy (pu, u, sizeof (*u));
|
proxy = getproxy (u->scheme);
|
||||||
pu->proxy = NULL; /* A minor correction :) */
|
|
||||||
/* Initialize u to nil. */
|
|
||||||
memset (u, 0, sizeof (*u));
|
|
||||||
u->proxy = pu;
|
|
||||||
/* Get the appropriate proxy server, appropriate for the
|
|
||||||
current scheme. */
|
|
||||||
proxy = getproxy (pu->scheme);
|
|
||||||
if (!proxy)
|
if (!proxy)
|
||||||
{
|
{
|
||||||
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
if (redirections)
|
if (redirections)
|
||||||
string_set_free (redirections);
|
string_set_free (redirections);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
return PROXERR;
|
return PROXERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Parse the proxy URL. */
|
/* Parse the proxy URL. */
|
||||||
result = parseurl (proxy, u, 0);
|
proxy_url = url_parse (proxy, &up_error_code);
|
||||||
if (result != URLOK || u->scheme != SCHEME_HTTP)
|
if (!proxy_url)
|
||||||
{
|
{
|
||||||
if (u->scheme == SCHEME_HTTP)
|
logprintf (LOG_NOTQUIET, "Error parsing proxy URL %s: %s.\n",
|
||||||
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
|
proxy, url_error (up_error_code));
|
||||||
else
|
|
||||||
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
|
||||||
freeurl (u, 1);
|
|
||||||
if (redirections)
|
if (redirections)
|
||||||
string_set_free (redirections);
|
string_set_free (redirections);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
return PROXERR;
|
return PROXERR;
|
||||||
}
|
}
|
||||||
u->scheme = SCHEME_HTTP;
|
if (proxy_url->scheme != SCHEME_HTTP)
|
||||||
|
{
|
||||||
|
logprintf (LOG_NOTQUIET, _("Error in proxy URL %s: Must be HTTP.\n"), proxy);
|
||||||
|
url_free (proxy_url);
|
||||||
|
if (redirections)
|
||||||
|
string_set_free (redirections);
|
||||||
|
xfree (url);
|
||||||
|
return PROXERR;
|
||||||
}
|
}
|
||||||
|
|
||||||
mynewloc = NULL;
|
result = http_loop (u, &mynewloc, &local_file, refurl, dt, proxy_url);
|
||||||
|
url_free (proxy_url);
|
||||||
if (u->scheme == SCHEME_HTTP
|
}
|
||||||
|
else if (u->scheme == SCHEME_HTTP
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
|| u->scheme == SCHEME_HTTPS
|
|| u->scheme == SCHEME_HTTPS
|
||||||
#endif
|
#endif
|
||||||
)
|
)
|
||||||
result = http_loop (u, &mynewloc, dt);
|
{
|
||||||
|
result = http_loop (u, &mynewloc, &local_file, refurl, dt, NULL);
|
||||||
|
}
|
||||||
else if (u->scheme == SCHEME_FTP)
|
else if (u->scheme == SCHEME_FTP)
|
||||||
{
|
{
|
||||||
/* If this is a redirection, we must not allow recursive FTP
|
/* If this is a redirection, we must not allow recursive FTP
|
||||||
@ -412,13 +405,11 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
opt.recursive = 0;
|
opt.recursive = 0;
|
||||||
result = ftp_loop (u, dt);
|
result = ftp_loop (u, dt);
|
||||||
opt.recursive = oldrec;
|
opt.recursive = oldrec;
|
||||||
|
#if 0
|
||||||
/* There is a possibility of having HTTP being redirected to
|
/* There is a possibility of having HTTP being redirected to
|
||||||
FTP. In these cases we must decide whether the text is HTML
|
FTP. In these cases we must decide whether the text is HTML
|
||||||
according to the suffix. The HTML suffixes are `.html' and
|
according to the suffix. The HTML suffixes are `.html' and
|
||||||
`.htm', case-insensitive.
|
`.htm', case-insensitive. */
|
||||||
|
|
||||||
#### All of this is, of course, crap. These types should be
|
|
||||||
determined through mailcap. */
|
|
||||||
if (redirections && u->local && (u->scheme == SCHEME_FTP))
|
if (redirections && u->local && (u->scheme == SCHEME_FTP))
|
||||||
{
|
{
|
||||||
char *suf = suffix (u->local);
|
char *suf = suffix (u->local);
|
||||||
@ -426,16 +417,19 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
*dt |= TEXTHTML;
|
*dt |= TEXTHTML;
|
||||||
FREE_MAYBE (suf);
|
FREE_MAYBE (suf);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
location_changed = (result == NEWLOCATION);
|
location_changed = (result == NEWLOCATION);
|
||||||
if (location_changed)
|
if (location_changed)
|
||||||
{
|
{
|
||||||
char *construced_newloc;
|
char *construced_newloc;
|
||||||
uerr_t newloc_result;
|
struct url *newloc_struct;
|
||||||
struct urlinfo *newloc_struct;
|
|
||||||
|
|
||||||
assert (mynewloc != NULL);
|
assert (mynewloc != NULL);
|
||||||
|
|
||||||
|
if (local_file)
|
||||||
|
xfree (local_file);
|
||||||
|
|
||||||
/* The HTTP specs only allow absolute URLs to appear in
|
/* The HTTP specs only allow absolute URLs to appear in
|
||||||
redirects, but a ton of boneheaded webservers and CGIs out
|
redirects, but a ton of boneheaded webservers and CGIs out
|
||||||
there break the rules and use relative URLs, and popular
|
there break the rules and use relative URLs, and popular
|
||||||
@ -445,13 +439,12 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
mynewloc = construced_newloc;
|
mynewloc = construced_newloc;
|
||||||
|
|
||||||
/* Now, see if this new location makes sense. */
|
/* Now, see if this new location makes sense. */
|
||||||
newloc_struct = newurl ();
|
newloc_struct = url_parse (mynewloc, NULL);
|
||||||
newloc_result = parseurl (mynewloc, newloc_struct, 1);
|
if (!newloc_struct)
|
||||||
if (newloc_result != URLOK)
|
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, uerrmsg (newloc_result));
|
logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, "UNKNOWN");
|
||||||
freeurl (newloc_struct, 1);
|
url_free (newloc_struct);
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
if (redirections)
|
if (redirections)
|
||||||
string_set_free (redirections);
|
string_set_free (redirections);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
@ -473,14 +466,14 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
string_set_add (redirections, u->url);
|
string_set_add (redirections, u->url);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The new location is OK. Let's check for redirection cycle by
|
/* The new location is OK. Check for redirection cycle by
|
||||||
peeking through the history of redirections. */
|
peeking through the history of redirections. */
|
||||||
if (string_set_contains (redirections, newloc_struct->url))
|
if (string_set_contains (redirections, newloc_struct->url))
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
|
logprintf (LOG_NOTQUIET, _("%s: Redirection cycle detected.\n"),
|
||||||
mynewloc);
|
mynewloc);
|
||||||
freeurl (newloc_struct, 1);
|
url_free (newloc_struct);
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
if (redirections)
|
if (redirections)
|
||||||
string_set_free (redirections);
|
string_set_free (redirections);
|
||||||
xfree (url);
|
xfree (url);
|
||||||
@ -491,29 +484,27 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
|
|
||||||
xfree (url);
|
xfree (url);
|
||||||
url = mynewloc;
|
url = mynewloc;
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
u = newloc_struct;
|
u = newloc_struct;
|
||||||
goto redirected;
|
goto redirected;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (u->local)
|
if (local_file)
|
||||||
{
|
{
|
||||||
if (*dt & RETROKF)
|
if (*dt & RETROKF)
|
||||||
{
|
{
|
||||||
register_download (url, u->local);
|
register_download (url, local_file);
|
||||||
if (*dt & TEXTHTML)
|
if (*dt & TEXTHTML)
|
||||||
register_html (url, u->local);
|
register_html (url, local_file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file)
|
if (file)
|
||||||
{
|
*file = local_file ? local_file : NULL;
|
||||||
if (u->local)
|
|
||||||
*file = xstrdup (u->local);
|
|
||||||
else
|
else
|
||||||
*file = NULL;
|
FREE_MAYBE (local_file);
|
||||||
}
|
|
||||||
freeurl (u, 1);
|
url_free (u);
|
||||||
if (redirections)
|
if (redirections)
|
||||||
string_set_free (redirections);
|
string_set_free (redirections);
|
||||||
|
|
||||||
|
@ -36,4 +36,12 @@ int downloaded_exceeds_quota PARAMS ((void));
|
|||||||
|
|
||||||
void sleep_between_retrievals PARAMS ((int));
|
void sleep_between_retrievals PARAMS ((int));
|
||||||
|
|
||||||
|
/* Because there's no http.h. */
|
||||||
|
|
||||||
|
struct url;
|
||||||
|
|
||||||
|
uerr_t http_loop PARAMS ((struct url *, char **, char **, const char *,
|
||||||
|
int *, struct url *));
|
||||||
|
|
||||||
|
|
||||||
#endif /* RETR_H */
|
#endif /* RETR_H */
|
||||||
|
59
src/url.h
59
src/url.h
@ -25,6 +25,9 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|||||||
#define DEFAULT_FTP_PORT 21
|
#define DEFAULT_FTP_PORT 21
|
||||||
#define DEFAULT_HTTPS_PORT 443
|
#define DEFAULT_HTTPS_PORT 443
|
||||||
|
|
||||||
|
/* Note: the ordering here is related to the order of elements in
|
||||||
|
`supported_schemes' in url.c. */
|
||||||
|
|
||||||
enum url_scheme {
|
enum url_scheme {
|
||||||
SCHEME_HTTP,
|
SCHEME_HTTP,
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
@ -35,24 +38,27 @@ enum url_scheme {
|
|||||||
};
|
};
|
||||||
|
|
||||||
/* Structure containing info on a URL. */
|
/* Structure containing info on a URL. */
|
||||||
struct urlinfo
|
struct url
|
||||||
{
|
{
|
||||||
char *url; /* Unchanged URL */
|
char *url; /* Original URL */
|
||||||
enum url_scheme scheme; /* URL scheme */
|
enum url_scheme scheme; /* URL scheme */
|
||||||
|
|
||||||
char *host; /* Extracted hostname */
|
char *host; /* Extracted hostname */
|
||||||
unsigned short port;
|
int port; /* Port number */
|
||||||
char ftp_type;
|
|
||||||
char *path, *dir, *file, *qstring;
|
/* URL components (URL-quoted). */
|
||||||
/* Path, dir, file, and query string
|
char *path;
|
||||||
(properly decoded) */
|
char *params;
|
||||||
char *user, *passwd; /* Username and password */
|
char *query;
|
||||||
struct urlinfo *proxy; /* The exact string to pass to proxy
|
char *fragment;
|
||||||
server */
|
|
||||||
char *referer; /* The source from which the request
|
/* Extracted path info (unquoted). */
|
||||||
URI was obtained */
|
char *dir;
|
||||||
char *local; /* The local filename of the URL
|
char *file;
|
||||||
document */
|
|
||||||
|
/* Username and password (unquoted). */
|
||||||
|
char *user;
|
||||||
|
char *passwd;
|
||||||
};
|
};
|
||||||
|
|
||||||
enum convert_options {
|
enum convert_options {
|
||||||
@ -104,19 +110,21 @@ typedef enum
|
|||||||
|
|
||||||
char *encode_string PARAMS ((const char *));
|
char *encode_string PARAMS ((const char *));
|
||||||
|
|
||||||
struct urlinfo *newurl PARAMS ((void));
|
struct url *url_parse PARAMS ((const char *, int *));
|
||||||
void freeurl PARAMS ((struct urlinfo *, int));
|
const char *url_error PARAMS ((int));
|
||||||
enum url_scheme url_detect_scheme PARAMS ((const char *));
|
char *url_full_path PARAMS ((const struct url *));
|
||||||
|
void url_set_dir PARAMS ((struct url *, const char *));
|
||||||
|
void url_set_file PARAMS ((struct url *, const char *));
|
||||||
|
void url_free PARAMS ((struct url *));
|
||||||
|
|
||||||
|
enum url_scheme url_scheme PARAMS ((const char *));
|
||||||
int url_skip_scheme PARAMS ((const char *));
|
int url_skip_scheme PARAMS ((const char *));
|
||||||
int url_has_scheme PARAMS ((const char *));
|
int url_has_scheme PARAMS ((const char *));
|
||||||
|
int scheme_default_port PARAMS ((enum url_scheme));
|
||||||
|
|
||||||
int url_skip_uname PARAMS ((const char *));
|
int url_skip_uname PARAMS ((const char *));
|
||||||
|
|
||||||
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
|
char *url_string PARAMS ((const struct url *, int));
|
||||||
char *str_url PARAMS ((const struct urlinfo *, int));
|
|
||||||
/* url_equal is not currently used. */
|
|
||||||
#if 0
|
|
||||||
int url_equal PARAMS ((const char *, const char *));
|
|
||||||
#endif /* 0 */
|
|
||||||
|
|
||||||
urlpos *get_urls_file PARAMS ((const char *));
|
urlpos *get_urls_file PARAMS ((const char *));
|
||||||
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
|
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int *));
|
||||||
@ -126,8 +134,7 @@ char *uri_merge PARAMS ((const char *, const char *));
|
|||||||
|
|
||||||
void rotate_backups PARAMS ((const char *));
|
void rotate_backups PARAMS ((const char *));
|
||||||
int mkalldirs PARAMS ((const char *));
|
int mkalldirs PARAMS ((const char *));
|
||||||
char *url_filename PARAMS ((const struct urlinfo *));
|
char *url_filename PARAMS ((const struct url *));
|
||||||
void opt_url PARAMS ((struct urlinfo *));
|
|
||||||
|
|
||||||
char *getproxy PARAMS ((uerr_t));
|
char *getproxy PARAMS ((uerr_t));
|
||||||
int no_proxy_match PARAMS ((const char *, const char **));
|
int no_proxy_match PARAMS ((const char *, const char **));
|
||||||
@ -137,6 +144,6 @@ urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
|
|||||||
|
|
||||||
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
|
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
|
||||||
|
|
||||||
char *rewrite_url_maybe PARAMS ((const char *));
|
char *rewrite_shorthand_url PARAMS ((const char *));
|
||||||
|
|
||||||
#endif /* URL_H */
|
#endif /* URL_H */
|
||||||
|
78
src/utils.c
78
src/utils.c
@ -404,30 +404,6 @@ datetime_str (time_t *tm)
|
|||||||
ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
|
ptm->tm_hour, ptm->tm_min, ptm->tm_sec);
|
||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Returns an error message for ERRNUM. #### This requires more work.
|
|
||||||
This function, as well as the whole error system, is very
|
|
||||||
ill-conceived. */
|
|
||||||
const char *
|
|
||||||
uerrmsg (uerr_t errnum)
|
|
||||||
{
|
|
||||||
switch (errnum)
|
|
||||||
{
|
|
||||||
case URLUNKNOWN:
|
|
||||||
return _("Unknown/unsupported protocol");
|
|
||||||
break;
|
|
||||||
case URLBADPORT:
|
|
||||||
return _("Invalid port specification");
|
|
||||||
break;
|
|
||||||
case URLBADHOST:
|
|
||||||
return _("Invalid host name");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
abort ();
|
|
||||||
/* $@#@#$ compiler. */
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* The Windows versions of the following two functions are defined in
|
/* The Windows versions of the following two functions are defined in
|
||||||
mswindows.c. */
|
mswindows.c. */
|
||||||
@ -464,6 +440,14 @@ fork_to_background (void)
|
|||||||
}
|
}
|
||||||
#endif /* not WINDOWS */
|
#endif /* not WINDOWS */
|
||||||
|
|
||||||
|
char *
|
||||||
|
ps (char *orig)
|
||||||
|
{
|
||||||
|
char *r = xstrdup (orig);
|
||||||
|
path_simplify (r);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
|
||||||
/* Canonicalize PATH, and return a new path. The new path differs from PATH
|
/* Canonicalize PATH, and return a new path. The new path differs from PATH
|
||||||
in that:
|
in that:
|
||||||
Multple `/'s are collapsed to a single `/'.
|
Multple `/'s are collapsed to a single `/'.
|
||||||
@ -479,7 +463,8 @@ fork_to_background (void)
|
|||||||
Always use '/' as stub_char.
|
Always use '/' as stub_char.
|
||||||
Don't check for local things using canon_stat.
|
Don't check for local things using canon_stat.
|
||||||
Change the original string instead of strdup-ing.
|
Change the original string instead of strdup-ing.
|
||||||
React correctly when beginning with `./' and `../'. */
|
React correctly when beginning with `./' and `../'.
|
||||||
|
Don't zip out trailing slashes. */
|
||||||
void
|
void
|
||||||
path_simplify (char *path)
|
path_simplify (char *path)
|
||||||
{
|
{
|
||||||
@ -545,20 +530,15 @@ path_simplify (char *path)
|
|||||||
i = start + 1;
|
i = start + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Check for trailing `/'. */
|
|
||||||
if (start && !path[i])
|
|
||||||
{
|
|
||||||
zero_last:
|
|
||||||
path[--i] = '\0';
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Check for `../', `./' or trailing `.' by itself. */
|
/* Check for `../', `./' or trailing `.' by itself. */
|
||||||
if (path[i] == '.')
|
if (path[i] == '.')
|
||||||
{
|
{
|
||||||
/* Handle trailing `.' by itself. */
|
/* Handle trailing `.' by itself. */
|
||||||
if (!path[i + 1])
|
if (!path[i + 1])
|
||||||
goto zero_last;
|
{
|
||||||
|
path[--i] = '\0';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
/* Handle `./'. */
|
/* Handle `./'. */
|
||||||
if (path[i + 1] == '/')
|
if (path[i + 1] == '/')
|
||||||
@ -579,12 +559,6 @@ path_simplify (char *path)
|
|||||||
}
|
}
|
||||||
} /* path == '.' */
|
} /* path == '.' */
|
||||||
} /* while */
|
} /* while */
|
||||||
|
|
||||||
if (!*path)
|
|
||||||
{
|
|
||||||
*path = stub_char;
|
|
||||||
path[1] = '\0';
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* "Touch" FILE, i.e. make its atime and mtime equal to the time
|
/* "Touch" FILE, i.e. make its atime and mtime equal to the time
|
||||||
@ -728,6 +702,30 @@ make_directory (const char *directory)
|
|||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Merge BASE with FILE. BASE can be a directory or a file name, FILE
|
||||||
|
should be a file name. For example, file_merge("/foo/bar", "baz")
|
||||||
|
will return "/foo/baz". file_merge("/foo/bar/", "baz") will return
|
||||||
|
"foo/bar/baz".
|
||||||
|
|
||||||
|
In other words, it's a simpler and gentler version of uri_merge_1. */
|
||||||
|
|
||||||
|
char *
|
||||||
|
file_merge (const char *base, const char *file)
|
||||||
|
{
|
||||||
|
char *result;
|
||||||
|
const char *cut = (const char *)strrchr (base, '/');
|
||||||
|
|
||||||
|
if (!cut)
|
||||||
|
cut = base + strlen (base);
|
||||||
|
|
||||||
|
result = (char *)xmalloc (cut - base + 1 + strlen (file) + 1);
|
||||||
|
memcpy (result, base, cut - base);
|
||||||
|
result[cut - base] = '/';
|
||||||
|
strcpy (result + (cut - base) + 1, file);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static int in_acclist PARAMS ((const char *const *, const char *, int));
|
static int in_acclist PARAMS ((const char *const *, const char *, int));
|
||||||
|
|
||||||
|
@ -44,8 +44,6 @@ struct wget_timer;
|
|||||||
char *time_str PARAMS ((time_t *));
|
char *time_str PARAMS ((time_t *));
|
||||||
char *datetime_str PARAMS ((time_t *));
|
char *datetime_str PARAMS ((time_t *));
|
||||||
|
|
||||||
const char *uerrmsg PARAMS ((uerr_t));
|
|
||||||
|
|
||||||
#ifdef DEBUG_MALLOC
|
#ifdef DEBUG_MALLOC
|
||||||
void print_malloc_debug_stats ();
|
void print_malloc_debug_stats ();
|
||||||
#endif
|
#endif
|
||||||
@ -63,6 +61,7 @@ int file_exists_p PARAMS ((const char *));
|
|||||||
int file_non_directory_p PARAMS ((const char *));
|
int file_non_directory_p PARAMS ((const char *));
|
||||||
int make_directory PARAMS ((const char *));
|
int make_directory PARAMS ((const char *));
|
||||||
char *unique_name PARAMS ((const char *));
|
char *unique_name PARAMS ((const char *));
|
||||||
|
char *file_merge PARAMS ((const char *, const char *));
|
||||||
|
|
||||||
int acceptable PARAMS ((const char *));
|
int acceptable PARAMS ((const char *));
|
||||||
int accdir PARAMS ((const char *s, enum accd));
|
int accdir PARAMS ((const char *s, enum accd));
|
||||||
|
@ -285,9 +285,8 @@ typedef enum
|
|||||||
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
|
BINDERR, BINDOK, LISTENERR, ACCEPTERR, ACCEPTOK,
|
||||||
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
|
CONCLOSED, FTPOK, FTPLOGINC, FTPLOGREFUSED, FTPPORTERR,
|
||||||
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
|
FTPNSFOD, FTPRETROK, FTPUNKNOWNTYPE, FTPRERR,
|
||||||
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLHTTPS,
|
FTPREXC, FTPSRVERR, FTPRETRINT, FTPRESTFAIL, URLERROR,
|
||||||
URLOK, URLHTTP, URLFTP, URLFILE, URLUNKNOWN, URLBADPORT,
|
FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
|
||||||
URLBADHOST, FOPENERR, FWRITEERR, HOK, HLEXC, HEOF,
|
|
||||||
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
|
HERR, RETROK, RECLEVELEXC, FTPACCDENIED, WRONGCODE,
|
||||||
FTPINVPASV, FTPNOPASV,
|
FTPINVPASV, FTPNOPASV,
|
||||||
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,
|
CONTNOTSUPPORTED, RETRUNNEEDED, RETRFINISHED, READERR, TRYLIMEXC,
|
||||||
|
Loading…
Reference in New Issue
Block a user