1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Committed my patches from <sxsbsw16sbu.fsf@florida.arsdigita.de>

and <sxsvgu824xk.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2000-10-31 11:25:32 -08:00
parent 5f96643297
commit 0dd418242a
7 changed files with 93 additions and 58 deletions

View File

@ -1,3 +1,20 @@
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
* ftp.c (getftp): Ditto.
* http.c (gethttp): Rewind the stream when retrying from scratch.
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Use url_concat() to handle relative
redirections instead of /ad hoc/ code.
* url.c (url_concat): New function encapsulating weird
construct().
(urllen_http_hack): New function.
(construct): When constructing new URLs, recognize that `?' does
not form part of the file name in HTTP.
2000-10-13 Adrian Aichner <adrian@xemacs.org>
* retr.c: Add msec timing support for WINDOWS.

View File

@ -648,6 +648,15 @@ Error in server response, closing control connection.\n"));
expected_bytes = ftp_expected_bytes (ftp_last_respline);
} /* cmd & DO_LIST */
/* Some FTP servers return the total length of file after REST
command, others just return the remaining size. */
if (*len && restval && expected_bytes
&& (expected_bytes == *len - restval))
{
DEBUGP (("Lying FTP server found, adjusting.\n"));
expected_bytes = *len;
}
/* If no transmission was required, then everything is OK. */
if (!(cmd & (DO_LIST | DO_RETR)))
return RETRFINISHED;
@ -685,15 +694,15 @@ Error in server response, closing control connection.\n"));
}
}
else
fp = opt.dfp;
/* Some FTP servers return the total length of file after REST command,
others just return the remaining size. */
if (*len && restval && expected_bytes
&& (expected_bytes == *len - restval))
{
DEBUGP (("Lying FTP server found, adjusting.\n"));
expected_bytes = *len;
fp = opt.dfp;
if (!restval)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
rewind (fp);
clearerr (fp);
}
}
if (*len)

View File

@ -844,7 +844,16 @@ Accept: %s\r\n\
}
}
else /* opt.dfp */
{
fp = opt.dfp;
if (!hs->restval)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
rewind (fp);
clearerr (fp);
}
}
/* #### This confuses the code that checks for file size. There
should be some overhead information. */

View File

@ -76,7 +76,6 @@ CMD_DECLARE (cmd_spec_dotstyle);
CMD_DECLARE (cmd_spec_header);
CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror);
CMD_DECLARE (cmd_spec_outputdocument);
CMD_DECLARE (cmd_spec_recursive);
CMD_DECLARE (cmd_spec_useragent);
@ -139,7 +138,7 @@ static struct {
{ "noparent", &opt.no_parent, cmd_boolean },
{ "noproxy", &opt.no_proxy, cmd_vector },
{ "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
{ "outputdocument", NULL, cmd_spec_outputdocument },
{ "outputdocument", &opt.output_document, cmd_string },
{ "pagerequisites", &opt.page_requisites, cmd_boolean },
{ "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
{ "passwd", &opt.ftp_pass, cmd_string },
@ -915,15 +914,6 @@ cmd_spec_mirror (const char *com, const char *val, void *closure)
return 1;
}
static int
cmd_spec_outputdocument (const char *com, const char *val, void *closure)
{
FREE_MAYBE (opt.output_document);
opt.output_document = xstrdup (val);
opt.ntry = 1;
return 1;
}
static int
cmd_spec_recursive (const char *com, const char *val, void *closure)
{

View File

@ -337,9 +337,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
again:
u = newurl ();
/* Parse the URL. RFC2068 requires `Location' to contain an
absoluteURI, but many sites break this requirement. #### We
should be liberal and accept a relative location, too. */
/* Parse the URL. */
result = parseurl (url, u, already_redirected);
if (result != URLOK)
{
@ -426,40 +424,26 @@ retrieve_url (const char *origurl, char **file, char **newloc,
location_changed = (result == NEWLOCATION);
if (location_changed)
{
/* Check for redirection to oneself. */
if (mynewloc)
{
/* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs
out there break the rules and use relative URLs, and
popular browsers are lenient about this, so wget should
be too. */
char *construced_newloc = url_concat (url, mynewloc);
free (mynewloc);
mynewloc = construced_newloc;
}
/* Check for redirection to back to itself. */
if (url_equal (url, mynewloc))
{
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
mynewloc);
return WRONGCODE;
}
if (mynewloc)
{
/* The HTTP specs only allow absolute URLs to appear in redirects, but
a ton of boneheaded webservers and CGIs out there break the rules
and use relative URLs, and popular browsers are lenient about this,
so wget should be too. */
if (strstr(mynewloc, "://") == NULL)
/* Doesn't look like an absolute URL (this check will incorrectly
think that rare relative URLs containing "://" later in the
string are absolute). */
{
char *temp = malloc(strlen(url) + strlen(mynewloc) + 1);
if (mynewloc[0] == '/')
/* "Hostless absolute" URL. Convert to absolute. */
sprintf(temp,"%s%s", url, mynewloc);
else
/* Relative URL. Convert to absolute. */
sprintf(temp,"%s/%s", url, mynewloc);
free(mynewloc);
mynewloc = temp;
}
free (url);
url = mynewloc;
}
freeurl (u, 1);
already_redirected = 1;
goto again;

View File

@ -1266,6 +1266,23 @@ url_filename (const struct urlinfo *u)
return name;
}
/* Like strlen(), except if `?' is present in the URL and its protocol
is HTTP, act as if `?' is the end of the string. Needed for the
correct implementation of `construct' below, at least until we code
up proper parsing of URLs. */
static int
urllen_http_hack (const char *url)
{
if ((!strncmp (url, "http://", 7)
|| !strncmp (url, "https://", 7)))
{
const char *q = strchr (url, '?');
if (q)
return q - url;
}
return strlen (url);
}
/* Construct an absolute URL, given a (possibly) relative one. This
is more tricky than it might seem, but it works. */
static char *
@ -1279,12 +1296,12 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
if (*sub != '/')
{
for (i = strlen (url); i && url[i] != '/'; i--);
for (i = urllen_http_hack (url); i && url[i] != '/'; i--);
if (!i || (url[i] == url[i - 1]))
{
int l = strlen (url);
int l = urllen_http_hack (url);
char *t = (char *)alloca (l + 2);
strcpy (t, url);
memcpy (t, url, l);
t[l] = '/';
t[l + 1] = '\0';
url = t;
@ -1312,7 +1329,7 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
while (fl);
if (!url[i])
{
int l = strlen (url);
int l = urllen_http_hack (url);
char *t = (char *)alloca (l + 2);
strcpy (t, url);
t[l] = '/';
@ -1334,6 +1351,13 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
}
return constr;
}
/* Like the function above, but with a saner caller interface. */
char *
url_concat (const char *base_url, const char *new_url)
{
return construct (base_url, new_url, strlen (new_url), !has_proto (new_url));
}
/* Optimize URL by host, destructively replacing u->host with realhost
(u->host). Do this regardless of opt.simple_check. */

View File

@ -98,6 +98,8 @@ urlpos *get_urls_file PARAMS ((const char *));
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int));
void free_urlpos PARAMS ((urlpos *));
char *url_concat PARAMS ((const char *, const char *));
void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *));
char *url_filename PARAMS ((const struct urlinfo *));