1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Committed my patch from <sxs7l6ozghz.fsf@florida.arsdigita.de>.

This commit is contained in:
hniksic 2000-10-31 16:26:33 -08:00
parent 0dd418242a
commit f6715dd08d
2 changed files with 125 additions and 62 deletions

View File

@ -1,3 +1,9 @@
2000-11-01 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c (construct): Rewritten for clarity. Avoids the
unnecessary copying and stack-allocation the old version
performed.
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com> 2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
* ftp.c (getftp): Ditto. * ftp.c (getftp): Ditto.

181
src/url.c
View File

@ -622,10 +622,10 @@ process_ftp_type (char *path)
return '\0'; return '\0';
} }
/* Return the URL as fine-formed string, with a proper protocol, port /* Return the URL as fine-formed string, with a proper protocol,
number, directory and optional user/password. If HIDE is non-zero, optional port number, directory and optional user/password. If
password will be hidden. The forbidden characters in the URL will HIDE is non-zero, password will be hidden. The forbidden
be cleansed. */ characters in the URL will be cleansed. */
char * char *
str_url (const struct urlinfo *u, int hide) str_url (const struct urlinfo *u, int hide)
{ {
@ -659,7 +659,7 @@ str_url (const struct urlinfo *u, int hide)
{ {
char *tmp = (char *)xmalloc (strlen (dir) + 3); char *tmp = (char *)xmalloc (strlen (dir) + 3);
/*sprintf (tmp, "%%2F%s", dir + 1);*/ /*sprintf (tmp, "%%2F%s", dir + 1);*/
*tmp = '%'; tmp[0] = '%';
tmp[1] = '2'; tmp[1] = '2';
tmp[2] = 'F'; tmp[2] = 'F';
strcpy (tmp + 3, dir + 1); strcpy (tmp + 3, dir + 1);
@ -1266,25 +1266,28 @@ url_filename (const struct urlinfo *u)
return name; return name;
} }
/* Like strlen(), except if `?' is present in the URL and its protocol /* Like strlen(), but allow the URL to be ended with '?'. */
is HTTP, act as if `?' is the end of the string. Needed for the
correct implementation of `construct' below, at least until we code
up proper parsing of URLs. */
static int static int
urllen_http_hack (const char *url) urlpath_length (const char *url)
{ {
if ((!strncmp (url, "http://", 7) const char *q = strchr (url, '?');
|| !strncmp (url, "https://", 7))) if (q)
{ return q - url;
const char *q = strchr (url, '?');
if (q)
return q - url;
}
return strlen (url); return strlen (url);
} }
static const char *
find_last_char (const char *b, const char *e, char c)
{
for (; e > b; e--)
if (*e == c)
return e;
return NULL;
}
/* Construct an absolute URL, given a (possibly) relative one. This /* Construct an absolute URL, given a (possibly) relative one. This
is more tricky than it might seem, but it works. */ gets tricky if you want to cover all the "reasonable" cases, but
I'm satisfied with the result. */
static char * static char *
construct (const char *url, const char *sub, int subsize, int no_proto) construct (const char *url, const char *sub, int subsize, int no_proto)
{ {
@ -1292,62 +1295,116 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
if (no_proto) if (no_proto)
{ {
int i; const char *end = url + urlpath_length (url);
if (*sub != '/') if (*sub != '/')
{ {
for (i = urllen_http_hack (url); i && url[i] != '/'; i--); /* SUB is a relative URL: we need to replace everything
if (!i || (url[i] == url[i - 1])) after last slash (possibly empty) with SUB.
So, if URL is "whatever/foo/bar", and SUB is "qux/xyzzy",
our result should be "whatever/foo/qux/xyzzy". */
int need_explicit_slash = 0;
int span;
const char *start_insert;
const char *last_slash = find_last_char (url, end, '/'); /* the last slash. */
if (!last_slash)
{ {
int l = urllen_http_hack (url); /* No slash found at all. Append SUB to what we have,
char *t = (char *)alloca (l + 2); but we'll need a slash as a separator.
memcpy (t, url, l);
t[l] = '/'; Example: if url == "foo" and sub == "qux/xyzzy", then
t[l + 1] = '\0'; we cannot just append sub to url, because we'd get
url = t; "fooqux/xyzzy", whereas what we want is
i = l; "foo/qux/xyzzy".
To make sure the / gets inserted, we set
need_explicit_slash to 1. We also set start_insert
to end + 1, so that the length calculations work out
correctly for one more (slash) character. Accessing
that character is fine, since it will be the
delimiter, '\0' or '?'. */
/* example: "foo?..." */
/* ^ ('?' gets changed to '/') */
start_insert = end + 1;
need_explicit_slash = 1;
} }
constr = (char *)xmalloc (i + 1 + subsize + 1); else
strncpy (constr, url, i + 1); {
constr[i + 1] = '\0'; /* example: "whatever/foo/bar" */
strncat (constr, sub, subsize); /* ^ */
start_insert = last_slash + 1;
}
span = start_insert - url;
constr = (char *)xmalloc (span + subsize + 1);
if (span)
memcpy (constr, url, span);
if (need_explicit_slash)
constr[span - 1] = '/';
if (subsize)
memcpy (constr + span, sub, subsize);
constr[span + subsize] = '\0';
} }
else /* *sub == `/' */ else /* *sub == `/' */
{ {
int fl; /* SUB is an absolute path: we need to replace everything
after (and including) the FIRST slash with SUB.
i = 0; So, if URL is "http://host/whatever/foo/bar", and SUB is
do "/qux/xyzzy", our result should be
{ "http://host/qux/xyzzy". */
for (; url[i] && url[i] != '/'; i++); int span;
if (!url[i]) const char *slash, *start_insert;
break; const char *pos = url;
fl = (url[i] == url[i + 1] && url[i + 1] == '/'); int seen_slash_slash = 0;
if (fl) /* We're looking for the first slash, but want to ignore
i += 2; double slash. */
} again:
while (fl); slash = memchr (pos, '/', end - pos);
if (!url[i]) if (slash && !seen_slash_slash)
{ if (*(slash + 1) == '/')
int l = urllen_http_hack (url); {
char *t = (char *)alloca (l + 2); pos = slash + 2;
strcpy (t, url); seen_slash_slash = 1;
t[l] = '/'; goto again;
t[l + 1] = '\0'; }
url = t;
} /* At this point, SLASH is the location of the first / after
constr = (char *)xmalloc (i + 1 + subsize + 1); "//", or the first slash altogether. START_INSERT is the
strncpy (constr, url, i); pointer to the location where SUB will be inserted. When
constr[i] = '\0'; examining the last two examples, keep in mind that SUB
strncat (constr + i, sub, subsize); begins with '/'. */
constr[i + subsize] = '\0';
} /* *sub == `/' */ if (!slash && !seen_slash_slash)
/* example: "foo" */
/* ^ */
start_insert = url;
else if (!slash && seen_slash_slash)
/* example: "http://foo" */
/* ^ */
start_insert = end;
else if (slash && !seen_slash_slash)
/* example: "foo/bar" */
/* ^ */
start_insert = url;
else if (slash && seen_slash_slash)
/* example: "http://something/" */
/* ^ */
start_insert = slash;
span = start_insert - url;
constr = (char *)xmalloc (span + subsize + 1);
if (span)
memcpy (constr, url, span);
if (subsize)
memcpy (constr + span, sub, subsize);
constr[span + subsize] = '\0';
}
} }
else /* !no_proto */ else /* !no_proto */
{ {
constr = (char *)xmalloc (subsize + 1); constr = strdupdelim (sub, sub + subsize);
strncpy (constr, sub, subsize);
constr[subsize] = '\0';
} }
return constr; return constr;
} }