mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Preserve consecutive slashes in URLs. Make sure leading ".."'s are
not stripped away.
This commit is contained in:
parent
9d907933ad
commit
6ddd0006e4
@ -1,3 +1,14 @@
|
||||
2003-11-14 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* url.c (uri_merge): Merging "foo" and "bar" should result in
|
||||
"bar", not in "foo/bar".
|
||||
(path_simplify): Don't remove empty path elements; don't
|
||||
special-case leading slash.
|
||||
(path_simplify): Don't swallow ".."'s at the beginning of string.
|
||||
E.g. simplify "foo/../../bar" as "../bar", not as "bar".
|
||||
(append_uri_pathel): Defang ".." path element upon encountering
|
||||
it.
|
||||
|
||||
2003-11-13 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* http.c (persistent_available_p): Don't attempt to talk to two
|
||||
|
191
src/url.c
191
src/url.c
@ -1469,22 +1469,31 @@ append_uri_pathel (const char *b, const char *e, int escaped_p,
|
||||
e = unescaped + strlen (unescaped);
|
||||
}
|
||||
|
||||
/* Defang ".." when found as component of path. Remember that path
|
||||
comes from the URL and might contain malicious input. */
|
||||
if (e - b == 2 && b[0] == '.' && b[1] == '.')
|
||||
{
|
||||
b = "%2E%2E";
|
||||
e = b + 6;
|
||||
}
|
||||
|
||||
/* Walk the PATHEL string and check how many characters we'll need
|
||||
to add for file quoting. */
|
||||
to quote. */
|
||||
quoted = 0;
|
||||
for (p = b; p < e; p++)
|
||||
if (FILE_CHAR_TEST (*p, mask))
|
||||
++quoted;
|
||||
|
||||
/* e-b is the string length. Each quoted char means two additional
|
||||
/* Calculate the length of the output string. e-b is the input
|
||||
string length. Each quoted char introduces two additional
|
||||
characters in the string, hence 2*quoted. */
|
||||
outlen = (e - b) + (2 * quoted);
|
||||
GROW (dest, outlen);
|
||||
|
||||
if (!quoted)
|
||||
{
|
||||
/* If there's nothing to quote, we don't need to go through the
|
||||
string the second time. */
|
||||
/* If there's nothing to quote, we can simply append the string
|
||||
without processing it again. */
|
||||
memcpy (TAIL (dest), b, outlen);
|
||||
}
|
||||
else
|
||||
@ -1623,29 +1632,6 @@ url_file_name (const struct url *u)
|
||||
xfree (fname);
|
||||
return unique;
|
||||
}
|
||||
|
||||
/* Return the length of URL's path. Path is considered to be
|
||||
terminated by one of '?', ';', '#', or by the end of the
|
||||
string. */
|
||||
static int
|
||||
path_length (const char *url)
|
||||
{
|
||||
const char *q = strpbrk_or_eos (url, "?;#");
|
||||
return q - url;
|
||||
}
|
||||
|
||||
/* Find the last occurrence of character C in the range [b, e), or
|
||||
NULL, if none are present. This is equivalent to strrchr(b, c),
|
||||
except that it accepts an END argument instead of requiring the
|
||||
string to be zero-terminated. Why is there no memrchr()? */
|
||||
static const char *
|
||||
find_last_char (const char *b, const char *e, char c)
|
||||
{
|
||||
for (; e > b; e--)
|
||||
if (*e == c)
|
||||
return e;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Resolve "." and ".." elements of PATH by destructively modifying
|
||||
PATH and return non-zero if PATH has been modified, zero otherwise.
|
||||
@ -1669,15 +1655,10 @@ find_last_char (const char *b, const char *e, char c)
|
||||
static int
|
||||
path_simplify (char *path)
|
||||
{
|
||||
char *h, *t, *end;
|
||||
|
||||
/* Preserve the leading '/'. */
|
||||
if (path[0] == '/')
|
||||
++path;
|
||||
|
||||
h = path; /* hare */
|
||||
t = path; /* tortoise */
|
||||
end = path + strlen (path);
|
||||
char *h = path; /* hare */
|
||||
char *t = path; /* tortoise */
|
||||
char *beg = path; /* boundary for backing the tortoise */
|
||||
char *end = path + strlen (path);
|
||||
|
||||
while (h < end)
|
||||
{
|
||||
@ -1691,28 +1672,27 @@ path_simplify (char *path)
|
||||
else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))
|
||||
{
|
||||
/* Handle "../" by retreating the tortoise by one path
|
||||
element -- but not past beggining of PATH. */
|
||||
if (t > path)
|
||||
element -- but not past beggining. */
|
||||
if (t > beg)
|
||||
{
|
||||
/* Move backwards until T hits the beginning of the
|
||||
previous path element or the beginning of path. */
|
||||
for (--t; t > path && t[-1] != '/'; t--)
|
||||
for (--t; t > beg && t[-1] != '/'; t--)
|
||||
;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If we're at the beginning, copy the "../" literally
|
||||
move the beginning so a later ".." doesn't remove
|
||||
it. */
|
||||
beg = t + 3;
|
||||
goto regular;
|
||||
}
|
||||
h += 3;
|
||||
}
|
||||
else if (*h == '/')
|
||||
{
|
||||
/* Ignore empty path elements. Supporting them well is hard
|
||||
(where do you save "http://x.com///y.html"?), and they
|
||||
don't bring any practical gain. Plus, they break our
|
||||
filesystem-influenced assumptions: allowing them would
|
||||
make "x/y//../z" simplify to "x/y/z", whereas most people
|
||||
would expect "x/z". */
|
||||
++h;
|
||||
}
|
||||
else
|
||||
{
|
||||
regular:
|
||||
/* A regular path element. If H hasn't advanced past T,
|
||||
simply skip to the next path element. Otherwise, copy
|
||||
the path element until the next slash. */
|
||||
@ -1741,6 +1721,30 @@ path_simplify (char *path)
|
||||
return t != h;
|
||||
}
|
||||
|
||||
/* Return the length of URL's path. Path is considered to be
|
||||
terminated by one of '?', ';', '#', or by the end of the
|
||||
string. */
|
||||
|
||||
static int
|
||||
path_length (const char *url)
|
||||
{
|
||||
const char *q = strpbrk_or_eos (url, "?;#");
|
||||
return q - url;
|
||||
}
|
||||
|
||||
/* Find the last occurrence of character C in the range [b, e), or
|
||||
NULL, if none are present. We might want to use memrchr (a GNU
|
||||
extension) under GNU libc. */
|
||||
|
||||
static const char *
|
||||
find_last_char (const char *b, const char *e, char c)
|
||||
{
|
||||
for (; e > b; e--)
|
||||
if (*e == c)
|
||||
return e;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Merge BASE with LINK and return the resulting URI.
|
||||
|
||||
Either of the URIs may be absolute or relative, complete with the
|
||||
@ -1748,8 +1752,10 @@ path_simplify (char *path)
|
||||
foreseeable cases. It only employs minimal URL parsing, without
|
||||
knowledge of the specifics of schemes.
|
||||
|
||||
Perhaps this function should call path_simplify so that the callers
|
||||
don't have to call url_parse unconditionally. */
|
||||
I briefly considered making this function call path_simplify after
|
||||
the merging process, as rfc1738 seems to suggest. This is a bad
|
||||
idea for several reasons: 1) it complexifies the code, and 2)
|
||||
url_parse has to simplify path anyway, so it's wasteful to boot. */
|
||||
|
||||
char *
|
||||
uri_merge (const char *base, const char *link)
|
||||
@ -1899,24 +1905,8 @@ uri_merge (const char *base, const char *link)
|
||||
const char *last_slash = find_last_char (base, end, '/');
|
||||
if (!last_slash)
|
||||
{
|
||||
/* No slash found at all. Append LINK to what we have,
|
||||
but we'll need a slash as a separator.
|
||||
|
||||
Example: if base == "foo" and link == "qux/xyzzy", then
|
||||
we cannot just append link to base, because we'd get
|
||||
"fooqux/xyzzy", whereas what we want is
|
||||
"foo/qux/xyzzy".
|
||||
|
||||
To make sure the / gets inserted, we set
|
||||
need_explicit_slash to 1. We also set start_insert
|
||||
to end + 1, so that the length calculations work out
|
||||
correctly for one more (slash) character. Accessing
|
||||
that character is fine, since it will be the
|
||||
delimiter, '\0' or '?'. */
|
||||
/* example: "foo?..." */
|
||||
/* ^ ('?' gets changed to '/') */
|
||||
start_insert = end + 1;
|
||||
need_explicit_slash = 1;
|
||||
/* No slash found at all. Replace what we have with LINK. */
|
||||
start_insert = base;
|
||||
}
|
||||
else if (last_slash && last_slash >= base + 2
|
||||
&& last_slash[-2] == ':' && last_slash[-1] == '/')
|
||||
@ -2095,10 +2085,10 @@ run_test (char *test, char *expected_result, int expected_change)
|
||||
if (modified != expected_change)
|
||||
{
|
||||
if (expected_change == 1)
|
||||
printf ("Expected no modification with path_simplify(\"%s\").\n",
|
||||
printf ("Expected modification with path_simplify(\"%s\").\n",
|
||||
test);
|
||||
else
|
||||
printf ("Expected modification with path_simplify(\"%s\").\n",
|
||||
printf ("Expected no modification with path_simplify(\"%s\").\n",
|
||||
test);
|
||||
}
|
||||
xfree (test_copy);
|
||||
@ -2111,24 +2101,28 @@ test_path_simplify (void)
|
||||
char *test, *result;
|
||||
int should_modify;
|
||||
} tests[] = {
|
||||
{ "", "", 0 },
|
||||
{ ".", "", 1 },
|
||||
{ "..", "", 1 },
|
||||
{ "foo", "foo", 0 },
|
||||
{ "foo/bar", "foo/bar", 0 },
|
||||
{ "foo///bar", "foo/bar", 1 },
|
||||
{ "foo/.", "foo/", 1 },
|
||||
{ "foo/./", "foo/", 1 },
|
||||
{ "foo./", "foo./", 0 },
|
||||
{ "foo/../bar", "bar", 1 },
|
||||
{ "foo/../bar/", "bar/", 1 },
|
||||
{ "foo/bar/..", "foo/", 1 },
|
||||
{ "foo/bar/../x", "foo/x", 1 },
|
||||
{ "foo/bar/../x/", "foo/x/", 1 },
|
||||
{ "foo/..", "", 1 },
|
||||
{ "foo/../..", "", 1 },
|
||||
{ "a/b/../../c", "c", 1 },
|
||||
{ "./a/../b", "b", 1 }
|
||||
{ "", "", 0 },
|
||||
{ ".", "", 1 },
|
||||
{ "./", "", 1 },
|
||||
{ "..", "..", 0 },
|
||||
{ "../", "../", 0 },
|
||||
{ "foo", "foo", 0 },
|
||||
{ "foo/bar", "foo/bar", 0 },
|
||||
{ "foo///bar", "foo///bar", 0 },
|
||||
{ "foo/.", "foo/", 1 },
|
||||
{ "foo/./", "foo/", 1 },
|
||||
{ "foo./", "foo./", 0 },
|
||||
{ "foo/../bar", "bar", 1 },
|
||||
{ "foo/../bar/", "bar/", 1 },
|
||||
{ "foo/bar/..", "foo/", 1 },
|
||||
{ "foo/bar/../x", "foo/x", 1 },
|
||||
{ "foo/bar/../x/", "foo/x/", 1 },
|
||||
{ "foo/..", "", 1 },
|
||||
{ "foo/../..", "..", 1 },
|
||||
{ "foo/../../..", "../..", 1 },
|
||||
{ "foo/../../bar/../../baz", "../../baz", 1 },
|
||||
{ "a/b/../../c", "c", 1 },
|
||||
{ "./a/../b", "b", 1 }
|
||||
};
|
||||
int i;
|
||||
|
||||
@ -2139,24 +2133,5 @@ test_path_simplify (void)
|
||||
int expected_change = tests[i].should_modify;
|
||||
run_test (test, expected_result, expected_change);
|
||||
}
|
||||
|
||||
/* Now run all the tests with a leading slash before the test case,
|
||||
to prove that the slash is being preserved. */
|
||||
for (i = 0; i < countof (tests); i++)
|
||||
{
|
||||
char *test, *expected_result;
|
||||
int expected_change = tests[i].should_modify;
|
||||
|
||||
test = xmalloc (1 + strlen (tests[i].test) + 1);
|
||||
sprintf (test, "/%s", tests[i].test);
|
||||
|
||||
expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
|
||||
sprintf (expected_result, "/%s", tests[i].result);
|
||||
|
||||
run_test (test, expected_result, expected_change);
|
||||
|
||||
xfree (test);
|
||||
xfree (expected_result);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user