1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Move path_simplify to url.c.

This commit is contained in:
hniksic 2001-12-14 07:46:00 -08:00
parent 0f120eee66
commit a3500d32d7
4 changed files with 235 additions and 224 deletions

View File

@ -1,3 +1,7 @@
2001-12-14 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c (path_simplify): Move here from utils.c, and make static.
2001-12-13 Hrvoje Niksic <hniksic@arsdigita.com> 2001-12-13 Hrvoje Niksic <hniksic@arsdigita.com>
* init.c (wgetrc_file_name): Print correct message when loading * init.c (wgetrc_file_name): Print correct message when loading

241
src/url.c
View File

@ -48,8 +48,6 @@ extern int errno;
/* Is X ".."? */ /* Is X ".."? */
#define DDOTP(x) ((*(x) == '.') && (*(x + 1) == '.') && (!*(x + 2))) #define DDOTP(x) ((*(x) == '.') && (*(x + 1) == '.') && (!*(x + 2)))
static int urlpath_length PARAMS ((const char *));
struct scheme_data struct scheme_data
{ {
char *leading_string; char *leading_string;
@ -70,7 +68,11 @@ static struct scheme_data supported_schemes[] =
{ NULL, -1, 0 } { NULL, -1, 0 }
}; };
/* Forward declarations: */
static char *construct_relative PARAMS ((const char *, const char *)); static char *construct_relative PARAMS ((const char *, const char *));
static int path_simplify PARAMS ((char *));
/* Support for encoding and decoding of URL strings. We determine /* Support for encoding and decoding of URL strings. We determine
@ -1373,18 +1375,20 @@ url_filename (const struct url *u)
return name; return name;
} }
/* Like strlen(), but allow the URL to be ended with '?'. */ /* Return the langth of URL's path. Path is considered to be
terminated by one of '?', ';', '#', or by the end of the
string. */
static int static int
urlpath_length (const char *url) path_length (const char *url)
{ {
const char *q = strpbrk_or_eos (url, "?;#"); const char *q = strpbrk_or_eos (url, "?;#");
return q - url; return q - url;
} }
/* Find the last occurrence of character C in the range [b, e), or /* Find the last occurrence of character C in the range [b, e), or
NULL, if none are present. This is almost completely equivalent to NULL, if none are present. This is equivalent to strrchr(b, c),
{ *e = '\0'; return strrchr(b); }, except that it doesn't change except that it accepts an END argument instead of requiring the
the contents of the string. */ string to be zero-terminated. Why is there no memrchr()? */
static const char * static const char *
find_last_char (const char *b, const char *e, char c) find_last_char (const char *b, const char *e, char c)
{ {
@ -1393,7 +1397,127 @@ find_last_char (const char *b, const char *e, char c)
return e; return e;
return NULL; return NULL;
} }
/* Resolve "." and ".." elements of PATH by destructively modifying
PATH. "." is resolved by removing that path element, and ".." is
resolved by removing the preceding path element. Leading and
trailing slashes are preserved.
Return non-zero if any changes have been made.
For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive
test examples are provided below. If you change anything in this
function, run test_path_simplify to make sure you haven't broken a
test case.
A previous version of this function was based on path_simplify()
from GNU Bash, but it has been rewritten for Wget 1.8.1. */
static int
path_simplify (char *path)
{
int change = 0;
char *p, *end;
if (path[0] == '/')
++path; /* preserve the leading '/'. */
p = path;
end = p + strlen (p) + 1; /* position past the terminating zero. */
while (1)
{
again:
/* P should point to the beginning of a path element. */
if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0'))
{
/* Handle "./foo" by moving "foo" two characters to the
left. */
if (*(p + 1) == '/')
{
change = 1;
memmove (p, p + 2, end - p);
end -= 2;
goto again;
}
else
{
change = 1;
*p = '\0';
break;
}
}
else if (*p == '.' && *(p + 1) == '.'
&& (*(p + 2) == '/' || *(p + 2) == '\0'))
{
/* Handle "../foo" by moving "foo" one path element to the
left. */
char *b = p; /* not p-1 because P can equal PATH */
/* Backtrack by one path element, but not past the beginning
of PATH. */
/* foo/bar/../baz */
/* ^ p */
/* ^ b */
if (b > path)
{
/* Move backwards until B hits the beginning of the
previous path element or the beginning of path. */
for (--b; b > path && *(b - 1) != '/'; b--)
;
}
change = 1;
if (*(p + 2) == '/')
{
memmove (b, p + 3, end - (p + 3));
end -= (p + 3) - b;
p = b;
}
else
{
*b = '\0';
break;
}
goto again;
}
else if (*p == '/')
{
/* Remove empty path elements. Not mandated by rfc1808 et
al, but empty path elements are not all that useful, and
the rest of Wget might not deal with them well. */
char *q = p;
while (*q == '/')
++q;
change = 1;
if (*q == '\0')
{
*p = '\0';
break;
}
memmove (p, q, end - q);
end -= q - p;
goto again;
}
/* Skip to the next path element. */
while (*p && *p != '/')
++p;
if (*p == '\0')
break;
/* Make sure P points to the beginning of the next path element,
which is location after the slash. */
++p;
}
return change;
}
/* Resolve the result of "linking" a base URI (BASE) to a /* Resolve the result of "linking" a base URI (BASE) to a
link-specified URI (LINK). link-specified URI (LINK).
@ -1405,8 +1529,8 @@ find_last_char (const char *b, const char *e, char c)
The parameters LINKLENGTH is useful if LINK is not zero-terminated. The parameters LINKLENGTH is useful if LINK is not zero-terminated.
See uri_merge for a gentler interface to this functionality. See uri_merge for a gentler interface to this functionality.
Perhaps this function should handle `./' and `../' so that the evil Perhaps this function should call path_simplify so that the callers
path_simplify can go. */ don't have to call url_parse unconditionally. */
static char * static char *
uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme) uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
{ {
@ -1414,7 +1538,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
if (no_scheme) if (no_scheme)
{ {
const char *end = base + urlpath_length (base); const char *end = base + path_length (base);
if (!*link) if (!*link)
{ {
@ -1723,6 +1847,10 @@ no_proxy_match (const char *host, const char **no_proxy)
return !sufmatch (no_proxy, host); return !sufmatch (no_proxy, host);
} }
/* Support for converting links for local viewing in downloaded HTML
files. This should be moved to another file, because it has
nothing to do with processing URLs. */
static void write_backup_file PARAMS ((const char *, downloaded_file_t)); static void write_backup_file PARAMS ((const char *, downloaded_file_t));
static const char *replace_attr PARAMS ((const char *, int, FILE *, static const char *replace_attr PARAMS ((const char *, int, FILE *,
const char *)); const char *));
@ -2253,3 +2381,96 @@ downloaded_files_free (void)
downloaded_files_hash = NULL; downloaded_files_hash = NULL;
} }
} }
#if 0
/* Debugging and testing support for path_simplify. */
/* Debug: run path_simplify on PATH and return the result in a new
string. Useful for calling from the debugger. */
static char *
ps (char *path)
{
char *copy = xstrdup (path);
path_simplify (copy);
return copy;
}
static void
run_test (char *test, char *expected_result, int expected_change)
{
char *test_copy = xstrdup (test);
int modified = path_simplify (test_copy);
if (0 != strcmp (test_copy, expected_result))
{
printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
test, expected_result, test_copy);
}
if (modified != expected_change)
{
if (expected_change == 1)
printf ("Expected no modification with path_simplify(\"%s\").\n",
test);
else
printf ("Expected modification with path_simplify(\"%s\").\n",
test);
}
xfree (test_copy);
}
static void
test_path_simplify (void)
{
static struct {
char *test, *result;
int should_modify;
} tests[] = {
{ "", "", 0 },
{ ".", "", 1 },
{ "..", "", 1 },
{ "foo", "foo", 0 },
{ "foo/bar", "foo/bar", 0 },
{ "foo///bar", "foo/bar", 1 },
{ "foo/.", "foo/", 1 },
{ "foo/./", "foo/", 1 },
{ "foo./", "foo./", 0 },
{ "foo/../bar", "bar", 1 },
{ "foo/../bar/", "bar/", 1 },
{ "foo/bar/..", "foo/", 1 },
{ "foo/bar/../x", "foo/x", 1 },
{ "foo/bar/../x/", "foo/x/", 1 },
{ "foo/..", "", 1 },
{ "foo/../..", "", 1 },
{ "a/b/../../c", "c", 1 },
{ "./a/../b", "b", 1 }
};
int i;
for (i = 0; i < ARRAY_SIZE (tests); i++)
{
char *test = tests[i].test;
char *expected_result = tests[i].result;
int expected_change = tests[i].should_modify;
run_test (test, expected_result, expected_change);
}
/* Now run all the tests with a leading slash before the test case,
to prove that the slash is being preserved. */
for (i = 0; i < ARRAY_SIZE (tests); i++)
{
char *test, *expected_result;
int expected_change = tests[i].should_modify;
test = xmalloc (1 + strlen (tests[i].test) + 1);
sprintf (test, "/%s", tests[i].test);
expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
sprintf (expected_result, "/%s", tests[i].result);
run_test (test, expected_result, expected_change);
xfree (test);
xfree (expected_result);
}
}
#endif

View File

@ -466,126 +466,6 @@ fork_to_background (void)
} }
#endif /* not WINDOWS */ #endif /* not WINDOWS */
/* Resolve "." and ".." elements of PATH by destructively modifying
PATH. "." is resolved by removing that path element, and ".." is
resolved by removing the preceding path element. Leading and
trailing slashes are preserved.
Return non-zero if any changes have been made.
For example, "a/b/c/./../d/.." will yield "a/b/". More exhaustive
test examples are provided below. If you change anything in this
function, run test_path_simplify to make sure you haven't broken a
test case.
A previous version of this function was based on path_simplify()
from GNU Bash, but it has been rewritten for Wget 1.8.1. */
int
path_simplify (char *path)
{
int change = 0;
char *p, *end;
if (path[0] == '/')
++path; /* preserve the leading '/'. */
p = path;
end = p + strlen (p) + 1; /* position past the terminating zero. */
while (1)
{
again:
/* P should point to the beginning of a path element. */
if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0'))
{
/* Handle "./foo" by moving "foo" two characters to the
left. */
if (*(p + 1) == '/')
{
change = 1;
memmove (p, p + 2, end - p);
end -= 2;
goto again;
}
else
{
change = 1;
*p = '\0';
break;
}
}
else if (*p == '.' && *(p + 1) == '.'
&& (*(p + 2) == '/' || *(p + 2) == '\0'))
{
/* Handle "../foo" by moving "foo" one path element to the
left. */
char *b = p; /* not p-1 because P can equal PATH */
/* Backtrack by one path element, but not past the beginning
of PATH. */
/* foo/bar/../baz */
/* ^ p */
/* ^ b */
if (b > path)
{
/* Move backwards until B hits the beginning of the
previous path element or the beginning of path. */
for (--b; b > path && *(b - 1) != '/'; b--)
;
}
change = 1;
if (*(p + 2) == '/')
{
memmove (b, p + 3, end - (p + 3));
end -= (p + 3) - b;
p = b;
}
else
{
*b = '\0';
break;
}
goto again;
}
else if (*p == '/')
{
/* Remove empty path elements. Not mandated by rfc1808 et
al, but empty path elements are not all that useful, and
the rest of Wget might not deal with them well. */
char *q = p;
while (*q == '/')
++q;
change = 1;
if (*q == '\0')
{
*p = '\0';
break;
}
memmove (p, q, end - q);
end -= q - p;
goto again;
}
/* Skip to the next path element. */
while (*p && *p != '/')
++p;
if (*p == '\0')
break;
/* Make sure P points to the beginning of the next path element,
which is location after the slash. */
++p;
}
return change;
}
/* "Touch" FILE, i.e. make its atime and mtime equal to the time /* "Touch" FILE, i.e. make its atime and mtime equal to the time
specified with TM. */ specified with TM. */
void void
@ -1807,96 +1687,3 @@ debug_test_md5 (char *buf)
return res; return res;
} }
#endif #endif
#if 0
/* Debugging and testing support for path_simplify. */
/* Debug: run path_simplify on PATH and return the result in a new
string. Useful for calling from the debugger. */
static char *
ps (char *path)
{
char *copy = xstrdup (path);
path_simplify (copy);
return copy;
}
static void
run_test (char *test, char *expected_result, int expected_change)
{
char *test_copy = xstrdup (test);
int modified = path_simplify (test_copy);
if (0 != strcmp (test_copy, expected_result))
{
printf ("Failed path_simplify(\"%s\"): expected \"%s\", got \"%s\".\n",
test, expected_result, test_copy);
}
if (modified != expected_change)
{
if (expected_change == 1)
printf ("Expected no modification with path_simplify(\"%s\").\n",
test);
else
printf ("Expected modification with path_simplify(\"%s\").\n",
test);
}
xfree (test_copy);
}
static void
test_path_simplify (void)
{
static struct {
char *test, *result;
int should_modify;
} tests[] = {
{ "", "", 0 },
{ ".", "", 1 },
{ "..", "", 1 },
{ "foo", "foo", 0 },
{ "foo/bar", "foo/bar", 0 },
{ "foo///bar", "foo/bar", 1 },
{ "foo/.", "foo/", 1 },
{ "foo/./", "foo/", 1 },
{ "foo./", "foo./", 0 },
{ "foo/../bar", "bar", 1 },
{ "foo/../bar/", "bar/", 1 },
{ "foo/bar/..", "foo/", 1 },
{ "foo/bar/../x", "foo/x", 1 },
{ "foo/bar/../x/", "foo/x/", 1 },
{ "foo/..", "", 1 },
{ "foo/../..", "", 1 },
{ "a/b/../../c", "c", 1 },
{ "./a/../b", "b", 1 }
};
int i;
for (i = 0; i < ARRAY_SIZE (tests); i++)
{
char *test = tests[i].test;
char *expected_result = tests[i].result;
int expected_change = tests[i].should_modify;
run_test (test, expected_result, expected_change);
}
/* Now run all the tests with a leading slash before the test case,
to prove that the slash is being preserved. */
for (i = 0; i < ARRAY_SIZE (tests); i++)
{
char *test, *expected_result;
int expected_change = tests[i].should_modify;
test = xmalloc (1 + strlen (tests[i].test) + 1);
sprintf (test, "/%s", tests[i].test);
expected_result = xmalloc (1 + strlen (tests[i].result) + 1);
sprintf (expected_result, "/%s", tests[i].result);
run_test (test, expected_result, expected_change);
xfree (test);
xfree (expected_result);
}
}
#endif

View File

@ -56,7 +56,6 @@ char **sepstring PARAMS ((const char *));
int frontcmp PARAMS ((const char *, const char *)); int frontcmp PARAMS ((const char *, const char *));
char *pwd_cuserid PARAMS ((char *)); char *pwd_cuserid PARAMS ((char *));
void fork_to_background PARAMS ((void)); void fork_to_background PARAMS ((void));
int path_simplify PARAMS ((char *));
void touch PARAMS ((const char *, time_t)); void touch PARAMS ((const char *, time_t));
int remove_link PARAMS ((const char *)); int remove_link PARAMS ((const char *));