1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Modified path_simplify not to rely on extensive use of memmove.

This commit is contained in:
hniksic 2003-09-21 17:23:44 -07:00
parent 9d333d074b
commit 3e9dc5b994
2 changed files with 55 additions and 81 deletions

View File

@ -1,3 +1,8 @@
2003-09-22 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (path_simplify): Instead of calls to memmove, handle "./"
and "../" by advancing pointers.
2003-09-22 Hrvoje Niksic <hniksic@xemacs.org> 2003-09-22 Hrvoje Niksic <hniksic@xemacs.org>
* retr.c (getproxy): Moved from url.c. * retr.c (getproxy): Moved from url.c.

131
src/url.c
View File

@ -1612,8 +1612,8 @@ find_last_char (const char *b, const char *e, char c)
/* Resolve "." and ".." elements of PATH by destructively modifying /* Resolve "." and ".." elements of PATH by destructively modifying
PATH. "." is resolved by removing that path element, and ".." is PATH. "." is resolved by removing that path element, and ".." is
resolved by removing the preceding path element. Leading and resolved by removing the preceding path element. Single leading
trailing slashes are preserved. and trailing slashes are preserved.
Return non-zero if any changes have been made. Return non-zero if any changes have been made.
@ -1628,108 +1628,77 @@ find_last_char (const char *b, const char *e, char c)
static int static int
path_simplify (char *path) path_simplify (char *path)
{ {
int change = 0; char *h, *t, *end;
char *p, *end;
/* Preserve the leading '/'. */
if (path[0] == '/') if (path[0] == '/')
++path; /* preserve the leading '/'. */ ++path;
p = path; h = path; /* hare */
end = p + strlen (p) + 1; /* position past the terminating zero. */ t = path; /* tortoise */
end = path + strlen (path);
while (1) while (h < end)
{ {
again: /* Hare should be at the beginning of a path element. */
/* P should point to the beginning of a path element. */
if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0')) if (h[0] == '.' && (h[1] == '/' || h[1] == '\0'))
{ {
/* Handle "./foo" by moving "foo" two characters to the /* Ignore "./". */
left. */ h += 2;
if (*(p + 1) == '/')
{
change = 1;
memmove (p, p + 2, end - (p + 2));
end -= 2;
goto again;
}
else
{
change = 1;
*p = '\0';
break;
}
} }
else if (*p == '.' && *(p + 1) == '.' else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))
&& (*(p + 2) == '/' || *(p + 2) == '\0'))
{ {
/* Handle "../foo" by moving "foo" one path element to the /* Handle "../" by retreating the tortoise by one path
left. */ element -- but not past beggining of PATH. */
char *b = p; /* not p-1 because P can equal PATH */
/* Backtrack by one path element, but not past the beginning if (t > path)
of PATH. */
/* foo/bar/../baz */
/* ^ p */
/* ^ b */
if (b > path)
{ {
/* Move backwards until B hits the beginning of the /* Move backwards until B hits the beginning of the
previous path element or the beginning of path. */ previous path element or the beginning of path. */
for (--b; b > path && *(b - 1) != '/'; b--) for (--t; t > path && t[-1] != '/'; t--)
; ;
} }
h += 3;
change = 1; }
if (*(p + 2) == '/') else if (*h == '/')
{
/* Ignore empty path elements. Supporting them is hard (in
which directory do you save http://x.com///y.html?), and
they don't bring any practical gain. Plus, they break
our filesystem-influenced assumptions: allowing empty
path elements means that "x/y/../z" simplifies to
"x/y/z", whereas most people would expect "x/z". */
++h;
}
else
{
/* A regular path element. If H hasn't advanced past T,
simply skip to the next path element. Otherwise, copy
the path element until the next slash. */
if (t == h)
{ {
memmove (b, p + 3, end - (p + 3)); /* Skip the path element, including the slash. */
end -= (p + 3) - b; while (h < end && *h != '/')
p = b; t++, h++;
if (h < end)
t++, h++;
} }
else else
{ {
*b = '\0'; /* Copy the path element, including the final slash. */
break; while (h < end && *h != '/')
*t++ = *h++;
if (h < end)
*t++ = *h++;
} }
goto again;
} }
else if (*p == '/')
{
/* Remove empty path elements. Not mandated by rfc1808 et
al, but it seems like a good idea to get rid of them.
Supporting them properly is hard (in which directory do
you save http://x.com///y.html?) and they don't seem to
bring much gain. */
char *q = p;
while (*q == '/')
++q;
change = 1;
if (*q == '\0')
{
*p = '\0';
break;
}
memmove (p, q, end - q);
end -= q - p;
goto again;
}
/* Skip to the next path element. */
while (*p && *p != '/')
++p;
if (*p == '\0')
break;
/* Make sure P points to the beginning of the next path element,
which is location after the slash. */
++p;
} }
return change; if (t != h)
*t = '\0';
return t != h;
} }
/* Merge BASE with LINK and return the resulting URI. /* Merge BASE with LINK and return the resulting URI.