1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Modified path_simplify not to rely on extensive use of memmove.

This commit is contained in:
hniksic 2003-09-21 17:23:44 -07:00
parent 9d333d074b
commit 3e9dc5b994
2 changed files with 55 additions and 81 deletions

View File

@ -1,3 +1,8 @@
2003-09-22 Hrvoje Niksic <hniksic@xemacs.org>
* url.c (path_simplify): Instead of calls to memmove, handle "./"
and "../" by advancing pointers.
2003-09-22 Hrvoje Niksic <hniksic@xemacs.org> 2003-09-22 Hrvoje Niksic <hniksic@xemacs.org>
* retr.c (getproxy): Moved from url.c. * retr.c (getproxy): Moved from url.c.

127
src/url.c
View File

@ -1612,8 +1612,8 @@ find_last_char (const char *b, const char *e, char c)
/* Resolve "." and ".." elements of PATH by destructively modifying /* Resolve "." and ".." elements of PATH by destructively modifying
PATH. "." is resolved by removing that path element, and ".." is PATH. "." is resolved by removing that path element, and ".." is
resolved by removing the preceding path element. Leading and resolved by removing the preceding path element. Single leading
trailing slashes are preserved. and trailing slashes are preserved.
Return non-zero if any changes have been made. Return non-zero if any changes have been made.
@ -1628,108 +1628,77 @@ find_last_char (const char *b, const char *e, char c)
static int static int
path_simplify (char *path) path_simplify (char *path)
{ {
int change = 0; char *h, *t, *end;
char *p, *end;
/* Preserve the leading '/'. */
if (path[0] == '/') if (path[0] == '/')
++path; /* preserve the leading '/'. */ ++path;
p = path; h = path; /* hare */
end = p + strlen (p) + 1; /* position past the terminating zero. */ t = path; /* tortoise */
end = path + strlen (path);
while (1) while (h < end)
{ {
again: /* Hare should be at the beginning of a path element. */
/* P should point to the beginning of a path element. */
if (*p == '.' && (*(p + 1) == '/' || *(p + 1) == '\0')) if (h[0] == '.' && (h[1] == '/' || h[1] == '\0'))
{ {
/* Handle "./foo" by moving "foo" two characters to the /* Ignore "./". */
left. */ h += 2;
if (*(p + 1) == '/')
{
change = 1;
memmove (p, p + 2, end - (p + 2));
end -= 2;
goto again;
} }
else else if (h[0] == '.' && h[1] == '.' && (h[2] == '/' || h[2] == '\0'))
{ {
change = 1; /* Handle "../" by retreating the tortoise by one path
*p = '\0'; element -- but not past beggining of PATH. */
break;
}
}
else if (*p == '.' && *(p + 1) == '.'
&& (*(p + 2) == '/' || *(p + 2) == '\0'))
{
/* Handle "../foo" by moving "foo" one path element to the
left. */
char *b = p; /* not p-1 because P can equal PATH */
/* Backtrack by one path element, but not past the beginning if (t > path)
of PATH. */
/* foo/bar/../baz */
/* ^ p */
/* ^ b */
if (b > path)
{ {
/* Move backwards until B hits the beginning of the /* Move backwards until B hits the beginning of the
previous path element or the beginning of path. */ previous path element or the beginning of path. */
for (--b; b > path && *(b - 1) != '/'; b--) for (--t; t > path && t[-1] != '/'; t--)
; ;
} }
h += 3;
change = 1; }
if (*(p + 2) == '/') else if (*h == '/')
{ {
memmove (b, p + 3, end - (p + 3)); /* Ignore empty path elements. Supporting them is hard (in
end -= (p + 3) - b; which directory do you save http://x.com///y.html?), and
p = b; they don't bring any practical gain. Plus, they break
our filesystem-influenced assumptions: allowing empty
path elements means that "x/y/../z" simplifies to
"x/y/z", whereas most people would expect "x/z". */
++h;
} }
else else
{ {
*b = '\0'; /* A regular path element. If H hasn't advanced past T,
break; simply skip to the next path element. Otherwise, copy
} the path element until the next slash. */
if (t == h)
goto again;
}
else if (*p == '/')
{ {
/* Remove empty path elements. Not mandated by rfc1808 et /* Skip the path element, including the slash. */
al, but it seems like a good idea to get rid of them. while (h < end && *h != '/')
Supporting them properly is hard (in which directory do t++, h++;
you save http://x.com///y.html?) and they don't seem to if (h < end)
bring much gain. */ t++, h++;
char *q = p; }
while (*q == '/') else
++q;
change = 1;
if (*q == '\0')
{ {
*p = '\0'; /* Copy the path element, including the final slash. */
break; while (h < end && *h != '/')
*t++ = *h++;
if (h < end)
*t++ = *h++;
}
} }
memmove (p, q, end - q);
end -= q - p;
goto again;
} }
/* Skip to the next path element. */ if (t != h)
while (*p && *p != '/') *t = '\0';
++p;
if (*p == '\0')
break;
/* Make sure P points to the beginning of the next path element, return t != h;
which is location after the slash. */
++p;
}
return change;
} }
/* Merge BASE with LINK and return the resulting URI. /* Merge BASE with LINK and return the resulting URI.