[svn] Made sync_path more resilient to pathological values of u->file and u->dir.

2024-07-03 16:38:41 -04:00 · 2003-09-16 17:18:52 -07:00 · 2003-09-16 17:18:52 -07:00 · d4281f04b2
commit d4281f04b2
parent 4b1afddab3
2 changed files with 110 additions and 68 deletions
--- a/src/ChangeLog
+++ b/src/ChangeLog
@ -1,3 +1,12 @@
 2003-09-17  Hrvoje Niksic  <hniksic@xemacs.org>
 	* url.c (url_escape_1): New function.
 	(url_escape): Use it.
 	(sync_path): Handle pathological cases where u->file and u->dir
 	contain really strange characters.
 	(ENCODE): Deleted.
 	(REENCODE): Deleted.
 2003-09-16  Hrvoje Niksic  <hniksic@xemacs.org>
 	* url.c (url_file_name): Don't reallocate FNAME if the file
--- a/src/url.c
+++ b/src/url.c
@ -145,11 +145,14 @@ const static unsigned char urlchr_table[256] =
 #undef U
 #undef RU
-/* Decodes the forms %xy in a URL to the character the hexadecimal
+/* URL-unescape the string S.
-   code of which is xy.  xy are hexadecimal digits from
+
-   [0123456789ABCDEF] (case-insensitive).  If x or y are not
+   This is done by transforming the sequences "%HH" to the character
-   hex-digits or `%' precedes `\0', the sequence is inserted
+   represented by the hexadecimal digits HH.  If % is not followed by
-   literally.  */
+   two hexadecimal digits, it is inserted literally.
   The transformation is done in place.  If you need the original
   string intact, make a copy before calling this function.  */
 static void
 url_unescape (char *s)
@ -177,10 +180,15 @@ url_unescape (char *s)
  *t = '\0';
 }
-/* Like url_escape, but return S if there are no unsafe chars.  */
+/* The core of url_escape_* functions.  Escapes the characters that
   match the provided mask in urlchr_table.
   If ALLOW_PASSTHROUGH is non-zero, a string with no unsafe chars
   will be returned unchanged.  If ALLOW_PASSTHROUGH is zero, a
   freshly allocated string will be returned in all cases.  */
 static char *
-url_escape_allow_passthrough (const char *s)
+url_escape_1 (const char *s, unsigned char mask, int allow_passthrough)
 {
  const char *p1;
  char *p2, *newstr;
@ -188,11 +196,11 @@ url_escape_allow_passthrough (const char *s)
  int addition = 0;
  for (p1 = s; *p1; p1++)
-    if (URL_UNSAFE_CHAR (*p1))
+    if (urlchr_test (*p1, mask))
      addition += 2;		/* Two more characters (hex digits) */
  if (!addition)
-    return (char *)s;
+    return allow_passthrough ? (char *)s : xstrdup (s);
  newlen = (p1 - s) + addition;
  newstr = (char *)xmalloc (newlen + 1);
@ -201,7 +209,8 @@ url_escape_allow_passthrough (const char *s)
  p2 = newstr;
  while (*p1)
    {
-      if (URL_UNSAFE_CHAR (*p1))
+      /* Quote the characters that match the test mask. */
      if (urlchr_test (*p1, mask))
 	{
 	  unsigned char c = *p1++;
 	  *p2++ = '%';
@ -211,37 +220,29 @@ url_escape_allow_passthrough (const char *s)
      else
 	*p2++ = *p1++;
    }
  *p2 = '\0';
  assert (p2 - newstr == newlen);
  *p2 = '\0';
  return newstr;
 }
-/* Encode the unsafe characters (as determined by URL_UNSAFE_CHAR) in a
+/* URL-escape the unsafe characters (see urlchr_table) in a given
-   given string, returning a malloc-ed %XX encoded string.  */
+   string, returning a freshly allocated string.  */
-  
+
 char *
 url_escape (const char *s)
 {
-  char *encoded = url_escape_allow_passthrough (s);
+  return url_escape_1 (s, urlchr_unsafe, 0);
  if (encoded != s)
    return encoded;
  else
    return xstrdup (s);
 }
-/* Encode unsafe characters in PTR to %xx.  If such encoding is done,
+/* URL-escape the unsafe characters (see urlchr_table) in a given
-   the old value of PTR is freed and PTR is made to point to the newly
+   string.  If no characters are unsafe, S is returned.  */
   allocated storage.  */
-#define ENCODE(ptr) do {				\
+static char *
-  char *e_new = url_escape_allow_passthrough (ptr);	\
+url_escape_allow_passthrough (const char *s)
-  if (e_new != ptr)					\
+{
-    {							\
+  return url_escape_1 (s, urlchr_unsafe, 1);
-      xfree (ptr);					\
+}
      ptr = e_new;					\
    }							\
 } while (0)
 enum copy_method { CM_DECODE, CM_ENCODE, CM_PASSTHROUGH };
@ -419,19 +420,6 @@ reencode_escapes (const char *s)
  assert (p2 - newstr == newlen);
  return newstr;
 }
 /* Run PTR_VAR through reencode_escapes.  If a new string is consed,
   free PTR_VAR and make it point to the new storage.  Obviously,
   PTR_VAR needs to be an lvalue.  */
 #define REENCODE(ptr_var) do {			\
  char *rf_new = reencode_escapes (ptr_var);	\
  if (rf_new != ptr_var)			\
    {						\
      xfree (ptr_var);				\
      ptr_var = rf_new;				\
    }						\
 } while (0)
 /* Returns the scheme type if the scheme is supported, or
   SCHEME_INVALID if not.  */
@ -1145,38 +1133,83 @@ url_full_path (const struct url *url)
  return full_path;
 }
-/* Sync u->path and u->url with u->dir and u->file. */
+/* Escape unsafe and reserved characters, except for the slash
   characters.  */
 static char *
 url_escape_dir (const char *dir)
 {
  char *newdir = url_escape_1 (dir, urlchr_unsafe | urlchr_reserved, 1);
  char *h, *t;
  if (newdir == dir)
    return (char *)dir;
  /* Unescape slashes in NEWDIR. */
  h = newdir;			/* hare */
  t = newdir;			/* tortoise */
  for (; *h; h++, t++)
    {
      if (*h == '%' && h[1] == '2' && h[2] == 'F')
 	{
 	  *t = '/';
 	  h += 2;
 	}
      else
 	*t = *h;
    }
  *t = '\0';
  return newdir;
 }
 /* Sync u->path and u->url with u->dir and u->file.  Called after
   u->file or u->dir have been changed, typically by the FTP code.  */
 static void
-sync_path (struct url *url)
+sync_path (struct url *u)
 {
-  char *newpath;
+  char *newpath, *efile, *edir;
-  xfree (url->path);
+  xfree (u->path);
-  if (!*url->dir)
+  /* u->dir and u->file are not escaped.  URL-escape them before
-    {
+     reassembling them into u->path.  That way, if they contain
-      newpath = xstrdup (url->file);
+     separators like '?' or even if u->file contains slashes, the
-      REENCODE (newpath);
+     path will be correctly assembled.  (u->file can contain slashes
-    }
+     if the URL specifies it with %2f, or if an FTP server returns
     it.)  */
  edir = url_escape_dir (u->dir);
  efile = url_escape_1 (u->file, urlchr_unsafe | urlchr_reserved, 1);
  if (!*edir)
    newpath = xstrdup (efile);
  else
    {
-      int dirlen = strlen (url->dir);
+      int dirlen = strlen (edir);
-      int filelen = strlen (url->file);
+      int filelen = strlen (efile);
-      newpath = xmalloc (dirlen + 1 + filelen + 1);
+      /* Copy "DIR/FILE" to newpath. */
-      memcpy (newpath, url->dir, dirlen);
+      char *p = newpath = xmalloc (dirlen + 1 + filelen + 1);
-      newpath[dirlen] = '/';
+      memcpy (p, edir, dirlen);
-      memcpy (newpath + dirlen + 1, url->file, filelen);
+      p += dirlen;
-      newpath[dirlen + 1 + filelen] = '\0';
+      *p++ = '/';
-      REENCODE (newpath);
+      memcpy (p, efile, filelen);
      p += filelen;
      *p++ = '\0';
    }
-  url->path = newpath;
+  u->path = newpath;
-  /* Synchronize u->url. */
+  if (edir != u->dir)
-  xfree (url->url);
+    xfree (edir);
-  url->url = url_string (url, 0);
+  if (efile != u->file)
    xfree (efile);
  /* Regenerate u->url as well.  */
  xfree (u->url);
  u->url = url_string (u, 0);
 }
 /* Mutators.  Code in ftp.c insists on changing u->dir and u->file.
@ -1396,10 +1429,10 @@ mkalldirs (const char *path)
 /* A growable string structure, used by url_file_name and friends.
   This should perhaps be moved to utils.c.
-   The idea is to have an easy way to construct a string by having
+   The idea is to have a convenient and efficient way to construct a
-   various functions append data to it.  Instead of passing the
+   string by having various functions append data to it.  Instead of
-   obligatory BASEVAR, SIZEVAR and TAILPOS to all the functions in
+   passing the obligatory BASEVAR, SIZEVAR and TAILPOS to all the
-   questions, we pass the pointer to this struct.  */
+   functions in questions, we pass the pointer to this struct.  */
 struct growable {
  char *base;