dotdot: introducing dot file path cleanup

RFC3986 details how a path part passed in as part of a URI should be "cleaned" from dot sequences before getting used. The described algorithm is now implemented in lib/dotdot.c with the accompanied test case in test 1395. Bug: http://curl.haxx.se/bug/view.cgi?id=1200 Reported-by: Alex Vinnik
2024-12-22 08:08:50 -05:00 · 2013-06-15 23:47:02 +02:00 · 2013-06-15 23:47:02 +02:00 · 7877619f85
commit 7877619f85
parent ec248b590d
9 changed files with 402 additions and 14 deletions
--- a/lib/Makefile.inc
+++ b/lib/Makefile.inc
@ -25,7 +25,7 @@ CSOURCES = file.c timeval.c base64.c hostip.c progress.c formdata.c	\
  http_proxy.c non-ascii.c asyn-ares.c asyn-thread.c curl_gssapi.c	\
  curl_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_ntlm_msgs.c		\
  curl_sasl.c curl_schannel.c curl_multibyte.c curl_darwinssl.c		\
-  hostcheck.c bundles.c conncache.c pipeline.c
+  hostcheck.c bundles.c conncache.c pipeline.c dotdot.c
 HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h	\
  progress.h formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h	\
@ -44,4 +44,4 @@ HHEADERS = arpa_telnet.h netrc.h file.h timeval.h qssl.h hostip.h	\
  asyn.h curl_ntlm.h curl_gssapi.h curl_ntlm_wb.h curl_ntlm_core.h	\
  curl_ntlm_msgs.h curl_sasl.h curl_schannel.h curl_multibyte.h		\
  curl_darwinssl.h hostcheck.h bundles.h conncache.h curl_setup_once.h	\
-  multihandle.h setup-vms.h pipeline.h
+  multihandle.h setup-vms.h pipeline.h dotdot.h
--- a/lib/dotdot.c
+++ b/lib/dotdot.c
@ -0,0 +1,170 @@
 /***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/
 #include "curl_setup.h"
 #include "dotdot.h"
 #include "curl_memory.h"
 /* The last #include file should be: */
 #include "memdebug.h"
 /*
 * "Remove Dot Segments"
 * http://tools.ietf.org/html/rfc3986#section-5.2.4
 */
 /*
 * Curl_dedotdotify()
 *
 * This function gets a zero-terminated path with dot and dotdot sequences
 * passed in and strips them off according to the rules in RFC 3986 section
 * 5.2.5.
 *
 * The function handles a query part ('?' + stuff) appended but it expects
 * that fragments ('#' + stuff) have already been cut off.
 *
 * RETURNS
 *
 * an allocated dedotdotified output string
 */
 char *Curl_dedotdotify(char *input)
 {
  size_t inlen = strlen(input);
  char *clone;
  size_t clen = inlen; /* the length of the cloned input */
  char *out = malloc(inlen+1);
  char *outp;
  char *orgclone;
  char *queryp;
  if(!out)
    return NULL; /* out of memory */
  /* get a cloned copy of the input */
  clone = strdup(input);
  if(!clone) {
    free(out);
    return NULL;
  }
  orgclone = clone;
  outp = out;
  /*
   * To handle query-parts properly, we must find it and remove it during the
   * dotdot-operation and then append it again at the end to the output
   * string.
   */
  queryp = strchr(clone, '?');
  if(queryp)
    *queryp = 0;
  do {
    /*  A.  If the input buffer begins with a prefix of "../" or "./", then
        remove that prefix from the input buffer; otherwise, */
    if(!strncmp("./", clone, 2)) {
      clone+=2;
      clen-=2;
    }
    else if(!strncmp("../", clone, 3)) {
      clone+=3;
      clen-=3;
    }
    /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
        "."  is a complete path segment, then replace that prefix with "/" in
        the input buffer; otherwise, */
    else if(!strncmp("/./", clone, 3)) {
      clone+=2;
      clen-=2;
    }
    else if(!strcmp("/.", clone)) {
      clone[1]='/';
      clone++;
      clen-=1;
    }
    /*  C.  if the input buffer begins with a prefix of "/../" or "/..", where
        ".." is a complete path segment, then replace that prefix with "/" in
        the input buffer and remove the last segment and its preceding "/" (if
        any) from the output buffer; otherwise, */
    else if(!strncmp("/../", clone, 4)) {
      clone+=3;
      clen-=3;
      /* remove the last segment from the output buffer */
      while(outp > out) {
        outp--;
        if(*outp == '/')
          break;
      }
      *outp = 0; /* zero-terminate where it stops */
    }
    else if(!strcmp("/..", clone)) {
      clone[2]='/';
      clone+=2;
      clen-=2;
      /* remove the last segment from the output buffer */
      while(outp > out) {
        outp--;
        if(*outp == '/')
          break;
      }
      *outp = 0; /* zero-terminate where it stops */
    }
    /*  D.  if the input buffer consists only of "." or "..", then remove
        that from the input buffer; otherwise, */
    else if(!strcmp(".", clone) || !strcmp("..", clone)) {
      *clone=0;
    }
    else {
      /*  E.  move the first path segment in the input buffer to the end of
          the output buffer, including the initial "/" character (if any) and
          any subsequent characters up to, but not including, the next "/"
          character or the end of the input buffer. */
      do {
        *outp++ = *clone++;
        clen--;
      } while(*clone && (*clone != '/'));
      *outp=0;
    }
  } while(*clone);
  if(queryp) {
    size_t qlen;
    /* There was a query part, append that to the output. The 'clone' string
       may now have been altered so we copy from the original input string
       from the correct index. */
    size_t oindex = queryp - orgclone;
    qlen = strlen(&input[oindex]);
    memcpy(outp, &input[oindex], qlen+1); /* include the ending zero byte */
  }
  free(orgclone);
  return out;
 }
--- a/lib/dotdot.h
+++ b/lib/dotdot.h
@ -0,0 +1,25 @@
 #ifndef HEADER_CURL_DOTDOT_H
 #define HEADER_CURL_DOTDOT_H
 /***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/
 char *Curl_dedotdotify(char *input);
 #endif
--- a/lib/url.c
+++ b/lib/url.c
@ -124,6 +124,7 @@ int curl_win32_idn_to_ascii(const char *in, char **out);
 #include "conncache.h"
 #include "multihandle.h"
 #include "pipeline.h"
 #include "dotdot.h"
 #define _MPRINTF_REPLACE /* use our functions only */
 #include <curl/mprintf.h>
@ -3674,7 +3675,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
  char protobuf[16];
  const char *protop;
  CURLcode result;
-  bool fix_slash = FALSE;
+  bool rebuild_url = FALSE;
  *prot_missing = FALSE;
@ -3825,14 +3826,14 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
    memcpy(path+1, query, hostlen);
    path[0]='/'; /* prepend the missing slash */
-    fix_slash = TRUE;
+    rebuild_url = TRUE;
    *query=0; /* now cut off the hostname at the ? */
  }
  else if(!path[0]) {
    /* if there's no path set, use a single slash */
    strcpy(path, "/");
-    fix_slash = TRUE;
+    rebuild_url = TRUE;
  }
  /* If the URL is malformatted (missing a '/' after hostname before path) we
@ -3845,17 +3846,30 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
       is bigger than the path. Use +1 to move the zero byte too. */
    memmove(&path[1], path, strlen(path)+1);
    path[0] = '/';
-    fix_slash = TRUE;
+    rebuild_url = TRUE;
  }
  else {
    /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */
    char *newp = Curl_dedotdotify(path);
    if(strcmp(newp, path)) {
      rebuild_url = TRUE;
      free(data->state.pathbuffer);
      data->state.pathbuffer = newp;
      data->state.path = newp;
      path = newp;
    }
    else
      free(newp);
  }
  /*
-   * "fix_slash" means that the URL was malformatted so we need to generate an
+   * "rebuild_url" means that one or more URL components have been modified so
-   * updated version with the new slash inserted at the right place!  We need
+   * we need to generate an updated full version.  We need the corrected URL
-   * the corrected URL when communicating over HTTP proxy and we don't know at
+   * when communicating over HTTP proxy and we don't know at this point if
-   * this point if we're using a proxy or not.
+   * we're using a proxy or not.
   */
-  if(fix_slash) {
+  if(rebuild_url) {
    char *reurl;
    size_t plen = strlen(path); /* new path, should be 1 byte longer than
@ -3878,6 +3892,8 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
      data->change.url_alloc = FALSE;
    }
    infof(data, "Rebuilt URL to: %s\n", reurl);
    data->change.url = reurl;
    data->change.url_alloc = TRUE; /* free this later */
  }
--- a/tests/data/Makefile.am
+++ b/tests/data/Makefile.am
@ -93,7 +93,7 @@ test1200 test1201 test1202 test1203 test1204 test1205 test1206 test1207 \
 test1208 test1209 test1210 test1211 test1212 test1213 test1214 test1215 \
 test1216 test1217 test1218 test1219 \
 test1220 test1221 test1222 test1223 test1224 test1225 test1226 test1227 \
-test1228 test1229 test1230 \
+test1228 test1229 test1230 test1231 \
 \
 test1300 test1301 test1302 test1303 test1304 test1305 test1306 test1307 \
 test1308 test1309 test1310 test1311 test1312 test1313 test1314 test1315 \
--- a/tests/data/test1231
+++ b/tests/data/test1231
@ -0,0 +1,61 @@
 <testcase>
 <info>
 <keywords>
 HTTP
 HTTP GET
 dotdot removal
 </keywords>
 </info>
 #
 # Server-side
 <reply name="1">
 <data>
 HTTP/1.1 200 OK
 Content-Length: 6
 Connection: close
 -foo-
 </data>
 <data1>
 HTTP/1.1 200 OK
 Content-Length: 7
 Connection: close
 -cool-
 </data1>
 </reply>
 #
 # Client-side
 <client>
 <server>
 http
 </server>
 <name>
 HTTP URL with dotdot removal from path
 </name>
 <command>
 http://%HOSTIP:%HTTPPORT/../../hej/but/who/../1231?stupid=me/../1231#soo/../1231 http://%HOSTIP:%HTTPPORT/../../hej/but/who/../12310001#/../12310001
 </command>
 </client>
 #
 # Verify data after the test has been "shot"
 <verify>
 <strip>
 ^User-Agent:.*
 </strip>
 <protocol>
 GET /hej/but/1231?stupid=me/../1231 HTTP/1.1
 Host: %HOSTIP:%HTTPPORT
 Accept: */*
 GET /hej/but/12310001 HTTP/1.1
 Host: %HOSTIP:%HTTPPORT
 Accept: */*
 </protocol>
 </verify>
 </testcase>
--- a/tests/data/test1395
+++ b/tests/data/test1395
@ -0,0 +1,26 @@
 <testcase>
 <info>
 <keywords>
 unittest
 </keywords>
 </info>
 #
 # Client-side
 <client>
 <server>
 none
 </server>
 <features>
 unittest
 </features>
 <name>
 Curl_dedotdotify
 </name>
 <tool>
 unit1395
 </tool>
 </client>
 </testcase>
--- a/tests/unit/Makefile.inc
+++ b/tests/unit/Makefile.inc
@ -6,7 +6,7 @@ UNITFILES = curlcheck.h \
 # These are all unit test programs
 UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \
- unit1308 unit1309 unit1330 unit1394 unit1396
+ unit1308 unit1309 unit1330 unit1394 unit1395 unit1396
 unit1300_SOURCES = unit1300.c $(UNITFILES)
 unit1300_CPPFLAGS = $(AM_CPPFLAGS)
@ -44,5 +44,8 @@ unit1394_LDADD = @LIBMETALINK_LIBS@ $(top_builddir)/lib/libcurl.la @LIBCURL_LIBS
 unit1394_LDFLAGS = @LIBMETALINK_LDFLAGS@ $(top_builddir)/src/libcurltool.la
 unit1394_LIBS =
 unit1395_SOURCES = unit1395.c $(UNITFILES)
 unit1395_CPPFLAGS = $(AM_CPPFLAGS)
 unit1396_SOURCES = unit1396.c $(UNITFILES)
 unit1396_CPPFLAGS = $(AM_CPPFLAGS)
--- a/tests/unit/unit1395.c
+++ b/tests/unit/unit1395.c
@ -0,0 +1,87 @@
 /***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at http://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/
 #include "curlcheck.h"
 #include "dotdot.h"
 #include "memdebug.h"
 static CURLcode unit_setup(void)
 {
  return CURLE_OK;
 }
 static void unit_stop(void)
 {
 }
 struct dotdot {
  const char *input;
  const char *output;
 };
 UNITTEST_START
  unsigned int i;
  int fails=0;
  struct dotdot pairs[] = {
    { "/a/b/c/./../../g", "/a/g" },
    { "mid/content=5/../6", "mid/6" },
    { "/hello/../moo", "/moo" },
    { "/1/../1", "/1" },
    { "/1/./1", "/1/1" },
    { "/1/..", "/" },
    { "/1/.", "/1/" },
    { "/1/./..", "/" },
    { "/1/./../2", "/2" },
    { "/hello/1/./../2", "/hello/2" },
    { "test/this", "test/this" },
    { "test/this/../now", "test/now" },
    { "/1../moo../foo", "/1../moo../foo"},
    { "/../../moo", "/moo"},
    { "/../../moo?andnot/../yay", "/moo?andnot/../yay"},
    { "/123?foo=/./&bar=/../", "/123?foo=/./&bar=/../"},
    { "/../moo/..?what", "/?what" },
  };
  for(i=0; i < sizeof(pairs)/sizeof(pairs[0]); i++) {
    char *out = Curl_dedotdotify((char *)pairs[i].input);
    if(strcmp(out, pairs[i].output)) {
      fprintf(stderr, "Test %d: '%s' gave '%s' instead of '%s'\n",
              i, pairs[i].input, out, pairs[i].output);
      fail("Test case output mismatched");
      fails++;
    }
    else
      fprintf(stderr, "Test %d: OK\n", i);
    free(out);
  }
  return fails;
 UNITTEST_STOP