From 4520534e6d5576f0647d03d6c573c5d7d45ccf6e Mon Sep 17 00:00:00 2001 From: Jay Satiro Date: Fri, 5 Feb 2016 01:44:27 -0500 Subject: [PATCH] tool_doswin: Improve sanitization processing - Add unit test 1604 to test the sanitize_file_name function. - Use -DCURL_STATICLIB when building libcurltool for unit testing. - Better detection of reserved DOS device names. - New flags to modify sanitize behavior: SANITIZE_ALLOW_COLONS: Allow colons SANITIZE_ALLOW_PATH: Allow path separators and colons SANITIZE_ALLOW_RESERVED: Allow reserved device names SANITIZE_ALLOW_TRUNCATE: Allow truncating a long filename - Restore sanitization of banned characters from user-specified outfile. Prior to this commit sanitization of a user-specified outfile was temporarily disabled in 2b6dadc because there was no way to allow path separators and colons through while replacing other banned characters. Now in such a case we call the sanitize function with SANITIZE_ALLOW_PATH which allows path separators and colons to pass through. Closes https://github.com/curl/curl/issues/624 Reported-by: Octavio Schroeder --- src/Makefile.am | 5 +- src/tool_cb_hdr.c | 39 ++-- src/tool_doswin.c | 456 +++++++++++++++++++++++++++++++--------- src/tool_doswin.h | 24 ++- src/tool_operate.c | 9 - src/tool_operhlp.c | 12 ++ src/tool_urlglob.c | 15 ++ tests/data/test1604 | 25 +++ tests/unit/Makefile.inc | 5 +- tests/unit/unit1604.c | 327 ++++++++++++++++++++++++++++ 10 files changed, 784 insertions(+), 133 deletions(-) create mode 100644 tests/data/test1604 create mode 100644 tests/unit/unit1604.c diff --git a/src/Makefile.am b/src/Makefile.am index 05297e388..535a740a5 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -75,8 +75,9 @@ curl_DEPENDENCIES = $(top_builddir)/lib/libcurl.la # if unit tests are enabled, build a static library to link them with if BUILD_UNITTESTS noinst_LTLIBRARIES = libcurltool.la -libcurltool_la_CPPFLAGS = $(LIBMETALINK_CPPFLAGS) $(AM_CPPFLAGS) -libcurltool_la_CFLAGS = -DUNITTESTS +libcurltool_la_CPPFLAGS = $(LIBMETALINK_CPPFLAGS) $(AM_CPPFLAGS) \ + -DCURL_STATICLIB -DUNITTESTS +libcurltool_la_CFLAGS = libcurltool_la_LDFLAGS = -static $(LINKFLAGS) libcurltool_la_SOURCES = $(curl_SOURCES) endif diff --git a/src/tool_cb_hdr.c b/src/tool_cb_hdr.c index f86397a23..5be02aad2 100644 --- a/src/tool_cb_hdr.c +++ b/src/tool_cb_hdr.c @@ -115,24 +115,18 @@ size_t tool_header_cb(void *ptr, size_t size, size_t nmemb, void *userdata) */ len = (ssize_t)cb - (p - str); filename = parse_filename(p, len); - if(!filename) - return failure; - -#if defined(MSDOS) || defined(WIN32) - if(sanitize_file_name(&filename)) { - free(filename); - return failure; + if(filename) { + outs->filename = filename; + outs->alloc_filename = TRUE; + outs->is_cd_filename = TRUE; + outs->s_isreg = TRUE; + outs->fopened = FALSE; + outs->stream = NULL; + hdrcbdata->honor_cd_filename = FALSE; + break; } -#endif /* MSDOS || WIN32 */ - - outs->filename = filename; - outs->alloc_filename = TRUE; - outs->is_cd_filename = TRUE; - outs->s_isreg = TRUE; - outs->fopened = FALSE; - outs->stream = NULL; - hdrcbdata->honor_cd_filename = FALSE; - break; + else + return failure; } } @@ -207,6 +201,17 @@ static char *parse_filename(const char *ptr, size_t len) if(copy != p) memmove(copy, p, strlen(p) + 1); +#if defined(MSDOS) || defined(WIN32) + { + char *sanitized; + SANITIZEcode sc = sanitize_file_name(&sanitized, copy, 0); + Curl_safefree(copy); + if(sc) + return NULL; + copy = sanitized; + } +#endif /* MSDOS || WIN32 */ + /* in case we built debug enabled, we allow an evironment variable * named CURL_TESTDIR to prefix the given file name to put it into a * specific directory diff --git a/src/tool_doswin.c b/src/tool_doswin.c index 6cad193ce..6e1fe0ed3 100644 --- a/src/tool_doswin.c +++ b/src/tool_doswin.c @@ -85,52 +85,113 @@ __pragma(warning(pop)) # include /* _use_lfn(f) prototype */ #endif +#ifndef UNITTESTS +static SANITIZEcode truncate_dryrun(const char *path, + const size_t truncate_pos); #ifdef MSDOS -static char *msdosify(const char *file_name); +static SANITIZEcode msdosify(char **const sanitized, const char *file_name, + int flags); #endif -static char *rename_if_dos_device_name(const char *file_name); +static SANITIZEcode rename_if_reserved_dos_device_name(char **const sanitized, + const char *file_name, + int flags); +#endif /* !UNITTESTS (static declarations used if no unit tests) */ /* -Sanitize *file_name. -Success: (CURLE_OK) *file_name points to a sanitized version of the original. - This function takes ownership of the original *file_name and frees it. -Failure: (!= CURLE_OK) *file_name is unchanged. +Sanitize a file or path name. + +All banned characters are replaced by underscores, for example: +f?*foo => f__foo +f:foo::$DATA => f_foo__$DATA +f:\foo:bar => f__foo_bar +f:\foo:bar => f:\foo:bar (flag SANITIZE_ALLOW_PATH) + +This function was implemented according to the guidelines in 'Naming Files, +Paths, and Namespaces' section 'Naming Conventions'. +https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx + +Flags +----- +SANITIZE_ALLOW_COLONS: Allow colons. +Without this flag colons are sanitized. + +SANITIZE_ALLOW_PATH: Allow path separators and colons. +Without this flag path separators and colons are sanitized. + +SANITIZE_ALLOW_RESERVED: Allow reserved device names. +Without this flag a reserved device name is renamed (COM1 => _COM1) unless it's +in a UNC prefixed path. + +SANITIZE_ALLOW_TRUNCATE: Allow truncating a long filename. +Without this flag if the sanitized filename or path will be too long an error +occurs. With this flag the filename --and not any other parts of the path-- may +be truncated to at least a single character. A filename followed by an +alternate data stream (ADS) cannot be truncated in any case. + +Success: (SANITIZE_ERR_OK) *sanitized points to a sanitized copy of file_name. +Failure: (!= SANITIZE_ERR_OK) *sanitized is NULL. */ -CURLcode sanitize_file_name(char **file_name) +SANITIZEcode sanitize_file_name(char **const sanitized, const char *file_name, + int flags) { + char *p, *target; size_t len; - char *p, *sanitized; - - /* Calculate the maximum length of a filename. - FILENAME_MAX is often the same as PATH_MAX, in other words it does not - discount the path information. PATH_MAX size is calculated based on: - */ - const size_t max_filename_len = PATH_MAX - 3 - 1; - - if(!file_name || !*file_name) - return CURLE_BAD_FUNCTION_ARGUMENT; - - len = strlen(*file_name); - - if(len >= max_filename_len) - len = max_filename_len - 1; - - sanitized = malloc(len + 1); + CURLcode res; + size_t max_sanitized_len; if(!sanitized) - return CURLE_OUT_OF_MEMORY; + return SANITIZE_ERR_BAD_ARGUMENT; - strncpy(sanitized, *file_name, len); - sanitized[len] = '\0'; + *sanitized = NULL; - for(p = sanitized; *p; ++p ) { + if(!file_name) + return SANITIZE_ERR_BAD_ARGUMENT; + + if((flags & SANITIZE_ALLOW_PATH)) { +#ifndef MSDOS + if((flags & SANITIZE_ALLOW_PATH) && + file_name[0] == '\\' && file_name[1] == '\\') + /* UNC prefixed path, eg \\?\C:\foo */ + max_sanitized_len = 32767-1; + else +#endif + max_sanitized_len = PATH_MAX-1; + } + else + /* The maximum length of a filename. + FILENAME_MAX is often the same as PATH_MAX, in other words it is 260 and + does not discount the path information therefore we shouldn't use it. */ + max_sanitized_len = (PATH_MAX-1 > 255) ? 255 : PATH_MAX-1; + + len = strlen(file_name); + if(len > max_sanitized_len) { + if(!(flags & SANITIZE_ALLOW_TRUNCATE) || + truncate_dryrun(file_name, max_sanitized_len)) + return SANITIZE_ERR_INVALID_PATH; + + len = max_sanitized_len; + } + + target = malloc(len + 1); + if(!target) + return SANITIZE_ERR_OUT_OF_MEMORY; + + strncpy(target, file_name, len); + target[len] = '\0'; + + /* replace control characters and other banned characters */ + for(p = target; *p; ++p) { const char *banned; - if(1 <= *p && *p <= 31) { + + if((1 <= *p && *p <= 31) || + (!(flags & (SANITIZE_ALLOW_COLONS|SANITIZE_ALLOW_PATH)) && *p == ':') || + (!(flags & SANITIZE_ALLOW_PATH) && (*p == '/' || *p == '\\'))) { *p = '_'; continue; } - for(banned = "|<>/\\\":?*"; *banned; ++banned) { + + for(banned = "|<>\"?*"; *banned; ++banned) { if(*p == *banned) { *p = '_'; break; @@ -138,39 +199,111 @@ CURLcode sanitize_file_name(char **file_name) } } -#ifdef MSDOS - /* msdosify checks for more banned characters for MSDOS, however it allows - for some path information to pass through. since we are sanitizing only a - filename and cannot allow a path it's important this call be done in - addition to and not instead of the banned character check above. */ - p = msdosify(sanitized); - if(!p) { - free(sanitized); - return CURLE_BAD_FUNCTION_ARGUMENT; + /* remove trailing spaces and periods if not allowing paths */ + if(!(flags & SANITIZE_ALLOW_PATH) && len) { + char *clip = NULL; + + p = &target[len]; + do { + --p; + if(*p != ' ' && *p != '.') + break; + clip = p; + } while(p != target); + + if(clip) { + *clip = '\0'; + len = clip - target; + } + } + +#ifdef MSDOS + res = msdosify(&p, target, flags); + free(target); + if(res) + return res; + target = p; + len = strlen(target); + + if(len > max_sanitized_len) { + free(target); + return SANITIZE_ERR_INVALID_PATH; } - sanitized = p; - len = strlen(sanitized); #endif - p = rename_if_dos_device_name(sanitized); - if(!p) { - free(sanitized); - return CURLE_BAD_FUNCTION_ARGUMENT; - } - sanitized = p; - len = strlen(sanitized); + if(!(flags & SANITIZE_ALLOW_RESERVED)) { + res = rename_if_reserved_dos_device_name(&p, target, flags); + free(target); + if(res) + return res; + target = p; + len = strlen(target); - /* dos_device_name rename will rename a device name, possibly changing the - length. If the length is too long now we can't truncate it because we - could end up with a device name. In practice this shouldn't be a problem - because device names are short, but you never know. */ - if(len >= max_filename_len) { - free(sanitized); - return CURLE_BAD_FUNCTION_ARGUMENT; + if(len > max_sanitized_len) { + free(target); + return SANITIZE_ERR_INVALID_PATH; + } } - *file_name = sanitized; - return CURLE_OK; + *sanitized = target; + return SANITIZE_ERR_OK; +} + + +/* +Test if truncating a path to a file will leave at least a single character in +the filename. Filenames suffixed by an alternate data stream can't be +truncated. This performs a dry run, nothing is modified. + +Good truncate_pos 9: C:\foo\bar => C:\foo\ba +Good truncate_pos 6: C:\foo => C:\foo +Good truncate_pos 5: C:\foo => C:\fo +Bad* truncate_pos 5: C:foo => C:foo +Bad truncate_pos 5: C:\foo:ads => C:\fo +Bad truncate_pos 9: C:\foo:ads => C:\foo:ad +Bad truncate_pos 5: C:\foo\bar => C:\fo +Bad truncate_pos 5: C:\foo\ => C:\fo +Bad truncate_pos 7: C:\foo\ => C:\foo\ +Error truncate_pos 7: C:\foo => (pos out of range) +Bad truncate_pos 1: C:\foo\ => C + +* C:foo is ambiguous, C could end up being a drive or file therefore something + like C:superlongfilename can't be truncated. + +Returns +SANITIZE_ERR_OK: Good -- 'path' can be truncated +SANITIZE_ERR_INVALID_PATH: Bad -- 'path' cannot be truncated +!= SANITIZE_ERR_OK && != SANITIZE_ERR_INVALID_PATH: Error +*/ +SANITIZEcode truncate_dryrun(const char *path, const size_t truncate_pos) +{ + size_t len; + + if(!path) + return SANITIZE_ERR_BAD_ARGUMENT; + + len = strlen(path); + + if(truncate_pos > len) + return SANITIZE_ERR_BAD_ARGUMENT; + + if(!len || !truncate_pos) + return SANITIZE_ERR_INVALID_PATH; + + if(strpbrk(&path[truncate_pos - 1], "\\/:")) + return SANITIZE_ERR_INVALID_PATH; + + /* C:\foo can be truncated but C:\foo:ads can't */ + if(truncate_pos > 1) { + const char *p = &path[truncate_pos - 1]; + do { + --p; + if(*p == ':') + return SANITIZE_ERR_INVALID_PATH; + } while(p != path && *p != '\\' && *p != '/'); + } + + return SANITIZE_ERR_OK; } /* The functions msdosify, rename_if_dos_device_name and __crt0_glob_function @@ -180,16 +313,24 @@ CURLcode sanitize_file_name(char **file_name) /* Extra sanitization MSDOS for file_name. -Returns a copy of file_name that is sanitized by MSDOS standards. -Warning: path information may pass through. For sanitizing a filename use -sanitize_file_name which calls this function after sanitizing path info. + +This is a supporting function for sanitize_file_name. + +Warning: This is an MSDOS legacy function and was purposely written in a way +that some path information may pass through. For example drive letter names +(C:, D:, etc) are allowed to pass through. For sanitizing a filename use +sanitize_file_name. + +Success: (SANITIZE_ERR_OK) *sanitized points to a sanitized copy of file_name. +Failure: (!= SANITIZE_ERR_OK) *sanitized is NULL. */ -#ifdef MSDOS -static char *msdosify(const char *file_name) +#if defined(MSDOS) || defined(UNITTESTS) +SANITIZEcode msdosify(char **const sanitized, const char *file_name, + int flags) { char dos_name[PATH_MAX]; static const char illegal_chars_dos[] = ".+, ;=[]" /* illegal in DOS */ - "|<>\\\":?*"; /* illegal in DOS & W95 */ + "|<>/\\\":?*"; /* illegal in DOS & W95 */ static const char *illegal_chars_w95 = &illegal_chars_dos[8]; int idx, dot_idx; const char *s = file_name; @@ -198,6 +339,19 @@ static char *msdosify(const char *file_name) const char *illegal_aliens = illegal_chars_dos; size_t len = sizeof(illegal_chars_dos) - 1; + if(!sanitized) + return SANITIZE_ERR_BAD_ARGUMENT; + + *sanitized = NULL; + + if(!file_name) + return SANITIZE_ERR_BAD_ARGUMENT; + + if(strlen(file_name) > PATH_MAX-1 && + (!(flags & SANITIZE_ALLOW_TRUNCATE) || + truncate_dryrun(file_name, PATH_MAX-1))) + return SANITIZE_ERR_INVALID_PATH; + /* Support for Windows 9X VFAT systems, when available. */ if(_use_lfn(file_name)) { illegal_aliens = illegal_chars_w95; @@ -207,22 +361,35 @@ static char *msdosify(const char *file_name) /* Get past the drive letter, if any. */ if(s[0] >= 'A' && s[0] <= 'z' && s[1] == ':') { *d++ = *s++; - *d++ = *s++; + *d = ((flags & (SANITIZE_ALLOW_COLONS|SANITIZE_ALLOW_PATH))) ? ':' : '_'; + ++d, ++s; } for(idx = 0, dot_idx = -1; *s && d < dlimit; s++, d++) { if(memchr(illegal_aliens, *s, len)) { + + if((flags & (SANITIZE_ALLOW_COLONS|SANITIZE_ALLOW_PATH)) && *s == ':') + *d = ':'; + else if((flags & SANITIZE_ALLOW_PATH) && (*s == '/' || *s == '\\')) + *d = *s; /* Dots are special: DOS doesn't allow them as the leading character, and a file name cannot have more than a single dot. We leave the first non-leading dot alone, unless it comes too close to the beginning of the name: we want sh.lex.c to become sh_lex.c, not sh.lex-c. */ - if(*s == '.') { - if(idx == 0 && (s[1] == '/' || (s[1] == '.' && s[2] == '/'))) { + else if(*s == '.') { + if((flags & SANITIZE_ALLOW_PATH) && idx == 0 && + (s[1] == '/' || s[1] == '\\' || + (s[1] == '.' && (s[2] == '/' || s[2] == '\\')))) { /* Copy "./" and "../" verbatim. */ *d++ = *s++; - if(*s == '.') + if(d == dlimit) + break; + if(*s == '.') { *d++ = *s++; + if(d == dlimit) + break; + } *d = *s; } else if(idx == 0) @@ -244,12 +411,22 @@ static char *msdosify(const char *file_name) else if(*s == '+' && s[1] == '+') { if(idx - 2 == dot_idx) { /* .c++, .h++ etc. */ *d++ = 'x'; + if(d == dlimit) + break; *d = 'x'; } else { /* libg++ etc. */ - memcpy (d, "plus", 4); - d += 3; + if(dlimit - d < 4) { + *d++ = 'x'; + if(d == dlimit) + break; + *d = 'x'; + } + else { + memcpy (d, "plus", 4); + d += 3; + } } s++; idx++; @@ -259,56 +436,90 @@ static char *msdosify(const char *file_name) } else *d = *s; - if(*s == '/') { + if(*s == '/' || *s == '\\') { idx = 0; dot_idx = -1; } else idx++; } - *d = '\0'; - return strdup(dos_name); + + if(*s) { + /* dos_name is truncated, check that truncation requirements are met, + specifically truncating a filename suffixed by an alternate data stream + or truncating the entire filename is not allowed. */ + if(!(flags & SANITIZE_ALLOW_TRUNCATE) || strpbrk(s, "\\/:") || + truncate_dryrun(dos_name, d - dos_name)) + return SANITIZE_ERR_INVALID_PATH; + } + + *sanitized = strdup(dos_name); + return (*sanitized ? SANITIZE_ERR_OK : SANITIZE_ERR_OUT_OF_MEMORY); } -#endif +#endif /* MSDOS || UNITTESTS */ /* -Rename file_name if it's a representation of a device name. -Returns a copy of file_name, and the copy will have contents different from the -original if a device name was found. +Rename file_name if it's a reserved dos device name. + +This is a supporting function for sanitize_file_name. + +Warning: This is an MSDOS legacy function and was purposely written in a way +that some path information may pass through. For example drive letter names +(C:, D:, etc) are allowed to pass through. For sanitizing a filename use +sanitize_file_name. + +Success: (SANITIZE_ERR_OK) *sanitized points to a sanitized copy of file_name. +Failure: (!= SANITIZE_ERR_OK) *sanitized is NULL. */ -static char *rename_if_dos_device_name(const char *file_name) +SANITIZEcode rename_if_reserved_dos_device_name(char **const sanitized, + const char *file_name, + int flags) { /* We could have a file whose name is a device on MS-DOS. Trying to * retrieve such a file would fail at best and wedge us at worst. We need * to rename such files. */ char *p, *base; - struct_stat st_buf; char fname[PATH_MAX]; +#ifdef MSDOS + struct_stat st_buf; +#endif + + if(!sanitized) + return SANITIZE_ERR_BAD_ARGUMENT; + + *sanitized = NULL; + + if(!file_name) + return SANITIZE_ERR_BAD_ARGUMENT; + + /* Ignore UNC prefixed paths, they are allowed to contain a reserved name. */ +#ifndef MSDOS + if((flags & SANITIZE_ALLOW_PATH) && + file_name[0] == '\\' && file_name[1] == '\\') { + size_t len = strlen(file_name); + *sanitized = malloc(len + 1); + if(!*sanitized) + return SANITIZE_ERR_OUT_OF_MEMORY; + strncpy(*sanitized, file_name, len + 1); + return SANITIZE_ERR_OK; + } +#endif + + if(strlen(file_name) > PATH_MAX-1 && + (!(flags & SANITIZE_ALLOW_TRUNCATE) || + truncate_dryrun(file_name, PATH_MAX-1))) + return SANITIZE_ERR_INVALID_PATH; strncpy(fname, file_name, PATH_MAX-1); fname[PATH_MAX-1] = '\0'; base = basename(fname); - if(((stat(base, &st_buf)) == 0) && (S_ISCHR(st_buf.st_mode))) { - size_t blen = strlen(base); - if(strlen(fname) == PATH_MAX-1) { - /* Make room for the '_' */ - blen--; - base[blen] = '\0'; - } - /* Prepend a '_'. */ - memmove(base + 1, base, blen + 1); - base[0] = '_'; - } - - /* The above stat check does not identify devices for me in Windows 7. For - example a stat on COM1 returns a regular file S_IFREG. According to MSDN - stat doc that is the correct behavior, so I assume the above code is - legacy, maybe MSDOS or DJGPP specific? */ - - /* Rename devices. - Examples: CON => _CON, CON.EXT => CON_EXT, CON:ADS => CON_ADS */ + /* Rename reserved device names that are known to be accessible without \\.\ + Examples: CON => _CON, CON.EXT => CON_EXT, CON:ADS => CON_ADS + https://support.microsoft.com/en-us/kb/74496 + https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247.aspx + */ for(p = fname; p; p = (p == fname && fname != base ? base : NULL)) { size_t p_len; int x = (curl_strnequal(p, "CON", 3) || @@ -323,31 +534,70 @@ static char *rename_if_dos_device_name(const char *file_name) continue; /* the devices may be accessible with an extension or ADS, for - example CON.AIR and CON:AIR both access console */ - if(p[x] == '.' || p[x] == ':') { + example CON.AIR and 'CON . AIR' and CON:AIR access console */ + + for(; p[x] == ' '; ++x) + ; + + if(p[x] == '.') { p[x] = '_'; continue; } + else if(p[x] == ':') { + if(!(flags & (SANITIZE_ALLOW_COLONS|SANITIZE_ALLOW_PATH))) { + p[x] = '_'; + continue; + } + ++x; + } else if(p[x]) /* no match */ continue; + /* p points to 'CON' or 'CON ' or 'CON:', etc */ p_len = strlen(p); + /* Prepend a '_' */ if(strlen(fname) == PATH_MAX-1) { - /* Make room for the '_' */ - p_len--; + --p_len; + if(!(flags & SANITIZE_ALLOW_TRUNCATE) || truncate_dryrun(p, p_len)) + return SANITIZE_ERR_INVALID_PATH; p[p_len] = '\0'; } - /* Prepend a '_'. */ memmove(p + 1, p, p_len + 1); p[0] = '_'; + ++p_len; /* if fname was just modified then the basename pointer must be updated */ if(p == fname) base = basename(fname); } - return strdup(fname); + /* This is the legacy portion from rename_if_dos_device_name that checks for + reserved device names. It only works on MSDOS. On Windows XP the stat + check errors with EINVAL if the device name is reserved. On Windows + Vista/7/8 it sets mode S_IFREG (regular file or device). According to MSDN + stat doc the latter behavior is correct, but that doesn't help us identify + whether it's a reserved device name and not a regular file name. */ +#ifdef MSDOS + if(base && ((stat(base, &st_buf)) == 0) && (S_ISCHR(st_buf.st_mode))) { + /* Prepend a '_' */ + size_t blen = strlen(base); + if(blen) { + if(strlen(fname) == PATH_MAX-1) { + --blen; + if(!(flags & SANITIZE_ALLOW_TRUNCATE) || truncate_dryrun(base, blen)) + return SANITIZE_ERR_INVALID_PATH; + base[blen] = '\0'; + } + memmove(base + 1, base, blen + 1); + base[0] = '_'; + ++blen; + } + } +#endif + + *sanitized = strdup(fname); + return (*sanitized ? SANITIZE_ERR_OK : SANITIZE_ERR_OUT_OF_MEMORY); } #if defined(MSDOS) && (defined(__DJGPP__) || defined(__GO32__)) diff --git a/src/tool_doswin.h b/src/tool_doswin.h index 3607e9035..f649ef023 100644 --- a/src/tool_doswin.h +++ b/src/tool_doswin.h @@ -25,7 +25,29 @@ #if defined(MSDOS) || defined(WIN32) -CURLcode sanitize_file_name(char **filename); +#define SANITIZE_ALLOW_COLONS (1<<0) /* Allow colons */ +#define SANITIZE_ALLOW_PATH (1<<1) /* Allow path separators and colons */ +#define SANITIZE_ALLOW_RESERVED (1<<2) /* Allow reserved device names */ +#define SANITIZE_ALLOW_TRUNCATE (1<<3) /* Allow truncating a long filename */ + +typedef enum { + SANITIZE_ERR_OK = 0, /* 0 - OK */ + SANITIZE_ERR_INVALID_PATH, /* 1 - the path is invalid */ + SANITIZE_ERR_BAD_ARGUMENT, /* 2 - bad function parameter */ + SANITIZE_ERR_OUT_OF_MEMORY, /* 3 - out of memory */ + SANITIZE_ERR_LAST /* never use! */ +} SANITIZEcode; + +SANITIZEcode sanitize_file_name(char **const sanitized, const char *file_name, + int flags); +#ifdef UNITTESTS +SANITIZEcode truncate_dryrun(const char *path, const size_t truncate_pos); +SANITIZEcode msdosify(char **const sanitized, const char *file_name, + int flags); +SANITIZEcode rename_if_reserved_dos_device_name(char **const sanitized, + const char *file_name, + int flags); +#endif /* UNITTESTS */ #if defined(MSDOS) && (defined(__DJGPP__) || defined(__GO32__)) diff --git a/src/tool_operate.c b/src/tool_operate.c index b8ce2c45c..69d60769d 100644 --- a/src/tool_operate.c +++ b/src/tool_operate.c @@ -543,15 +543,6 @@ static CURLcode operate_do(struct GlobalConfig *global, result = get_url_file_name(&outfile, this_url); if(result) goto show_error; - -#if defined(MSDOS) || defined(WIN32) - result = sanitize_file_name(&outfile); - if(result) { - Curl_safefree(outfile); - goto show_error; - } -#endif /* MSDOS || WIN32 */ - if(!*outfile && !config->content_disposition) { helpf(global->errors, "Remote file name has no length!\n"); result = CURLE_WRITE_ERROR; diff --git a/src/tool_operhlp.c b/src/tool_operhlp.c index 387dcb628..fb344f65d 100644 --- a/src/tool_operhlp.c +++ b/src/tool_operhlp.c @@ -29,6 +29,7 @@ #include "tool_cfgable.h" #include "tool_convert.h" +#include "tool_doswin.h" #include "tool_operhlp.h" #include "tool_metalink.h" @@ -151,6 +152,17 @@ CURLcode get_url_file_name(char **filename, const char *url) if(!*filename) return CURLE_OUT_OF_MEMORY; +#if defined(MSDOS) || defined(WIN32) + { + char *sanitized; + SANITIZEcode sc = sanitize_file_name(&sanitized, *filename, 0); + Curl_safefree(*filename); + if(sc) + return CURLE_URL_MALFORMAT; + *filename = sanitized; + } +#endif /* MSDOS || WIN32 */ + /* in case we built debug enabled, we allow an environment variable * named CURL_TESTDIR to prefix the given file name to put it into a * specific directory diff --git a/src/tool_urlglob.c b/src/tool_urlglob.c index 0b714af80..39cb32d6c 100644 --- a/src/tool_urlglob.c +++ b/src/tool_urlglob.c @@ -24,6 +24,8 @@ #define ENABLE_CURLX_PRINTF /* use our own printf() functions */ #include "curlx.h" +#include "tool_cfgable.h" +#include "tool_doswin.h" #include "tool_urlglob.h" #include "tool_vms.h" @@ -666,6 +668,19 @@ CURLcode glob_match_url(char **result, char *filename, URLGlob *glob) stringlen += appendlen; } target[stringlen]= '\0'; + +#if defined(MSDOS) || defined(WIN32) + { + char *sanitized; + SANITIZEcode sc = sanitize_file_name(&sanitized, target, + SANITIZE_ALLOW_PATH); + Curl_safefree(target); + if(sc) + return CURLE_URL_MALFORMAT; + target = sanitized; + } +#endif /* MSDOS || WIN32 */ + *result = target; return CURLE_OK; } diff --git a/tests/data/test1604 b/tests/data/test1604 new file mode 100644 index 000000000..cf207750d --- /dev/null +++ b/tests/data/test1604 @@ -0,0 +1,25 @@ + + + +unittest + + + +# +# Client-side + + +none + + +unittest + + +Test WIN32/MSDOS filename sanitization + + +unit1604 + + + + diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc index 056a8fbf2..c5a152373 100644 --- a/tests/unit/Makefile.inc +++ b/tests/unit/Makefile.inc @@ -7,7 +7,7 @@ UNITFILES = curlcheck.h \ # These are all unit test programs UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \ unit1308 unit1309 unit1330 unit1394 unit1395 unit1396 unit1397 unit1398 \ - unit1600 unit1601 unit1602 unit1603 + unit1600 unit1601 unit1602 unit1603 unit1604 unit1300_SOURCES = unit1300.c $(UNITFILES) unit1300_CPPFLAGS = $(AM_CPPFLAGS) @@ -69,3 +69,6 @@ unit1602_CPPFLAGS = $(AM_CPPFLAGS) unit1603_SOURCES = unit1603.c $(UNITFILES) unit1603_CPPFLAGS = $(AM_CPPFLAGS) +unit1604_SOURCES = unit1604.c $(UNITFILES) +unit1604_CPPFLAGS = $(AM_CPPFLAGS) $(LIBMETALINK_CPPFLAGS) + diff --git a/tests/unit/unit1604.c b/tests/unit/unit1604.c new file mode 100644 index 000000000..218c627fd --- /dev/null +++ b/tests/unit/unit1604.c @@ -0,0 +1,327 @@ +/*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | + * / __| | | | |_) | | + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * + * Copyright (C) 1998 - 2016, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms + * are also available at http://curl.haxx.se/docs/copyright.html. + * + * You may opt to use, copy, modify, merge, publish, distribute and/or sell + * copies of the Software, and permit persons to whom the Software is + * furnished to do so, under the terms of the COPYING file. + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY + * KIND, either express or implied. + * + ***************************************************************************/ +#include "curlcheck.h" + +#include "tool_cfgable.h" +#include "tool_doswin.h" + +#include +#include +#include + +#include "memdebug.h" /* LAST include file */ + +static CURLcode unit_setup(void) +{ + return SANITIZE_ERR_OK; +} + +static void unit_stop(void) +{ + +} + +#if defined(MSDOS) || defined(WIN32) + +static char *getflagstr(int flags) { + char *buf = malloc(256); + fail_unless(buf, "out of memory"); + sprintf(buf, "%s,%s,%s,%s", + ((flags & SANITIZE_ALLOW_COLONS) ? "SANITIZE_ALLOW_COLONS" : ""), + ((flags & SANITIZE_ALLOW_PATH) ? "SANITIZE_ALLOW_PATH" : ""), + ((flags & SANITIZE_ALLOW_RESERVED) ? "SANITIZE_ALLOW_RESERVED" : ""), + ((flags & SANITIZE_ALLOW_TRUNCATE) ? "SANITIZE_ALLOW_TRUNCATE" : "")); + return buf; +} + +static char *getcurlcodestr(int cc) { + char *buf = malloc(256); + fail_unless(buf, "out of memory"); + sprintf(buf, "%s (%d)", + (cc == SANITIZE_ERR_OK ? "SANITIZE_ERR_OK" : + cc == SANITIZE_ERR_BAD_ARGUMENT ? "SANITIZE_ERR_BAD_ARGUMENT" : + cc == SANITIZE_ERR_INVALID_PATH ? "SANITIZE_ERR_INVALID_PATH" : + cc == SANITIZE_ERR_OUT_OF_MEMORY ? "SANITIZE_ERR_OUT_OF_MEMORY" : + "unexpected error code - add name"), + cc); + return buf; +} + +struct data { + const char *input; + int flags; + const char *expected_output; + CURLcode expected_result; +}; + +UNITTEST_START + +{ /* START sanitize_file_name */ + struct data data[] = { + { "", 0, + "", SANITIZE_ERR_OK + }, + { "normal filename", 0, + "normal filename", SANITIZE_ERR_OK + }, + { "control\tchar", 0, + "control_char", SANITIZE_ERR_OK + }, + { "banned*char", 0, + "banned_char", SANITIZE_ERR_OK + }, + { "f:foo", 0, + "f_foo", SANITIZE_ERR_OK + }, + { "f:foo", SANITIZE_ALLOW_COLONS, + "f:foo", SANITIZE_ERR_OK + }, + { "f:foo", SANITIZE_ALLOW_PATH, + "f:foo", SANITIZE_ERR_OK + }, + { "f:\\foo", 0, + "f__foo", SANITIZE_ERR_OK + }, + { "f:\\foo", SANITIZE_ALLOW_PATH, + "f:\\foo", SANITIZE_ERR_OK + }, + { "f:/foo", 0, + "f__foo", SANITIZE_ERR_OK + }, + { "f:/foo", SANITIZE_ALLOW_PATH, + "f:/foo", SANITIZE_ERR_OK + }, + { "foo:bar", 0, + "foo_bar", SANITIZE_ERR_OK + }, + { "foo|<>/bar\\\":?*baz", 0, + "foo____bar_____baz", SANITIZE_ERR_OK + }, + { "f:foo::$DATA", 0, + "f_foo__$DATA", SANITIZE_ERR_OK + }, + { "con . air", 0, + "con _ air", SANITIZE_ERR_OK + }, + { "con.air", 0, + "con_air", SANITIZE_ERR_OK + }, + { "con:/x", 0, + "con__x", SANITIZE_ERR_OK + }, + { "file . . . . .. .", 0, + "file", SANITIZE_ERR_OK + }, + { "foo . . ? . . ", 0, + "foo . . _", SANITIZE_ERR_OK + }, + { "com1", 0, + "_com1", SANITIZE_ERR_OK + }, + { "com1", SANITIZE_ALLOW_RESERVED, + "com1", SANITIZE_ERR_OK + }, + { "f:\\com1", 0, + "f__com1", SANITIZE_ERR_OK + }, + { "f:\\com1", SANITIZE_ALLOW_PATH, + "f:\\_com1", SANITIZE_ERR_OK + }, + { "f:\\com1", SANITIZE_ALLOW_RESERVED, + "f__com1", SANITIZE_ERR_OK + }, + { "f:\\com1", SANITIZE_ALLOW_RESERVED | SANITIZE_ALLOW_COLONS, + "f:_com1", SANITIZE_ERR_OK + }, + { "f:\\com1", SANITIZE_ALLOW_RESERVED | SANITIZE_ALLOW_PATH, + "f:\\com1", SANITIZE_ERR_OK + }, + { "com1:\\com1", SANITIZE_ALLOW_PATH, + "_com1:\\_com1", SANITIZE_ERR_OK + }, + { "com1:\\com1", SANITIZE_ALLOW_RESERVED | SANITIZE_ALLOW_PATH, + "com1:\\com1", SANITIZE_ERR_OK + }, + { "com1:\\com1", SANITIZE_ALLOW_RESERVED, + "com1__com1", SANITIZE_ERR_OK + }, + { "CoM1", 0, + "_CoM1", SANITIZE_ERR_OK + }, + { "CoM1", SANITIZE_ALLOW_RESERVED, + "CoM1", SANITIZE_ERR_OK + }, + { "COM56", 0, + "COM56", SANITIZE_ERR_OK + }, + /* At the moment we expect a maximum path length of 259. I assume MSDOS + has variable max path lengths depending on compiler that are shorter + so currently these "good" truncate tests won't run on MSDOS */ +#ifndef MSDOS + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE, + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFFFF", SANITIZE_ERR_OK + }, + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFF\\FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE | SANITIZE_ALLOW_PATH, + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFF\\FFFFF", SANITIZE_ERR_OK + }, + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFF\\FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE, + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFF_F", SANITIZE_ERR_OK + }, +#endif /* !MSDOS */ + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + 0, + NULL, SANITIZE_ERR_INVALID_PATH + }, + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFFF\\FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE, + NULL, SANITIZE_ERR_INVALID_PATH + }, + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFFFFFFFFFFFFFFFFFFFFFFFF\\FFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE | SANITIZE_ALLOW_PATH, + NULL, SANITIZE_ERR_INVALID_PATH + }, + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FFF\\FFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE | SANITIZE_ALLOW_PATH, + NULL, SANITIZE_ERR_INVALID_PATH + }, + { "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA" + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB" + "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC" + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD" + "EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE" + "FF\\F:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + SANITIZE_ALLOW_TRUNCATE | SANITIZE_ALLOW_PATH, + NULL, SANITIZE_ERR_INVALID_PATH + }, + { NULL, 0, + NULL, SANITIZE_ERR_BAD_ARGUMENT + }, + }; + + size_t i; + + for(i = 0; i < sizeof data / sizeof data[0]; ++i) { + char *output = NULL; + char *flagstr = NULL; + char *received_ccstr = NULL; + char *expected_ccstr = NULL; + + CURLcode res = sanitize_file_name(&output, data[i].input, data[i].flags); + + if(res == data[i].expected_result && + ((!output && !data[i].expected_output) || + (output && data[i].expected_output && + !strcmp(output, data[i].expected_output)))) { /* OK */ + free(output); + continue; + } + + flagstr = getflagstr(data[i].flags); + received_ccstr = getcurlcodestr(res); + expected_ccstr = getcurlcodestr(data[i].expected_result); + + unitfail++; + fprintf(stderr, "\n" + "%s:%d sanitize_file_name failed.\n" + "input: %s\n" + "flags: %s\n" + "output: %s\n" + "result: %s\n" + "expected output: %s\n" + "expected result: %s\n", + __FILE__, __LINE__, + data[i].input, + flagstr, + (output ? output : "(null)"), + received_ccstr, + (data[i].expected_output ? data[i].expected_output : "(null)"), + expected_ccstr); + + free(output); + free(flagstr); + free(received_ccstr); + free(expected_ccstr); + } +} /* END sanitize_file_name */ + +#else +UNITTEST_START + +{ + fprintf(stderr, "Skipped test not for this platform\n"); +} +#endif /* MSDOS || WIN32 */ + +UNITTEST_STOP