curl_multibyte: always return a heap-allocated copy of string

- Change the Windows char <-> UTF-8 conversion functions to return an allocated copy of the passed in string instead of the original. Prior to this change the curlx_convert_ functions would, as what I assume was an optimization, not make a copy of the passed in string if no conversion was required. No conversion is required in non-UNICODE Windows builds since our tchar strings are type char and remain in whatever the passed in encoding is, which is assumed to be UTF-8 but may be other encoding. In contrast the UNICODE Windows builds require conversion (wchar <-> char) and do return a copy. That inconsistency could lead to programming errors where the developer expects a copy, and does not realize that won't happen in all cases. Closes https://github.com/curl/curl/pull/6602
2021-02-13 00:51:28 -05:00 · 2021-02-13 00:51:28 -05:00 · 09363500b9
parent f65d7889b5
commit 09363500b9
2 changed files with 25 additions and 22 deletions
--- a/lib/curl_multibyte.c
+++ b/lib/curl_multibyte.c
@ -21,7 +21,11 @@
 ***************************************************************************/

 /*
- * This file is 'mem-include-scan' clean. See test 1132.
+ * This file is 'mem-include-scan' clean, which means memdebug.h and
+ * curl_memory.h are purposely not included in this file. See test 1132.
+ *
+ * The functions in this file are curlx functions which are not tracked by the
+ * curl memory tracker memdebug.
 */

 #include "curl_setup.h"
--- a/lib/curl_multibyte.h
+++ b/lib/curl_multibyte.h
@ -31,7 +31,6 @@

 wchar_t *curlx_convert_UTF8_to_wchar(const char *str_utf8);
 char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w);
-
 #endif /* WIN32 */

 /*
@ -40,29 +39,23 @@ char *curlx_convert_wchar_to_UTF8(const wchar_t *str_w);
 * preprocessor conditional directives needed by code using these
 * to differentiate UNICODE from non-UNICODE builds.
 *
- * When building with UNICODE defined, these two macros
- * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8()
- * return a pointer to a newly allocated memory area holding result.
- * When the result is no longer needed, allocated memory is intended
- * to be free'ed with curlx_unicodefree().
+ * In the case of a non-UNICODE build the tchar strings are char strings that
+ * are duplicated via strdup and remain in whatever the passed in encoding is,
+ * which is assumed to be UTF-8 but may be other encoding. Therefore the
+ * significance of the conversion functions is primarily for UNICODE builds.
 *
- * When building without UNICODE defined, this macros
- * curlx_convert_UTF8_to_tchar() and curlx_convert_tchar_to_UTF8()
- * return the pointer received as argument. curlx_unicodefree() does
- * no actual free'ing of this pointer it is simply set to NULL.
+ * Allocated memory should be free'd with curlx_unicodefree().
+ *
+ * Note: Because these are curlx functions their memory usage is not tracked
+ * by the curl memory tracker memdebug. You'll notice that curlx function-like
+ * macros call free and strdup in parentheses, eg (strdup)(ptr), and that's to
+ * ensure that the curl memdebug override macros do not replace them.
 */

 #if defined(UNICODE) && defined(WIN32)

 #define curlx_convert_UTF8_to_tchar(ptr) curlx_convert_UTF8_to_wchar((ptr))
 #define curlx_convert_tchar_to_UTF8(ptr) curlx_convert_wchar_to_UTF8((ptr))
-#define curlx_unicodefree(ptr)                          \
-  do {                                                  \
-    if(ptr) {                                           \
-      (free)(ptr);                                        \
-      (ptr) = NULL;                                     \
-    }                                                   \
-  } while(0)

 typedef union {
  unsigned short       *tchar_ptr;
@ -73,10 +66,8 @@ typedef union {

 #else

-#define curlx_convert_UTF8_to_tchar(ptr) (ptr)
-#define curlx_convert_tchar_to_UTF8(ptr) (ptr)
-#define curlx_unicodefree(ptr) \
-  do {(ptr) = NULL;} while(0)
+#define curlx_convert_UTF8_to_tchar(ptr) (strdup)(ptr)
+#define curlx_convert_tchar_to_UTF8(ptr) (strdup)(ptr)

 typedef union {
  char                *tchar_ptr;
@ -87,4 +78,12 @@ typedef union {

 #endif /* UNICODE && WIN32 */

+#define curlx_unicodefree(ptr)                          \
+  do {                                                  \
+    if(ptr) {                                           \
+      (free)(ptr);                                      \
+      (ptr) = NULL;                                     \
+    }                                                   \
+  } while(0)
+
 #endif /* HEADER_CURL_MULTIBYTE_H */