From 1230b724ca76d4313c16f4c781b696c20971dabf Mon Sep 17 00:00:00 2001 From: hniksic Date: Thu, 5 May 2005 06:47:20 -0700 Subject: [PATCH] [svn] Document what is meant by "non-printable" in the context of escnonprint. --- src/ChangeLog | 6 +++++ src/log.c | 65 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/src/ChangeLog b/src/ChangeLog index 908fc155..25e41d94 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,9 @@ +2005-05-05 Hrvoje Niksic + + * log.c (copy_and_escape): Replace the FOR_URI argument with a + slightly more general mechanism for specifying different kinds of + escape. + 2005-05-05 Hrvoje Niksic * ftp-basic.c (ftp_response): Fix printing FTP server response. diff --git a/src/log.c b/src/log.c index 27d2cd70..196325c9 100644 --- a/src/log.c +++ b/src/log.c @@ -635,46 +635,65 @@ count_nonprint (const char *source) return cnt; } -/* Copy SOURCE to DEST, escaping non-printable characters. If FOR_URI - is 0, they are escaped as \ooo; otherwise, they are escaped as - %xx. +/* Copy SOURCE to DEST, escaping non-printable characters. + + Non-printable refers to anything outside the non-control ASCII + range (32-126) which means that, for example, CR, LF, and TAB are + considered non-printable along with ESC and other control chars. + This is by design: it makes sure that messages from remote servers + cannot be used to deceive the users by mimicking Wget's output. + Disallowing non-ASCII characters is another necessary security + measure, which makes sure that remote servers cannot garble the + screen or guess the local charset and perform homographic attacks. + + Of course, the above means that escnonprint must only be used in + decidedly ASCII-only context, such as when printing host names, + responses from HTTP headers, messages coming from FTP servers, and + the like. + + ESCAPE is the character used to introduce the escape sequence. + BASE should be the base of the escape sequence, and must be either + 8 for octal or 16 for hex. DEST must point to a location with sufficient room to store an encoded version of SOURCE. */ static void -copy_and_escape (const char *source, char *dest, int for_uri) +copy_and_escape (const char *source, char *dest, char escape, int base) { const char *from; char *to; - /* Copy the string, escaping non-printable chars. */ - if (!for_uri) + /* Copy the string from SOURCE to DEST, escaping non-printable chars. */ + switch (base) { + case 8: for (from = source, to = dest; *from; from++) if (ISPRINT (*from)) *to++ = *from; else { const unsigned char c = *from; - *to++ = '\\'; + *to++ = escape; *to++ = '0' + (c >> 6); *to++ = '0' + ((c >> 3) & 7); *to++ = '0' + (c & 7); } - } - else - { + break; + case 16: for (from = source, to = dest; *from; from++) if (ISPRINT (*from)) *to++ = *from; else { const unsigned char c = *from; - *to++ = '%'; + *to++ = escape; *to++ = XNUM_TO_DIGIT (c >> 4); *to++ = XNUM_TO_DIGIT (c & 0xf); } + break; + default: + abort (); } *to = '\0'; } @@ -687,10 +706,12 @@ struct ringel { static struct ringel ring[RING_SIZE]; /* ring data */ static const char * -escnonprint_internal (const char *str, int for_uri) +escnonprint_internal (const char *str, char escape, int base) { static int ringpos; /* current ring position */ + assert (base == 8 || base == 16); + int nprcnt = count_nonprint (str); if (nprcnt == 0) /* If there are no non-printable chars in STR, don't bother @@ -702,11 +723,11 @@ escnonprint_internal (const char *str, int for_uri) simply r->X instead of ring[ringpos].X. */ struct ringel *r = ring + ringpos; - /* Every non-printable character is replaced with "\ooo", - i.e. with three *additional* chars (two in URI-mode). Size - must also include the length of the original string and an + /* Every non-printable character is replaced with the escape char + and three (or two, depending on BASE) *additional* chars. Size + must also include the length of the original string and one additional char for the terminating \0. */ - int needed_size = strlen (str) + 1 + (for_uri ? 2 * nprcnt : 3 * nprcnt); + int needed_size = strlen (str) + 1 + (base == 8 ? 3 * nprcnt : 2 * nprcnt); /* If the current buffer is uninitialized or too small, (re)allocate it. */ @@ -716,7 +737,7 @@ escnonprint_internal (const char *str, int for_uri) r->size = needed_size; } - copy_and_escape (str, r->buffer, for_uri); + copy_and_escape (str, r->buffer, escape, base); ringpos = (ringpos + 1) % RING_SIZE; return r->buffer; } @@ -724,7 +745,8 @@ escnonprint_internal (const char *str, int for_uri) /* Return a pointer to a static copy of STR with the non-printable characters escaped as \ooo. If there are no non-printable - characters in STR, STR is returned. + characters in STR, STR is returned. See copy_and_escape for more + information on which characters are considered non-printable. NOTE: since this function can return a pointer to static data, be careful to copy its result before calling it again. However, to be @@ -736,12 +758,13 @@ escnonprint_internal (const char *str, int for_uri) const char * escnonprint (const char *str) { - return escnonprint_internal (str, 0); + return escnonprint_internal (str, '\\', 8); } /* Return a pointer to a static copy of STR with the non-printable characters escaped as %XX. If there are no non-printable - characters in STR, STR is returned. + characters in STR, STR is returned. See copy_and_escape for more + information on which characters are considered non-printable. This function returns a pointer to static data which will be overwritten by subsequent calls -- see escnonprint for details. */ @@ -749,7 +772,7 @@ escnonprint (const char *str) const char * escnonprint_uri (const char *str) { - return escnonprint_internal (str, 1); + return escnonprint_internal (str, '%', 16); } void