diff --git a/src/ChangeLog b/src/ChangeLog index 277a0edb..762067f0 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,35 @@ +2005-03-03 Hrvoje Niksic + + * retr.c (retrieve_url): Escape location header. + + * http.c (print_server_response_1): Escape server response when + printing it. + (gethttp): Escape host name, status message, location header, and + content type. + (http_loop): Escape error message from server. + + * host.c (lookup_host): Escape host name when printing it. + + * ftp.c (getftp): Escape user name when printing it. + (getftp): Escape remote file and directory for printing. + (getftp): Escape server listing when printing it. + (ftp_retrieve_list): Escape link name and file name. + (ftp_retrieve_glob): Escape file name. + + * ftp-basic.c (ftp_response): Escape server response when printing + it. + + * cookies.c (parse_set_cookies): Escape the cookie field when + printing it. + (parse_set_cookies): Escape contents of remote header. + (cookie_handle_set_cookie): Escape host name and cookie domain. + + * connect.c (connect_to_ip): Escape the host name. + + * log.c (escnonprint): New function, used for printing strings + coming from the server that possibly contain non-ASCII characters. + (escnonprint_uri): Ditto. + 2005-02-24 Hrvoje Niksic * ftp.c (getftp): Ditto. diff --git a/src/connect.c b/src/connect.c index ff7a741c..40efc363 100644 --- a/src/connect.c +++ b/src/connect.c @@ -269,8 +269,8 @@ connect_to_ip (const ip_address *ip, int port, const char *print) { const char *txt_addr = pretty_print_address (ip); if (print && 0 != strcmp (print, txt_addr)) - logprintf (LOG_VERBOSE, - _("Connecting to %s|%s|:%d... "), print, txt_addr, port); + logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "), + escnonprint (print), txt_addr, port); else logprintf (LOG_VERBOSE, _("Connecting to %s:%d... "), txt_addr, port); } diff --git a/src/cookies.c b/src/cookies.c index 98ab423c..fe9761fd 100644 --- a/src/cookies.c +++ b/src/cookies.c @@ -616,7 +616,8 @@ parse_set_cookies (const char *sc, char *name; BOUNDED_TO_ALLOCA (name_b, name_e, name); logprintf (LOG_NOTQUIET, - _("Error in Set-Cookie, field `%s'"), name); + _("Error in Set-Cookie, field `%s'"), + escnonprint (name)); } state = S_ERROR; break; @@ -640,7 +641,7 @@ parse_set_cookies (const char *sc, if (!silent) logprintf (LOG_NOTQUIET, _("Syntax error in Set-Cookie: %s at position %d.\n"), - sc, p - sc); + escnonprint (sc), p - sc); return NULL; } @@ -862,7 +863,7 @@ cookie_handle_set_cookie (struct cookie_jar *jar, { logprintf (LOG_NOTQUIET, "Cookie coming from %s attempted to set domain to %s\n", - host, cookie->domain); + escnonprint (host), escnonprint (cookie->domain)); xfree (cookie->domain); goto copy_domain; } diff --git a/src/ftp-basic.c b/src/ftp-basic.c index a246e37c..011268b2 100644 --- a/src/ftp-basic.c +++ b/src/ftp-basic.c @@ -67,9 +67,9 @@ ftp_response (int fd, char **ret_line) if (!line) return FTPRERR; if (opt.server_response) - logputs (LOG_NOTQUIET, line); + logputs (LOG_NOTQUIET, escnonprint (line)); else - DEBUGP (("%s", line)); + DEBUGP (("%s", escnonprint (line))); if (ISDIGIT (line[0]) && ISDIGIT (line[1]) && ISDIGIT (line[2]) && line[3] == ' ') { diff --git a/src/ftp.c b/src/ftp.c index c1e2d002..47face71 100644 --- a/src/ftp.c +++ b/src/ftp.c @@ -292,7 +292,7 @@ getftp (struct url *u, wgint *len, wgint restval, ccon *con) con->csock = -1; /* Second: Login with proper USER/PASS sequence. */ - logprintf (LOG_VERBOSE, _("Logging in as %s ... "), user); + logprintf (LOG_VERBOSE, _("Logging in as %s ... "), escnonprint (user)); if (opt.server_response) logputs (LOG_ALWAYS, "\n"); err = ftp_login (csock, logname, passwd); @@ -551,7 +551,7 @@ Error in server response, closing control connection.\n")); } if (!opt.server_response) - logprintf (LOG_VERBOSE, "==> CWD %s ... ", target); + logprintf (LOG_VERBOSE, "==> CWD %s ... ", escnonprint (target)); err = ftp_cwd (csock, target); /* FTPRERR, WRITEFAILED, FTPNSFOD */ switch (err) @@ -575,7 +575,7 @@ Error in server response, closing control connection.\n")); case FTPNSFOD: logputs (LOG_VERBOSE, "\n"); logprintf (LOG_NOTQUIET, _("No such directory `%s'.\n\n"), - u->dir); + escnonprint (u->dir)); fd_close (csock); con->csock = -1; return err; @@ -599,7 +599,7 @@ Error in server response, closing control connection.\n")); if (opt.verbose) { if (!opt.server_response) - logprintf (LOG_VERBOSE, "==> SIZE %s ... ", u->file); + logprintf (LOG_VERBOSE, "==> SIZE %s ... ", escnonprint (u->file)); } err = ftp_size (csock, u->file, len); @@ -760,7 +760,8 @@ Error in server response, closing control connection.\n")); if (restval && (cmd & DO_RETR)) { if (!opt.server_response) - logprintf (LOG_VERBOSE, "==> REST %s ... ", number_to_static_string (restval)); + logprintf (LOG_VERBOSE, "==> REST %s ... ", + number_to_static_string (restval)); err = ftp_rest (csock, restval); /* FTPRERR, WRITEFAILED, FTPRESTFAIL */ @@ -822,7 +823,7 @@ Error in server response, closing control connection.\n")); { if (restval) logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_VERBOSE, "==> RETR %s ... ", u->file); + logprintf (LOG_VERBOSE, "==> RETR %s ... ", escnonprint (u->file)); } } @@ -852,7 +853,8 @@ Error in server response, closing control connection.\n")); break; case FTPNSFOD: logputs (LOG_VERBOSE, "\n"); - logprintf (LOG_NOTQUIET, _("No such file `%s'.\n\n"), u->file); + logprintf (LOG_NOTQUIET, _("No such file `%s'.\n\n"), + escnonprint (u->file)); fd_close (dtsock); fd_close (local_sock); return err; @@ -1114,7 +1116,7 @@ Error in server response, closing control connection.\n")); no-buffering on opt.lfile. */ while ((line = read_whole_line (fp))) { - logprintf (LOG_ALWAYS, "%s\n", line); + logprintf (LOG_ALWAYS, "%s\n", escnonprint (line)); xfree (line); } fclose (fp); @@ -1536,19 +1538,18 @@ The sizes do not match (local %s) -- retrieving.\n\n"), { logprintf (LOG_VERBOSE, _("\ Already have correct symlink %s -> %s\n\n"), - con->target, f->linkto); + con->target, escnonprint (f->linkto)); dlthis = 0; break; } } } logprintf (LOG_VERBOSE, _("Creating symlink %s -> %s\n"), - con->target, f->linkto); + con->target, escnonprint (f->linkto)); /* Unlink before creating symlink! */ unlink (con->target); if (symlink (f->linkto, con->target) == -1) - logprintf (LOG_NOTQUIET, "symlink: %s\n", - strerror (errno)); + logprintf (LOG_NOTQUIET, "symlink: %s\n", strerror (errno)); logputs (LOG_VERBOSE, "\n"); } /* have f->linkto */ #else /* not HAVE_SYMLINK */ @@ -1566,7 +1567,7 @@ Already have correct symlink %s -> %s\n\n"), case FT_DIRECTORY: if (!opt.recursive) logprintf (LOG_NOTQUIET, _("Skipping directory `%s'.\n"), - f->name); + escnonprint (f->name)); break; case FT_PLAINFILE: /* Call the retrieve loop. */ @@ -1575,7 +1576,7 @@ Already have correct symlink %s -> %s\n\n"), break; case FT_UNKNOWN: logprintf (LOG_NOTQUIET, _("%s: unknown/unsupported file type.\n"), - f->name); + escnonprint (f->name)); break; } /* switch */ @@ -1680,7 +1681,8 @@ ftp_retrieve_dirs (struct url *u, struct fileinfo *f, ccon *con) if (!accdir (newdir, ALLABS)) { logprintf (LOG_VERBOSE, _("\ -Not descending to `%s' as it is excluded/not-included.\n"), newdir); +Not descending to `%s' as it is excluded/not-included.\n"), + escnonprint (newdir)); continue; } @@ -1743,7 +1745,8 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) { if (f->type != FT_DIRECTORY && !acceptable (f->name)) { - logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"), f->name); + logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"), + escnonprint (f->name)); f = delelement (f, &start); } else @@ -1756,7 +1759,8 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) { if (has_insecure_name_p (f->name)) { - logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"), f->name); + logprintf (LOG_VERBOSE, _("Rejecting `%s'.\n"), + escnonprint (f->name)); f = delelement (f, &start); } else @@ -1802,7 +1806,8 @@ ftp_retrieve_glob (struct url *u, ccon *con, int action) /* No luck. */ /* #### This message SUCKS. We should see what was the reason that nothing was retrieved. */ - logprintf (LOG_VERBOSE, _("No matches on pattern `%s'.\n"), u->file); + logprintf (LOG_VERBOSE, _("No matches on pattern `%s'.\n"), + escnonprint (u->file)); } else /* GETONE or GETALL */ { diff --git a/src/host.c b/src/host.c index 6f96845c..b550d786 100644 --- a/src/host.c +++ b/src/host.c @@ -723,7 +723,7 @@ lookup_host (const char *host, int flags) /* No luck with the cache; resolve HOST. */ if (!silent && !numeric_address) - logprintf (LOG_VERBOSE, _("Resolving %s... "), host); + logprintf (LOG_VERBOSE, _("Resolving %s... "), escnonprint (host)); #ifdef ENABLE_IPV6 { diff --git a/src/http.c b/src/http.c index 6bc4f3be..1afe586b 100644 --- a/src/http.c +++ b/src/http.c @@ -687,7 +687,7 @@ print_server_response_1 (const char *prefix, const char *b, const char *e) if (b < e && e[-1] == '\r') --e; BOUNDED_TO_ALLOCA (b, e, ln); - logprintf (LOG_VERBOSE, "%s%s\n", prefix, ln); + logprintf (LOG_VERBOSE, "%s%s\n", prefix, escnonprint (ln)); } /* Print the server response, line by line, omitting the trailing CR @@ -1306,7 +1306,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) sock = pconn.socket; using_ssl = pconn.ssl; logprintf (LOG_VERBOSE, _("Reusing existing connection to %s:%d.\n"), - pconn.host, pconn.port); + escnonprint (pconn.host), pconn.port); DEBUGP (("Reusing fd %d.\n", sock)); } } @@ -1377,11 +1377,11 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { failed_tunnel: logprintf (LOG_NOTQUIET, _("Proxy tunneling failed: %s"), - message ? message : "?"); + message ? escnonprint (message) : "?"); xfree_null (message); return CONSSLERR; } - xfree (message); + xfree_null (message); /* SOCK is now *really* connected to u->host, so update CONN to reflect this. That way register_persistent will @@ -1458,7 +1458,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) message = NULL; statcode = response_status (resp, &message); if (!opt.server_response) - logprintf (LOG_VERBOSE, "%2d %s\n", statcode, message ? message : ""); + logprintf (LOG_VERBOSE, "%2d %s\n", statcode, + message ? escnonprint (message) : ""); else { logprintf (LOG_VERBOSE, "\n"); @@ -1604,7 +1605,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) { logprintf (LOG_VERBOSE, _("Location: %s%s\n"), - hs->newloc ? hs->newloc : _("unspecified"), + hs->newloc ? escnonprint_uri (hs->newloc) : _("unspecified"), hs->newloc ? _(" [following]") : ""); if (keep_alive) skip_short_body (sock, contlen); @@ -1691,7 +1692,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy) logputs (LOG_VERBOSE, opt.ignore_length ? _("ignored") : _("unspecified")); if (type) - logprintf (LOG_VERBOSE, " [%s]\n", type); + logprintf (LOG_VERBOSE, " [%s]\n", escnonprint (type)); else logputs (LOG_VERBOSE, "\n"); } @@ -2105,7 +2106,7 @@ File `%s' already there, will not retrieve.\n"), *hstat.local_file); xfree (hurl); } logprintf (LOG_NOTQUIET, _("%s ERROR %d: %s.\n"), - tms, hstat.statcode, hstat.error); + tms, hstat.statcode, escnonprint (hstat.error)); logputs (LOG_VERBOSE, "\n"); free_hstat (&hstat); xfree_null (dummy); @@ -2190,7 +2191,8 @@ The sizes do not match (local %s) -- retrieving.\n"), if (opt.spider) { - logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, hstat.error); + logprintf (LOG_NOTQUIET, "%d %s\n\n", hstat.statcode, + escnonprint (hstat.error)); xfree_null (dummy); return RETROK; } diff --git a/src/log.c b/src/log.c index 2511513f..5477093f 100644 --- a/src/log.c +++ b/src/log.c @@ -615,6 +615,140 @@ log_dump_context (void) fflush (fp); } +/* String escape functions. */ + +/* Return the number of non-printable characters in SOURCE. + + Non-printable characters are determined as per safe-ctype.h, + i.e. the non-printable characters of the "C" locale. This code is + meant to be used to protect the user from binary characters in + (normally ASCII) server messages. */ + +static int +count_nonprint (const char *source) +{ + const char *p; + int cnt; + for (p = source, cnt = 0; *p; p++) + if (!ISPRINT (*p)) + ++cnt; + return cnt; +} + +/* Copy SOURCE to DEST, escaping non-printable characters. If FOR_URI + is 0, they are escaped as \ooo; otherwise, they are escaped as + %xx. + + DEST must point to a location with sufficient room to store an + encoded version of SOURCE. */ + +static void +copy_and_escape (const char *source, char *dest, int for_uri) +{ + const char *from; + char *to; + + /* Copy the string, escaping non-printable chars. */ + if (!for_uri) + { + for (from = source, to = dest; *from; from++) + if (ISPRINT (*from)) + *to++ = *from; + else + { + const unsigned char c = *from; + *to++ = '\\'; + *to++ = '0' + (c >> 6); + *to++ = '0' + ((c >> 3) & 7); + *to++ = '0' + (c & 7); + } + } + else + { + for (from = source, to = dest; *from; from++) + if (ISPRINT (*from)) + *to++ = *from; + else + { + const unsigned char c = *from; + *to++ = '%'; + *to++ = XNUM_TO_DIGIT (c >> 4); + *to++ = XNUM_TO_DIGIT (c & 0xf); + } + } + *to = '\0'; +} + +#define RING_SIZE 3 +struct ringel { + char *buffer; + int size; +}; + +static const char * +escnonprint_internal (const char *str, int for_uri) +{ + static struct ringel ring[RING_SIZE]; /* ring data */ + static int ringpos; /* current ring position */ + + int nprcnt = count_nonprint (str); + if (nprcnt == 0) + /* If there are no non-printable chars in STR, don't bother + copying anything, just return STR. */ + return str; + + { + /* Set up a pointer to the current ring position, so we can write + simply r->X instead of ring[ringpos].X. */ + struct ringel *r = ring + ringpos; + + /* Every non-printable character is replaced with "\ooo", + i.e. with three *additional* chars (two in URI-mode). Size + must also include the length of the original string and an + additional char for the terminating \0. */ + int needed_size = strlen (str) + 1 + for_uri ? (2 * nprcnt) : (3 * nprcnt); + + /* If the current buffer is uninitialized or too small, + (re)allocate it. */ + if (r->buffer == NULL || r->size < needed_size) + r->buffer = xrealloc (r->buffer, needed_size); + + copy_and_escape (str, r->buffer, for_uri); + ringpos = (ringpos + 1) % RING_SIZE; + return r->buffer; + } +} + +/* Return a pointer to a static copy of STR with the non-printable + characters escaped as \ooo. If there are no non-printable + characters in STR, STR is returned. + + NOTE: since this function can return a pointer to static data, be + careful to copy its result before calling it again. However, to be + more useful with printf, it maintains an internal ring of static + buffers to return. Currently the ring size is 3, which means you + can print up to three values in the same printf; if more is needed, + bump RING_SIZE. */ + +const char * +escnonprint (const char *str) +{ + return escnonprint_internal (str, 0); +} + +/* Return a pointer to a static copy of STR with the non-printable + characters escaped as %XX. If there are no non-printable + characters in STR, STR is returned. + + This function returns a pointer to static data which will be + overwritten by subsequent calls -- see escnonprint for details. */ + +const char * +escnonprint_uri (const char *str) +{ + return escnonprint_internal (str, 1); +} + /* When SIGHUP or SIGUSR1 are received, the output is redirected elsewhere. Such redirection is only allowed once. */ enum { RR_NONE, RR_REQUESTED, RR_DONE } redirect_request = RR_NONE; diff --git a/src/log.h b/src/log.h index 173f81e3..d2961994 100644 --- a/src/log.h +++ b/src/log.h @@ -52,4 +52,7 @@ void log_init PARAMS ((const char *, int)); void log_close PARAMS ((void)); void log_request_redirect_output PARAMS ((const char *)); +const char *escnonprint PARAMS ((const char *)); +const char *escnonprint_uri PARAMS ((const char *)); + #endif /* LOG_H */ diff --git a/src/retr.c b/src/retr.c index 339a2fa0..ef142bce 100644 --- a/src/retr.c +++ b/src/retr.c @@ -699,7 +699,7 @@ retrieve_url (const char *origurl, char **file, char **newloc, newloc_parsed = url_parse (mynewloc, &up_error_code); if (!newloc_parsed) { - logprintf (LOG_NOTQUIET, "%s: %s.\n", mynewloc, + logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), url_error (up_error_code)); url_free (u); xfree (url); diff --git a/src/string_t.c b/src/string_t.c index 90417a22..b94f1203 100644 --- a/src/string_t.c +++ b/src/string_t.c @@ -31,9 +31,13 @@ #include "config.h" +#define _GNU_SOURCE /* to get iswblank */ + #include #include +#include #include +#include #include "wget.h" @@ -182,12 +186,14 @@ string_destroy (struct string_t *str) memset (str, 0, sizeof (*str)); } +#if 0 /* unused */ static void string_append_delim (struct string_t *dst) { assert_valid_string (dst); string_cat (dst, line_delim, line_delim_len); } +#endif static int is_line_delim (const wchar_t *wsz)