From b60131a399b1215674e5728e0e77f8fe8e07d6fe Mon Sep 17 00:00:00 2001 From: Ander Juaristi Date: Wed, 1 Jul 2015 00:21:05 +0200 Subject: [PATCH] Added support for HSTS. * Makefile.am: Added new source files hsts.c and hsts.h. * http.c (parse_strict_transport_security): new function for STS header parsing. (gethttp): update the HSTS store. * http.h: new include "hsts.h". * init.c: new options --hsts and --hsts-file. * main.c (get_hsts_database, load_hsts, save_hsts): new functions. New options --no-hsts and --hsts-file added to help. (main): load and save HSTS store. * options.h: new variables for supporting --hsts and --hsts-file. * retr.c (retrieve_url): rewrite the URI according to the HSTS policy before entering http_loop. * test.c, test.h: new unit tests for HSTS. * utils.c, utils.h (countchars): new function. * wget.h: new preprocessor check. * hsts.c, hsts.h: new files with the HSTS engine implementation. Added support for HTTP Strict Transport Security (HSTS), as defined by RFC 6797. --- src/Makefile.am | 4 +- src/hsts.c | 744 ++++++++++++++++++++++++++++++++++++++++++++++++ src/hsts.h | 53 ++++ src/http.c | 84 ++++++ src/http.h | 2 + src/init.c | 9 + src/main.c | 97 ++++++- src/options.h | 5 + src/retr.c | 16 ++ src/test.c | 7 + src/test.h | 4 + src/wget.h | 1 + 12 files changed, 1020 insertions(+), 6 deletions(-) create mode 100644 src/hsts.c create mode 100644 src/hsts.h diff --git a/src/Makefile.am b/src/Makefile.am index 449a27f7..050f58e9 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -48,12 +48,12 @@ EXTRA_DIST = css.l css.c css_.c build_info.c.in bin_PROGRAMS = wget wget_SOURCES = connect.c convert.c cookies.c ftp.c \ css_.c css-url.c \ - ftp-basic.c ftp-ls.c hash.c host.c html-parse.c html-url.c \ + ftp-basic.c ftp-ls.c hash.c host.c hsts.c html-parse.c html-url.c \ http.c init.c log.c main.c netrc.c progress.c ptimer.c \ recur.c res.c retr.c spider.c url.c warc.c \ utils.c exits.c build_info.c $(IRI_OBJ) $(METALINK_OBJ) \ css-url.h css-tokens.h connect.h convert.h cookies.h \ - ftp.h hash.h host.h html-parse.h html-url.h \ + ftp.h hash.h host.h hsts.h html-parse.h html-url.h \ http.h http-ntlm.h init.h log.h mswindows.h netrc.h \ options.h progress.h ptimer.h recur.h res.h retr.h \ spider.h ssl.h sysdep.h url.h warc.h utils.h wget.h iri.h \ diff --git a/src/hsts.c b/src/hsts.c new file mode 100644 index 00000000..873707a0 --- /dev/null +++ b/src/hsts.c @@ -0,0 +1,744 @@ +/* HTTP Strict Transport Security (HSTS) support. + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2015 Free Software + Foundation, Inc. + +This file is part of GNU Wget. + +GNU Wget is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + +GNU Wget is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Wget. If not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ +#include "wget.h" + +#ifdef HAVE_HSTS +#include "hsts.h" +#include "host.h" /* for is_valid_ip_address() */ +#include "init.h" /* for home_dir() */ +#include "utils.h" +#include "hash.h" +#include "c-ctype.h" +#ifdef TESTING +#include "test.h" +#include /* for unlink(), used only in tests */ +#endif + +#include +#include +#include +#include + +struct hsts_store { + struct hash_table *table; + time_t last_mtime; +}; + +struct hsts_kh { + char *host; + int explicit_port; +}; + +struct hsts_kh_info { + time_t created; + time_t max_age; + bool include_subdomains; +}; + +enum hsts_kh_match { + NO_MATCH, + SUPERDOMAIN_MATCH, + CONGRUENT_MATCH +}; + +#define hsts_is_host_name_valid(host) (!is_valid_ip_address (host)) +#define hsts_is_scheme_valid(scheme) (scheme == SCHEME_HTTPS) +#define hsts_is_host_eligible(scheme, host) \ + (hsts_is_scheme_valid (scheme) && hsts_is_host_name_valid (host)) + +#define DEFAULT_HTTP_PORT 80 +#define DEFAULT_SSL_PORT 443 +#define CHECK_EXPLICIT_PORT(p1, p2) (p1 == 0 || p1 == p2) +#define MAKE_EXPLICIT_PORT(s, p) (s == SCHEME_HTTPS ? (p == DEFAULT_SSL_PORT ? 0 : p) \ + : (p == DEFAULT_HTTP_PORT ? 0 : p)) + +/* Hashing and comparison functions for the hash table */ + +static unsigned long +hsts_hash_func (const void *key) +{ + struct hsts_kh *k = (struct hsts_kh *) key; + const char *h = NULL; + unsigned int hash = k->explicit_port; + + for (h = k->host; *h; h++) + hash = hash * 31 + *h; + + return hash; +} + +static int +hsts_cmp_func (const void *h1, const void *h2) +{ + struct hsts_kh *kh1 = (struct hsts_kh *) h1, + *kh2 = (struct hsts_kh *) h2; + + return (!strcmp (kh1->host, kh2->host)) && (kh1->explicit_port == kh2->explicit_port); +} + +/* Private functions. Feel free to make some of these public when needed. */ + +static struct hsts_kh_info * +hsts_find_entry (hsts_store_t store, + const char *host, int explicit_port, + enum hsts_kh_match *match_type, + struct hsts_kh *kh) +{ + struct hsts_kh *k = NULL; + struct hsts_kh_info *khi = NULL; + enum hsts_kh_match match = NO_MATCH; + char *pos = NULL; + char *org_ptr = NULL; + + k = (struct hsts_kh *) xnew (struct hsts_kh); + k->host = xstrdup_lower (host); + k->explicit_port = explicit_port; + + /* save pointer so that we don't get into trouble later when freeing */ + org_ptr = k->host; + + khi = (struct hsts_kh_info *) hash_table_get (store->table, k); + if (khi) + { + match = CONGRUENT_MATCH; + goto end; + } + + while (match == NO_MATCH && + (pos = strchr (k->host, '.')) && pos - k->host > 0 && + strchr (pos + 1, '.')) + { + k->host += (pos - k->host + 1); + khi = (struct hsts_kh_info *) hash_table_get (store->table, k); + if (khi) + match = SUPERDOMAIN_MATCH; + } + +end: + /* restore pointer or we'll get a SEGV */ + k->host = org_ptr; + xfree (k->host); + + /* copy parameters to previous frame */ + if (match_type) + *match_type = match; + if (kh) + memcpy (kh, k, sizeof (struct hsts_kh)); + + xfree (k); + return khi; +} + +static bool +hsts_new_entry_internal (hsts_store_t store, + const char *host, int port, + time_t created, time_t max_age, + bool include_subdomains, + bool check_validity, + bool check_expired, + bool check_duplicates) +{ + struct hsts_kh *kh = xnew (struct hsts_kh); + struct hsts_kh_info *khi = xnew0 (struct hsts_kh_info); + bool success = false; + + kh->host = xstrdup_lower (host); + kh->explicit_port = MAKE_EXPLICIT_PORT (SCHEME_HTTPS, port); + + khi->created = created; + khi->max_age = max_age; + khi->include_subdomains = include_subdomains; + + /* Check validity */ + if (check_validity && !hsts_is_host_name_valid (host)) + goto bail; + + if (check_expired && ((khi->created + khi->max_age) < khi->created)) + goto bail; + + if (check_duplicates && hash_table_contains (store->table, kh)) + goto bail; + + /* Now store the new entry */ + hash_table_put (store->table, kh, khi); + success = true; + +bail: + if (!success) + { + /* abort! */ + xfree (kh->host); + xfree (kh); + xfree (khi); + } + + return success; +} + +/* + Creates a new entry, but does not check whether that entry already exists. + This function assumes that check has already been done by the caller. + */ +static bool +hsts_add_entry (hsts_store_t store, + const char *host, int port, + time_t max_age, bool include_subdomains) +{ + time_t t = time (NULL); + + /* It might happen time() returned -1 */ + return (t < 0 ? + false : + hsts_new_entry_internal (store, host, port, t, max_age, include_subdomains, false, true, false)); +} + +/* Creates a new entry, unless an identical one already exists. */ +static bool +hsts_new_entry (hsts_store_t store, + const char *host, int port, + time_t created, time_t max_age, + bool include_subdomains) +{ + return hsts_new_entry_internal (store, host, port, created, max_age, include_subdomains, true, true, true); +} + +static void +hsts_remove_entry (hsts_store_t store, struct hsts_kh *kh) +{ + if (hash_table_remove (store->table, kh)) + xfree (kh->host); +} + +static bool +hsts_store_merge (hsts_store_t store, + const char *host, int port, + time_t created, time_t max_age, + bool include_subdomains) +{ + enum hsts_kh_match match_type = NO_MATCH; + struct hsts_kh_info *khi = NULL; + bool success = false; + + port = MAKE_EXPLICIT_PORT (SCHEME_HTTPS, port); + khi = hsts_find_entry (store, host, port, &match_type, NULL); + if (khi && match_type == CONGRUENT_MATCH && created > khi->created) + { + /* update the entry with the new info */ + khi->created = created; + khi->max_age = max_age; + khi->include_subdomains = include_subdomains; + + success = true; + } + + return success; +} + +static bool +hsts_read_database (hsts_store_t store, const char *file, bool merge_with_existing_entries) +{ + FILE *fp = NULL; + char *line = NULL, *p; + size_t len = 0; + int items_read; + bool result = false; + bool (*func)(hsts_store_t, const char *, int, time_t, time_t, bool); + + char host[256]; + int port; + time_t created, max_age; + int include_subdomains; + + func = (merge_with_existing_entries ? hsts_store_merge : hsts_new_entry); + + fp = fopen (file, "r"); + if (fp) + { + while (getline (&line, &len, fp) > 0) + { + for (p = line; c_isspace (*p); p++) + ; + + if (*p == '#') + continue; + + items_read = sscanf (p, "%255s %d %d %lu %lu", + host, + &port, + &include_subdomains, + (unsigned long *) &created, + (unsigned long *) &max_age); + + if (items_read == 5) + func (store, host, port, created, max_age, !!include_subdomains); + } + + xfree (line); + fclose (fp); + + result = true; + } + + return result; +} + +static void +hsts_store_dump (hsts_store_t store, const char *filename) +{ + FILE *fp = NULL; + hash_table_iterator it; + + fp = fopen (filename, "w"); + if (fp) + { + /* Print preliminary comments. We don't care if any of these fail. */ + fputs ("# HSTS 1.0 Known Hosts database for GNU Wget.\n", fp); + fputs ("# Edit at your own risk.\n", fp); + fputs ("# [:]\t\t\t\n", fp); + + /* Now cycle through the HSTS store in memory and dump the entries */ + for (hash_table_iterate (store->table, &it); hash_table_iter_next (&it);) + { + struct hsts_kh *kh = (struct hsts_kh *) it.key; + struct hsts_kh_info *khi = (struct hsts_kh_info *) it.value; + + if (fprintf (fp, "%s\t%d\t%d\t%lu\t%lu\n", + kh->host, kh->explicit_port, khi->include_subdomains, + khi->created, khi->max_age) < 0) + { + logprintf (LOG_ALWAYS, "Could not write the HSTS database correctly.\n"); + break; + } + } + + fclose (fp); + } +} + +/* HSTS API */ + +/* + Changes the given URLs according to the HSTS policy. + + If there's no host in the store that either congruently + or not, matches the given URL, no changes are made. + Returns true if the URL was changed, or false + if it was left intact. + */ +bool +hsts_match (hsts_store_t store, struct url *u) +{ + bool url_changed = false; + struct hsts_kh_info *entry = NULL; + struct hsts_kh *kh = xnew(struct hsts_kh); + enum hsts_kh_match match = NO_MATCH; + int port = MAKE_EXPLICIT_PORT (u->scheme, u->port); + + /* avoid doing any computation if we're already in HTTPS */ + if (!hsts_is_scheme_valid (u->scheme)) + { + entry = hsts_find_entry (store, u->host, port, &match, kh); + if (entry) + { + if ((entry->created + entry->max_age) >= time(NULL)) + { + if ((match == CONGRUENT_MATCH) || + (match == SUPERDOMAIN_MATCH && entry->include_subdomains)) + { + /* we found a matching Known HSTS Host + rewrite the URL */ + u->scheme = SCHEME_HTTPS; + if (u->port == 80) + u->port = 443; + url_changed = true; + } + } + else + hsts_remove_entry (store, kh); + } + } + + xfree(kh); + + return url_changed; +} + +/* + Add a new HSTS Known Host to the HSTS store. + + If the host already exists, its information is updated, + or it'll be removed from the store if max_age is zero. + + Bear in mind that the store is kept in memory, and will not + be written to disk until hsts_store_save is called. + This function regrows the in-memory HSTS store if necessary. + + Currently, for a host to be taken into consideration, + two conditions have to be met: + - Connection must be through a secure channel (HTTPS). + - The host must not be an IPv4 or IPv6 address. + + The RFC 6797 states that hosts that match IPv4 or IPv6 format + should be discarded at URI rewrite time. But we short-circuit + that check here, since there's no point in storing a host that + will never be matched. + + Returns true if a new entry was actually created, or false + if an existing entry was updated/deleted. */ +bool +hsts_store_entry (hsts_store_t store, + enum url_scheme scheme, const char *host, int port, + time_t max_age, bool include_subdomains) +{ + bool result = false; + enum hsts_kh_match match = NO_MATCH; + struct hsts_kh *kh = xnew(struct hsts_kh); + struct hsts_kh_info *entry = NULL; + time_t t = 0; + + if (hsts_is_host_eligible (scheme, host)) + { + port = MAKE_EXPLICIT_PORT (scheme, port); + entry = hsts_find_entry (store, host, port, &match, kh); + if (entry && match == CONGRUENT_MATCH) + { + if (max_age == 0) + hsts_remove_entry (store, kh); + else if (max_age > 0) + { + entry->include_subdomains = include_subdomains; + + if (entry->max_age != max_age) + { + /* RFC 6797 states that 'max_age' is a TTL relative to the reception of the STS header + so we have to update the 'created' field too */ + t = time (NULL); + if (t != -1) + entry->created = t; + entry->max_age = max_age; + } + } + /* we ignore negative max_ages */ + } + else if (entry == NULL || match == SUPERDOMAIN_MATCH) + { + /* Either we didn't find a matching host, + or we got a superdomain match. + In either case, we create a new entry. + + We have to perform an explicit check because it might + happen we got a non-existent entry with max_age == 0. + */ + result = hsts_add_entry (store, host, port, max_age, include_subdomains); + } + /* we ignore new entries with max_age == 0 */ + } + + xfree(kh); + + return result; +} + +hsts_store_t +hsts_store_open (const char *filename) +{ + hsts_store_t store = NULL; + struct stat st; + + store = xnew0 (struct hsts_store); + store->table = hash_table_new (0, hsts_hash_func, hsts_cmp_func); + store->last_mtime = 0; + + if (file_exists_p (filename)) + { + if (stat (filename, &st) == 0) + store->last_mtime = st.st_mtime; + + if (!hsts_read_database (store, filename, false)) + { + /* abort! */ + hsts_store_close (store); + xfree (store); + store = NULL; + } + } + + return store; +} + +void +hsts_store_save (hsts_store_t store, const char *filename) +{ + struct stat st; + + if (filename && hash_table_count (store->table) > 0) + { + /* If the file has changed, merge the changes with our in-memory data + before dumping them to the file. + Otherwise we could potentially overwrite the data stored by other Wget processes. + */ + if (store->last_mtime && stat (filename, &st) == 0 && st.st_mtime > store->last_mtime) + hsts_read_database (store, filename, true); + + /* now dump to the file */ + hsts_store_dump (store, filename); + } +} + +void +hsts_store_close (hsts_store_t store) +{ + hash_table_iterator it; + + /* free all the host fields */ + for (hash_table_iterate (store->table, &it); hash_table_iter_next (&it);) + { + xfree (((struct hsts_kh *) it.key)->host); + xfree (it.key); + xfree (it.value); + } + + hash_table_destroy (store->table); +} + +#ifdef TESTING +/* I know I'm really evil because I'm writing macros + that change control flow. But we're testing, who will tell? :D + */ +#define TEST_URL_RW(s, u, p) do { \ + if (test_url_rewrite (s, u, p, true)) \ + return test_url_rewrite (s, u, p, true); \ + } while (0) + +#define TEST_URL_NORW(s, u, p) do { \ + if (test_url_rewrite (s, u, p, false)) \ + return test_url_rewrite (s, u, p, false); \ + } while (0) + +static char * +get_hsts_store_filename (void) +{ + char *home = NULL, *filename = NULL; + FILE *fp = NULL; + + home = home_dir (); + if (home) + { + filename = aprintf ("%s/.wget-hsts-test", home); + fp = fopen (filename, "w"); + if (fp) + fclose (fp); + } + + return filename; +} + +static hsts_store_t +open_hsts_test_store (void) +{ + char *filename = NULL; + hsts_store_t table = NULL; + + filename = get_hsts_store_filename (); + table = hsts_store_open (filename); + xfree (filename); + + return table; +} + +static void +close_hsts_test_store (void) +{ + char *filename = NULL; + + filename = get_hsts_store_filename (); + unlink (filename); + xfree (filename); +} + +static const char* +test_url_rewrite (hsts_store_t s, const char *url, int port, bool rewrite) +{ + bool result; + struct url u; + + u.host = xstrdup (url); + u.port = port; + u.scheme = SCHEME_HTTP; + + result = hsts_match (s, &u); + + if (rewrite) + { + if (port == 80) + mu_assert("URL: port should've been rewritten to 443", u.port == 443); + else + mu_assert("URL: port should've been left intact", u.port == port); + mu_assert("URL: scheme should've been rewritten to HTTPS", u.scheme == SCHEME_HTTPS); + mu_assert("result should've been true", result == true); + } + else + { + mu_assert("URL: port should've been left intact", u.port == port); + mu_assert("URL: scheme should've been left intact", u.scheme == SCHEME_HTTP); + mu_assert("result should've been false", result == false); + } + + xfree (u.host); + return NULL; +} + +const char * +test_hsts_new_entry (void) +{ + enum hsts_kh_match match = NO_MATCH; + struct hsts_kh_info *khi = NULL; + + hsts_store_t s = open_hsts_test_store (); + mu_assert("Could not open the HSTS store. This could be due to lack of memory.", s != NULL); + + bool created = hsts_store_entry (s, SCHEME_HTTP, "www.foo.com", 80, 1234, true); + mu_assert("No entry should have been created.", created == false); + + created = hsts_store_entry (s, SCHEME_HTTPS, "www.foo.com", 443, 1234, true); + mu_assert("A new entry should have been created", created == true); + + khi = hsts_find_entry (s, "www.foo.com", MAKE_EXPLICIT_PORT (SCHEME_HTTPS, 443), &match, NULL); + mu_assert("Should've been a congruent match", match == CONGRUENT_MATCH); + mu_assert("No valid HSTS info was returned", khi != NULL); + mu_assert("Variable 'max_age' should be 1234", khi->max_age == 1234); + mu_assert("Variable 'include_subdomains' should be asserted", khi->include_subdomains == true); + + khi = hsts_find_entry (s, "b.www.foo.com", MAKE_EXPLICIT_PORT (SCHEME_HTTPS, 443), &match, NULL); + mu_assert("Should've been a superdomain match", match == SUPERDOMAIN_MATCH); + mu_assert("No valid HSTS info was returned", khi != NULL); + mu_assert("Variable 'max_age' should be 1234", khi->max_age == 1234); + mu_assert("Variable 'include_subdomains' should be asserted", khi->include_subdomains == true); + + khi = hsts_find_entry (s, "ww.foo.com", MAKE_EXPLICIT_PORT (SCHEME_HTTPS, 443), &match, NULL); + mu_assert("Should've been no match", match == NO_MATCH); + + khi = hsts_find_entry (s, "foo.com", MAKE_EXPLICIT_PORT (SCHEME_HTTPS, 443), &match, NULL); + mu_assert("Should've been no match", match == NO_MATCH); + + khi = hsts_find_entry (s, ".foo.com", MAKE_EXPLICIT_PORT (SCHEME_HTTPS, 443), &match, NULL); + mu_assert("Should've been no match", match == NO_MATCH); + + khi = hsts_find_entry (s, ".www.foo.com", MAKE_EXPLICIT_PORT (SCHEME_HTTPS, 443), &match, NULL); + mu_assert("Should've been no match", match == NO_MATCH); + + hsts_store_close (s); + close_hsts_test_store (); + + return NULL; +} + +const char* +test_hsts_url_rewrite_superdomain (void) +{ + hsts_store_t s; + bool created; + + s = open_hsts_test_store (); + mu_assert("Could not open the HSTS store", s != NULL); + + created = hsts_store_entry (s, SCHEME_HTTPS, "www.foo.com", 443, time(NULL) + 1234, true); + mu_assert("A new entry should've been created", created == true); + + TEST_URL_RW (s, "www.foo.com", 80); + TEST_URL_RW (s, "bar.www.foo.com", 80); + + hsts_store_close (s); + close_hsts_test_store (); + + return NULL; +} + +const char* +test_hsts_url_rewrite_congruent (void) +{ + hsts_store_t s; + bool created; + + s = open_hsts_test_store (); + mu_assert("Could not open the HSTS store", s != NULL); + + created = hsts_store_entry (s, SCHEME_HTTPS, "foo.com", 443, time(NULL) + 1234, false); + mu_assert("A new entry should've been created", created == true); + + TEST_URL_RW (s, "foo.com", 80); + TEST_URL_NORW (s, "www.foo.com", 80); + + hsts_store_close (s); + close_hsts_test_store (); + + return NULL; +} + +const char* +test_hsts_read_database (void) +{ + hsts_store_t table; + char *home = home_dir(); + char *file = NULL; + FILE *fp = NULL; + + if (home) + { + file = aprintf ("%s/.wget-hsts-testing", home); + fp = fopen (file, "w"); + if (fp) + { + fputs ("# dummy comment\n", fp); + fputs ("foo.example.com\t0\t1\t1434224817\t123123123\n", fp); + fputs ("bar.example.com\t0\t0\t1434224817\t456456456\n", fp); + fputs ("test.example.com\t8080\t0\t1434224817\t789789789\n", fp); + fclose (fp); + + table = hsts_store_open (file); + + TEST_URL_RW (table, "foo.example.com", 80); + TEST_URL_RW (table, "www.foo.example.com", 80); + TEST_URL_RW (table, "bar.example.com", 80); + + TEST_URL_NORW(table, "www.bar.example.com", 80); + + TEST_URL_RW (table, "test.example.com", 8080); + + hsts_store_close (table); + unlink (file); + } + } + + return NULL; +} +#endif /* TESTING */ +#endif /* HAVE_HSTS */ diff --git a/src/hsts.h b/src/hsts.h new file mode 100644 index 00000000..9a7043da --- /dev/null +++ b/src/hsts.h @@ -0,0 +1,53 @@ +/* Declarations for hsts.c + Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, + 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2015 Free Software + Foundation, Inc. + +This file is part of GNU Wget. + +GNU Wget is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + +GNU Wget is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with Wget. If not, see . + +Additional permission under GNU GPL version 3 section 7 + +If you modify this program, or any covered work, by linking or +combining it with the OpenSSL project's OpenSSL library (or a +modified version of that library), containing parts covered by the +terms of the OpenSSL or SSLeay licenses, the Free Software Foundation +grants you additional permission to convey the resulting work. +Corresponding Source for a non-source form of such a combination +shall include the source code for the parts of OpenSSL used as well +as that of the covered work. */ +#include "wget.h" + +#ifdef HAVE_HSTS + +#ifndef HSTS_H +#define HSTS_H + +#include "url.h" + +typedef struct hsts_store *hsts_store_t; + +hsts_store_t hsts_store_open (const char *); + +void hsts_store_save (hsts_store_t, const char *); +void hsts_store_close (hsts_store_t); + +bool hsts_store_entry (hsts_store_t, + enum url_scheme, const char *, int, + time_t, bool); +bool hsts_match (hsts_store_t, struct url *); + +#endif /* HSTS_H */ +#endif /* HAVE_HSTS */ diff --git a/src/http.c b/src/http.c index fe0aebd8..1e6f8003 100644 --- a/src/http.c +++ b/src/http.c @@ -42,6 +42,7 @@ as that of the covered work. */ #include "hash.h" #include "http.h" +#include "hsts.h" #include "utils.h" #include "url.h" #include "host.h" @@ -1257,6 +1258,55 @@ parse_content_disposition (const char *hdr, char **filename) return false; } +#ifdef HAVE_HSTS +static bool +parse_strict_transport_security (const char *header, time_t *max_age, bool *include_subdomains) +{ + param_token name, value; + const char *c_max_age = NULL; + bool is = false; /* includeSubDomains */ + bool is_url_encoded = false; + bool success = false; + + if (header) + { + /* Process the STS header. Keys should be matched case-insensitively. */ + for (; extract_param (&header, &name, &value, ';', &is_url_encoded); is_url_encoded = false) + { + if (BOUNDED_EQUAL_NO_CASE(name.b, name.e, "max-age")) + c_max_age = strdupdelim (value.b, value.e); + else if (BOUNDED_EQUAL_NO_CASE(name.b, name.e, "includeSubDomains")) + is = true; + } + + /* pass the parsed values over */ + if (c_max_age) + { + /* If the string value goes out of a long's bounds, strtol() will return LONG_MIN or LONG_MAX. + * In theory, the HSTS engine should be able to handle it. + * Also, time_t is normally defined as a long, so this should not break. + */ + if (max_age) + *max_age = (time_t) strtol (c_max_age, NULL, 10); + if (include_subdomains) + *include_subdomains = is; + + DEBUGP(("Parsed Strict-Transport-Security max-age = %s, includeSubDomains = %s\n", + c_max_age, (is ? "true" : "false"))); + + success = true; + } + else + { + /* something weird happened */ + logprintf (LOG_VERBOSE, "Could not parse String-Transport-Security header\n"); + success = false; + } + } + + return success; +} +#endif /* Persistent connections. Currently, we cache the most recently used connection as persistent, provided that the HTTP server agrees to @@ -2842,6 +2892,17 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, FILE *fp; int err; uerr_t retval; +#ifdef HAVE_HSTS +#ifdef TESTING + /* we don't link against main.o when we're testing */ + hsts_store_t hsts_store = NULL; +#else + extern hsts_store_t hsts_store; +#endif + const char *hsts_params; + time_t max_age; + bool include_subdomains; +#endif int sock = -1; @@ -3319,6 +3380,29 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, else hs->error = xstrdup (message); +#ifdef HAVE_HSTS + if (opt.hsts && hsts_store) + { + hsts_params = resp_header_strdup (resp, "Strict-Transport-Security"); + if (parse_strict_transport_security (hsts_params, &max_age, &include_subdomains)) + { + /* process strict transport security */ + if (hsts_store_entry (hsts_store, u->scheme, u->host, u->port, max_age, include_subdomains)) + DEBUGP(("Added new HSTS host: %s:%u (max-age: %u, includeSubdomains: %s)\n", + u->host, + u->port, + (unsigned int) max_age, + (include_subdomains ? "true" : "false"))); + else + DEBUGP(("Updated HSTS host: %s:%u (max-age: %u, includeSubdomains: %s)\n", + u->host, + u->port, + (unsigned int) max_age, + (include_subdomains ? "true" : "false"))); + } + } +#endif + type = resp_header_strdup (resp, "Content-Type"); if (type) { diff --git a/src/http.h b/src/http.h index 05cdda49..71cb0f01 100644 --- a/src/http.h +++ b/src/http.h @@ -31,6 +31,8 @@ as that of the covered work. */ #ifndef HTTP_H #define HTTP_H +#include "hsts.h" + struct url; uerr_t http_loop (struct url *, struct url *, char **, char **, const char *, diff --git a/src/init.c b/src/init.c index ed4171e3..da17614f 100644 --- a/src/init.c +++ b/src/init.c @@ -194,6 +194,10 @@ static const struct { { "ftpuser", &opt.ftp_user, cmd_string }, { "glob", &opt.ftp_glob, cmd_boolean }, { "header", NULL, cmd_spec_header }, +#ifdef HAVE_HSTS + { "hsts", &opt.hsts, cmd_boolean }, + { "hsts-file", &opt.hsts_file, cmd_file }, +#endif { "htmlextension", &opt.adjust_extension, cmd_boolean }, /* deprecated */ { "htmlify", NULL, cmd_spec_htmlify }, { "httpkeepalive", &opt.http_keep_alive, cmd_boolean }, @@ -450,6 +454,11 @@ defaults (void) opt.start_pos = -1; opt.show_progress = -1; opt.noscroll = false; + +#ifdef HAVE_HSTS + /* HSTS is enabled by default */ + opt.hsts = true; +#endif } /* Return the user's home directory (strdup-ed), or NULL if none is diff --git a/src/main.c b/src/main.c index 79a37b5b..64907119 100644 --- a/src/main.c +++ b/src/main.c @@ -54,6 +54,7 @@ as that of the covered work. */ #include "convert.h" #include "spider.h" #include "http.h" /* for save_cookies */ +#include "hsts.h" /* for initializing hsts_store to NULL */ #include "ptimer.h" #include "warc.h" #include "version.h" @@ -141,6 +142,63 @@ i18n_initialize (void) #endif /* ENABLE_NLS */ } +#ifdef HAVE_HSTS +/* make the HSTS store global */ +hsts_store_t hsts_store; + +static char* +get_hsts_database (void) +{ + char *home; + + if (opt.hsts_file) + return xstrdup (opt.hsts_file); + + home = home_dir (); + if (home) + return aprintf ("%s/.wget-hsts", home); + + return NULL; +} + +static void +load_hsts (void) +{ + if (!hsts_store) + { + char *filename = get_hsts_database (); + + if (filename) + { + hsts_store = hsts_store_open (filename); + + if (!hsts_store) + logprintf (LOG_NOTQUIET, "ERROR: could not open HSTS store at '%s'. " + "HSTS will be disabled.\n", + filename); + } + else + logprintf (LOG_NOTQUIET, "ERROR: could not open HSTS store. HSTS will be disabled.\n"); + + xfree (filename); + } +} + +static void +save_hsts (void) +{ + if (hsts_store) + { + char *filename = get_hsts_database (); + + hsts_store_save (hsts_store, filename); + hsts_store_close (hsts_store); + + xfree (filename); + } +} +#endif + /* Definition of command-line options. */ static void _Noreturn print_help (void); @@ -230,6 +288,10 @@ static struct cmdline_option option_data[] = { "header", 0, OPT_VALUE, "header", -1 }, { "help", 'h', OPT_FUNCALL, (void *)print_help, no_argument }, { "host-directories", 0, OPT_BOOLEAN, "addhostdir", -1 }, +#ifdef HAVE_HSTS + { "hsts", 0, OPT_BOOLEAN, "hsts", -1}, + { "hsts-file", 0, OPT_VALUE, "hsts-file", -1 }, +#endif { "html-extension", 'E', OPT_BOOLEAN, "adjustextension", -1 }, /* deprecated */ { "htmlify", 0, OPT_BOOLEAN, "htmlify", -1 }, { "http-keep-alive", 0, OPT_BOOLEAN, "httpkeepalive", -1 }, @@ -714,6 +776,16 @@ HTTPS (SSL/TLS) options:\n"), "\n", #endif /* HAVE_SSL */ +#ifdef HAVE_HSTS + N_("\ +HSTS options:\n"), + N_("\ + --no-hsts disable HSTS\n"), + N_("\ + --hsts-file path of HSTS database (will override default)\n"), + "\n", +#endif + N_("\ FTP options:\n"), #ifdef __VMS @@ -1690,6 +1762,18 @@ outputting to a regular file.\n")); signal (SIGWINCH, progress_handle_sigwinch); #endif +#ifdef HAVE_HSTS + hsts_store = NULL; + + /* Load the HSTS database. + Maybe all the URLs are FTP(S), in which case HSTS would not be needed, + but this is the best place to do it, and it shouldn't be a critical + performance hit. + */ + if (opt.hsts) + load_hsts (); +#endif + /* Retrieve the URLs from argument list. */ for (t = url; *t; t++) { @@ -1727,10 +1811,10 @@ outputting to a regular file.\n")); opt.follow_ftp = old_follow_ftp; } else - { - retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL, - &dt, opt.recursive, iri, true); - } + { + retrieve_url (url_parsed, *t, &filename, &redirected_URL, NULL, + &dt, opt.recursive, iri, true); + } if (opt.delete_after && filename != NULL && file_exists_p (filename)) { @@ -1848,6 +1932,11 @@ outputting to a regular file.\n")); if (opt.cookies_output) save_cookies (); +#ifdef HAVE_HSTS + if (opt.hsts && hsts_store) + save_hsts (); +#endif + if (opt.convert_links && !opt.delete_after) convert_all_links (); diff --git a/src/options.h b/src/options.h index 1ede7b34..f8244ea5 100644 --- a/src/options.h +++ b/src/options.h @@ -295,6 +295,11 @@ struct options bool show_all_dns_entries; /* Show all the DNS entries when resolving a name. */ bool report_bps; /*Output bandwidth in bits format*/ + +#ifdef HAVE_HSTS + bool hsts; + char *hsts_file; +#endif }; extern struct options opt; diff --git a/src/retr.c b/src/retr.c index f60da6ef..896b58fe 100644 --- a/src/retr.c +++ b/src/retr.c @@ -56,6 +56,7 @@ as that of the covered work. */ #include "ptimer.h" #include "html-url.h" #include "iri.h" +#include "hsts.h" /* Total size of downloaded files. Used to enforce quota. */ SUM_SIZE_INT total_downloaded_bytes; @@ -799,6 +800,20 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, #endif || (proxy_url && proxy_url->scheme == SCHEME_HTTP)) { +#ifdef HAVE_HSTS +#ifdef TESTING + /* we don't link against main.o when we're testing */ + hsts_store_t hsts_store = NULL; +#else + extern hsts_store_t hsts_store; +#endif + + if (opt.hsts && hsts_store) + { + if (hsts_match (hsts_store, u)) + logprintf (LOG_VERBOSE, "URL transformed to HTTPS due to an HSTS policy\n"); + } +#endif result = http_loop (u, orig_parsed, &mynewloc, &local_file, refurl, dt, proxy_url, iri); } @@ -988,6 +1003,7 @@ retrieve_url (struct url * orig_parsed, const char *origurl, char **file, bail: if (register_status) inform_exit_status (result); + return result; } diff --git a/src/test.c b/src/test.c index 9599ac40..5278925e 100644 --- a/src/test.c +++ b/src/test.c @@ -62,6 +62,12 @@ all_tests(void) mu_run_test (test_append_uri_pathel); mu_run_test (test_are_urls_equal); mu_run_test (test_is_robots_txt_url); +#ifdef HAVE_HSTS + mu_run_test (test_hsts_new_entry); + mu_run_test (test_hsts_url_rewrite_superdomain); + mu_run_test (test_hsts_url_rewrite_congruent); + mu_run_test (test_hsts_read_database); +#endif return NULL; } @@ -73,6 +79,7 @@ main (int argc _GL_UNUSED, const char *argv[]) { const char *result; + printf ("[DEBUG] Testing...\n\n"); #ifdef ENABLE_NLS /* Set the current locale. */ setlocale (LC_ALL, ""); diff --git a/src/test.h b/src/test.h index 6d5cbbe5..f74c1620 100644 --- a/src/test.h +++ b/src/test.h @@ -56,6 +56,10 @@ const char *test_append_uri_pathel(void); const char *test_are_urls_equal(void); const char *test_subdir_p(void); const char *test_dir_matches_p(void); +const char *test_hsts_new_entry(void); +const char *test_hsts_url_rewrite_superdomain(void); +const char *test_hsts_url_rewrite_congruent(void); +const char *test_hsts_read_database(void); #endif /* TEST_H */ diff --git a/src/wget.h b/src/wget.h index 2caa03e9..e2c6b30b 100644 --- a/src/wget.h +++ b/src/wget.h @@ -54,6 +54,7 @@ as that of the covered work. */ /* Is OpenSSL or GNUTLS available? */ #if defined HAVE_LIBSSL || defined HAVE_LIBSSL32 || defined HAVE_LIBGNUTLS # define HAVE_SSL +# define HAVE_HSTS /* There's no sense in enabling HSTS without SSL */ #endif /* `gettext (FOO)' is long to write, so we use `_(FOO)'. If NLS is