From 291cbc639b0325067c8d074203727601520da1d7 Mon Sep 17 00:00:00 2001 From: Tatsuhiro Tsujikawa Date: Thu, 14 Feb 2013 00:28:55 +0900 Subject: [PATCH] shrpx: Use patricia trie for cert lookup --- src/shrpx_ssl.cc | 166 ++++++++++++++++++++++++++---------------- src/shrpx_ssl.h | 24 +++--- src/shrpx_ssl_test.cc | 29 +++++++- src/util.h | 40 ++++++++-- 4 files changed, 177 insertions(+), 82 deletions(-) diff --git a/src/shrpx_ssl.cc b/src/shrpx_ssl.cc index 1fd3e07..6baa9fa 100644 --- a/src/shrpx_ssl.cc +++ b/src/shrpx_ssl.cc @@ -513,7 +513,8 @@ CertLookupTree* cert_lookup_tree_new() CertLookupTree *tree = new CertLookupTree(); CertNode *root = new CertNode(); root->ssl_ctx = 0; - root->c = 0; + root->str = 0; + root->first = root->last = 0; tree->root = root; return tree; } @@ -525,11 +526,6 @@ void cert_node_del(CertNode *node) eoi = node->next.end(); i != eoi; ++i) { cert_node_del(*i); } - for(std::vector >::iterator i = - node->wildcard_certs.begin(), eoi = node->wildcard_certs.end(); - i != eoi; ++i) { - delete [] (*i).first; - } delete node; } } // namespace @@ -537,58 +533,85 @@ void cert_node_del(CertNode *node) void cert_lookup_tree_del(CertLookupTree *lt) { cert_node_del(lt->root); + for(std::vector::iterator i = lt->hosts.begin(), + eoi = lt->hosts.end(); i != eoi; ++i) { + delete [] *i; + } delete lt; } namespace { -// The |offset| is the index in the hostname we are examining. +// The |offset| is the index in the hostname we are examining. We are +// going to scan from |offset| in backwards. void cert_lookup_tree_add_cert(CertLookupTree *lt, CertNode *node, SSL_CTX *ssl_ctx, - const char *hostname, size_t len, int offset) + char *hostname, size_t len, int offset) { - if(offset == -1) { - if(!node->ssl_ctx) { - node->ssl_ctx = ssl_ctx; - } - return; - } int i, next_len = node->next.size(); - char c = util::lowcase(hostname[offset]); + char c = hostname[offset]; + CertNode *cn = 0; for(i = 0; i < next_len; ++i) { - if(node->next[i]->c == c) { + cn = node->next[i]; + if(cn->str[cn->first] == c) { break; } } if(i == next_len) { - CertNode *parent = node; - int j; - for(j = offset; j >= 0; --j) { - if(hostname[j] == '*') { - // We assume hostname as wildcard hostname when first '*' is - // encountered. Note that as per RFC 6125 (6.4.3), there are - // some restrictions for wildcard hostname. We just ignore - // these rules here but do the proper check when we do the - // match. - char *hostcopy = strdup(hostname); - for(int k = 0; hostcopy[k]; ++k) { - hostcopy[k] = util::lowcase(hostcopy[k]); - } - parent->wildcard_certs.push_back(std::make_pair(hostcopy, ssl_ctx)); - break; - } + if(c == '*') { + // We assume hostname as wildcard hostname when first '*' is + // encountered. Note that as per RFC 6125 (6.4.3), there are + // some restrictions for wildcard hostname. We just ignore + // these rules here but do the proper check when we do the + // match. + node->wildcard_certs.push_back(std::make_pair(hostname, ssl_ctx)); + } else { + int j; CertNode *new_node = new CertNode(); - new_node->ssl_ctx = 0; - new_node->c = util::lowcase(hostname[j]); - parent->next.push_back(new_node); - parent = new_node; - } - if(j == -1) { - // non-wildcard hostname, exact match case. - parent->ssl_ctx = ssl_ctx; + new_node->str = hostname; + new_node->first = offset; + // If wildcard is found, set the region before it because we + // don't include it in [first, last). + for(j = offset; j >= 0 && hostname[j] != '*'; --j); + new_node->last = j; + if(j == -1) { + new_node->ssl_ctx = ssl_ctx; + } else { + new_node->ssl_ctx = 0; + new_node->wildcard_certs.push_back(std::make_pair(hostname, ssl_ctx)); + } + node->next.push_back(new_node); } } else { - cert_lookup_tree_add_cert(lt, node->next[i], ssl_ctx, - hostname, len, offset-1); + int j; + for(i = cn->first, j = offset; i > cn->last && j >= 0 && + cn->str[i] == hostname[j]; --i, --j); + if(i == cn->last) { + if(j == -1) { + // same hostname, we don't overwrite exiting ssl_ctx + } else { + // The existing hostname is a suffix of this hostname. + // Continue matching at potion j. + cert_lookup_tree_add_cert(lt, cn, ssl_ctx, hostname, len, j); + } + } else { + CertNode *new_node = new CertNode(); + new_node->ssl_ctx = cn->ssl_ctx; + new_node->str = cn->str; + new_node->first = i; + new_node->last = cn->last; + new_node->wildcard_certs.swap(cn->wildcard_certs); + cn->next.push_back(new_node); + + cn->last = i; + if(j == -1) { + // This hostname is a suffix of the existing hostname. + cn->ssl_ctx = ssl_ctx; + } else { + // This hostname and existing one share suffix. + cn->ssl_ctx = 0; + cert_lookup_tree_add_cert(lt, cn, ssl_ctx, hostname, len, j); + } + } } } } // namespace @@ -599,38 +622,53 @@ void cert_lookup_tree_add_cert(CertLookupTree *lt, SSL_CTX *ssl_ctx, if(len == 0) { return; } - cert_lookup_tree_add_cert(lt, lt->root, ssl_ctx, hostname, len, len-1); + // Copy hostname including terminal NULL + char *host_copy = new char[len + 1]; + for(size_t i = 0; i < len; ++i) { + host_copy[i] = util::lowcase(hostname[i]); + } + host_copy[len] = '\0'; + lt->hosts.push_back(host_copy); + cert_lookup_tree_add_cert(lt, lt->root, ssl_ctx, host_copy, len, len-1); } namespace { - SSL_CTX* cert_lookup_tree_lookup(CertLookupTree *lt, CertNode *node, const char *hostname, size_t len, int offset) { - if(offset == -1) { - if(node->ssl_ctx) { - return node->ssl_ctx; + int i, j; + for(i = node->first, j = offset; i > node->last && j >= 0 && + node->str[i] == util::lowcase(hostname[j]); --i, --j); + if(i == node->last) { + if(j == -1) { + if(node->ssl_ctx) { + // exact match + return node->ssl_ctx; + } else { + // Do not perform wildcard-match because '*' must match at least + // one character. + return 0; + } } else { - // Do not perform wildcard-match because '*' must match at least - // one character. + for(std::vector >::iterator i = + node->wildcard_certs.begin(), eoi = node->wildcard_certs.end(); + i != eoi; ++i) { + if(tls_hostname_match((*i).first, hostname)) { + return (*i).second; + } + } + char c = util::lowcase(hostname[j]); + for(std::vector::iterator i = node->next.begin(), + eoi = node->next.end(); i != eoi; ++i) { + if((*i)->str[(*i)->first] == c) { + return cert_lookup_tree_lookup(lt, *i, hostname, len, j); + } + } return 0; } + } else { + return 0; } - for(std::vector >::iterator i = - node->wildcard_certs.begin(), eoi = node->wildcard_certs.end(); - i != eoi; ++i) { - if(tls_hostname_match((*i).first, hostname)) { - return (*i).second; - } - } - char c = util::lowcase(hostname[offset]); - for(std::vector::iterator i = node->next.begin(), - eoi = node->next.end(); i != eoi; ++i) { - if((*i)->c == c) { - return cert_lookup_tree_lookup(lt, *i, hostname, len, offset-1); - } - } - return 0; } } // namespace diff --git a/src/shrpx_ssl.h b/src/shrpx_ssl.h index 56665b8..60374fa 100644 --- a/src/shrpx_ssl.h +++ b/src/shrpx_ssl.h @@ -65,14 +65,15 @@ void get_altnames(X509 *cert, std::string& common_name); // CertLookupTree forms lookup tree to get SSL_CTX whose DNS or -// commonName matches hostname in query. The tree is trie data -// structure form from the tail of the hostname pattern. Each CertNode -// contains one ASCII character in the c member and the next member -// contains the following CertNode pointers ('following' means -// character before the current one). The CertNode where a hostname -// pattern ends contains its SSL_CTX pointer in the ssl_ctx member. -// For wildcard hostname pattern, we store the its pattern and SSL_CTX -// in CertNode one before first "*" found from the tail. +// commonName matches hostname in query. The tree is patricia trie +// data structure formed from the tail of the hostname pattern. Each +// CertNode contains part of hostname str member in range [first, +// last) member and the next member contains the following CertNode +// pointers ('following' means character before the current one). The +// CertNode where a hostname pattern ends contains its SSL_CTX pointer +// in the ssl_ctx member. For wildcard hostname pattern, we store the +// its pattern and SSL_CTX in CertNode one before first "*" found from +// the tail. // // When querying SSL_CTX with particular hostname, we match from its // tail in our lookup tree. If the query goes to the first character @@ -89,14 +90,17 @@ struct CertNode { // list of wildcard domain name and its SSL_CTX pair, the wildcard // '*' appears in this position. std::vector > wildcard_certs; - // ASCII byte in this position - char c; // Next CertNode index of CertLookupTree::nodes std::vector next; + char *str; + // [first, last) in the reverse direction in str, first >= + // last. This indices only work for str member. + int first, last; }; struct CertLookupTree { std::vector certs; + std::vector hosts; CertNode *root; }; diff --git a/src/shrpx_ssl_test.cc b/src/shrpx_ssl_test.cc index 75eaaa7..26b9982 100644 --- a/src/shrpx_ssl_test.cc +++ b/src/shrpx_ssl_test.cc @@ -34,15 +34,27 @@ void test_shrpx_ssl_create_lookup_tree(void) { ssl::CertLookupTree* tree = ssl::cert_lookup_tree_new(); SSL_CTX *ctxs[] = {SSL_CTX_new(TLSv1_method()), + SSL_CTX_new(TLSv1_method()), + SSL_CTX_new(TLSv1_method()), + SSL_CTX_new(TLSv1_method()), + SSL_CTX_new(TLSv1_method()), + SSL_CTX_new(TLSv1_method()), SSL_CTX_new(TLSv1_method()), SSL_CTX_new(TLSv1_method()), SSL_CTX_new(TLSv1_method()), SSL_CTX_new(TLSv1_method())}; + const char *hostnames[] = { "example.com", "www.example.org", "*www.example.org", "x*.host.domain", - "*yy.host.domain"}; + "*yy.host.domain", + "spdylay.sourceforge.net", + "sourceforge.net", + "sourceforge.net", // duplicate + "*.foo.bar", // oo.bar is suffix of *.foo.bar + "oo.bar" + }; int num = sizeof(ctxs)/sizeof(ctxs[0]); for(int i = 0; i < num; ++i) { ssl::cert_lookup_tree_add_cert(tree, ctxs[i], hostnames[i], @@ -61,12 +73,23 @@ void test_shrpx_ssl_create_lookup_tree(void) CU_ASSERT(ctxs[3] == ssl::cert_lookup_tree_lookup(tree, h3, strlen(h3))); // Does not match *yy.host.domain, because * must match at least 1 // character. - const char h4[] = "yy.host.domain"; + const char h4[] = "yy.Host.domain"; CU_ASSERT(0 == ssl::cert_lookup_tree_lookup(tree, h4, strlen(h4))); const char h5[] = "zyy.host.domain"; CU_ASSERT(ctxs[4] == ssl::cert_lookup_tree_lookup(tree, h5, strlen(h5))); CU_ASSERT(0 == ssl::cert_lookup_tree_lookup(tree, "", 0)); - + CU_ASSERT(ctxs[5] == ssl::cert_lookup_tree_lookup(tree, hostnames[5], + strlen(hostnames[5]))); + CU_ASSERT(ctxs[6] == ssl::cert_lookup_tree_lookup(tree, hostnames[6], + strlen(hostnames[6]))); + const char h6[] = "pdylay.sourceforge.net"; + for(int i = 0; i < 7; ++i) { + CU_ASSERT(0 == ssl::cert_lookup_tree_lookup(tree, h6 + i, strlen(h6) - i)); + } + const char h7[] = "x.foo.bar"; + CU_ASSERT(ctxs[8] == ssl::cert_lookup_tree_lookup(tree, h7, strlen(h7))); + CU_ASSERT(ctxs[9] == ssl::cert_lookup_tree_lookup(tree, hostnames[9], + strlen(hostnames[9]))); ssl::cert_lookup_tree_del(tree); for(int i = 0; i < num; ++i) { SSL_CTX_free(ctxs[i]); diff --git a/src/util.h b/src/util.h index 8f1d768..10d9cf5 100644 --- a/src/util.h +++ b/src/util.h @@ -300,11 +300,41 @@ char lowcase(char c); inline char lowcase(char c) { - if('A' <= c && c <= 'Z') { - return c-'A'+'a'; - } else { - return c; - } + static unsigned char tbl[] = { + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + 64, 'a', 'b', 'c', 'd', 'e', 'f', 'g', + 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', + 'x', 'y', 'z', 91, 92, 93, 94, 95, + 96, 97, 98, 99, 100, 101, 102, 103, + 104, 105, 106, 107, 108, 109, 110, 111, + 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, + 128, 129, 130, 131, 132, 133, 134, 135, + 136, 137, 138, 139, 140, 141, 142, 143, + 144, 145, 146, 147, 148, 149, 150, 151, + 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, + 168, 169, 170, 171, 172, 173, 174, 175, + 176, 177, 178, 179, 180, 181, 182, 183, + 184, 185, 186, 187, 188, 189, 190, 191, + 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, + 208, 209, 210, 211, 212, 213, 214, 215, + 216, 217, 218, 219, 220, 221, 222, 223, + 224, 225, 226, 227, 228, 229, 230, 231, + 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, + 248, 249, 250, 251, 252, 253, 254, 255, + }; + return tbl[static_cast(c)]; } template