psl: use latest psl and refresh it periodically

The latest psl is cached in the multi or share handle. It is refreshed
before use after 72 hours.
New share lock CURL_LOCK_DATA_PSL controls the psl cache sharing.
If the latest psl is not available, the builtin psl is used.

Reported-by: Yaakov Selkowitz
Fixes #2553
Closes #2601
This commit is contained in:
Patrick Monnerat 2018-05-28 20:29:15 +02:00
parent 536e9f8289
commit 8541d02c96
14 changed files with 254 additions and 29 deletions

View File

@ -87,6 +87,15 @@ existed before this.
Note that when you use the multi interface, all easy handles added to the same
multi handle will share connection cache by default without using this option.
.IP CURL_LOCK_DATA_PSL
The Public Suffix List stored in the share object is made available to all
easy handle bound to the later. Since the Public Suffix List is periodically
refreshed, this avoids updates in too many different contexts.
\fBCURL_LOCK_DATA_PSL\fP exists since 7.61.0.
Note that when you use the multi interface, all easy handles added to the same
multi handle will share PSL cache by default without using this option.
.RE
.IP CURLSHOPT_UNSHARE
This option does the opposite of \fICURLSHOPT_SHARE\fP. It specifies that

View File

@ -780,6 +780,7 @@ CURL_LOCK_DATA_CONNECT 7.10.3
CURL_LOCK_DATA_COOKIE 7.10.3
CURL_LOCK_DATA_DNS 7.10.3
CURL_LOCK_DATA_NONE 7.10.3
CURL_LOCK_DATA_PSL 7.61.0
CURL_LOCK_DATA_SHARE 7.10.4
CURL_LOCK_DATA_SSL_SESSION 7.10.3
CURL_LOCK_TYPE_CONNECT 7.10 - 7.10.2

View File

@ -2582,6 +2582,7 @@ typedef enum {
CURL_LOCK_DATA_DNS,
CURL_LOCK_DATA_SSL_SESSION,
CURL_LOCK_DATA_CONNECT,
CURL_LOCK_DATA_PSL,
CURL_LOCK_DATA_LAST
} curl_lock_data;

View File

@ -54,7 +54,7 @@ LIB_CFILES = file.c timeval.c base64.c hostip.c progress.c formdata.c \
http_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_sasl.c rand.c \
curl_multibyte.c hostcheck.c conncache.c pipeline.c dotdot.c \
x509asn1.c http2.c smb.c curl_endian.c curl_des.c system_win32.c \
mime.c sha256.c setopt.c curl_path.c curl_ctype.c curl_range.c
mime.c sha256.c setopt.c curl_path.c curl_ctype.c curl_range.c psl.c
LIB_HFILES = arpa_telnet.h netrc.h file.h timeval.h hostip.h progress.h \
formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h if2ip.h \
@ -74,7 +74,7 @@ LIB_HFILES = arpa_telnet.h netrc.h file.h timeval.h hostip.h progress.h \
curl_setup_once.h multihandle.h setup-vms.h pipeline.h dotdot.h \
x509asn1.h http2.h sigpipe.h smb.h curl_endian.h curl_des.h \
curl_printf.h system_win32.h rand.h mime.h curl_sha256.h setopt.h \
curl_path.h curl_ctype.h curl_range.h
curl_path.h curl_ctype.h curl_range.h psl.h
LIB_RCFILES = libcurl.rc

View File

@ -84,12 +84,9 @@ Example set of cookies:
#if !defined(CURL_DISABLE_HTTP) && !defined(CURL_DISABLE_COOKIES)
#ifdef USE_LIBPSL
# include <libpsl.h>
#endif
#include "urldata.h"
#include "cookie.h"
#include "psl.h"
#include "strtok.h"
#include "sendf.h"
#include "slist.h"
@ -406,6 +403,12 @@ static void remove_expired(struct CookieInfo *cookies)
}
}
/* Make sure domain contains a dot or is localhost. */
static bool bad_domain(const char *domain)
{
return !strchr(domain, '.') && !strcasecompare(domain, "localhost");
}
/****************************************************************************
*
* Curl_cookie_add()
@ -442,10 +445,6 @@ Curl_cookie_add(struct Curl_easy *data,
bool badcookie = FALSE; /* cookies are good by default. mmmmm yummy */
size_t myhash;
#ifdef USE_LIBPSL
const psl_ctx_t *psl;
#endif
#ifdef CURL_DISABLE_VERBOSE_STRINGS
(void)data;
#endif
@ -585,13 +584,8 @@ Curl_cookie_add(struct Curl_easy *data,
* TLD or otherwise "protected" suffix. To reduce risk, we require a
* dot OR the exact host name being "localhost".
*/
{
const char *dotp;
/* check for more dots */
dotp = strchr(whatptr, '.');
if(!dotp && !strcasecompare("localhost", whatptr))
domain = ":";
}
if(bad_domain(whatptr))
domain = ":";
#endif
is_ip = isip(domain ? domain : whatptr);
@ -890,14 +884,21 @@ Curl_cookie_add(struct Curl_easy *data,
remove_expired(c);
#ifdef USE_LIBPSL
/* Check if the domain is a Public Suffix and if yes, ignore the cookie.
This needs a libpsl compiled with builtin data. */
/* Check if the domain is a Public Suffix and if yes, ignore the cookie. */
if(domain && co->domain && !isip(co->domain)) {
psl = psl_builtin();
if(psl && !psl_is_cookie_domain_acceptable(psl, domain, co->domain)) {
infof(data,
"cookie '%s' dropped, domain '%s' must not set cookies for '%s'\n",
co->name, domain, co->domain);
const psl_ctx_t *psl = Curl_psl_use(data);
int acceptable;
if(psl) {
acceptable = psl_is_cookie_domain_acceptable(psl, domain, co->domain);
Curl_psl_release(data);
}
else
acceptable = !bad_domain(domain);
if(!acceptable) {
infof(data, "cookie '%s' dropped, domain '%s' must not "
"set cookies for '%s'\n", co->name, domain, co->domain);
freecookie(co);
return NULL;
}

View File

@ -31,6 +31,7 @@
#include "progress.h"
#include "easyif.h"
#include "share.h"
#include "psl.h"
#include "multiif.h"
#include "sendf.h"
#include "timeval.h"
@ -409,6 +410,14 @@ CURLMcode curl_multi_add_handle(struct Curl_multi *multi,
else
data->state.conn_cache = &multi->conn_cache;
#ifdef USE_LIBPSL
/* Do the same for PSL. */
if(data->share && (data->share->specifier & (1 << CURL_LOCK_DATA_PSL)))
data->psl = &data->share->psl;
else
data->psl = &multi->psl;
#endif
/* This adds the new entry at the 'end' of the doubly-linked circular
list of Curl_easy structs to try and maintain a FIFO queue so
the pipelined requests are in order. */
@ -727,6 +736,12 @@ CURLMcode curl_multi_remove_handle(struct Curl_multi *multi,
data->easy_conn = NULL;
}
#ifdef USE_LIBPSL
/* Remove the PSL association. */
if(data->psl == &multi->psl)
data->psl = NULL;
#endif
data->multi = NULL; /* clear the association to this multi handle */
/* make sure there's no pending message in the queue sent from this easy
@ -2220,6 +2235,11 @@ CURLMcode curl_multi_cleanup(struct Curl_multi *multi)
data->state.conn_cache = NULL;
data->multi = NULL; /* clear the association */
#ifdef USE_LIBPSL
if(data->psl == &multi->psl)
data->psl = NULL;
#endif
data = nextdata;
}
@ -2232,6 +2252,7 @@ CURLMcode curl_multi_cleanup(struct Curl_multi *multi)
Curl_llist_destroy(&multi->pending, NULL);
Curl_hash_destroy(&multi->hostcache);
Curl_psl_destroy(&multi->psl);
/* Free the blacklists by setting them to NULL */
Curl_pipeline_set_site_blacklist(NULL, &multi->pipelining_site_bl);

View File

@ -7,7 +7,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -23,6 +23,7 @@
***************************************************************************/
#include "conncache.h"
#include "psl.h"
struct Curl_message {
struct curl_llist_element list;
@ -97,6 +98,11 @@ struct Curl_multi {
/* Hostname cache */
struct curl_hash hostcache;
#ifdef USE_LIBPSL
/* PSL cache. */
struct PslCache psl;
#endif
/* timetree points to the splay-tree of time nodes to figure out expire
times of all currently set timers */
struct Curl_tree *timetree;

111
lib/psl.c Normal file
View File

@ -0,0 +1,111 @@
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.haxx.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
#include "curl_setup.h"
#include <curl/curl.h>
#ifdef USE_LIBPSL
#include "psl.h"
#include "share.h"
/* The last 3 #include files should be in this order */
#include "curl_printf.h"
#include "curl_memory.h"
#include "memdebug.h"
void Curl_psl_destroy(struct PslCache *pslcache)
{
if(pslcache->psl) {
if(pslcache->dynamic)
psl_free((psl_ctx_t *) pslcache->psl);
pslcache->psl = NULL;
pslcache->dynamic = FALSE;
}
}
static time_t now_seconds(void)
{
struct curltime now = Curl_now();
return now.tv_sec;
}
const psl_ctx_t *Curl_psl_use(struct Curl_easy *easy)
{
struct PslCache *pslcache = easy->psl;
const psl_ctx_t *psl;
time_t now;
if(!pslcache)
return NULL;
Curl_share_lock(easy, CURL_LOCK_DATA_PSL, CURL_LOCK_ACCESS_SHARED);
now = now_seconds();
if(!pslcache->psl || pslcache->expires <= now) {
/* Let a chance to other threads to do the job: avoids deadlock. */
Curl_share_unlock(easy, CURL_LOCK_DATA_PSL);
/* Update cache: this needs an exclusive lock. */
Curl_share_lock(easy, CURL_LOCK_DATA_PSL, CURL_LOCK_ACCESS_SINGLE);
/* Recheck in case another thread did the job. */
now = now_seconds();
if(!pslcache->psl || pslcache->expires <= now) {
bool dynamic = FALSE;
time_t expires = TIME_T_MAX;
#if defined(PSL_VERSION_NUMBER) && PSL_VERSION_NUMBER >= 0x001000
psl = psl_latest(NULL);
dynamic = psl != NULL;
/* Take care of possible time computation overflow. */
expires = now < TIME_T_MAX - PSL_TTL? now + PSL_TTL: TIME_T_MAX;
/* Only get the built-in PSL if we do not already have the "latest". */
if(!psl && !pslcache->dynamic)
#endif
psl = psl_builtin();
if(psl) {
Curl_psl_destroy(pslcache);
pslcache->psl = psl;
pslcache->dynamic = dynamic;
pslcache->expires = expires;
}
}
Curl_share_unlock(easy, CURL_LOCK_DATA_PSL); /* Release exclusive lock. */
Curl_share_lock(easy, CURL_LOCK_DATA_PSL, CURL_LOCK_ACCESS_SHARED);
}
psl = pslcache->psl;
if(!psl)
Curl_share_unlock(easy, CURL_LOCK_DATA_PSL);
return psl;
}
void Curl_psl_release(struct Curl_easy *easy)
{
Curl_share_unlock(easy, CURL_LOCK_DATA_PSL);
}
#endif /* USE_LIBPSL */

47
lib/psl.h Normal file
View File

@ -0,0 +1,47 @@
#ifndef HEADER_PSL_H
#define HEADER_PSL_H
/***************************************************************************
* _ _ ____ _
* Project ___| | | | _ \| |
* / __| | | | |_) | |
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
* are also available at https://curl.haxx.se/docs/copyright.html.
*
* You may opt to use, copy, modify, merge, publish, distribute and/or sell
* copies of the Software, and permit persons to whom the Software is
* furnished to do so, under the terms of the COPYING file.
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
* KIND, either express or implied.
*
***************************************************************************/
#ifdef USE_LIBPSL
#include <libpsl.h>
#define PSL_TTL (72 * 3600) /* PSL time to live before a refresh. */
struct PslCache {
const psl_ctx_t *psl; /* The PSL. */
time_t expires; /* Time this PSL life expires. */
bool dynamic; /* PSL should be released when no longer needed. */
};
const psl_ctx_t *Curl_psl_use(struct Curl_easy *easy);
void Curl_psl_release(struct Curl_easy *easy);
void Curl_psl_destroy(struct PslCache *pslcache);
#else
#define Curl_psl_use(easy) NULL
#define Curl_psl_release(easy)
#define Curl_psl_destroy(pslcache)
#endif /* USE_LIBPSL */
#endif /* HEADER_PSL_H */

View File

@ -1942,6 +1942,11 @@ CURLcode Curl_vsetopt(struct Curl_easy *data, CURLoption option,
if(data->share->sslsession == data->state.session)
data->state.session = NULL;
#ifdef USE_LIBPSL
if(data->psl == &data->share->psl)
data->psl = data->multi? &data->multi->psl: NULL;
#endif
data->share->dirty--;
Curl_share_unlock(data, CURL_LOCK_DATA_SHARE);
@ -1973,8 +1978,12 @@ CURLcode Curl_vsetopt(struct Curl_easy *data, CURLoption option,
data->set.general_ssl.max_ssl_sessions = data->share->max_ssl_sessions;
data->state.session = data->share->sslsession;
}
Curl_share_unlock(data, CURL_LOCK_DATA_SHARE);
#ifdef USE_LIBPSL
if(data->share->specifier & (1 << CURL_LOCK_DATA_PSL))
data->psl = &data->share->psl;
#endif
Curl_share_unlock(data, CURL_LOCK_DATA_SHARE);
}
/* check for host cache not needed,
* it will be done by curl_easy_perform */

View File

@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -25,6 +25,7 @@
#include <curl/curl.h>
#include "urldata.h"
#include "share.h"
#include "psl.h"
#include "vtls/vtls.h"
#include "curl_memory.h"
@ -106,6 +107,12 @@ curl_share_setopt(struct Curl_share *share, CURLSHoption option, ...)
res = CURLSHE_NOMEM;
break;
case CURL_LOCK_DATA_PSL:
#ifndef USE_LIBPSL
res = CURLSHE_NOT_BUILT_IN;
#endif
break;
default:
res = CURLSHE_BAD_OPTION;
}
@ -205,6 +212,8 @@ curl_share_cleanup(struct Curl_share *share)
}
#endif
Curl_psl_destroy(&share->psl);
if(share->unlockfunc)
share->unlockfunc(NULL, CURL_LOCK_DATA_SHARE, share->clientdata);
free(share);

View File

@ -7,7 +7,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
* Copyright (C) 1998 - 2017, Daniel Stenberg, <daniel@haxx.se>, et al.
* Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@ -25,6 +25,7 @@
#include "curl_setup.h"
#include <curl/curl.h>
#include "cookie.h"
#include "psl.h"
#include "urldata.h"
#include "conncache.h"
@ -49,6 +50,9 @@ struct Curl_share {
#if !defined(CURL_DISABLE_HTTP) && !defined(CURL_DISABLE_COOKIES)
struct CookieInfo *cookies;
#endif
#ifdef USE_LIBPSL
struct PslCache psl;
#endif
struct curl_ssl_session *sslsession;
size_t max_ssl_sessions;

View File

@ -80,6 +80,7 @@
#define RESP_TIMEOUT (1800*1000)
#include "cookie.h"
#include "psl.h"
#include "formdata.h"
#ifdef HAVE_NETINET_IN_H
@ -1736,6 +1737,9 @@ struct Curl_easy {
struct to which this "belongs" when used
by the easy interface */
struct Curl_share *share; /* Share, handles global variable mutexing */
#ifdef USE_LIBPSL
struct PslCache *psl; /* The associated PSL cache. */
#endif
struct SingleRequest req; /* Request-specific data */
struct UserDefined set; /* values set by the libcurl user */
struct DynamicStatic change; /* possibly modified userdefined data */

View File

@ -1591,8 +1591,10 @@
d c 4
d CURL_LOCK_DATA_CONNECT...
d c 5
d CURL_LOCK_DATA_LAST...
d CURL_LOCK_DATA_PSL...
d c 6
d CURL_LOCK_DATA_LAST...
d c 7
*
d curl_lock_access...
d s 10i 0 based(######ptr######) Enum