diff --git a/src/http.c b/src/http.c index 1daa852a..b601e63f 100644 --- a/src/http.c +++ b/src/http.c @@ -1810,155 +1810,23 @@ initialize_proxy_configuration (struct url *u, struct request *req, request_set_header (req, "Proxy-Authorization", *proxyauth, rel_value); } -/* Retrieve a document through HTTP protocol. It recognizes status - code, and correctly handles redirections. It closes the network - socket. If it receives an error from the functions below it, it - will print it if there is enough information to do so (almost - always), returning the error to the caller (i.e. http_loop). - - Various HTTP parameters are stored to hs. - - If PROXY is non-NULL, the connection will be made to the proxy - server, and u->url will be requested. */ static uerr_t -gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, - struct iri *iri, int count) +establish_connection (struct url *u, struct url **conn_ref, + struct http_stat *hs, struct url *proxy, + char **proxyauth, + struct request **req_ref, bool *using_ssl, + bool inhibit_keep_alive, + int *sock_ref) { - struct request *req; - - char *type; - char *user, *passwd; - char *proxyauth; - int statcode; - int write_error; - wgint contlen, contrange; - struct url *conn; - FILE *fp; - int err; - - int sock = -1; - - /* Set to 1 when the authorization has already been sent and should - not be tried again. */ - bool auth_finished = false; - - /* Set to 1 when just globally-set Basic authorization has been sent; - * should prevent further Basic negotiations, but not other - * mechanisms. */ - bool basic_auth_finished = false; - - /* Whether NTLM authentication is used for this request. */ - bool ntlm_seen = false; - - /* Whether our connection to the remote host is through SSL. */ - bool using_ssl = false; - - /* Whether a HEAD request will be issued (as opposed to GET or - POST). */ - bool head_only = !!(*dt & HEAD_ONLY); - - char *head; - struct response *resp; - char hdrval[512]; - char *message; - - /* Declare WARC variables. */ - bool warc_enabled = (opt.warc_filename != NULL); - FILE *warc_tmp = NULL; - char warc_timestamp_str [21]; - char warc_request_uuid [48]; - ip_address *warc_ip = NULL; - off_t warc_payload_offset = -1; - - /* Whether this connection will be kept alive after the HTTP request - is done. */ - bool keep_alive; - - /* Is the server using the chunked transfer encoding? */ - bool chunked_transfer_encoding = false; - - /* Whether keep-alive should be inhibited. */ - bool inhibit_keep_alive = - !opt.http_keep_alive || opt.ignore_length; - - /* Headers sent when using POST. */ - wgint body_data_size = 0; - bool host_lookup_failed = false; + int sock = *sock_ref; + struct request *req = *req_ref; + struct url *conn = *conn_ref; + struct response *resp; + int write_error; + int statcode; -#ifdef HAVE_SSL - if (u->scheme == SCHEME_HTTPS) - { - /* Initialize the SSL context. After this has once been done, - it becomes a no-op. */ - if (!ssl_init ()) - { - scheme_disable (SCHEME_HTTPS); - logprintf (LOG_NOTQUIET, - _("Disabling SSL due to encountered errors.\n")); - return SSLINITFAILED; - } - } -#endif /* HAVE_SSL */ - - /* Initialize certain elements of struct http_stat. */ - hs->len = 0; - hs->contlen = -1; - hs->res = -1; - hs->rderrmsg = NULL; - hs->newloc = NULL; - xfree(hs->remote_time); - hs->error = NULL; - hs->message = NULL; - - conn = u; - - { - uerr_t ret; - req = initialize_request (u, hs, dt, proxy, inhibit_keep_alive, - &basic_auth_finished, &body_data_size, - &user, &passwd, &ret); - if (req == NULL) - return ret; - } - retry_with_auth: - /* We need to come back here when the initial attempt to retrieve - without authorization header fails. (Expected to happen at least - for the Digest authorization scheme.) */ - - if (opt.cookies) - request_set_header (req, "Cookie", - cookie_header (wget_cookie_jar, - u->host, u->port, u->path, -#ifdef HAVE_SSL - u->scheme == SCHEME_HTTPS -#else - 0 -#endif - ), - rel_value); - - /* Add the user headers. */ - if (opt.user_headers) - { - int i; - for (i = 0; opt.user_headers[i]; i++) - request_set_user_header (req, opt.user_headers[i]); - } - - proxyauth = NULL; - if (proxy) - { - conn = proxy; - initialize_proxy_configuration (u, req, proxy, &proxyauth); - } - keep_alive = true; - - /* Establish the connection. */ - - if (inhibit_keep_alive) - keep_alive = false; - else + if (! inhibit_keep_alive) { /* Look for a persistent connection to target host, unless a proxy is used. The exception is when SSL is in use, in which @@ -1980,7 +1848,7 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, { int family = socket_family (pconn.socket, ENDPOINT_PEER); sock = pconn.socket; - using_ssl = pconn.ssl; + *using_ssl = pconn.ssl; #if ENABLE_IPV6 if (family == AF_INET6) logprintf (LOG_VERBOSE, _("Reusing existing connection to [%s]:%d.\n"), @@ -2030,6 +1898,8 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, #ifdef HAVE_SSL if (proxy && u->scheme == SCHEME_HTTPS) { + char *head; + char *message; /* When requesting SSL URLs through proxies, use the CONNECT method to request passthrough. */ struct request *connreq = request_new ("CONNECT", @@ -2038,11 +1908,11 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, if (proxyauth) { request_set_header (connreq, "Proxy-Authorization", - proxyauth, rel_value); + *proxyauth, rel_value); /* Now that PROXYAUTH is part of the CONNECT request, zero it out so we don't send proxy authorization with the regular request below. */ - proxyauth = NULL; + *proxyauth = NULL; } request_set_header (connreq, "Host", aprintf ("%s:%d", u->host, u->port), @@ -2122,10 +1992,169 @@ gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, request_free (req); return VERIFCERTERR; } - using_ssl = true; + *using_ssl = true; } #endif /* HAVE_SSL */ } + *conn_ref = conn; + *req_ref = req; + *sock_ref = sock; + return RETROK; +} + +/* Retrieve a document through HTTP protocol. It recognizes status + code, and correctly handles redirections. It closes the network + socket. If it receives an error from the functions below it, it + will print it if there is enough information to do so (almost + always), returning the error to the caller (i.e. http_loop). + + Various HTTP parameters are stored to hs. + + If PROXY is non-NULL, the connection will be made to the proxy + server, and u->url will be requested. */ +static uerr_t +gethttp (struct url *u, struct http_stat *hs, int *dt, struct url *proxy, + struct iri *iri, int count) +{ + struct request *req; + + char *type; + char *user, *passwd; + char *proxyauth; + int statcode; + int write_error; + wgint contlen, contrange; + struct url *conn; + FILE *fp; + int err; + + int sock = -1; + + /* Set to 1 when the authorization has already been sent and should + not be tried again. */ + bool auth_finished = false; + + /* Set to 1 when just globally-set Basic authorization has been sent; + * should prevent further Basic negotiations, but not other + * mechanisms. */ + bool basic_auth_finished = false; + + /* Whether NTLM authentication is used for this request. */ + bool ntlm_seen = false; + + /* Whether our connection to the remote host is through SSL. */ + bool using_ssl = false; + + /* Whether a HEAD request will be issued (as opposed to GET or + POST). */ + bool head_only = !!(*dt & HEAD_ONLY); + + char *head; + struct response *resp; + char hdrval[512]; + char *message; + + /* Declare WARC variables. */ + bool warc_enabled = (opt.warc_filename != NULL); + FILE *warc_tmp = NULL; + char warc_timestamp_str [21]; + char warc_request_uuid [48]; + ip_address *warc_ip = NULL; + off_t warc_payload_offset = -1; + + /* Whether this connection will be kept alive after the HTTP request + is done. */ + bool keep_alive; + + /* Is the server using the chunked transfer encoding? */ + bool chunked_transfer_encoding = false; + + /* Whether keep-alive should be inhibited. */ + bool inhibit_keep_alive = + !opt.http_keep_alive || opt.ignore_length; + + /* Headers sent when using POST. */ + wgint body_data_size = 0; + +#ifdef HAVE_SSL + if (u->scheme == SCHEME_HTTPS) + { + /* Initialize the SSL context. After this has once been done, + it becomes a no-op. */ + if (!ssl_init ()) + { + scheme_disable (SCHEME_HTTPS); + logprintf (LOG_NOTQUIET, + _("Disabling SSL due to encountered errors.\n")); + return SSLINITFAILED; + } + } +#endif /* HAVE_SSL */ + + /* Initialize certain elements of struct http_stat. */ + hs->len = 0; + hs->contlen = -1; + hs->res = -1; + hs->rderrmsg = NULL; + hs->newloc = NULL; + xfree(hs->remote_time); + hs->error = NULL; + hs->message = NULL; + + conn = u; + + { + uerr_t ret; + req = initialize_request (u, hs, dt, proxy, inhibit_keep_alive, + &basic_auth_finished, &body_data_size, + &user, &passwd, &ret); + if (req == NULL) + return ret; + } + retry_with_auth: + /* We need to come back here when the initial attempt to retrieve + without authorization header fails. (Expected to happen at least + for the Digest authorization scheme.) */ + + if (opt.cookies) + request_set_header (req, "Cookie", + cookie_header (wget_cookie_jar, + u->host, u->port, u->path, +#ifdef HAVE_SSL + u->scheme == SCHEME_HTTPS +#else + 0 +#endif + ), + rel_value); + + /* Add the user headers. */ + if (opt.user_headers) + { + int i; + for (i = 0; opt.user_headers[i]; i++) + request_set_user_header (req, opt.user_headers[i]); + } + + proxyauth = NULL; + if (proxy) + { + conn = proxy; + initialize_proxy_configuration (u, req, proxy, &proxyauth); + } + keep_alive = true; + + /* Establish the connection. */ + if (inhibit_keep_alive) + keep_alive = false; + + { + uerr_t err = establish_connection (u, &conn, hs, proxy, &proxyauth, &req, + &using_ssl, inhibit_keep_alive, &sock); + if (err != RETROK) + return err; + } + /* Open the temporary file where we will write the request. */ if (warc_enabled)