1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] My patch "persistent connection tweaks".

Published in <sxshf531qhj.fsf@florida.arsdigita.de>.

(Applied with the addition of correct calculation for the
length of the request.)
This commit is contained in:
hniksic 2000-11-19 15:42:13 -08:00
parent b0b1c815c1
commit b27144fcce
3 changed files with 149 additions and 46 deletions

View File

@ -1,3 +1,16 @@
2000-11-20 Hrvoje Niksic <hniksic@arsdigita.com>
* recur.c (recursive_retrieve): Print the "so we don't load"
debugging message only if we really don't load.
* http.c (gethttp): Inhibit keep-alive if proxy is being used.
(gethttp): Don't request keep-alive if keep-alive is inhibited.
2000-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
* http.c (gethttp): Make the HTTP persistent connections more
robust.
2000-11-19 Hrvoje Niksic <hniksic@arsdigita.com> 2000-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (get_contents): If use_expected, make sure that the * retr.c (get_contents): If use_expected, make sure that the

View File

@ -264,31 +264,87 @@ http_process_connection (const char *hdr, void *arg)
return 1; return 1;
} }
/* Persistent connections (pc). */ /* Persistent connections (pc). Currently, we cache the most recently
used connection as persistent, provided that the HTTP server agrees
to make it such. The persistence data is stored in the variables
below. Ideally, it would be in a structure, and it should be
possible to cache an arbitrary fixed number of these connections.
I think the code is quite easy to extend in that direction. */
/* Whether the persistent connection is active. */
static int pc_active_p;
/* Host and port of the last persistent connection. */
static unsigned char pc_last_host[4]; static unsigned char pc_last_host[4];
static unsigned short pc_last_port; static unsigned short pc_last_port;
/* File descriptor of the last persistent connection. */
static int pc_last_fd; static int pc_last_fd;
static void /* Mark the persistent connection as invalid. This is used by the
register_persistent (const char *host, unsigned short port, int fd) CLOSE_* macros after they forcefully close a registered persistent
{ connection. */
if (!store_hostaddress (pc_last_host, host))
return;
pc_last_port = port;
pc_last_fd = fd;
}
static void static void
invalidate_persistent (void) invalidate_persistent (void)
{ {
pc_last_port = 0; pc_active_p = 0;
DEBUGP (("Invalidating fd %d from further reuse.\n", pc_last_fd));
} }
/* Register FD, which should be a TCP/IP connection to HOST:PORT, as
persistent. This will enable someone to use the same connection
later. In the context of HTTP, this must be called only AFTER the
response has been received and the server has promised that the
connection will remain alive.
If a previous connection was persistent, it is closed. */
static void
register_persistent (const char *host, unsigned short port, int fd)
{
int success;
if (pc_active_p)
{
if (pc_last_fd == fd)
{
/* The connection FD is already registered. Nothing to
do. */
return;
}
else
{
/* The old persistent connection is still active; let's
close it first. This situation arises whenever a
persistent connection exists, but we then connect to a
different host, and try to register a persistent
connection to that one. */
CLOSE (pc_last_fd);
invalidate_persistent ();
}
}
/* This store_hostaddress may not fail, because it has the results
in the cache. */
success = store_hostaddress (pc_last_host, host);
assert (success);
pc_last_port = port;
pc_last_fd = fd;
pc_active_p = 1;
DEBUGP (("Registered fd %d for persistent reuse.\n", fd));
}
/* Return non-zero if a persistent connection is available for
connecting to HOST:PORT. */
static int static int
persistent_available_p (const char *host, unsigned short port) persistent_available_p (const char *host, unsigned short port)
{ {
unsigned char this_host[4]; unsigned char this_host[4];
if (!pc_active_p)
return 0;
if (port != pc_last_port) if (port != pc_last_port)
return 0; return 0;
if (!store_hostaddress (this_host, host)) if (!store_hostaddress (this_host, host))
@ -297,6 +353,7 @@ persistent_available_p (const char *host, unsigned short port)
return 0; return 0;
if (!test_socket_open (pc_last_fd)) if (!test_socket_open (pc_last_fd))
{ {
CLOSE (pc_last_fd);
invalidate_persistent (); invalidate_persistent ();
return 0; return 0;
} }
@ -312,24 +369,24 @@ persistent_available_p (const char *host, unsigned short port)
In case of keep_alive, CLOSE_FINISH should leave the connection In case of keep_alive, CLOSE_FINISH should leave the connection
open, while CLOSE_INVALIDATE should still close it. open, while CLOSE_INVALIDATE should still close it.
The semantic difference between the flags `keep_alive' and Note that the semantics of the flag `keep_alive' is "this
`reused_connection' is that keep_alive defines the state of HTTP: connection *will* be reused (the server has promised not to close
whether the connection *will* be preservable. reused_connection, the connection once we're done)", while the semantics of
on the other hand, reflects the present: whether the *current* `pc_active_p && (fd) == pc_last_fd' is "we're *now* using an
connection is the result of preserving. */ active, registered connection". */
#define CLOSE_FINISH(fd) do { \ #define CLOSE_FINISH(fd) do { \
if (!keep_alive) \ if (!keep_alive) \
{ \ { \
CLOSE (fd); \ CLOSE (fd); \
if (reused_connection) \ if (pc_active_p && (fd) == pc_last_fd) \
invalidate_persistent (); \ invalidate_persistent (); \
} \ } \
} while (0) } while (0)
#define CLOSE_INVALIDATE(fd) do { \ #define CLOSE_INVALIDATE(fd) do { \
CLOSE (fd); \ CLOSE (fd); \
if (reused_connection) \ if (pc_active_p && (fd) == pc_last_fd) \
invalidate_persistent (); \ invalidate_persistent (); \
} while (0) } while (0)
@ -388,6 +445,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
char *proxyauth; char *proxyauth;
char *all_headers; char *all_headers;
char *host_port; char *host_port;
char *request_keep_alive;
int host_port_len; int host_port_len;
int sock, hcount, num_written, all_length, remport, statcode; int sock, hcount, num_written, all_length, remport, statcode;
long contlen, contrange; long contlen, contrange;
@ -396,8 +454,17 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
FILE *fp; FILE *fp;
int auth_tried_already; int auth_tried_already;
struct rbuf rbuf; struct rbuf rbuf;
int keep_alive, http_keep_alive_1, http_keep_alive_2;
int reused_connection; /* Whether this connection will be kept alive after the HTTP request
is done. */
int keep_alive;
/* Flags that detect the two ways of specifying HTTP keep-alive
response. */
int http_keep_alive_1, http_keep_alive_2;
/* Whether keep-alive should be inhibited. */
int inhibit_keep_alive;
if (!(*dt & HEAD_ONLY)) if (!(*dt & HEAD_ONLY))
/* If we're doing a GET on the URL, as opposed to just a HEAD, we need to /* If we're doing a GET on the URL, as opposed to just a HEAD, we need to
@ -407,12 +474,13 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
authenticate_h = 0; authenticate_h = 0;
auth_tried_already = 0; auth_tried_already = 0;
inhibit_keep_alive = (u->proxy != NULL);
again: again:
/* We need to come back here when the initial attempt to retrieve /* We need to come back here when the initial attempt to retrieve
without authorization header fails. */ without authorization header fails. */
keep_alive = 0; keep_alive = 0;
http_keep_alive_1 = http_keep_alive_2 = 0; http_keep_alive_1 = http_keep_alive_2 = 0;
reused_connection = 0;
/* Initialize certain elements of struct http_stat. */ /* Initialize certain elements of struct http_stat. */
hs->len = 0L; hs->len = 0L;
@ -429,7 +497,8 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
ou = u; ou = u;
/* First: establish the connection. */ /* First: establish the connection. */
if (u->proxy || !persistent_available_p (u->host, u->port)) if (inhibit_keep_alive
|| !persistent_available_p (u->host, u->port))
{ {
logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port); logprintf (LOG_VERBOSE, _("Connecting to %s:%hu... "), u->host, u->port);
err = make_connection (&sock, u->host, u->port); err = make_connection (&sock, u->host, u->port);
@ -469,8 +538,10 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
else else
{ {
logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port); logprintf (LOG_VERBOSE, _("Reusing connection to %s:%hu.\n"), u->host, u->port);
/* #### pc_last_fd should be accessed through an accessor
function. */
sock = pc_last_fd; sock = pc_last_fd;
reused_connection = 1; DEBUGP (("Reusing fd %d.\n", sock));
} }
if (u->proxy) if (u->proxy)
@ -492,12 +563,13 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
if (hs->restval) if (hs->restval)
{ {
range = (char *)alloca (13 + numdigit (hs->restval) + 4); range = (char *)alloca (13 + numdigit (hs->restval) + 4);
/* #### Gag me! Some servers (e.g. WebSitePro) have been known /* Gag me! Some servers (e.g. WebSitePro) have been known to
to misinterpret the following `Range' format, and return the respond to the following `Range' format by generating a
document as multipart/x-byte-ranges MIME type! multipart/x-byte-ranges MIME document! This MIME type was
present in an old draft of the byteranges specification.
#### TODO: Interpret MIME types, recognize bullshits similar HTTP/1.1 specifies a multipart/byte-ranges MIME type, but
the one described above, and deal with them! */ only if multiple non-overlapping ranges are requested --
which Wget never does. */
sprintf (range, "Range: bytes=%ld-\r\n", hs->restval); sprintf (range, "Range: bytes=%ld-\r\n", hs->restval);
} }
else else
@ -562,11 +634,18 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
host_port_len = sprintf (host_port, ":%d", remport); host_port_len = sprintf (host_port, ":%d", remport);
} }
if (!inhibit_keep_alive)
request_keep_alive = "Connection: Keep-Alive\r\n";
else
request_keep_alive = NULL;
/* Allocate the memory for the request. */ /* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path) request = (char *)alloca (strlen (command) + strlen (path)
+ strlen (useragent) + strlen (useragent)
+ strlen (remhost) + host_port_len + strlen (remhost) + host_port_len
+ strlen (HTTP_ACCEPT) + strlen (HTTP_ACCEPT)
+ (request_keep_alive
? strlen (request_keep_alive) : 0)
+ (referer ? strlen (referer) : 0) + (referer ? strlen (referer) : 0)
+ (wwwauth ? strlen (wwwauth) : 0) + (wwwauth ? strlen (wwwauth) : 0)
+ (proxyauth ? strlen (proxyauth) : 0) + (proxyauth ? strlen (proxyauth) : 0)
@ -580,11 +659,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
User-Agent: %s\r\n\ User-Agent: %s\r\n\
Host: %s%s\r\n\ Host: %s%s\r\n\
Accept: %s\r\n\ Accept: %s\r\n\
Connection: Keep-Alive\r\n\ %s%s%s%s%s%s%s\r\n",
%s%s%s%s%s%s\r\n",
command, path, useragent, remhost, command, path, useragent, remhost,
host_port ? host_port : "", host_port ? host_port : "",
HTTP_ACCEPT, referer ? referer : "", HTTP_ACCEPT,
request_keep_alive ? request_keep_alive : "",
referer ? referer : "",
wwwauth ? wwwauth : "", wwwauth ? wwwauth : "",
proxyauth ? proxyauth : "", proxyauth ? proxyauth : "",
range ? range : "", range ? range : "",
@ -767,6 +847,9 @@ Connection: Keep-Alive\r\n\
goto done_header; goto done_header;
} }
} }
/* Check for keep-alive related responses. */
if (!inhibit_keep_alive)
{
/* Check for the `Keep-Alive' header. */ /* Check for the `Keep-Alive' header. */
if (!http_keep_alive_1) if (!http_keep_alive_1)
{ {
@ -781,6 +864,7 @@ Connection: Keep-Alive\r\n\
&http_keep_alive_2)) &http_keep_alive_2))
goto done_header; goto done_header;
} }
}
done_header: done_header:
free (hdr); free (hdr);
} }
@ -789,8 +873,13 @@ Connection: Keep-Alive\r\n\
if (contlen != -1 if (contlen != -1
&& (http_keep_alive_1 || http_keep_alive_2)) && (http_keep_alive_1 || http_keep_alive_2))
{
assert (inhibit_keep_alive == 0);
keep_alive = 1; keep_alive = 1;
if (keep_alive && !reused_connection) }
if (keep_alive)
/* The server has promised that it will not close the connection
when we're done. This means that we can register it. */
register_persistent (u->host, u->port, sock); register_persistent (u->host, u->port, sock);
if ((statcode == HTTP_STATUS_UNAUTHORIZED) if ((statcode == HTTP_STATUS_UNAUTHORIZED)

View File

@ -518,6 +518,7 @@ recursive_retrieve (const char *file, const char *this_url)
cur_url->local_name = xstrdup (filename); cur_url->local_name = xstrdup (filename);
} }
} }
else
DEBUGP (("%s already in list, so we don't load.\n", constr)); DEBUGP (("%s already in list, so we don't load.\n", constr));
/* Free filename and constr. */ /* Free filename and constr. */
FREE_MAYBE (filename); FREE_MAYBE (filename);