1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Clean up handling of schemes.

Published in <sxswv0n7h7s.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-11-18 16:12:05 -08:00
parent 303f406997
commit f178e6c613
8 changed files with 164 additions and 145 deletions

View File

@ -1,3 +1,7 @@
2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c: Clean up handling of URL schemes.
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com> 2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c: Get rid of `protostrings'. * url.c: Get rid of `protostrings'.

View File

@ -278,12 +278,12 @@ same_host (const char *u1, const char *u2)
char *real1, *real2; char *real1, *real2;
/* Skip protocol, if present. */ /* Skip protocol, if present. */
u1 += skip_proto (u1); u1 += url_skip_scheme (u1);
u2 += skip_proto (u2); u2 += url_skip_scheme (u2);
/* Skip username ans password, if present. */ /* Skip username ans password, if present. */
u1 += skip_uname (u1); u1 += url_skip_uname (u1);
u2 += skip_uname (u2); u2 += url_skip_uname (u2);
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++); for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
p1 = strdupdelim (s, u1); p1 = strdupdelim (s, u1);

View File

@ -301,7 +301,7 @@ static void
handle_link (struct collect_urls_closure *closure, const char *link_uri, handle_link (struct collect_urls_closure *closure, const char *link_uri,
struct taginfo *tag, int attrid) struct taginfo *tag, int attrid)
{ {
int no_proto = !has_proto (link_uri); int no_scheme = !url_has_scheme (link_uri);
urlpos *newel; urlpos *newel;
const char *base = closure->base ? closure->base : closure->parent_base; const char *base = closure->base ? closure->base : closure->parent_base;
@ -324,10 +324,10 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
if (!base) if (!base)
{ {
if (no_proto) if (no_scheme)
{ {
/* We have no base, and the link does not have a protocol or /* We have no base, and the link does not have a host
a host attached to it. Nothing we can do. */ attached to it. Nothing we can do. */
/* #### Should we print a warning here? Wget 1.5.x used to. */ /* #### Should we print a warning here? Wget 1.5.x used to. */
return; return;
} }
@ -349,11 +349,11 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text; newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
newel->size = tag->attrs[attrid].value_raw_size; newel->size = tag->attrs[attrid].value_raw_size;
/* A URL is relative if the host and protocol are not named, and the /* A URL is relative if the host is not named, and the name does not
name does not start with `/'. */ start with `/'. */
if (no_proto && *link_uri != '/') if (no_scheme && *link_uri != '/')
newel->link_relative_p = 1; newel->link_relative_p = 1;
else if (!no_proto) else if (!no_scheme)
newel->link_complete_p = 1; newel->link_complete_p = 1;
if (closure->tail) if (closure->tail)

View File

@ -614,7 +614,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
#ifndef HAVE_SSL #ifndef HAVE_SSL
!persistent_available_p (u->host, u->port) !persistent_available_p (u->host, u->port)
#else #else
!persistent_available_p (u->host, u->port, (u->proto==URLHTTPS ? 1 : 0)) !persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
#endif /* HAVE_SSL */ #endif /* HAVE_SSL */
) )
{ {
@ -653,7 +653,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
break; break;
} }
#ifdef HAVE_SSL #ifdef HAVE_SSL
if (u->proto == URLHTTPS) if (u->scheme == SCHEME_HTTPS)
if (connect_ssl (&ssl, ssl_ctx,sock) != 0) if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
{ {
logputs (LOG_VERBOSE, "\n"); logputs (LOG_VERBOSE, "\n");
@ -786,7 +786,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
port_maybe = NULL; port_maybe = NULL;
if (1 if (1
#ifdef HAVE_SSL #ifdef HAVE_SSL
&& remport != (u->proto == URLHTTPS && remport != (u->scheme == SCHEME_HTTPS
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT) ? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
#else #else
&& remport != DEFAULT_HTTP_PORT && remport != DEFAULT_HTTP_PORT
@ -804,7 +804,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
if (opt.cookies) if (opt.cookies)
cookies = build_cookies_request (ou->host, ou->port, ou->path, cookies = build_cookies_request (ou->host, ou->port, ou->path,
ou->proto == URLHTTPS); #ifdef HAVE_SSL
ou->scheme == SCHEME_HTTPS
#else
0
#endif
);
/* Allocate the memory for the request. */ /* Allocate the memory for the request. */
request = (char *)alloca (strlen (command) + strlen (path) request = (char *)alloca (strlen (command) + strlen (path)
@ -848,7 +853,7 @@ Accept: %s\r\n\
/* Send the request to server. */ /* Send the request to server. */
#ifdef HAVE_SSL #ifdef HAVE_SSL
if (u->proto == URLHTTPS) if (u->scheme == SCHEME_HTTPS)
num_written = ssl_iwrite (ssl, request, strlen (request)); num_written = ssl_iwrite (ssl, request, strlen (request));
else else
#endif /* HAVE_SSL */ #endif /* HAVE_SSL */
@ -871,7 +876,7 @@ Accept: %s\r\n\
/* Before reading anything, initialize the rbuf. */ /* Before reading anything, initialize the rbuf. */
rbuf_initialize (&rbuf, sock); rbuf_initialize (&rbuf, sock);
#ifdef HAVE_SSL #ifdef HAVE_SSL
if (u->proto == URLHTTPS) if (u->scheme == SCHEME_HTTPS)
rbuf.ssl = ssl; rbuf.ssl = ssl;
else else
rbuf.ssl = NULL; rbuf.ssl = NULL;

View File

@ -187,7 +187,7 @@ recursive_retrieve (const char *file, const char *this_url)
that the retrieval is done through proxy. In that case, FTP that the retrieval is done through proxy. In that case, FTP
links will be followed by default and recursion will not be links will be followed by default and recursion will not be
turned off when following them. */ turned off when following them. */
this_url_ftp = (urlproto (this_url) == URLFTP); this_url_ftp = (url_scheme (this_url) == SCHEME_FTP);
/* Get the URL-s from an HTML file: */ /* Get the URL-s from an HTML file: */
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url, url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
@ -217,12 +217,6 @@ recursive_retrieve (const char *file, const char *this_url)
freeurl (u, 1); freeurl (u, 1);
continue; continue;
} }
if (u->proto == URLFILE)
{
DEBUGP (("Nothing to do with file:// around here.\n"));
freeurl (u, 1);
continue;
}
assert (u->url != NULL); assert (u->url != NULL);
constr = xstrdup (u->url); constr = xstrdup (u->url);
@ -254,7 +248,7 @@ recursive_retrieve (const char *file, const char *this_url)
/* If it is FTP, and FTP is not followed, chuck it out. */ /* If it is FTP, and FTP is not followed, chuck it out. */
if (!inl) if (!inl)
if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp) if (u->scheme == SCHEME_FTP && !opt.follow_ftp && !this_url_ftp)
{ {
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n")); DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
string_set_add (undesirable_urls, constr); string_set_add (undesirable_urls, constr);
@ -262,7 +256,7 @@ recursive_retrieve (const char *file, const char *this_url)
} }
/* If it is absolute link and they are not followed, chuck it /* If it is absolute link and they are not followed, chuck it
out. */ out. */
if (!inl && u->proto != URLFTP) if (!inl && u->scheme != SCHEME_FTP)
if (opt.relative_only && !cur_url->link_relative_p) if (opt.relative_only && !cur_url->link_relative_p)
{ {
DEBUGP (("It doesn't really look like a relative link.\n")); DEBUGP (("It doesn't really look like a relative link.\n"));
@ -281,7 +275,7 @@ recursive_retrieve (const char *file, const char *this_url)
if (!inl && opt.no_parent if (!inl && opt.no_parent
/* If the new URL is FTP and the old was not, ignore /* If the new URL is FTP and the old was not, ignore
opt.no_parent. */ opt.no_parent. */
&& !(!this_url_ftp && u->proto == URLFTP)) && !(!this_url_ftp && u->scheme == SCHEME_FTP))
{ {
/* Check for base_dir first. */ /* Check for base_dir first. */
if (!(base_dir && frontcmp (base_dir, u->dir))) if (!(base_dir && frontcmp (base_dir, u->dir)))
@ -368,7 +362,7 @@ recursive_retrieve (const char *file, const char *this_url)
/* This line is bogus. */ /* This line is bogus. */
/*string_set_add (undesirable_urls, constr);*/ /*string_set_add (undesirable_urls, constr);*/
if (!inl && !((u->proto == URLFTP) && !this_url_ftp)) if (!inl && !((u->scheme == SCHEME_FTP) && !this_url_ftp))
if (!opt.spanhost && this_url && !same_host (this_url, constr)) if (!opt.spanhost && this_url && !same_host (this_url, constr))
{ {
DEBUGP (("This is not the same hostname as the parent's.\n")); DEBUGP (("This is not the same hostname as the parent's.\n"));
@ -377,7 +371,7 @@ recursive_retrieve (const char *file, const char *this_url)
} }
} }
/* What about robots.txt? */ /* What about robots.txt? */
if (!inl && opt.use_robots && u->proto == URLHTTP) if (!inl && opt.use_robots && u->scheme == SCHEME_FTP)
{ {
struct robot_specs *specs = res_get_specs (u->host, u->port); struct robot_specs *specs = res_get_specs (u->host, u->port);
if (!specs) if (!specs)
@ -418,7 +412,7 @@ recursive_retrieve (const char *file, const char *this_url)
string_set_add (undesirable_urls, constr); string_set_add (undesirable_urls, constr);
/* Automatically followed FTPs will *not* be downloaded /* Automatically followed FTPs will *not* be downloaded
recursively. */ recursively. */
if (u->proto == URLFTP) if (u->scheme == SCHEME_FTP)
{ {
/* Don't you adore side-effects? */ /* Don't you adore side-effects? */
opt.recursive = 0; opt.recursive = 0;
@ -428,7 +422,7 @@ recursive_retrieve (const char *file, const char *this_url)
/* Retrieve it. */ /* Retrieve it. */
retrieve_url (constr, &filename, &newloc, retrieve_url (constr, &filename, &newloc,
canon_this_url ? canon_this_url : this_url, &dt); canon_this_url ? canon_this_url : this_url, &dt);
if (u->proto == URLFTP) if (u->scheme == SCHEME_FTP)
{ {
/* Restore... */ /* Restore... */
opt.recursive = 1; opt.recursive = 1;

View File

@ -300,7 +300,7 @@ rate (long bytes, long msecs, int pad)
return res; return res;
} }
#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \ #define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
&& no_proxy_match((u)->host, \ && no_proxy_match((u)->host, \
(const char **)opt.no_proxy)) (const char **)opt.no_proxy))
@ -366,8 +366,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
memset (u, 0, sizeof (*u)); memset (u, 0, sizeof (*u));
u->proxy = pu; u->proxy = pu;
/* Get the appropriate proxy server, appropriate for the /* Get the appropriate proxy server, appropriate for the
current protocol. */ current scheme. */
proxy = getproxy (pu->proto); proxy = getproxy (pu->scheme);
if (!proxy) if (!proxy)
{ {
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n")); logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
@ -379,9 +379,9 @@ retrieve_url (const char *origurl, char **file, char **newloc,
} }
/* Parse the proxy URL. */ /* Parse the proxy URL. */
result = parseurl (proxy, u, 0); result = parseurl (proxy, u, 0);
if (result != URLOK || u->proto != URLHTTP) if (result != URLOK || u->scheme != SCHEME_HTTP)
{ {
if (u->proto == URLHTTP) if (u->scheme == SCHEME_HTTP)
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result)); logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
else else
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy); logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
@ -391,19 +391,18 @@ retrieve_url (const char *origurl, char **file, char **newloc,
xfree (url); xfree (url);
return PROXERR; return PROXERR;
} }
u->proto = URLHTTP; u->scheme = SCHEME_HTTP;
} }
assert (u->proto != URLFILE); /* #### Implement me! */
mynewloc = NULL; mynewloc = NULL;
if (u->proto == URLHTTP if (u->scheme == SCHEME_HTTP
#ifdef HAVE_SSL #ifdef HAVE_SSL
|| u->proto == URLHTTPS || u->scheme == SCHEME_HTTPS
#endif #endif
) )
result = http_loop (u, &mynewloc, dt); result = http_loop (u, &mynewloc, dt);
else if (u->proto == URLFTP) else if (u->scheme == SCHEME_FTP)
{ {
/* If this is a redirection, we must not allow recursive FTP /* If this is a redirection, we must not allow recursive FTP
retrieval, so we save recursion to oldrec, and restore it retrieval, so we save recursion to oldrec, and restore it
@ -420,7 +419,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
#### All of this is, of course, crap. These types should be #### All of this is, of course, crap. These types should be
determined through mailcap. */ determined through mailcap. */
if (redirections && u->local && (u->proto == URLFTP )) if (redirections && u->local && (u->scheme == SCHEME_FTP))
{ {
char *suf = suffix (u->local); char *suf = suffix (u->local);
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm"))) if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))

202
src/url.c
View File

@ -49,21 +49,21 @@ extern int errno;
static int urlpath_length PARAMS ((const char *)); static int urlpath_length PARAMS ((const char *));
struct proto struct scheme_data
{ {
char *name; enum url_scheme scheme;
uerr_t ind; char *leading_string;
unsigned short port; int default_port;
}; };
/* Supported protocols: */ /* Supported schemes: */
static struct proto sup_protos[] = static struct scheme_data supported_schemes[] =
{ {
{ "http://", URLHTTP, DEFAULT_HTTP_PORT }, { SCHEME_HTTP, "http://", DEFAULT_HTTP_PORT },
#ifdef HAVE_SSL #ifdef HAVE_SSL
{ "https://",URLHTTPS, DEFAULT_HTTPS_PORT}, { SCHEME_HTTPS, "https://", DEFAULT_HTTPS_PORT },
#endif #endif
{ "ftp://", URLFTP, DEFAULT_FTP_PORT } { SCHEME_FTP, "ftp://", DEFAULT_FTP_PORT }
}; };
static void parse_dir PARAMS ((const char *, char **, char **)); static void parse_dir PARAMS ((const char *, char **, char **));
@ -229,39 +229,28 @@ encode_string (const char *s)
} \ } \
} while (0) } while (0)
/* Returns the protocol type if URL's protocol is supported, or /* Returns the scheme type if the scheme is supported, or
URLUNKNOWN if not. */ SCHEME_INVALID if not. */
uerr_t enum url_scheme
urlproto (const char *url) url_scheme (const char *url)
{ {
int i; int i;
for (i = 0; i < ARRAY_SIZE (sup_protos); i++) for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
if (!strncasecmp (url, sup_protos[i].name, strlen (sup_protos[i].name))) if (!strncasecmp (url, supported_schemes[i].leading_string,
return sup_protos[i].ind; strlen (supported_schemes[i].leading_string)))
for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++); return supported_schemes[i].scheme;
if (url[i] == ':') return SCHEME_INVALID;
{
for (++i; url[i] && url[i] != '/'; i++)
if (!ISDIGIT (url[i]))
return URLBADPORT;
if (url[i - 1] == ':')
return URLFTP;
else
return URLHTTP;
}
else
return URLHTTP;
} }
/* Skip the protocol part of the URL, e.g. `http://'. If no protocol /* Return the number of characters needed to skip the scheme part of
part is found, returns 0. */ the URL, e.g. `http://'. If no scheme is found, returns 0. */
int int
skip_proto (const char *url) url_skip_scheme (const char *url)
{ {
const char *p = url; const char *p = url;
/* Skip protocol name. We allow `-' and `+' because of `whois++', /* Skip the scheme name. We allow `-' and `+' because of `whois++',
etc. */ etc. */
while (ISALNUM (*p) || *p == '-' || *p == '+') while (ISALNUM (*p) || *p == '-' || *p == '+')
++p; ++p;
@ -277,10 +266,10 @@ skip_proto (const char *url)
return p - url; return p - url;
} }
/* Returns 1 if the URL begins with a protocol (supported or /* Returns 1 if the URL begins with a scheme (supported or
unsupported), 0 otherwise. */ unsupported), 0 otherwise. */
int int
has_proto (const char *url) url_has_scheme (const char *url)
{ {
const char *p = url; const char *p = url;
while (ISALNUM (*p) || *p == '-' || *p == '+') while (ISALNUM (*p) || *p == '-' || *p == '+')
@ -290,11 +279,11 @@ has_proto (const char *url)
/* Skip the username and password, if present here. The function /* Skip the username and password, if present here. The function
should be called *not* with the complete URL, but with the part should be called *not* with the complete URL, but with the part
right after the protocol. right after the scheme.
If no username and password are found, return 0. */ If no username and password are found, return 0. */
int int
skip_uname (const char *url) url_skip_uname (const char *url)
{ {
const char *p; const char *p;
const char *q = NULL; const char *q = NULL;
@ -317,7 +306,7 @@ newurl (void)
u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo)); u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
memset (u, 0, sizeof (*u)); memset (u, 0, sizeof (*u));
u->proto = URLUNKNOWN; u->scheme = SCHEME_INVALID;
return u; return u;
} }
@ -344,10 +333,14 @@ freeurl (struct urlinfo *u, int complete)
return; return;
} }
enum url_parse_error {
PE_UNRECOGNIZED_SCHEME, PE_BAD_PORT
};
/* Extract the given URL of the form /* Extract the given URL of the form
(http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)? (http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
1. hostname (terminated with `/' or `:') 1. hostname (terminated with `/' or `:')
2. port number (terminated with `/'), or chosen for the protocol 2. port number (terminated with `/'), or chosen for the scheme
3. dirname (everything after hostname) 3. dirname (everything after hostname)
Most errors are handled. No allocation is done, you must supply Most errors are handled. No allocation is done, you must supply
pointers to allocated memory. pointers to allocated memory.
@ -367,36 +360,36 @@ parseurl (const char *url, struct urlinfo *u, int strict)
{ {
int i, l, abs_ftp; int i, l, abs_ftp;
int recognizable; /* Recognizable URL is the one where int recognizable; /* Recognizable URL is the one where
the protocol name was explicitly the scheme was explicitly named,
named, i.e. it wasn't deduced from i.e. it wasn't deduced from the URL
the URL format. */ format. */
uerr_t type; uerr_t type;
DEBUGP (("parseurl (\"%s\") -> ", url)); DEBUGP (("parseurl (\"%s\") -> ", url));
recognizable = has_proto (url); recognizable = url_has_scheme (url);
if (strict && !recognizable) if (strict && !recognizable)
return URLUNKNOWN; return URLUNKNOWN;
for (i = 0, l = 0; i < ARRAY_SIZE (sup_protos); i++) for (i = 0, l = 0; i < ARRAY_SIZE (supported_schemes); i++)
{ {
l = strlen (sup_protos[i].name); l = strlen (supported_schemes[i].leading_string);
if (!strncasecmp (sup_protos[i].name, url, l)) if (!strncasecmp (supported_schemes[i].leading_string, url, l))
break; break;
} }
/* If protocol is recognizable, but unsupported, bail out, else /* If scheme is recognizable, but unsupported, bail out, else
suppose unknown. */ suppose unknown. */
if (recognizable && i == ARRAY_SIZE (sup_protos)) if (recognizable && i == ARRAY_SIZE (supported_schemes))
return URLUNKNOWN; return URLUNKNOWN;
else if (i == ARRAY_SIZE (sup_protos)) else if (i == ARRAY_SIZE (supported_schemes))
type = URLUNKNOWN; type = URLUNKNOWN;
else else
u->proto = type = sup_protos[i].ind; u->scheme = type = supported_schemes[i].scheme;
if (type == URLUNKNOWN) if (type == URLUNKNOWN)
l = 0; l = 0;
/* Allow a username and password to be specified (i.e. just skip /* Allow a username and password to be specified (i.e. just skip
them for now). */ them for now). */
if (recognizable) if (recognizable)
l += skip_uname (url + l); l += url_skip_uname (url + l);
for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++); for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
if (i == l) if (i == l)
return URLBADHOST; return URLBADHOST;
@ -413,7 +406,10 @@ parseurl (const char *url, struct urlinfo *u, int strict)
if (ISDIGIT (url[++i])) /* A port number */ if (ISDIGIT (url[++i])) /* A port number */
{ {
if (type == URLUNKNOWN) if (type == URLUNKNOWN)
u->proto = type = URLHTTP; {
type = URLHTTP;
u->scheme = SCHEME_HTTP;
}
for (; url[i] && url[i] != '/'; i++) for (; url[i] && url[i] != '/'; i++)
if (ISDIGIT (url[i])) if (ISDIGIT (url[i]))
u->port = 10 * u->port + (url[i] - '0'); u->port = 10 * u->port + (url[i] - '0');
@ -424,21 +420,27 @@ parseurl (const char *url, struct urlinfo *u, int strict)
DEBUGP (("port %hu -> ", u->port)); DEBUGP (("port %hu -> ", u->port));
} }
else if (type == URLUNKNOWN) /* or a directory */ else if (type == URLUNKNOWN) /* or a directory */
u->proto = type = URLFTP; {
type = URLFTP;
u->scheme = SCHEME_FTP;
}
else /* or just a misformed port number */ else /* or just a misformed port number */
return URLBADPORT; return URLBADPORT;
} }
else if (type == URLUNKNOWN) else if (type == URLUNKNOWN)
u->proto = type = URLHTTP; {
type = URLHTTP;
u->scheme = SCHEME_HTTP;
}
if (!u->port) if (!u->port)
{ {
int ind; int ind;
for (ind = 0; ind < ARRAY_SIZE (sup_protos); ind++) for (ind = 0; ind < ARRAY_SIZE (supported_schemes); ind++)
if (sup_protos[ind].ind == type) if (supported_schemes[ind].scheme == u->scheme)
break; break;
if (ind == ARRAY_SIZE (sup_protos)) if (ind == ARRAY_SIZE (supported_schemes))
return URLUNKNOWN; return URLUNKNOWN;
u->port = sup_protos[ind].port; u->port = supported_schemes[ind].default_port;
} }
/* Some delimiter troubles... */ /* Some delimiter troubles... */
if (url[i] == '/' && url[i - 1] != ':') if (url[i] == '/' && url[i - 1] != ':')
@ -480,7 +482,7 @@ parseurl (const char *url, struct urlinfo *u, int strict)
if (l > 1 && u->dir[l - 1] == '/') if (l > 1 && u->dir[l - 1] == '/')
u->dir[l - 1] = '\0'; u->dir[l - 1] = '\0';
/* Re-create the path: */ /* Re-create the path: */
abs_ftp = (u->proto == URLFTP && *u->dir == '/'); abs_ftp = (u->scheme == SCHEME_FTP && *u->dir == '/');
/* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/", /* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */ abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
strcpy (u->path, abs_ftp ? "%2F" : "/"); strcpy (u->path, abs_ftp ? "%2F" : "/");
@ -574,11 +576,10 @@ parse_uname (const char *url, char **user, char **passwd)
*user = NULL; *user = NULL;
*passwd = NULL; *passwd = NULL;
/* Look for the end of the protocol string. */ /* Look for the end of the scheme identifier. */
l = skip_proto (url); l = url_skip_scheme (url);
if (!l) if (!l)
return URLUNKNOWN; return URLUNKNOWN;
/* Add protocol offset. */
url += l; url += l;
/* Is there an `@' character? */ /* Is there an `@' character? */
for (p = url; *p && *p != '/'; p++) for (p = url; *p && *p != '/'; p++)
@ -623,26 +624,27 @@ process_ftp_type (char *path)
return '\0'; return '\0';
} }
/* Return the URL as fine-formed string, with a proper protocol, optional port /* Recreate the URL string from the data in urlinfo. This can be used
number, directory and optional user/password. If `hide' is non-zero (as it to create a "canonical" representation of the URL. If `hide' is
is when we're calling this on a URL we plan to print, but not when calling it non-zero (as it is when we're calling this on a URL we plan to
to canonicalize a URL for use within the program), password will be hidden. print, but not when calling it to canonicalize a URL for use within
The forbidden characters in the URL will be cleansed. */ the program), password will be hidden. The forbidden characters in
the URL will be cleansed. */
char * char *
str_url (const struct urlinfo *u, int hide) str_url (const struct urlinfo *u, int hide)
{ {
char *res, *host, *user, *passwd, *proto_name, *dir, *file; char *res, *host, *user, *passwd, *scheme_name, *dir, *file;
int i, l, ln, lu, lh, lp, lf, ld; int i, l, ln, lu, lh, lp, lf, ld;
unsigned short proto_default_port; unsigned short default_port;
/* Look for the protocol name. */ /* Look for the scheme. */
for (i = 0; i < ARRAY_SIZE (sup_protos); i++) for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
if (sup_protos[i].ind == u->proto) if (supported_schemes[i].scheme == u->scheme)
break; break;
if (i == ARRAY_SIZE (sup_protos)) if (i == ARRAY_SIZE (supported_schemes))
return NULL; return NULL;
proto_name = sup_protos[i].name; scheme_name = supported_schemes[i].leading_string;
proto_default_port = sup_protos[i].port; default_port = supported_schemes[i].default_port;
host = encode_string (u->host); host = encode_string (u->host);
dir = encode_string (u->dir); dir = encode_string (u->dir);
file = encode_string (u->file); file = encode_string (u->file);
@ -660,7 +662,7 @@ str_url (const struct urlinfo *u, int hide)
else else
passwd = encode_string (u->passwd); passwd = encode_string (u->passwd);
} }
if (u->proto == URLFTP && *dir == '/') if (u->scheme == SCHEME_FTP && *dir == '/')
{ {
char *tmp = (char *)xmalloc (strlen (dir) + 3); char *tmp = (char *)xmalloc (strlen (dir) + 3);
/*sprintf (tmp, "%%2F%s", dir + 1);*/ /*sprintf (tmp, "%%2F%s", dir + 1);*/
@ -672,19 +674,19 @@ str_url (const struct urlinfo *u, int hide)
dir = tmp; dir = tmp;
} }
ln = strlen (proto_name); ln = strlen (scheme_name);
lu = user ? strlen (user) : 0; lu = user ? strlen (user) : 0;
lp = passwd ? strlen (passwd) : 0; lp = passwd ? strlen (passwd) : 0;
lh = strlen (host); lh = strlen (host);
ld = strlen (dir); ld = strlen (dir);
lf = strlen (file); lf = strlen (file);
res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */ res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
/* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name, /* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", scheme_name,
(user ? user : ""), (passwd ? ":" : ""), (user ? user : ""), (passwd ? ":" : ""),
(passwd ? passwd : ""), (user ? "@" : ""), (passwd ? passwd : ""), (user ? "@" : ""),
host, u->port, dir, *dir ? "/" : "", file); */ host, u->port, dir, *dir ? "/" : "", file); */
l = 0; l = 0;
memcpy (res, proto_name, ln); memcpy (res, scheme_name, ln);
l += ln; l += ln;
if (user) if (user)
{ {
@ -700,7 +702,7 @@ str_url (const struct urlinfo *u, int hide)
} }
memcpy (res + l, host, lh); memcpy (res + l, host, lh);
l += lh; l += lh;
if (u->port != proto_default_port) if (u->port != default_port)
{ {
res[l++] = ':'; res[l++] = ':';
long_to_string (res + l, (long)u->port); long_to_string (res + l, (long)u->port);
@ -1123,7 +1125,7 @@ find_last_char (const char *b, const char *e, char c)
Either of the URIs may be absolute or relative, complete with the Either of the URIs may be absolute or relative, complete with the
host name, or path only. This tries to behave "reasonably" in all host name, or path only. This tries to behave "reasonably" in all
foreseeable cases. It employs little specific knowledge about foreseeable cases. It employs little specific knowledge about
protocols or URL-specific stuff -- it just works on strings. schemes or URL-specific stuff -- it just works on strings.
The parameters LINKLENGTH is useful if LINK is not zero-terminated. The parameters LINKLENGTH is useful if LINK is not zero-terminated.
See uri_merge for a gentler interface to this functionality. See uri_merge for a gentler interface to this functionality.
@ -1131,11 +1133,11 @@ find_last_char (const char *b, const char *e, char c)
#### This function should handle `./' and `../' so that the evil #### This function should handle `./' and `../' so that the evil
path_simplify can go. */ path_simplify can go. */
static char * static char *
uri_merge_1 (const char *base, const char *link, int linklength, int no_proto) uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
{ {
char *constr; char *constr;
if (no_proto) if (no_scheme)
{ {
const char *end = base + urlpath_length (base); const char *end = base + urlpath_length (base);
@ -1252,7 +1254,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
constr[span + linklength] = '\0'; constr[span + linklength] = '\0';
} }
} }
else /* !no_proto */ else /* !no_scheme */
{ {
constr = strdupdelim (link, link + linklength); constr = strdupdelim (link, link + linklength);
} }
@ -1265,7 +1267,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
char * char *
uri_merge (const char *base, const char *link) uri_merge (const char *base, const char *link)
{ {
return uri_merge_1 (base, link, strlen (link), !has_proto (link)); return uri_merge_1 (base, link, strlen (link), !url_has_scheme (link));
} }
/* Optimize URL by host, destructively replacing u->host with realhost /* Optimize URL by host, destructively replacing u->host with realhost
@ -1283,22 +1285,28 @@ opt_url (struct urlinfo *u)
u->url = str_url (u, 0); u->url = str_url (u, 0);
} }
/* Returns proxy host address, in accordance with PROTO. */ /* Returns proxy host address, in accordance with SCHEME. */
char * char *
getproxy (uerr_t proto) getproxy (enum url_scheme scheme)
{ {
char *proxy; char *proxy = NULL;
if (proto == URLHTTP) switch (scheme)
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy"); {
else if (proto == URLFTP) case SCHEME_HTTP:
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy"); proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
break;
#ifdef HAVE_SSL #ifdef HAVE_SSL
else if (proto == URLHTTPS) case SCHEME_HTTPS:
proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy"); proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
#endif /* HAVE_SSL */ break;
else #endif
proxy = NULL; case SCHEME_FTP:
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
break;
case SCHEME_INVALID:
break;
}
if (!proxy || !*proxy) if (!proxy || !*proxy)
return NULL; return NULL;
return proxy; return proxy;

View File

@ -25,12 +25,21 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#define DEFAULT_FTP_PORT 21 #define DEFAULT_FTP_PORT 21
#define DEFAULT_HTTPS_PORT 443 #define DEFAULT_HTTPS_PORT 443
enum url_scheme {
SCHEME_HTTP,
#ifdef HAVE_SSL
SCHEME_HTTPS,
#endif
SCHEME_FTP,
SCHEME_INVALID
};
/* Structure containing info on a URL. */ /* Structure containing info on a URL. */
struct urlinfo struct urlinfo
{ {
char *url; /* Unchanged URL */ char *url; /* Unchanged URL */
uerr_t proto; /* URL protocol */ enum url_scheme scheme; /* URL scheme */
char *host; /* Extracted hostname */ char *host; /* Extracted hostname */
unsigned short port; unsigned short port;
char ftp_type; char ftp_type;
@ -97,10 +106,10 @@ char *encode_string PARAMS ((const char *));
struct urlinfo *newurl PARAMS ((void)); struct urlinfo *newurl PARAMS ((void));
void freeurl PARAMS ((struct urlinfo *, int)); void freeurl PARAMS ((struct urlinfo *, int));
uerr_t urlproto PARAMS ((const char *)); enum url_scheme url_detect_scheme PARAMS ((const char *));
int skip_proto PARAMS ((const char *)); int url_skip_scheme PARAMS ((const char *));
int has_proto PARAMS ((const char *)); int url_has_scheme PARAMS ((const char *));
int skip_uname PARAMS ((const char *)); int url_skip_uname PARAMS ((const char *));
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int)); uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
char *str_url PARAMS ((const struct urlinfo *, int)); char *str_url PARAMS ((const struct urlinfo *, int));