mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Clean up handling of schemes.
Published in <sxswv0n7h7s.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
303f406997
commit
f178e6c613
@ -1,3 +1,7 @@
|
||||
2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* url.c: Clean up handling of URL schemes.
|
||||
|
||||
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* url.c: Get rid of `protostrings'.
|
||||
|
@ -278,12 +278,12 @@ same_host (const char *u1, const char *u2)
|
||||
char *real1, *real2;
|
||||
|
||||
/* Skip protocol, if present. */
|
||||
u1 += skip_proto (u1);
|
||||
u2 += skip_proto (u2);
|
||||
u1 += url_skip_scheme (u1);
|
||||
u2 += url_skip_scheme (u2);
|
||||
|
||||
/* Skip username ans password, if present. */
|
||||
u1 += skip_uname (u1);
|
||||
u2 += skip_uname (u2);
|
||||
u1 += url_skip_uname (u1);
|
||||
u2 += url_skip_uname (u2);
|
||||
|
||||
for (s = u1; *u1 && *u1 != '/' && *u1 != ':'; u1++);
|
||||
p1 = strdupdelim (s, u1);
|
||||
|
@ -301,7 +301,7 @@ static void
|
||||
handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
||||
struct taginfo *tag, int attrid)
|
||||
{
|
||||
int no_proto = !has_proto (link_uri);
|
||||
int no_scheme = !url_has_scheme (link_uri);
|
||||
urlpos *newel;
|
||||
|
||||
const char *base = closure->base ? closure->base : closure->parent_base;
|
||||
@ -324,10 +324,10 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
||||
|
||||
if (!base)
|
||||
{
|
||||
if (no_proto)
|
||||
if (no_scheme)
|
||||
{
|
||||
/* We have no base, and the link does not have a protocol or
|
||||
a host attached to it. Nothing we can do. */
|
||||
/* We have no base, and the link does not have a host
|
||||
attached to it. Nothing we can do. */
|
||||
/* #### Should we print a warning here? Wget 1.5.x used to. */
|
||||
return;
|
||||
}
|
||||
@ -349,11 +349,11 @@ handle_link (struct collect_urls_closure *closure, const char *link_uri,
|
||||
newel->pos = tag->attrs[attrid].value_raw_beginning - closure->text;
|
||||
newel->size = tag->attrs[attrid].value_raw_size;
|
||||
|
||||
/* A URL is relative if the host and protocol are not named, and the
|
||||
name does not start with `/'. */
|
||||
if (no_proto && *link_uri != '/')
|
||||
/* A URL is relative if the host is not named, and the name does not
|
||||
start with `/'. */
|
||||
if (no_scheme && *link_uri != '/')
|
||||
newel->link_relative_p = 1;
|
||||
else if (!no_proto)
|
||||
else if (!no_scheme)
|
||||
newel->link_complete_p = 1;
|
||||
|
||||
if (closure->tail)
|
||||
|
17
src/http.c
17
src/http.c
@ -614,7 +614,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
#ifndef HAVE_SSL
|
||||
!persistent_available_p (u->host, u->port)
|
||||
#else
|
||||
!persistent_available_p (u->host, u->port, (u->proto==URLHTTPS ? 1 : 0))
|
||||
!persistent_available_p (u->host, u->port, u->scheme == SCHEME_HTTPS)
|
||||
#endif /* HAVE_SSL */
|
||||
)
|
||||
{
|
||||
@ -653,7 +653,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_SSL
|
||||
if (u->proto == URLHTTPS)
|
||||
if (u->scheme == SCHEME_HTTPS)
|
||||
if (connect_ssl (&ssl, ssl_ctx,sock) != 0)
|
||||
{
|
||||
logputs (LOG_VERBOSE, "\n");
|
||||
@ -786,7 +786,7 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
port_maybe = NULL;
|
||||
if (1
|
||||
#ifdef HAVE_SSL
|
||||
&& remport != (u->proto == URLHTTPS
|
||||
&& remport != (u->scheme == SCHEME_HTTPS
|
||||
? DEFAULT_HTTPS_PORT : DEFAULT_HTTP_PORT)
|
||||
#else
|
||||
&& remport != DEFAULT_HTTP_PORT
|
||||
@ -804,7 +804,12 @@ gethttp (struct urlinfo *u, struct http_stat *hs, int *dt)
|
||||
|
||||
if (opt.cookies)
|
||||
cookies = build_cookies_request (ou->host, ou->port, ou->path,
|
||||
ou->proto == URLHTTPS);
|
||||
#ifdef HAVE_SSL
|
||||
ou->scheme == SCHEME_HTTPS
|
||||
#else
|
||||
0
|
||||
#endif
|
||||
);
|
||||
|
||||
/* Allocate the memory for the request. */
|
||||
request = (char *)alloca (strlen (command) + strlen (path)
|
||||
@ -848,7 +853,7 @@ Accept: %s\r\n\
|
||||
|
||||
/* Send the request to server. */
|
||||
#ifdef HAVE_SSL
|
||||
if (u->proto == URLHTTPS)
|
||||
if (u->scheme == SCHEME_HTTPS)
|
||||
num_written = ssl_iwrite (ssl, request, strlen (request));
|
||||
else
|
||||
#endif /* HAVE_SSL */
|
||||
@ -871,7 +876,7 @@ Accept: %s\r\n\
|
||||
/* Before reading anything, initialize the rbuf. */
|
||||
rbuf_initialize (&rbuf, sock);
|
||||
#ifdef HAVE_SSL
|
||||
if (u->proto == URLHTTPS)
|
||||
if (u->scheme == SCHEME_HTTPS)
|
||||
rbuf.ssl = ssl;
|
||||
else
|
||||
rbuf.ssl = NULL;
|
||||
|
22
src/recur.c
22
src/recur.c
@ -187,7 +187,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
that the retrieval is done through proxy. In that case, FTP
|
||||
links will be followed by default and recursion will not be
|
||||
turned off when following them. */
|
||||
this_url_ftp = (urlproto (this_url) == URLFTP);
|
||||
this_url_ftp = (url_scheme (this_url) == SCHEME_FTP);
|
||||
|
||||
/* Get the URL-s from an HTML file: */
|
||||
url_list = get_urls_html (file, canon_this_url ? canon_this_url : this_url,
|
||||
@ -217,12 +217,6 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
freeurl (u, 1);
|
||||
continue;
|
||||
}
|
||||
if (u->proto == URLFILE)
|
||||
{
|
||||
DEBUGP (("Nothing to do with file:// around here.\n"));
|
||||
freeurl (u, 1);
|
||||
continue;
|
||||
}
|
||||
assert (u->url != NULL);
|
||||
constr = xstrdup (u->url);
|
||||
|
||||
@ -254,7 +248,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
|
||||
/* If it is FTP, and FTP is not followed, chuck it out. */
|
||||
if (!inl)
|
||||
if (u->proto == URLFTP && !opt.follow_ftp && !this_url_ftp)
|
||||
if (u->scheme == SCHEME_FTP && !opt.follow_ftp && !this_url_ftp)
|
||||
{
|
||||
DEBUGP (("Uh, it is FTP but i'm not in the mood to follow FTP.\n"));
|
||||
string_set_add (undesirable_urls, constr);
|
||||
@ -262,7 +256,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
}
|
||||
/* If it is absolute link and they are not followed, chuck it
|
||||
out. */
|
||||
if (!inl && u->proto != URLFTP)
|
||||
if (!inl && u->scheme != SCHEME_FTP)
|
||||
if (opt.relative_only && !cur_url->link_relative_p)
|
||||
{
|
||||
DEBUGP (("It doesn't really look like a relative link.\n"));
|
||||
@ -281,7 +275,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
if (!inl && opt.no_parent
|
||||
/* If the new URL is FTP and the old was not, ignore
|
||||
opt.no_parent. */
|
||||
&& !(!this_url_ftp && u->proto == URLFTP))
|
||||
&& !(!this_url_ftp && u->scheme == SCHEME_FTP))
|
||||
{
|
||||
/* Check for base_dir first. */
|
||||
if (!(base_dir && frontcmp (base_dir, u->dir)))
|
||||
@ -368,7 +362,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
/* This line is bogus. */
|
||||
/*string_set_add (undesirable_urls, constr);*/
|
||||
|
||||
if (!inl && !((u->proto == URLFTP) && !this_url_ftp))
|
||||
if (!inl && !((u->scheme == SCHEME_FTP) && !this_url_ftp))
|
||||
if (!opt.spanhost && this_url && !same_host (this_url, constr))
|
||||
{
|
||||
DEBUGP (("This is not the same hostname as the parent's.\n"));
|
||||
@ -377,7 +371,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
}
|
||||
}
|
||||
/* What about robots.txt? */
|
||||
if (!inl && opt.use_robots && u->proto == URLHTTP)
|
||||
if (!inl && opt.use_robots && u->scheme == SCHEME_FTP)
|
||||
{
|
||||
struct robot_specs *specs = res_get_specs (u->host, u->port);
|
||||
if (!specs)
|
||||
@ -418,7 +412,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
string_set_add (undesirable_urls, constr);
|
||||
/* Automatically followed FTPs will *not* be downloaded
|
||||
recursively. */
|
||||
if (u->proto == URLFTP)
|
||||
if (u->scheme == SCHEME_FTP)
|
||||
{
|
||||
/* Don't you adore side-effects? */
|
||||
opt.recursive = 0;
|
||||
@ -428,7 +422,7 @@ recursive_retrieve (const char *file, const char *this_url)
|
||||
/* Retrieve it. */
|
||||
retrieve_url (constr, &filename, &newloc,
|
||||
canon_this_url ? canon_this_url : this_url, &dt);
|
||||
if (u->proto == URLFTP)
|
||||
if (u->scheme == SCHEME_FTP)
|
||||
{
|
||||
/* Restore... */
|
||||
opt.recursive = 1;
|
||||
|
21
src/retr.c
21
src/retr.c
@ -300,7 +300,7 @@ rate (long bytes, long msecs, int pad)
|
||||
return res;
|
||||
}
|
||||
|
||||
#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->proto) \
|
||||
#define USE_PROXY_P(u) (opt.use_proxy && getproxy((u)->scheme) \
|
||||
&& no_proxy_match((u)->host, \
|
||||
(const char **)opt.no_proxy))
|
||||
|
||||
@ -366,8 +366,8 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
memset (u, 0, sizeof (*u));
|
||||
u->proxy = pu;
|
||||
/* Get the appropriate proxy server, appropriate for the
|
||||
current protocol. */
|
||||
proxy = getproxy (pu->proto);
|
||||
current scheme. */
|
||||
proxy = getproxy (pu->scheme);
|
||||
if (!proxy)
|
||||
{
|
||||
logputs (LOG_NOTQUIET, _("Could not find proxy host.\n"));
|
||||
@ -379,9 +379,9 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
}
|
||||
/* Parse the proxy URL. */
|
||||
result = parseurl (proxy, u, 0);
|
||||
if (result != URLOK || u->proto != URLHTTP)
|
||||
if (result != URLOK || u->scheme != SCHEME_HTTP)
|
||||
{
|
||||
if (u->proto == URLHTTP)
|
||||
if (u->scheme == SCHEME_HTTP)
|
||||
logprintf (LOG_NOTQUIET, "Proxy %s: %s.\n", proxy, uerrmsg(result));
|
||||
else
|
||||
logprintf (LOG_NOTQUIET, _("Proxy %s: Must be HTTP.\n"), proxy);
|
||||
@ -391,19 +391,18 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
xfree (url);
|
||||
return PROXERR;
|
||||
}
|
||||
u->proto = URLHTTP;
|
||||
u->scheme = SCHEME_HTTP;
|
||||
}
|
||||
|
||||
assert (u->proto != URLFILE); /* #### Implement me! */
|
||||
mynewloc = NULL;
|
||||
|
||||
if (u->proto == URLHTTP
|
||||
if (u->scheme == SCHEME_HTTP
|
||||
#ifdef HAVE_SSL
|
||||
|| u->proto == URLHTTPS
|
||||
|| u->scheme == SCHEME_HTTPS
|
||||
#endif
|
||||
)
|
||||
result = http_loop (u, &mynewloc, dt);
|
||||
else if (u->proto == URLFTP)
|
||||
else if (u->scheme == SCHEME_FTP)
|
||||
{
|
||||
/* If this is a redirection, we must not allow recursive FTP
|
||||
retrieval, so we save recursion to oldrec, and restore it
|
||||
@ -420,7 +419,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
||||
|
||||
#### All of this is, of course, crap. These types should be
|
||||
determined through mailcap. */
|
||||
if (redirections && u->local && (u->proto == URLFTP ))
|
||||
if (redirections && u->local && (u->scheme == SCHEME_FTP))
|
||||
{
|
||||
char *suf = suffix (u->local);
|
||||
if (suf && (!strcasecmp (suf, "html") || !strcasecmp (suf, "htm")))
|
||||
|
202
src/url.c
202
src/url.c
@ -49,21 +49,21 @@ extern int errno;
|
||||
|
||||
static int urlpath_length PARAMS ((const char *));
|
||||
|
||||
struct proto
|
||||
struct scheme_data
|
||||
{
|
||||
char *name;
|
||||
uerr_t ind;
|
||||
unsigned short port;
|
||||
enum url_scheme scheme;
|
||||
char *leading_string;
|
||||
int default_port;
|
||||
};
|
||||
|
||||
/* Supported protocols: */
|
||||
static struct proto sup_protos[] =
|
||||
/* Supported schemes: */
|
||||
static struct scheme_data supported_schemes[] =
|
||||
{
|
||||
{ "http://", URLHTTP, DEFAULT_HTTP_PORT },
|
||||
{ SCHEME_HTTP, "http://", DEFAULT_HTTP_PORT },
|
||||
#ifdef HAVE_SSL
|
||||
{ "https://",URLHTTPS, DEFAULT_HTTPS_PORT},
|
||||
{ SCHEME_HTTPS, "https://", DEFAULT_HTTPS_PORT },
|
||||
#endif
|
||||
{ "ftp://", URLFTP, DEFAULT_FTP_PORT }
|
||||
{ SCHEME_FTP, "ftp://", DEFAULT_FTP_PORT }
|
||||
};
|
||||
|
||||
static void parse_dir PARAMS ((const char *, char **, char **));
|
||||
@ -229,39 +229,28 @@ encode_string (const char *s)
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* Returns the protocol type if URL's protocol is supported, or
|
||||
URLUNKNOWN if not. */
|
||||
uerr_t
|
||||
urlproto (const char *url)
|
||||
/* Returns the scheme type if the scheme is supported, or
|
||||
SCHEME_INVALID if not. */
|
||||
enum url_scheme
|
||||
url_scheme (const char *url)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
|
||||
if (!strncasecmp (url, sup_protos[i].name, strlen (sup_protos[i].name)))
|
||||
return sup_protos[i].ind;
|
||||
for (i = 0; url[i] && url[i] != ':' && url[i] != '/'; i++);
|
||||
if (url[i] == ':')
|
||||
{
|
||||
for (++i; url[i] && url[i] != '/'; i++)
|
||||
if (!ISDIGIT (url[i]))
|
||||
return URLBADPORT;
|
||||
if (url[i - 1] == ':')
|
||||
return URLFTP;
|
||||
else
|
||||
return URLHTTP;
|
||||
}
|
||||
else
|
||||
return URLHTTP;
|
||||
for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
|
||||
if (!strncasecmp (url, supported_schemes[i].leading_string,
|
||||
strlen (supported_schemes[i].leading_string)))
|
||||
return supported_schemes[i].scheme;
|
||||
return SCHEME_INVALID;
|
||||
}
|
||||
|
||||
/* Skip the protocol part of the URL, e.g. `http://'. If no protocol
|
||||
part is found, returns 0. */
|
||||
/* Return the number of characters needed to skip the scheme part of
|
||||
the URL, e.g. `http://'. If no scheme is found, returns 0. */
|
||||
int
|
||||
skip_proto (const char *url)
|
||||
url_skip_scheme (const char *url)
|
||||
{
|
||||
const char *p = url;
|
||||
|
||||
/* Skip protocol name. We allow `-' and `+' because of `whois++',
|
||||
/* Skip the scheme name. We allow `-' and `+' because of `whois++',
|
||||
etc. */
|
||||
while (ISALNUM (*p) || *p == '-' || *p == '+')
|
||||
++p;
|
||||
@ -277,10 +266,10 @@ skip_proto (const char *url)
|
||||
return p - url;
|
||||
}
|
||||
|
||||
/* Returns 1 if the URL begins with a protocol (supported or
|
||||
/* Returns 1 if the URL begins with a scheme (supported or
|
||||
unsupported), 0 otherwise. */
|
||||
int
|
||||
has_proto (const char *url)
|
||||
url_has_scheme (const char *url)
|
||||
{
|
||||
const char *p = url;
|
||||
while (ISALNUM (*p) || *p == '-' || *p == '+')
|
||||
@ -290,11 +279,11 @@ has_proto (const char *url)
|
||||
|
||||
/* Skip the username and password, if present here. The function
|
||||
should be called *not* with the complete URL, but with the part
|
||||
right after the protocol.
|
||||
right after the scheme.
|
||||
|
||||
If no username and password are found, return 0. */
|
||||
int
|
||||
skip_uname (const char *url)
|
||||
url_skip_uname (const char *url)
|
||||
{
|
||||
const char *p;
|
||||
const char *q = NULL;
|
||||
@ -317,7 +306,7 @@ newurl (void)
|
||||
|
||||
u = (struct urlinfo *)xmalloc (sizeof (struct urlinfo));
|
||||
memset (u, 0, sizeof (*u));
|
||||
u->proto = URLUNKNOWN;
|
||||
u->scheme = SCHEME_INVALID;
|
||||
return u;
|
||||
}
|
||||
|
||||
@ -344,10 +333,14 @@ freeurl (struct urlinfo *u, int complete)
|
||||
return;
|
||||
}
|
||||
|
||||
enum url_parse_error {
|
||||
PE_UNRECOGNIZED_SCHEME, PE_BAD_PORT
|
||||
};
|
||||
|
||||
/* Extract the given URL of the form
|
||||
(http:|ftp:)// (user (:password)?@)?hostname (:port)? (/path)?
|
||||
1. hostname (terminated with `/' or `:')
|
||||
2. port number (terminated with `/'), or chosen for the protocol
|
||||
2. port number (terminated with `/'), or chosen for the scheme
|
||||
3. dirname (everything after hostname)
|
||||
Most errors are handled. No allocation is done, you must supply
|
||||
pointers to allocated memory.
|
||||
@ -367,36 +360,36 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
||||
{
|
||||
int i, l, abs_ftp;
|
||||
int recognizable; /* Recognizable URL is the one where
|
||||
the protocol name was explicitly
|
||||
named, i.e. it wasn't deduced from
|
||||
the URL format. */
|
||||
the scheme was explicitly named,
|
||||
i.e. it wasn't deduced from the URL
|
||||
format. */
|
||||
uerr_t type;
|
||||
|
||||
DEBUGP (("parseurl (\"%s\") -> ", url));
|
||||
recognizable = has_proto (url);
|
||||
recognizable = url_has_scheme (url);
|
||||
if (strict && !recognizable)
|
||||
return URLUNKNOWN;
|
||||
for (i = 0, l = 0; i < ARRAY_SIZE (sup_protos); i++)
|
||||
for (i = 0, l = 0; i < ARRAY_SIZE (supported_schemes); i++)
|
||||
{
|
||||
l = strlen (sup_protos[i].name);
|
||||
if (!strncasecmp (sup_protos[i].name, url, l))
|
||||
l = strlen (supported_schemes[i].leading_string);
|
||||
if (!strncasecmp (supported_schemes[i].leading_string, url, l))
|
||||
break;
|
||||
}
|
||||
/* If protocol is recognizable, but unsupported, bail out, else
|
||||
/* If scheme is recognizable, but unsupported, bail out, else
|
||||
suppose unknown. */
|
||||
if (recognizable && i == ARRAY_SIZE (sup_protos))
|
||||
if (recognizable && i == ARRAY_SIZE (supported_schemes))
|
||||
return URLUNKNOWN;
|
||||
else if (i == ARRAY_SIZE (sup_protos))
|
||||
else if (i == ARRAY_SIZE (supported_schemes))
|
||||
type = URLUNKNOWN;
|
||||
else
|
||||
u->proto = type = sup_protos[i].ind;
|
||||
u->scheme = type = supported_schemes[i].scheme;
|
||||
|
||||
if (type == URLUNKNOWN)
|
||||
l = 0;
|
||||
/* Allow a username and password to be specified (i.e. just skip
|
||||
them for now). */
|
||||
if (recognizable)
|
||||
l += skip_uname (url + l);
|
||||
l += url_skip_uname (url + l);
|
||||
for (i = l; url[i] && url[i] != ':' && url[i] != '/'; i++);
|
||||
if (i == l)
|
||||
return URLBADHOST;
|
||||
@ -413,7 +406,10 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
||||
if (ISDIGIT (url[++i])) /* A port number */
|
||||
{
|
||||
if (type == URLUNKNOWN)
|
||||
u->proto = type = URLHTTP;
|
||||
{
|
||||
type = URLHTTP;
|
||||
u->scheme = SCHEME_HTTP;
|
||||
}
|
||||
for (; url[i] && url[i] != '/'; i++)
|
||||
if (ISDIGIT (url[i]))
|
||||
u->port = 10 * u->port + (url[i] - '0');
|
||||
@ -424,21 +420,27 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
||||
DEBUGP (("port %hu -> ", u->port));
|
||||
}
|
||||
else if (type == URLUNKNOWN) /* or a directory */
|
||||
u->proto = type = URLFTP;
|
||||
{
|
||||
type = URLFTP;
|
||||
u->scheme = SCHEME_FTP;
|
||||
}
|
||||
else /* or just a misformed port number */
|
||||
return URLBADPORT;
|
||||
}
|
||||
else if (type == URLUNKNOWN)
|
||||
u->proto = type = URLHTTP;
|
||||
{
|
||||
type = URLHTTP;
|
||||
u->scheme = SCHEME_HTTP;
|
||||
}
|
||||
if (!u->port)
|
||||
{
|
||||
int ind;
|
||||
for (ind = 0; ind < ARRAY_SIZE (sup_protos); ind++)
|
||||
if (sup_protos[ind].ind == type)
|
||||
for (ind = 0; ind < ARRAY_SIZE (supported_schemes); ind++)
|
||||
if (supported_schemes[ind].scheme == u->scheme)
|
||||
break;
|
||||
if (ind == ARRAY_SIZE (sup_protos))
|
||||
if (ind == ARRAY_SIZE (supported_schemes))
|
||||
return URLUNKNOWN;
|
||||
u->port = sup_protos[ind].port;
|
||||
u->port = supported_schemes[ind].default_port;
|
||||
}
|
||||
/* Some delimiter troubles... */
|
||||
if (url[i] == '/' && url[i - 1] != ':')
|
||||
@ -480,7 +482,7 @@ parseurl (const char *url, struct urlinfo *u, int strict)
|
||||
if (l > 1 && u->dir[l - 1] == '/')
|
||||
u->dir[l - 1] = '\0';
|
||||
/* Re-create the path: */
|
||||
abs_ftp = (u->proto == URLFTP && *u->dir == '/');
|
||||
abs_ftp = (u->scheme == SCHEME_FTP && *u->dir == '/');
|
||||
/* sprintf (u->path, "%s%s%s%s", abs_ftp ? "%2F": "/",
|
||||
abs_ftp ? (u->dir + 1) : u->dir, *u->dir ? "/" : "", u->file); */
|
||||
strcpy (u->path, abs_ftp ? "%2F" : "/");
|
||||
@ -574,11 +576,10 @@ parse_uname (const char *url, char **user, char **passwd)
|
||||
*user = NULL;
|
||||
*passwd = NULL;
|
||||
|
||||
/* Look for the end of the protocol string. */
|
||||
l = skip_proto (url);
|
||||
/* Look for the end of the scheme identifier. */
|
||||
l = url_skip_scheme (url);
|
||||
if (!l)
|
||||
return URLUNKNOWN;
|
||||
/* Add protocol offset. */
|
||||
url += l;
|
||||
/* Is there an `@' character? */
|
||||
for (p = url; *p && *p != '/'; p++)
|
||||
@ -623,26 +624,27 @@ process_ftp_type (char *path)
|
||||
return '\0';
|
||||
}
|
||||
|
||||
/* Return the URL as fine-formed string, with a proper protocol, optional port
|
||||
number, directory and optional user/password. If `hide' is non-zero (as it
|
||||
is when we're calling this on a URL we plan to print, but not when calling it
|
||||
to canonicalize a URL for use within the program), password will be hidden.
|
||||
The forbidden characters in the URL will be cleansed. */
|
||||
/* Recreate the URL string from the data in urlinfo. This can be used
|
||||
to create a "canonical" representation of the URL. If `hide' is
|
||||
non-zero (as it is when we're calling this on a URL we plan to
|
||||
print, but not when calling it to canonicalize a URL for use within
|
||||
the program), password will be hidden. The forbidden characters in
|
||||
the URL will be cleansed. */
|
||||
char *
|
||||
str_url (const struct urlinfo *u, int hide)
|
||||
{
|
||||
char *res, *host, *user, *passwd, *proto_name, *dir, *file;
|
||||
char *res, *host, *user, *passwd, *scheme_name, *dir, *file;
|
||||
int i, l, ln, lu, lh, lp, lf, ld;
|
||||
unsigned short proto_default_port;
|
||||
unsigned short default_port;
|
||||
|
||||
/* Look for the protocol name. */
|
||||
for (i = 0; i < ARRAY_SIZE (sup_protos); i++)
|
||||
if (sup_protos[i].ind == u->proto)
|
||||
/* Look for the scheme. */
|
||||
for (i = 0; i < ARRAY_SIZE (supported_schemes); i++)
|
||||
if (supported_schemes[i].scheme == u->scheme)
|
||||
break;
|
||||
if (i == ARRAY_SIZE (sup_protos))
|
||||
if (i == ARRAY_SIZE (supported_schemes))
|
||||
return NULL;
|
||||
proto_name = sup_protos[i].name;
|
||||
proto_default_port = sup_protos[i].port;
|
||||
scheme_name = supported_schemes[i].leading_string;
|
||||
default_port = supported_schemes[i].default_port;
|
||||
host = encode_string (u->host);
|
||||
dir = encode_string (u->dir);
|
||||
file = encode_string (u->file);
|
||||
@ -660,7 +662,7 @@ str_url (const struct urlinfo *u, int hide)
|
||||
else
|
||||
passwd = encode_string (u->passwd);
|
||||
}
|
||||
if (u->proto == URLFTP && *dir == '/')
|
||||
if (u->scheme == SCHEME_FTP && *dir == '/')
|
||||
{
|
||||
char *tmp = (char *)xmalloc (strlen (dir) + 3);
|
||||
/*sprintf (tmp, "%%2F%s", dir + 1);*/
|
||||
@ -672,19 +674,19 @@ str_url (const struct urlinfo *u, int hide)
|
||||
dir = tmp;
|
||||
}
|
||||
|
||||
ln = strlen (proto_name);
|
||||
ln = strlen (scheme_name);
|
||||
lu = user ? strlen (user) : 0;
|
||||
lp = passwd ? strlen (passwd) : 0;
|
||||
lh = strlen (host);
|
||||
ld = strlen (dir);
|
||||
lf = strlen (file);
|
||||
res = (char *)xmalloc (ln + lu + lp + lh + ld + lf + 20); /* safe sex */
|
||||
/* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", proto_name,
|
||||
/* sprintf (res, "%s%s%s%s%s%s:%d/%s%s%s", scheme_name,
|
||||
(user ? user : ""), (passwd ? ":" : ""),
|
||||
(passwd ? passwd : ""), (user ? "@" : ""),
|
||||
host, u->port, dir, *dir ? "/" : "", file); */
|
||||
l = 0;
|
||||
memcpy (res, proto_name, ln);
|
||||
memcpy (res, scheme_name, ln);
|
||||
l += ln;
|
||||
if (user)
|
||||
{
|
||||
@ -700,7 +702,7 @@ str_url (const struct urlinfo *u, int hide)
|
||||
}
|
||||
memcpy (res + l, host, lh);
|
||||
l += lh;
|
||||
if (u->port != proto_default_port)
|
||||
if (u->port != default_port)
|
||||
{
|
||||
res[l++] = ':';
|
||||
long_to_string (res + l, (long)u->port);
|
||||
@ -1123,7 +1125,7 @@ find_last_char (const char *b, const char *e, char c)
|
||||
Either of the URIs may be absolute or relative, complete with the
|
||||
host name, or path only. This tries to behave "reasonably" in all
|
||||
foreseeable cases. It employs little specific knowledge about
|
||||
protocols or URL-specific stuff -- it just works on strings.
|
||||
schemes or URL-specific stuff -- it just works on strings.
|
||||
|
||||
The parameters LINKLENGTH is useful if LINK is not zero-terminated.
|
||||
See uri_merge for a gentler interface to this functionality.
|
||||
@ -1131,11 +1133,11 @@ find_last_char (const char *b, const char *e, char c)
|
||||
#### This function should handle `./' and `../' so that the evil
|
||||
path_simplify can go. */
|
||||
static char *
|
||||
uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
|
||||
uri_merge_1 (const char *base, const char *link, int linklength, int no_scheme)
|
||||
{
|
||||
char *constr;
|
||||
|
||||
if (no_proto)
|
||||
if (no_scheme)
|
||||
{
|
||||
const char *end = base + urlpath_length (base);
|
||||
|
||||
@ -1252,7 +1254,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
|
||||
constr[span + linklength] = '\0';
|
||||
}
|
||||
}
|
||||
else /* !no_proto */
|
||||
else /* !no_scheme */
|
||||
{
|
||||
constr = strdupdelim (link, link + linklength);
|
||||
}
|
||||
@ -1265,7 +1267,7 @@ uri_merge_1 (const char *base, const char *link, int linklength, int no_proto)
|
||||
char *
|
||||
uri_merge (const char *base, const char *link)
|
||||
{
|
||||
return uri_merge_1 (base, link, strlen (link), !has_proto (link));
|
||||
return uri_merge_1 (base, link, strlen (link), !url_has_scheme (link));
|
||||
}
|
||||
|
||||
/* Optimize URL by host, destructively replacing u->host with realhost
|
||||
@ -1283,22 +1285,28 @@ opt_url (struct urlinfo *u)
|
||||
u->url = str_url (u, 0);
|
||||
}
|
||||
|
||||
/* Returns proxy host address, in accordance with PROTO. */
|
||||
/* Returns proxy host address, in accordance with SCHEME. */
|
||||
char *
|
||||
getproxy (uerr_t proto)
|
||||
getproxy (enum url_scheme scheme)
|
||||
{
|
||||
char *proxy;
|
||||
char *proxy = NULL;
|
||||
|
||||
if (proto == URLHTTP)
|
||||
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
|
||||
else if (proto == URLFTP)
|
||||
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
|
||||
switch (scheme)
|
||||
{
|
||||
case SCHEME_HTTP:
|
||||
proxy = opt.http_proxy ? opt.http_proxy : getenv ("http_proxy");
|
||||
break;
|
||||
#ifdef HAVE_SSL
|
||||
else if (proto == URLHTTPS)
|
||||
proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
|
||||
#endif /* HAVE_SSL */
|
||||
else
|
||||
proxy = NULL;
|
||||
case SCHEME_HTTPS:
|
||||
proxy = opt.https_proxy ? opt.https_proxy : getenv ("https_proxy");
|
||||
break;
|
||||
#endif
|
||||
case SCHEME_FTP:
|
||||
proxy = opt.ftp_proxy ? opt.ftp_proxy : getenv ("ftp_proxy");
|
||||
break;
|
||||
case SCHEME_INVALID:
|
||||
break;
|
||||
}
|
||||
if (!proxy || !*proxy)
|
||||
return NULL;
|
||||
return proxy;
|
||||
|
19
src/url.h
19
src/url.h
@ -25,12 +25,21 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
#define DEFAULT_FTP_PORT 21
|
||||
#define DEFAULT_HTTPS_PORT 443
|
||||
|
||||
enum url_scheme {
|
||||
SCHEME_HTTP,
|
||||
#ifdef HAVE_SSL
|
||||
SCHEME_HTTPS,
|
||||
#endif
|
||||
SCHEME_FTP,
|
||||
SCHEME_INVALID
|
||||
};
|
||||
|
||||
/* Structure containing info on a URL. */
|
||||
struct urlinfo
|
||||
{
|
||||
char *url; /* Unchanged URL */
|
||||
uerr_t proto; /* URL protocol */
|
||||
enum url_scheme scheme; /* URL scheme */
|
||||
|
||||
char *host; /* Extracted hostname */
|
||||
unsigned short port;
|
||||
char ftp_type;
|
||||
@ -97,10 +106,10 @@ char *encode_string PARAMS ((const char *));
|
||||
|
||||
struct urlinfo *newurl PARAMS ((void));
|
||||
void freeurl PARAMS ((struct urlinfo *, int));
|
||||
uerr_t urlproto PARAMS ((const char *));
|
||||
int skip_proto PARAMS ((const char *));
|
||||
int has_proto PARAMS ((const char *));
|
||||
int skip_uname PARAMS ((const char *));
|
||||
enum url_scheme url_detect_scheme PARAMS ((const char *));
|
||||
int url_skip_scheme PARAMS ((const char *));
|
||||
int url_has_scheme PARAMS ((const char *));
|
||||
int url_skip_uname PARAMS ((const char *));
|
||||
|
||||
uerr_t parseurl PARAMS ((const char *, struct urlinfo *, int));
|
||||
char *str_url PARAMS ((const struct urlinfo *, int));
|
||||
|
Loading…
Reference in New Issue
Block a user