1
0
mirror of https://github.com/moparisthebest/wget synced 2024-07-03 16:38:41 -04:00

[svn] Don't list all the "known" (but unsupported) protocols. Instead, just

skip the characters until the first ':'.
Published in <sxsitc8a848.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-11-17 22:49:09 -08:00
parent ee99e8998a
commit 303f406997
2 changed files with 25 additions and 67 deletions

View File

@ -1,3 +1,9 @@
2001-05-13 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c: Get rid of `protostrings'.
(skip_proto): Don't use protostrings.
(has_proto): Ditto.
2001-11-18 Hrvoje Niksic <hniksic@arsdigita.com> 2001-11-18 Hrvoje Niksic <hniksic@arsdigita.com>
* Makefile.in: Conditionally compile getopt.o. * Makefile.in: Conditionally compile getopt.o.

View File

@ -49,54 +49,6 @@ extern int errno;
static int urlpath_length PARAMS ((const char *)); static int urlpath_length PARAMS ((const char *));
/* A NULL-terminated list of strings to be recognized as protocol
types (URL schemes). Note that recognized doesn't mean supported
-- only HTTP, HTTPS and FTP are currently supported.
However, a string that does not match anything in the list will be
considered a relative URL. Thus it's important that this list has
anything anyone could think of being legal.
#### This is probably broken. Wget should use other means to
distinguish between absolute and relative URIs in HTML links.
Take a look at <http://www.w3.org/pub/WWW/Addressing/schemes.html>
for more. */
static char *protostrings[] =
{
"cid:",
"clsid:",
"file:",
"finger:",
"ftp:",
"gopher:",
"hdl:",
"http:",
"https:",
"ilu:",
"ior:",
"irc:",
"java:",
"javascript:",
"lifn:",
"mailto:",
"mid:",
"news:",
"nntp:",
"path:",
"prospero:",
"rlogin:",
"service:",
"shttp:",
"snews:",
"stanf:",
"telnet:",
"tn3270:",
"wais:",
"whois++:",
NULL
};
struct proto struct proto
{ {
char *name; char *name;
@ -104,7 +56,7 @@ struct proto
unsigned short port; unsigned short port;
}; };
/* Similar to former, but for supported protocols: */ /* Supported protocols: */
static struct proto sup_protos[] = static struct proto sup_protos[] =
{ {
{ "http://", URLHTTP, DEFAULT_HTTP_PORT }, { "http://", URLHTTP, DEFAULT_HTTP_PORT },
@ -307,20 +259,22 @@ urlproto (const char *url)
int int
skip_proto (const char *url) skip_proto (const char *url)
{ {
char **s; const char *p = url;
int l;
for (s = protostrings; *s; s++) /* Skip protocol name. We allow `-' and `+' because of `whois++',
if (!strncasecmp (*s, url, strlen (*s))) etc. */
break; while (ISALNUM (*p) || *p == '-' || *p == '+')
if (!*s) ++p;
if (*p != ':')
return 0; return 0;
l = strlen (*s); /* Skip ':'. */
/* HTTP and FTP protocols are expected to yield exact host names ++p;
(i.e. the `//' part must be skipped, too). */
if (!strcmp (*s, "http:") || !strcmp (*s, "ftp:")) /* Skip "//" if found. */
l += 2; if (*p == '/' && *(p + 1) == '/')
return l; p += 2;
return p - url;
} }
/* Returns 1 if the URL begins with a protocol (supported or /* Returns 1 if the URL begins with a protocol (supported or
@ -328,12 +282,10 @@ skip_proto (const char *url)
int int
has_proto (const char *url) has_proto (const char *url)
{ {
char **s; const char *p = url;
while (ISALNUM (*p) || *p == '-' || *p == '+')
for (s = protostrings; *s; s++) ++p;
if (strncasecmp (url, *s, strlen (*s)) == 0) return *p == ':';
return 1;
return 0;
} }
/* Skip the username and password, if present here. The function /* Skip the username and password, if present here. The function