mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Rewrite shorthand URLs in a step separate from parsing.
Published in <sxspu6f7ecz.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
f178e6c613
commit
e8e8797873
@ -1,3 +1,9 @@
|
|||||||
|
2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* main.c (main): Use it.
|
||||||
|
|
||||||
|
* url.c (rewrite_url_maybe): New function.
|
||||||
|
|
||||||
2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-11-19 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* url.c: Clean up handling of URL schemes.
|
* url.c: Clean up handling of URL schemes.
|
||||||
|
14
src/main.c
14
src/main.c
@ -50,6 +50,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|||||||
#include "recur.h"
|
#include "recur.h"
|
||||||
#include "host.h"
|
#include "host.h"
|
||||||
#include "cookies.h"
|
#include "cookies.h"
|
||||||
|
#include "url.h"
|
||||||
|
|
||||||
/* On GNU system this will include system-wide getopt.h. */
|
/* On GNU system this will include system-wide getopt.h. */
|
||||||
#include "getopt.h"
|
#include "getopt.h"
|
||||||
@ -739,9 +740,14 @@ Can't timestamp and not clobber old files at the same time.\n"));
|
|||||||
/* Fill in the arguments. */
|
/* Fill in the arguments. */
|
||||||
for (i = 0; i < nurl; i++, optind++)
|
for (i = 0; i < nurl; i++, optind++)
|
||||||
{
|
{
|
||||||
char *irix4_cc_needs_this;
|
char *rewritten = rewrite_url_maybe (argv[optind]);
|
||||||
STRDUP_ALLOCA (irix4_cc_needs_this, argv[optind]);
|
if (rewritten)
|
||||||
url[i] = irix4_cc_needs_this;
|
{
|
||||||
|
printf ("Converted %s to %s\n", argv[optind], rewritten);
|
||||||
|
url[i] = rewritten;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
url[i] = xstrdup (argv[optind]);
|
||||||
}
|
}
|
||||||
url[i] = NULL;
|
url[i] = NULL;
|
||||||
|
|
||||||
@ -853,6 +859,8 @@ Can't timestamp and not clobber old files at the same time.\n"));
|
|||||||
convert_all_links ();
|
convert_all_links ();
|
||||||
}
|
}
|
||||||
log_close ();
|
log_close ();
|
||||||
|
for (i = 0; i < nurl; i++)
|
||||||
|
free (url[i]);
|
||||||
cleanup ();
|
cleanup ();
|
||||||
#ifdef DEBUG_MALLOC
|
#ifdef DEBUG_MALLOC
|
||||||
print_malloc_debug_stats ();
|
print_malloc_debug_stats ();
|
||||||
|
60
src/url.c
60
src/url.c
@ -296,6 +296,66 @@ url_skip_uname (const char *url)
|
|||||||
else
|
else
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Used by main.c: detect URLs written using the "shorthand" URL forms
|
||||||
|
popularized by Netscape and NcFTP. HTTP shorthands look like this:
|
||||||
|
|
||||||
|
www.foo.com[:port]/dir/file -> http://www.foo.com[:port]/dir/file
|
||||||
|
www.foo.com[:port] -> http://www.foo.com[:port]
|
||||||
|
|
||||||
|
FTP shorthands look like this:
|
||||||
|
|
||||||
|
foo.bar.com:dir/file -> ftp://foo.bar.com/dir/file
|
||||||
|
foo.bar.com:/absdir/file -> ftp://foo.bar.com//absdir/file
|
||||||
|
|
||||||
|
If the URL needs not or cannot be rewritten, return NULL. */
|
||||||
|
char *
|
||||||
|
rewrite_url_maybe (const char *url)
|
||||||
|
{
|
||||||
|
const char *p;
|
||||||
|
|
||||||
|
if (url_has_scheme (url))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/* Look for a ':' or '/'. The former signifies NcFTP syntax, the
|
||||||
|
latter Netscape. */
|
||||||
|
for (p = url; *p && *p != ':' && *p != '/'; p++)
|
||||||
|
;
|
||||||
|
|
||||||
|
if (p == url)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (*p == ':')
|
||||||
|
{
|
||||||
|
const char *pp, *path;
|
||||||
|
char *res;
|
||||||
|
/* If the characters after the colon and before the next slash
|
||||||
|
or end of string are all digits, it's HTTP. */
|
||||||
|
int digits = 0;
|
||||||
|
for (pp = p + 1; ISDIGIT (*pp); pp++)
|
||||||
|
++digits;
|
||||||
|
if (digits > 0
|
||||||
|
&& (*pp == '/' || *pp == '\0'))
|
||||||
|
goto http;
|
||||||
|
|
||||||
|
/* Prepend "ftp://" to the entire URL... */
|
||||||
|
path = p + 1;
|
||||||
|
res = xmalloc (6 + strlen (url) + 1);
|
||||||
|
sprintf (res, "ftp://%s", url);
|
||||||
|
/* ...and replace ':' with '/'. */
|
||||||
|
res[6 + (p - url)] = '/';
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char *res;
|
||||||
|
http:
|
||||||
|
/* Just prepend "http://" to what we have. */
|
||||||
|
res = xmalloc (7 + strlen (url) + 1);
|
||||||
|
sprintf (res, "http://%s", url);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Allocate a new urlinfo structure, fill it with default values and
|
/* Allocate a new urlinfo structure, fill it with default values and
|
||||||
return a pointer to it. */
|
return a pointer to it. */
|
||||||
|
@ -137,4 +137,6 @@ urlpos *add_url PARAMS ((urlpos *, const char *, const char *));
|
|||||||
|
|
||||||
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
|
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
|
||||||
|
|
||||||
|
char *rewrite_url_maybe PARAMS ((const char *));
|
||||||
|
|
||||||
#endif /* URL_H */
|
#endif /* URL_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user