1999-12-02 02:42:23 -05:00
|
|
|
/* Declarations for url.c.
|
|
|
|
Copyright (C) 1995, 1996, 1997 Free Software Foundation, Inc.
|
|
|
|
|
2001-05-27 15:35:15 -04:00
|
|
|
This file is part of GNU Wget.
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-05-27 15:35:15 -04:00
|
|
|
GNU Wget is free software; you can redistribute it and/or modify
|
1999-12-02 02:42:23 -05:00
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
2001-05-27 15:35:15 -04:00
|
|
|
GNU Wget is distributed in the hope that it will be useful,
|
1999-12-02 02:42:23 -05:00
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
2001-05-27 15:35:15 -04:00
|
|
|
along with Wget; if not, write to the Free Software
|
1999-12-02 02:42:23 -05:00
|
|
|
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|
|
|
|
|
|
|
#ifndef URL_H
|
|
|
|
#define URL_H
|
|
|
|
|
2001-03-08 18:11:03 -05:00
|
|
|
/* Default port definitions */
|
|
|
|
#define DEFAULT_HTTP_PORT 80
|
|
|
|
#define DEFAULT_FTP_PORT 21
|
|
|
|
#define DEFAULT_HTTPS_PORT 443
|
|
|
|
|
2001-11-21 19:24:28 -05:00
|
|
|
/* Note: the ordering here is related to the order of elements in
|
|
|
|
`supported_schemes' in url.c. */
|
|
|
|
|
2001-11-18 19:12:05 -05:00
|
|
|
enum url_scheme {
|
|
|
|
SCHEME_HTTP,
|
|
|
|
#ifdef HAVE_SSL
|
|
|
|
SCHEME_HTTPS,
|
|
|
|
#endif
|
|
|
|
SCHEME_FTP,
|
|
|
|
SCHEME_INVALID
|
|
|
|
};
|
2001-03-08 18:11:03 -05:00
|
|
|
|
1999-12-02 02:42:23 -05:00
|
|
|
/* Structure containing info on a URL. */
|
2001-11-21 19:24:28 -05:00
|
|
|
struct url
|
1999-12-02 02:42:23 -05:00
|
|
|
{
|
2001-11-21 19:24:28 -05:00
|
|
|
char *url; /* Original URL */
|
2001-11-18 19:12:05 -05:00
|
|
|
enum url_scheme scheme; /* URL scheme */
|
|
|
|
|
1999-12-02 02:42:23 -05:00
|
|
|
char *host; /* Extracted hostname */
|
2001-11-21 19:24:28 -05:00
|
|
|
int port; /* Port number */
|
|
|
|
|
|
|
|
/* URL components (URL-quoted). */
|
|
|
|
char *path;
|
|
|
|
char *params;
|
|
|
|
char *query;
|
|
|
|
char *fragment;
|
|
|
|
|
|
|
|
/* Extracted path info (unquoted). */
|
|
|
|
char *dir;
|
|
|
|
char *file;
|
|
|
|
|
|
|
|
/* Username and password (unquoted). */
|
|
|
|
char *user;
|
|
|
|
char *passwd;
|
1999-12-02 02:42:23 -05:00
|
|
|
};
|
|
|
|
|
2000-11-20 21:06:36 -05:00
|
|
|
enum convert_options {
|
|
|
|
CO_NOCONVERT = 0, /* don't convert this URL */
|
|
|
|
CO_CONVERT_TO_RELATIVE, /* convert to relative, e.g. to
|
|
|
|
"../../otherdir/foo.gif" */
|
2001-11-25 13:40:55 -05:00
|
|
|
CO_CONVERT_TO_COMPLETE, /* convert to absolute, e.g. to
|
2000-11-20 21:06:36 -05:00
|
|
|
"http://orighost/somedir/bar.jpg". */
|
2001-11-25 13:40:55 -05:00
|
|
|
CO_NULLIFY_BASE /* change to empty string. */
|
1999-12-02 02:42:23 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
/* A structure that defines the whereabouts of a URL, i.e. its
|
|
|
|
position in an HTML document, etc. */
|
2000-11-20 21:06:36 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
struct urlpos {
|
|
|
|
struct url *url; /* the URL of the link, after it has
|
|
|
|
been merged with the base */
|
|
|
|
char *local_name; /* local file to which it was saved
|
|
|
|
(used by convert_links) */
|
2000-11-20 21:06:36 -05:00
|
|
|
|
2001-11-30 16:17:53 -05:00
|
|
|
/* reserved for special links such as <base href="..."> which are
|
|
|
|
used when converting links, but ignored when downloading. */
|
|
|
|
unsigned int ignore_when_downloading :1;
|
2001-11-25 13:40:55 -05:00
|
|
|
|
2000-11-20 21:06:36 -05:00
|
|
|
/* Information about the original link: */
|
2001-11-30 16:17:53 -05:00
|
|
|
|
|
|
|
unsigned int link_relative_p :1; /* was the link relative? */
|
|
|
|
unsigned int link_complete_p :1; /* was the link complete (with the
|
|
|
|
host name, etc.) */
|
|
|
|
unsigned int link_base_p :1; /* was the link <base href=...> */
|
|
|
|
unsigned int link_inline_p :1; /* needed to render the page. */
|
2000-11-20 21:06:36 -05:00
|
|
|
|
2001-11-30 23:18:51 -05:00
|
|
|
unsigned int link_refresh_p :1; /* link was received from
|
|
|
|
<meta http-equiv=refresh content=...> */
|
|
|
|
int refresh_timeout; /* for reconstructing the refresh. */
|
|
|
|
|
2000-11-20 21:06:36 -05:00
|
|
|
/* Conversion requirements: */
|
|
|
|
enum convert_options convert; /* is conversion required? */
|
|
|
|
|
|
|
|
/* URL's position in the buffer. */
|
|
|
|
int pos, size;
|
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
struct urlpos *next; /* next list element */
|
|
|
|
};
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2000-10-20 01:55:46 -04:00
|
|
|
/* downloaded_file() takes a parameter of this type and returns this type. */
|
2000-03-02 01:33:48 -05:00
|
|
|
typedef enum
|
|
|
|
{
|
2000-10-20 01:55:46 -04:00
|
|
|
/* Return enumerators: */
|
|
|
|
FILE_NOT_ALREADY_DOWNLOADED = 0,
|
|
|
|
|
|
|
|
/* Return / parameter enumerators: */
|
|
|
|
FILE_DOWNLOADED_NORMALLY,
|
|
|
|
FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED,
|
|
|
|
|
|
|
|
/* Parameter enumerators: */
|
2000-03-02 01:33:48 -05:00
|
|
|
CHECK_FOR_FILE
|
|
|
|
} downloaded_file_t;
|
1999-12-02 02:42:23 -05:00
|
|
|
|
|
|
|
/* Function declarations */
|
|
|
|
|
|
|
|
char *encode_string PARAMS ((const char *));
|
|
|
|
|
2001-11-21 19:24:28 -05:00
|
|
|
struct url *url_parse PARAMS ((const char *, int *));
|
|
|
|
const char *url_error PARAMS ((int));
|
|
|
|
char *url_full_path PARAMS ((const struct url *));
|
|
|
|
void url_set_dir PARAMS ((struct url *, const char *));
|
|
|
|
void url_set_file PARAMS ((struct url *, const char *));
|
|
|
|
void url_free PARAMS ((struct url *));
|
|
|
|
|
|
|
|
enum url_scheme url_scheme PARAMS ((const char *));
|
2001-11-18 19:12:05 -05:00
|
|
|
int url_skip_scheme PARAMS ((const char *));
|
|
|
|
int url_has_scheme PARAMS ((const char *));
|
2001-11-21 19:24:28 -05:00
|
|
|
int scheme_default_port PARAMS ((enum url_scheme));
|
|
|
|
|
2001-11-18 19:12:05 -05:00
|
|
|
int url_skip_uname PARAMS ((const char *));
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-21 19:24:28 -05:00
|
|
|
char *url_string PARAMS ((const struct url *, int));
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
struct urlpos *get_urls_file PARAMS ((const char *));
|
2001-11-30 16:17:53 -05:00
|
|
|
struct urlpos *get_urls_html PARAMS ((const char *, const char *, int *));
|
2001-11-24 22:10:34 -05:00
|
|
|
void free_urlpos PARAMS ((struct urlpos *));
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-04-14 00:11:35 -04:00
|
|
|
char *uri_merge PARAMS ((const char *, const char *));
|
2000-10-31 14:25:32 -05:00
|
|
|
|
1999-12-02 02:42:23 -05:00
|
|
|
void rotate_backups PARAMS ((const char *));
|
|
|
|
int mkalldirs PARAMS ((const char *));
|
2001-11-21 19:24:28 -05:00
|
|
|
char *url_filename PARAMS ((const struct url *));
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
char *getproxy PARAMS ((enum url_scheme));
|
1999-12-02 02:42:23 -05:00
|
|
|
int no_proxy_match PARAMS ((const char *, const char **));
|
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
void convert_links PARAMS ((const char *, struct urlpos *));
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2000-10-20 01:55:46 -04:00
|
|
|
downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
|
2000-03-02 01:33:48 -05:00
|
|
|
|
2001-11-21 19:24:28 -05:00
|
|
|
char *rewrite_shorthand_url PARAMS ((const char *));
|
2001-11-18 20:14:14 -05:00
|
|
|
|
1999-12-02 02:42:23 -05:00
|
|
|
#endif /* URL_H */
|