1999-12-02 02:42:23 -05:00
|
|
|
/* Handling of recursive HTTP retrieving.
|
2009-09-04 03:13:47 -04:00
|
|
|
Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
2012-09-29 05:40:01 -04:00
|
|
|
2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation,
|
2011-01-01 07:19:37 -05:00
|
|
|
Inc.
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-05-27 15:35:15 -04:00
|
|
|
This file is part of GNU Wget.
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-05-27 15:35:15 -04:00
|
|
|
GNU Wget is free software; you can redistribute it and/or modify
|
1999-12-02 02:42:23 -05:00
|
|
|
it under the terms of the GNU General Public License as published by
|
2007-07-10 01:53:22 -04:00
|
|
|
the Free Software Foundation; either version 3 of the License, or
|
2003-10-10 10:25:10 -04:00
|
|
|
(at your option) any later version.
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-05-27 15:35:15 -04:00
|
|
|
GNU Wget is distributed in the hope that it will be useful,
|
1999-12-02 02:42:23 -05:00
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
2007-07-10 01:53:22 -04:00
|
|
|
along with Wget. If not, see <http://www.gnu.org/licenses/>.
|
2002-05-17 22:16:36 -04:00
|
|
|
|
2007-11-28 03:05:33 -05:00
|
|
|
Additional permission under GNU GPL version 3 section 7
|
|
|
|
|
|
|
|
If you modify this program, or any covered work, by linking or
|
|
|
|
combining it with the OpenSSL project's OpenSSL library (or a
|
|
|
|
modified version of that library), containing parts covered by the
|
|
|
|
terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
|
|
|
|
grants you additional permission to convey the resulting work.
|
|
|
|
Corresponding Source for a non-source form of such a combination
|
|
|
|
shall include the source code for the parts of OpenSSL used as well
|
|
|
|
as that of the covered work. */
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2007-10-18 23:50:40 -04:00
|
|
|
#include "wget.h"
|
1999-12-02 02:42:23 -05:00
|
|
|
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
2005-06-19 18:34:58 -04:00
|
|
|
#include <string.h>
|
2010-12-01 07:15:13 -05:00
|
|
|
#include <unistd.h>
|
1999-12-02 02:42:23 -05:00
|
|
|
#include <errno.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include "url.h"
|
|
|
|
#include "recur.h"
|
|
|
|
#include "utils.h"
|
|
|
|
#include "retr.h"
|
|
|
|
#include "ftp.h"
|
|
|
|
#include "host.h"
|
2000-11-19 15:50:10 -05:00
|
|
|
#include "hash.h"
|
2001-11-17 21:17:30 -05:00
|
|
|
#include "res.h"
|
2003-09-21 18:47:14 -04:00
|
|
|
#include "convert.h"
|
2008-04-22 03:15:48 -04:00
|
|
|
#include "html-url.h"
|
|
|
|
#include "css-url.h"
|
2006-08-28 10:41:40 -04:00
|
|
|
#include "spider.h"
|
2014-11-20 10:35:34 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* Functions for maintaining the URL queue. */
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
struct queue_element {
|
2008-04-22 04:47:39 -04:00
|
|
|
const char *url; /* the URL to download */
|
2007-08-02 23:38:21 -04:00
|
|
|
const char *referer; /* the referring document */
|
|
|
|
int depth; /* the depth */
|
|
|
|
bool html_allowed; /* whether the document is allowed to
|
|
|
|
be treated as HTML. */
|
2008-07-23 18:56:29 -04:00
|
|
|
struct iri *iri; /* sXXXav */
|
2008-04-22 04:28:15 -04:00
|
|
|
bool css_allowed; /* whether the document is allowed to
|
|
|
|
be treated as CSS. */
|
2007-08-02 23:38:21 -04:00
|
|
|
struct queue_element *next; /* next element in queue */
|
2001-11-24 22:10:34 -05:00
|
|
|
};
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
struct url_queue {
|
|
|
|
struct queue_element *head;
|
|
|
|
struct queue_element *tail;
|
|
|
|
int count, maxcount;
|
|
|
|
};
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* Create a URL queue. */
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
static struct url_queue *
|
|
|
|
url_queue_new (void)
|
|
|
|
{
|
2003-10-31 09:55:50 -05:00
|
|
|
struct url_queue *queue = xnew0 (struct url_queue);
|
2001-11-24 22:10:34 -05:00
|
|
|
return queue;
|
|
|
|
}
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* Delete a URL queue. */
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
static void
|
|
|
|
url_queue_delete (struct url_queue *queue)
|
1999-12-02 02:42:23 -05:00
|
|
|
{
|
2001-11-24 22:10:34 -05:00
|
|
|
xfree (queue);
|
1999-12-02 02:42:23 -05:00
|
|
|
}
|
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* Enqueue a URL in the queue. The queue is FIFO: the items will be
|
|
|
|
retrieved ("dequeued") from the queue in the order they were placed
|
|
|
|
into it. */
|
|
|
|
|
|
|
|
static void
|
2008-07-23 18:56:29 -04:00
|
|
|
url_enqueue (struct url_queue *queue, struct iri *i,
|
2008-04-22 04:28:15 -04:00
|
|
|
const char *url, const char *referer, int depth,
|
2008-04-22 03:15:48 -04:00
|
|
|
bool html_allowed, bool css_allowed)
|
1999-12-02 02:42:23 -05:00
|
|
|
{
|
2003-10-31 09:55:50 -05:00
|
|
|
struct queue_element *qel = xnew (struct queue_element);
|
2008-07-23 18:56:29 -04:00
|
|
|
qel->iri = i;
|
2001-11-24 22:10:34 -05:00
|
|
|
qel->url = url;
|
|
|
|
qel->referer = referer;
|
|
|
|
qel->depth = depth;
|
2003-10-10 10:25:10 -04:00
|
|
|
qel->html_allowed = html_allowed;
|
2008-04-22 03:15:48 -04:00
|
|
|
qel->css_allowed = css_allowed;
|
2001-11-24 22:10:34 -05:00
|
|
|
qel->next = NULL;
|
|
|
|
|
|
|
|
++queue->count;
|
|
|
|
if (queue->count > queue->maxcount)
|
|
|
|
queue->maxcount = queue->count;
|
|
|
|
|
2009-07-02 04:04:11 -04:00
|
|
|
DEBUGP (("Enqueuing %s at depth %d\n",
|
|
|
|
quotearg_n_style (0, escape_quoting_style, url), depth));
|
2001-11-24 22:10:34 -05:00
|
|
|
DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
|
|
|
|
|
2008-07-23 18:56:29 -04:00
|
|
|
if (i)
|
2008-08-15 09:15:42 -04:00
|
|
|
DEBUGP (("[IRI Enqueuing %s with %s\n", quote_n (0, url),
|
|
|
|
i->uri_encoding ? quote_n (1, i->uri_encoding) : "None"));
|
2008-07-20 07:10:02 -04:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
if (queue->tail)
|
|
|
|
queue->tail->next = qel;
|
|
|
|
queue->tail = qel;
|
|
|
|
|
|
|
|
if (!queue->head)
|
|
|
|
queue->head = queue->tail;
|
1999-12-02 02:42:23 -05:00
|
|
|
}
|
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
/* Take a URL out of the queue. Return true if this operation
|
|
|
|
succeeded, or false if the queue is empty. */
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
static bool
|
2008-07-23 18:56:29 -04:00
|
|
|
url_dequeue (struct url_queue *queue, struct iri **i,
|
2007-08-02 23:38:21 -04:00
|
|
|
const char **url, const char **referer, int *depth,
|
2008-04-22 04:28:15 -04:00
|
|
|
bool *html_allowed, bool *css_allowed)
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
|
|
|
struct queue_element *qel = queue->head;
|
|
|
|
|
|
|
|
if (!qel)
|
2005-06-22 15:38:10 -04:00
|
|
|
return false;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
queue->head = queue->head->next;
|
|
|
|
if (!queue->head)
|
|
|
|
queue->tail = NULL;
|
|
|
|
|
2008-07-23 18:56:29 -04:00
|
|
|
*i = qel->iri;
|
2001-11-24 22:10:34 -05:00
|
|
|
*url = qel->url;
|
|
|
|
*referer = qel->referer;
|
|
|
|
*depth = qel->depth;
|
2003-10-10 10:25:10 -04:00
|
|
|
*html_allowed = qel->html_allowed;
|
2008-04-22 03:15:48 -04:00
|
|
|
*css_allowed = qel->css_allowed;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
--queue->count;
|
|
|
|
|
2009-07-02 04:04:11 -04:00
|
|
|
DEBUGP (("Dequeuing %s at depth %d\n",
|
|
|
|
quotearg_n_style (0, escape_quoting_style, qel->url), qel->depth));
|
2001-11-24 22:10:34 -05:00
|
|
|
DEBUGP (("Queue count %d, maxcount %d.\n", queue->count, queue->maxcount));
|
|
|
|
|
|
|
|
xfree (qel);
|
2005-06-22 15:38:10 -04:00
|
|
|
return true;
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
2014-11-20 10:35:34 -05:00
|
|
|
|
2014-11-26 06:39:47 -05:00
|
|
|
static void blacklist_add (struct hash_table *blacklist, const char *url)
|
2014-11-26 05:18:09 -05:00
|
|
|
{
|
2014-11-26 06:39:47 -05:00
|
|
|
char *url_unescaped = xstrdup (url);
|
2014-11-26 05:18:09 -05:00
|
|
|
|
|
|
|
url_unescape (url_unescaped);
|
|
|
|
string_set_add (blacklist, url_unescaped);
|
|
|
|
xfree (url_unescaped);
|
|
|
|
}
|
|
|
|
|
2014-11-26 06:39:47 -05:00
|
|
|
static int blacklist_contains (struct hash_table *blacklist, const char *url)
|
2014-11-26 05:18:09 -05:00
|
|
|
{
|
|
|
|
char *url_unescaped = xstrdup(url);
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
url_unescape (url_unescaped);
|
|
|
|
ret = string_set_contains (blacklist, url_unescaped);
|
|
|
|
xfree (url_unescaped);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
static bool download_child_p (const struct urlpos *, struct url *, int,
|
2008-07-23 18:56:29 -04:00
|
|
|
struct url *, struct hash_table *, struct iri *);
|
2009-02-01 11:06:32 -05:00
|
|
|
static bool descend_redirect_p (const char *, struct url *, int,
|
2008-07-23 18:56:29 -04:00
|
|
|
struct url *, struct hash_table *, struct iri *);
|
2001-11-25 20:11:48 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
/* Retrieve a part of the web beginning with START_URL. This used to
|
|
|
|
be called "recursive retrieval", because the old function was
|
|
|
|
recursive and implemented depth-first search. retrieve_tree on the
|
|
|
|
other hand implements breadth-search traversal of the tree, which
|
|
|
|
results in much nicer ordering of downloads.
|
|
|
|
|
|
|
|
The algorithm this function uses is simple:
|
|
|
|
|
|
|
|
1. put START_URL in the queue.
|
|
|
|
2. while there are URLs in the queue:
|
|
|
|
|
|
|
|
3. get next URL from the queue.
|
|
|
|
4. download it.
|
|
|
|
5. if the URL is HTML and its depth does not exceed maximum depth,
|
|
|
|
get the list of URLs embedded therein.
|
|
|
|
6. for each of those URLs do the following:
|
|
|
|
|
|
|
|
7. if the URL is not one of those downloaded before, and if it
|
|
|
|
satisfies the criteria specified by the various command-line
|
2007-08-02 23:38:21 -04:00
|
|
|
options, add it to the queue. */
|
1999-12-02 02:42:23 -05:00
|
|
|
|
|
|
|
uerr_t
|
2009-06-25 04:14:11 -04:00
|
|
|
retrieve_tree (struct url *start_url_parsed, struct iri *pi)
|
1999-12-02 02:42:23 -05:00
|
|
|
{
|
2001-11-24 22:10:34 -05:00
|
|
|
uerr_t status = RETROK;
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* The queue of URLs we need to load. */
|
2002-02-19 01:09:57 -05:00
|
|
|
struct url_queue *queue;
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-25 16:03:30 -05:00
|
|
|
/* The URLs we do not wish to enqueue, because they are already in
|
|
|
|
the queue, but haven't been downloaded yet. */
|
2002-02-19 01:09:57 -05:00
|
|
|
struct hash_table *blacklist;
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2008-07-23 18:56:29 -04:00
|
|
|
struct iri *i = iri_new ();
|
2008-08-14 12:26:53 -04:00
|
|
|
|
|
|
|
#define COPYSTR(x) (x) ? xstrdup(x) : NULL;
|
|
|
|
/* Duplicate pi struct if not NULL */
|
|
|
|
if (pi)
|
|
|
|
{
|
|
|
|
i->uri_encoding = COPYSTR (pi->uri_encoding);
|
|
|
|
i->content_encoding = COPYSTR (pi->content_encoding);
|
|
|
|
i->utf8_encode = pi->utf8_encode;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
set_uri_encoding (i, opt.locale, true);
|
|
|
|
#undef COPYSTR
|
2002-02-19 01:09:57 -05:00
|
|
|
|
|
|
|
queue = url_queue_new ();
|
|
|
|
blacklist = make_string_hash_table (0);
|
2000-11-19 15:50:10 -05:00
|
|
|
|
2001-12-19 09:27:29 -05:00
|
|
|
/* Enqueue the starting URL. Use start_url_parsed->url rather than
|
|
|
|
just URL so we enqueue the canonical form of the URL. */
|
2008-07-23 18:56:29 -04:00
|
|
|
url_enqueue (queue, i, xstrdup (start_url_parsed->url), NULL, 0, true,
|
|
|
|
false);
|
2014-11-26 06:39:47 -05:00
|
|
|
blacklist_add (blacklist, start_url_parsed->url);
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
while (1)
|
1999-12-02 02:42:23 -05:00
|
|
|
{
|
2005-06-22 15:38:10 -04:00
|
|
|
bool descend = false;
|
2001-11-24 22:10:34 -05:00
|
|
|
char *url, *referer, *file = NULL;
|
2005-06-22 15:38:10 -04:00
|
|
|
int depth;
|
2008-04-22 03:15:48 -04:00
|
|
|
bool html_allowed, css_allowed;
|
|
|
|
bool is_css = false;
|
2005-06-22 15:38:10 -04:00
|
|
|
bool dash_p_leaf_HTML = false;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2003-10-11 09:57:11 -04:00
|
|
|
if (opt.quota && total_downloaded_bytes > opt.quota)
|
2007-08-02 23:38:21 -04:00
|
|
|
break;
|
2001-11-24 22:10:34 -05:00
|
|
|
if (status == FWRITEERR)
|
2007-08-02 23:38:21 -04:00
|
|
|
break;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2001-12-04 16:03:35 -05:00
|
|
|
/* Get the next URL from the queue... */
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2008-07-23 18:56:29 -04:00
|
|
|
if (!url_dequeue (queue, (struct iri **) &i,
|
2007-08-02 23:38:21 -04:00
|
|
|
(const char **)&url, (const char **)&referer,
|
2008-04-22 04:28:15 -04:00
|
|
|
&depth, &html_allowed, &css_allowed))
|
2007-08-02 23:38:21 -04:00
|
|
|
break;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2001-12-04 16:03:35 -05:00
|
|
|
/* ...and download it. Note that this download is in most cases
|
2007-08-02 23:38:21 -04:00
|
|
|
unconditional, as download_child_p already makes sure a file
|
|
|
|
doesn't get enqueued twice -- and yet this check is here, and
|
|
|
|
not in download_child_p. This is so that if you run `wget -r
|
|
|
|
URL1 URL2', and a random URL is encountered once under URL1
|
|
|
|
and again under URL2, but at a different (possibly smaller)
|
|
|
|
depth, we want the URL's children to be taken into account
|
|
|
|
the second time. */
|
2001-12-04 16:03:35 -05:00
|
|
|
if (dl_url_file_map && hash_table_contains (dl_url_file_map, url))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
2014-05-26 05:51:58 -04:00
|
|
|
bool is_css_bool;
|
2012-09-29 05:40:01 -04:00
|
|
|
|
2007-08-02 23:38:21 -04:00
|
|
|
file = xstrdup (hash_table_get (dl_url_file_map, url));
|
2001-12-18 17:14:31 -05:00
|
|
|
|
2007-08-02 23:38:21 -04:00
|
|
|
DEBUGP (("Already downloaded \"%s\", reusing it from \"%s\".\n",
|
|
|
|
url, file));
|
2001-12-18 17:14:31 -05:00
|
|
|
|
2014-05-26 05:51:58 -04:00
|
|
|
if ((is_css_bool = (css_allowed
|
|
|
|
&& downloaded_css_set
|
|
|
|
&& string_set_contains (downloaded_css_set, file)))
|
|
|
|
|| (html_allowed
|
|
|
|
&& downloaded_html_set
|
|
|
|
&& string_set_contains (downloaded_html_set, file)))
|
|
|
|
{
|
|
|
|
descend = true;
|
|
|
|
is_css = is_css_bool;
|
|
|
|
}
|
2007-08-02 23:38:21 -04:00
|
|
|
}
|
2001-12-04 16:03:35 -05:00
|
|
|
else
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
2009-02-01 11:06:32 -05:00
|
|
|
int dt = 0, url_err;
|
2007-08-02 23:38:21 -04:00
|
|
|
char *redirected = NULL;
|
2009-06-28 23:55:01 -04:00
|
|
|
struct url *url_parsed = url_parse (url, &url_err, i, true);
|
2007-08-02 23:38:21 -04:00
|
|
|
|
2009-06-25 04:14:11 -04:00
|
|
|
status = retrieve_url (url_parsed, url, &file, &redirected, referer,
|
2009-08-28 02:08:58 -04:00
|
|
|
&dt, false, i, true);
|
2007-08-02 23:38:21 -04:00
|
|
|
|
|
|
|
if (html_allowed && file && status == RETROK
|
|
|
|
&& (dt & RETROKF) && (dt & TEXTHTML))
|
2008-04-22 03:15:48 -04:00
|
|
|
{
|
|
|
|
descend = true;
|
|
|
|
is_css = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* a little different, css_allowed can override content type
|
|
|
|
lots of web servers serve css with an incorrect content type
|
|
|
|
*/
|
|
|
|
if (file && status == RETROK
|
|
|
|
&& (dt & RETROKF) &&
|
|
|
|
((dt & TEXTCSS) || css_allowed))
|
|
|
|
{
|
|
|
|
descend = true;
|
2008-04-22 06:24:09 -04:00
|
|
|
is_css = true;
|
2008-04-22 03:15:48 -04:00
|
|
|
}
|
2007-08-02 23:38:21 -04:00
|
|
|
|
|
|
|
if (redirected)
|
|
|
|
{
|
|
|
|
/* We have been redirected, possibly to another host, or
|
|
|
|
different path, or wherever. Check whether we really
|
|
|
|
want to follow it. */
|
|
|
|
if (descend)
|
|
|
|
{
|
2009-02-01 11:06:32 -05:00
|
|
|
if (!descend_redirect_p (redirected, url_parsed, depth,
|
2008-07-23 18:56:29 -04:00
|
|
|
start_url_parsed, blacklist, i))
|
2007-08-02 23:38:21 -04:00
|
|
|
descend = false;
|
|
|
|
else
|
|
|
|
/* Make sure that the old pre-redirect form gets
|
|
|
|
blacklisted. */
|
2014-11-26 06:39:47 -05:00
|
|
|
blacklist_add (blacklist, url);
|
2007-08-02 23:38:21 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
xfree (url);
|
|
|
|
url = redirected;
|
|
|
|
}
|
2009-07-02 02:17:33 -04:00
|
|
|
else
|
|
|
|
{
|
|
|
|
xfree (url);
|
|
|
|
url = xstrdup (url_parsed->url);
|
|
|
|
}
|
2014-11-26 06:39:47 -05:00
|
|
|
url_free (url_parsed);
|
2007-08-02 23:38:21 -04:00
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2006-08-24 11:27:57 -04:00
|
|
|
if (opt.spider)
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
2006-08-24 11:27:57 -04:00
|
|
|
visited_url (url, referer);
|
2007-08-02 23:38:21 -04:00
|
|
|
}
|
2006-08-24 11:27:57 -04:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
if (descend
|
2007-08-02 23:38:21 -04:00
|
|
|
&& depth >= opt.reclevel && opt.reclevel != INFINITE_RECURSION)
|
|
|
|
{
|
|
|
|
if (opt.page_requisites
|
|
|
|
&& (depth == opt.reclevel || depth == opt.reclevel + 1))
|
|
|
|
{
|
|
|
|
/* When -p is specified, we are allowed to exceed the
|
|
|
|
maximum depth, but only for the "inline" links,
|
|
|
|
i.e. those that are needed to display the page.
|
|
|
|
Originally this could exceed the depth at most by
|
|
|
|
one, but we allow one more level so that the leaf
|
|
|
|
pages that contain frames can be loaded
|
|
|
|
correctly. */
|
|
|
|
dash_p_leaf_HTML = true;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* Either -p wasn't specified or it was and we've
|
|
|
|
already spent the two extra (pseudo-)levels that it
|
|
|
|
affords us, so we need to bail out. */
|
|
|
|
DEBUGP (("Not descending further; at depth %d, max. %d.\n",
|
|
|
|
depth, opt.reclevel));
|
|
|
|
descend = false;
|
|
|
|
}
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2008-04-22 03:15:48 -04:00
|
|
|
/* If the downloaded document was HTML or CSS, parse it and enqueue the
|
2007-08-02 23:38:21 -04:00
|
|
|
links it contains. */
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
if (descend)
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
bool meta_disallow_follow = false;
|
|
|
|
struct urlpos *children
|
2008-04-22 04:28:15 -04:00
|
|
|
= is_css ? get_urls_css_file (file, url) :
|
2008-07-23 18:56:29 -04:00
|
|
|
get_urls_html (file, url, &meta_disallow_follow, i);
|
2007-08-02 23:38:21 -04:00
|
|
|
|
|
|
|
if (opt.use_robots && meta_disallow_follow)
|
|
|
|
{
|
|
|
|
free_urlpos (children);
|
|
|
|
children = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (children)
|
|
|
|
{
|
|
|
|
struct urlpos *child = children;
|
2009-06-28 23:55:01 -04:00
|
|
|
struct url *url_parsed = url_parse (url, NULL, i, true);
|
2008-07-23 18:56:29 -04:00
|
|
|
struct iri *ci;
|
2007-07-29 21:22:34 -04:00
|
|
|
char *referer_url = url;
|
2007-08-27 17:28:22 -04:00
|
|
|
bool strip_auth = (url_parsed != NULL
|
|
|
|
&& url_parsed->user != NULL);
|
2007-08-02 23:38:21 -04:00
|
|
|
assert (url_parsed != NULL);
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2007-07-29 21:22:34 -04:00
|
|
|
/* Strip auth info if present */
|
|
|
|
if (strip_auth)
|
|
|
|
referer_url = url_string (url_parsed, URL_AUTH_HIDE);
|
|
|
|
|
2007-08-02 23:38:21 -04:00
|
|
|
for (; child; child = child->next)
|
|
|
|
{
|
|
|
|
if (child->ignore_when_downloading)
|
|
|
|
continue;
|
|
|
|
if (dash_p_leaf_HTML && !child->link_inline_p)
|
|
|
|
continue;
|
|
|
|
if (download_child_p (child, url_parsed, depth, start_url_parsed,
|
2008-07-23 18:56:29 -04:00
|
|
|
blacklist, i))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
2008-07-23 18:56:29 -04:00
|
|
|
ci = iri_new ();
|
2008-07-30 04:15:55 -04:00
|
|
|
set_uri_encoding (ci, i->content_encoding, false);
|
2008-07-23 18:56:29 -04:00
|
|
|
url_enqueue (queue, ci, xstrdup (child->url->url),
|
2007-08-02 23:38:21 -04:00
|
|
|
xstrdup (referer_url), depth + 1,
|
2008-04-22 04:28:15 -04:00
|
|
|
child->link_expect_html,
|
|
|
|
child->link_expect_css);
|
2007-08-02 23:38:21 -04:00
|
|
|
/* We blacklist the URL we have enqueued, because we
|
|
|
|
don't want to enqueue (and hence download) the
|
|
|
|
same URL twice. */
|
2014-11-26 06:39:47 -05:00
|
|
|
blacklist_add (blacklist, child->url->url);
|
2007-08-02 23:38:21 -04:00
|
|
|
}
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2007-07-29 21:22:34 -04:00
|
|
|
if (strip_auth)
|
|
|
|
xfree (referer_url);
|
2007-08-02 23:38:21 -04:00
|
|
|
url_free (url_parsed);
|
|
|
|
free_urlpos (children);
|
|
|
|
}
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2008-07-20 07:10:02 -04:00
|
|
|
if (file
|
|
|
|
&& (opt.delete_after
|
2006-05-25 12:11:29 -04:00
|
|
|
|| opt.spider /* opt.recursive is implicitely true */
|
|
|
|
|| !acceptable (file)))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
/* Either --delete-after was specified, or we loaded this
|
2008-07-20 07:10:02 -04:00
|
|
|
(otherwise unneeded because of --spider or rejected by -R)
|
|
|
|
HTML file just to harvest its hyperlinks -- in either case,
|
2007-08-02 23:38:21 -04:00
|
|
|
delete the local file. */
|
|
|
|
DEBUGP (("Removing file due to %s in recursive_retrieve():\n",
|
|
|
|
opt.delete_after ? "--delete-after" :
|
2008-07-20 07:10:02 -04:00
|
|
|
(opt.spider ? "--spider" :
|
2007-08-02 23:38:21 -04:00
|
|
|
"recursive rejection criteria")));
|
|
|
|
logprintf (LOG_VERBOSE,
|
|
|
|
(opt.delete_after || opt.spider
|
|
|
|
? _("Removing %s.\n")
|
|
|
|
: _("Removing %s since it should be rejected.\n")),
|
|
|
|
file);
|
|
|
|
if (unlink (file))
|
|
|
|
logprintf (LOG_NOTQUIET, "unlink: %s\n", strerror (errno));
|
|
|
|
logputs (LOG_VERBOSE, "\n");
|
|
|
|
register_delete_file (file);
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
xfree (url);
|
2014-11-29 11:54:20 -05:00
|
|
|
xfree (referer);
|
|
|
|
xfree (file);
|
2008-07-23 18:56:29 -04:00
|
|
|
iri_free (i);
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If anything is left of the queue due to a premature exit, free it
|
|
|
|
now. */
|
|
|
|
{
|
|
|
|
char *d1, *d2;
|
2005-06-22 15:38:10 -04:00
|
|
|
int d3;
|
2008-04-22 03:15:48 -04:00
|
|
|
bool d4, d5;
|
2008-07-23 18:56:29 -04:00
|
|
|
struct iri *d6;
|
|
|
|
while (url_dequeue (queue, (struct iri **)&d6,
|
2008-04-22 04:28:15 -04:00
|
|
|
(const char **)&d1, (const char **)&d2, &d3, &d4, &d5))
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
2008-07-23 18:56:29 -04:00
|
|
|
iri_free (d6);
|
2007-08-02 23:38:21 -04:00
|
|
|
xfree (d1);
|
2014-11-29 11:54:20 -05:00
|
|
|
xfree (d2);
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
url_queue_delete (queue);
|
|
|
|
|
|
|
|
string_set_free (blacklist);
|
|
|
|
|
2003-10-11 09:57:11 -04:00
|
|
|
if (opt.quota && total_downloaded_bytes > opt.quota)
|
2001-11-24 22:10:34 -05:00
|
|
|
return QUOTEXC;
|
|
|
|
else if (status == FWRITEERR)
|
|
|
|
return FWRITEERR;
|
|
|
|
else
|
|
|
|
return RETROK;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Based on the context provided by retrieve_tree, decide whether a
|
|
|
|
URL is to be descended to. This is only ever called from
|
2001-11-25 16:03:30 -05:00
|
|
|
retrieve_tree, but is in a separate function for clarity.
|
|
|
|
|
2014-11-26 06:39:47 -05:00
|
|
|
The most expensive checks (such as those for robots) are memoized
|
2001-11-25 16:03:30 -05:00
|
|
|
by storing these URLs to BLACKLIST. This may or may not help. It
|
|
|
|
will help if those URLs are encountered many times. */
|
2001-11-24 22:10:34 -05:00
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
static bool
|
2001-12-04 16:03:35 -05:00
|
|
|
download_child_p (const struct urlpos *upos, struct url *parent, int depth,
|
2008-07-23 18:56:29 -04:00
|
|
|
struct url *start_url_parsed, struct hash_table *blacklist,
|
|
|
|
struct iri *iri)
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
|
|
|
struct url *u = upos->url;
|
|
|
|
const char *url = u->url;
|
2005-06-22 15:38:10 -04:00
|
|
|
bool u_scheme_like_http;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
|
|
|
|
|
2014-11-26 06:39:47 -05:00
|
|
|
if (blacklist_contains (blacklist, url))
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
2008-07-20 12:20:18 -04:00
|
|
|
if (opt.spider)
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
2007-07-29 21:22:34 -04:00
|
|
|
char *referrer = url_string (parent, URL_AUTH_HIDE_PASSWD);
|
2008-04-16 06:28:19 -04:00
|
|
|
DEBUGP (("download_child_p: parent->url is: %s\n", quote (parent->url)));
|
2006-08-24 11:27:57 -04:00
|
|
|
visited_url (url, referrer);
|
2007-08-02 23:38:21 -04:00
|
|
|
xfree (referrer);
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
DEBUGP (("Already on the black list.\n"));
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Several things to check for:
|
2013-08-22 06:28:11 -04:00
|
|
|
1. if scheme is not https and https_only requested
|
|
|
|
2. if scheme is not http, and we don't load it
|
|
|
|
3. check for relative links (if relative_only is set)
|
|
|
|
4. check for domain
|
|
|
|
5. check for no-parent
|
|
|
|
6. check for excludes && includes
|
|
|
|
7. check for suffix
|
|
|
|
8. check for same host (if spanhost is unset), with possible
|
2001-11-24 22:10:34 -05:00
|
|
|
gethostbyname baggage
|
2013-08-22 06:28:11 -04:00
|
|
|
9. check for robots.txt
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
Addendum: If the URL is FTP, and it is to be loaded, only the
|
|
|
|
domain and suffix settings are "stronger".
|
|
|
|
|
|
|
|
Note that .html files will get loaded regardless of suffix rules
|
|
|
|
(but that is remedied later with unlink) unless the depth equals
|
|
|
|
the maximum depth.
|
|
|
|
|
|
|
|
More time- and memory- consuming tests should be put later on
|
|
|
|
the list. */
|
|
|
|
|
2013-09-13 05:21:38 -04:00
|
|
|
#ifdef HAVE_SSL
|
2013-08-22 06:28:11 -04:00
|
|
|
if (opt.https_only && u->scheme != SCHEME_HTTPS)
|
|
|
|
{
|
|
|
|
DEBUGP (("Not following non-HTTPS links.\n"));
|
|
|
|
goto out;
|
|
|
|
}
|
2013-09-13 05:21:38 -04:00
|
|
|
#endif
|
2013-08-22 06:28:11 -04:00
|
|
|
|
2002-05-16 13:22:24 -04:00
|
|
|
/* Determine whether URL under consideration has a HTTP-like scheme. */
|
|
|
|
u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
|
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* 1. Schemes other than HTTP are normally not recursed into. */
|
2002-05-16 13:22:24 -04:00
|
|
|
if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
|
|
|
DEBUGP (("Not following non-HTTP schemes.\n"));
|
2001-11-25 16:03:30 -05:00
|
|
|
goto out;
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* 2. If it is an absolute link and they are not followed, throw it
|
|
|
|
out. */
|
2002-05-16 13:38:30 -04:00
|
|
|
if (u_scheme_like_http)
|
2001-11-24 22:10:34 -05:00
|
|
|
if (opt.relative_only && !upos->link_relative_p)
|
|
|
|
{
|
2007-08-02 23:38:21 -04:00
|
|
|
DEBUGP (("It doesn't really look like a relative link.\n"));
|
|
|
|
goto out;
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* 3. If its domain is not to be accepted/looked-up, chuck it
|
|
|
|
out. */
|
|
|
|
if (!accept_domain (u))
|
|
|
|
{
|
|
|
|
DEBUGP (("The domain was not accepted.\n"));
|
2001-11-25 16:03:30 -05:00
|
|
|
goto out;
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* 4. Check for parent directory.
|
|
|
|
|
|
|
|
If we descended to a different host or changed the scheme, ignore
|
2001-11-30 16:17:53 -05:00
|
|
|
opt.no_parent. Also ignore it for documents needed to display
|
|
|
|
the parent page when in -p mode. */
|
2001-11-24 22:10:34 -05:00
|
|
|
if (opt.no_parent
|
2002-05-16 13:22:24 -04:00
|
|
|
&& schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
|
2001-11-29 12:04:28 -05:00
|
|
|
&& 0 == strcasecmp (u->host, start_url_parsed->host)
|
2011-03-30 19:37:12 -04:00
|
|
|
&& (u->scheme != start_url_parsed->scheme
|
|
|
|
|| u->port == start_url_parsed->port)
|
2001-11-30 16:17:53 -05:00
|
|
|
&& !(opt.page_requisites && upos->link_inline_p))
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
2006-03-15 09:55:29 -05:00
|
|
|
if (!subdir_p (start_url_parsed->dir, u->dir))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
DEBUGP (("Going to \"%s\" would escape \"%s\" with no_parent on.\n",
|
|
|
|
u->dir, start_url_parsed->dir));
|
|
|
|
goto out;
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* 5. If the file does not match the acceptance list, or is on the
|
|
|
|
rejection list, chuck it out. The same goes for the directory
|
|
|
|
exclusion and inclusion lists. */
|
|
|
|
if (opt.includes || opt.excludes)
|
|
|
|
{
|
2005-07-06 15:44:00 -04:00
|
|
|
if (!accdir (u->dir))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
DEBUGP (("%s (%s) is excluded/not-included.\n", url, u->dir));
|
|
|
|
goto out;
|
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
2012-05-09 15:18:23 -04:00
|
|
|
if (!accept_url (url))
|
|
|
|
{
|
|
|
|
DEBUGP (("%s is excluded/not-included through regex.\n", url));
|
|
|
|
goto out;
|
|
|
|
}
|
1999-12-02 02:42:23 -05:00
|
|
|
|
2002-04-20 20:54:13 -04:00
|
|
|
/* 6. Check for acceptance/rejection rules. We ignore these rules
|
2005-04-09 18:18:36 -04:00
|
|
|
for directories (no file name to match) and for non-leaf HTMLs,
|
|
|
|
which can lead to other files that do need to be downloaded. (-p
|
|
|
|
automatically implies non-leaf because with -p we can, if
|
|
|
|
necesary, overstep the maximum depth to get the page requisites.) */
|
2002-04-20 20:54:13 -04:00
|
|
|
if (u->file[0] != '\0'
|
|
|
|
&& !(has_html_suffix_p (u->file)
|
2007-08-02 23:38:21 -04:00
|
|
|
/* The exception only applies to non-leaf HTMLs (but -p
|
|
|
|
always implies non-leaf because we can overstep the
|
|
|
|
maximum depth to get the requisites): */
|
|
|
|
&& (/* non-leaf */
|
|
|
|
opt.reclevel == INFINITE_RECURSION
|
|
|
|
/* also non-leaf */
|
|
|
|
|| depth < opt.reclevel - 1
|
|
|
|
/* -p, which implies non-leaf (see above) */
|
|
|
|
|| opt.page_requisites)))
|
2002-04-20 20:54:13 -04:00
|
|
|
{
|
|
|
|
if (!acceptable (u->file))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
DEBUGP (("%s (%s) does not match acc/rej rules.\n",
|
|
|
|
url, u->file));
|
|
|
|
goto out;
|
|
|
|
}
|
2002-04-20 20:54:13 -04:00
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
/* 7. */
|
2002-05-16 13:22:24 -04:00
|
|
|
if (schemes_are_similar_p (u->scheme, parent->scheme))
|
2001-11-24 22:10:34 -05:00
|
|
|
if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
|
|
|
|
{
|
2007-08-02 23:38:21 -04:00
|
|
|
DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
|
|
|
|
u->host, parent->host));
|
|
|
|
goto out;
|
2001-11-24 22:10:34 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
/* 8. */
|
2002-05-16 13:22:24 -04:00
|
|
|
if (opt.use_robots && u_scheme_like_http)
|
2001-11-24 22:10:34 -05:00
|
|
|
{
|
|
|
|
struct robot_specs *specs = res_get_specs (u->host, u->port);
|
|
|
|
if (!specs)
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
char *rfile;
|
2008-07-23 18:56:29 -04:00
|
|
|
if (res_retrieve_file (url, &rfile, iri))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
specs = res_parse_from_file (rfile);
|
2007-10-14 22:00:33 -04:00
|
|
|
|
|
|
|
/* Delete the robots.txt file if we chose to either delete the
|
|
|
|
files after downloading or we're just running a spider. */
|
|
|
|
if (opt.delete_after || opt.spider)
|
|
|
|
{
|
2012-09-02 05:33:09 -04:00
|
|
|
logprintf (LOG_VERBOSE, _("Removing %s.\n"), rfile);
|
2007-10-14 22:10:38 -04:00
|
|
|
if (unlink (rfile))
|
|
|
|
logprintf (LOG_NOTQUIET, "unlink: %s\n",
|
|
|
|
strerror (errno));
|
2007-10-14 22:00:33 -04:00
|
|
|
}
|
|
|
|
|
2007-08-02 23:38:21 -04:00
|
|
|
xfree (rfile);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
/* If we cannot get real specs, at least produce
|
|
|
|
dummy ones so that we can register them and stop
|
|
|
|
trying to retrieve them. */
|
|
|
|
specs = res_parse ("", 0);
|
|
|
|
}
|
|
|
|
res_register_specs (u->host, u->port, specs);
|
|
|
|
}
|
2000-10-23 23:43:47 -04:00
|
|
|
|
2001-11-24 22:10:34 -05:00
|
|
|
/* Now that we have (or don't have) robots.txt specs, we can
|
2007-08-02 23:38:21 -04:00
|
|
|
check what they say. */
|
2001-11-24 22:10:34 -05:00
|
|
|
if (!res_match_path (specs, u->path))
|
2007-08-02 23:38:21 -04:00
|
|
|
{
|
|
|
|
DEBUGP (("Not following %s because robots.txt forbids it.\n", url));
|
2014-11-26 06:39:47 -05:00
|
|
|
blacklist_add (blacklist, url);
|
2007-08-02 23:38:21 -04:00
|
|
|
goto out;
|
|
|
|
}
|
1999-12-02 02:42:23 -05:00
|
|
|
}
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
/* The URL has passed all the tests. It can be placed in the
|
|
|
|
download queue. */
|
|
|
|
DEBUGP (("Decided to load it.\n"));
|
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
return true;
|
2001-11-24 22:10:34 -05:00
|
|
|
|
|
|
|
out:
|
|
|
|
DEBUGP (("Decided NOT to load it.\n"));
|
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
return false;
|
1999-12-02 02:42:23 -05:00
|
|
|
}
|
2001-11-25 20:11:48 -05:00
|
|
|
|
2001-12-04 16:03:35 -05:00
|
|
|
/* This function determines whether we will consider downloading the
|
|
|
|
children of a URL whose download resulted in a redirection,
|
|
|
|
possibly to another host, etc. It is needed very rarely, and thus
|
|
|
|
it is merely a simple-minded wrapper around download_child_p. */
|
2001-11-25 20:11:48 -05:00
|
|
|
|
2005-06-22 15:38:10 -04:00
|
|
|
static bool
|
2009-02-01 11:06:32 -05:00
|
|
|
descend_redirect_p (const char *redirected, struct url *orig_parsed, int depth,
|
2008-07-23 18:56:29 -04:00
|
|
|
struct url *start_url_parsed, struct hash_table *blacklist,
|
|
|
|
struct iri *iri)
|
2001-11-25 20:11:48 -05:00
|
|
|
{
|
2009-02-01 11:06:32 -05:00
|
|
|
struct url *new_parsed;
|
2001-11-25 20:11:48 -05:00
|
|
|
struct urlpos *upos;
|
2005-06-22 15:38:10 -04:00
|
|
|
bool success;
|
2001-11-25 20:11:48 -05:00
|
|
|
|
|
|
|
assert (orig_parsed != NULL);
|
|
|
|
|
2008-09-27 05:13:21 -04:00
|
|
|
new_parsed = url_parse (redirected, NULL, NULL, false);
|
2001-11-25 20:11:48 -05:00
|
|
|
assert (new_parsed != NULL);
|
|
|
|
|
2003-10-31 09:55:50 -05:00
|
|
|
upos = xnew0 (struct urlpos);
|
2001-11-25 20:11:48 -05:00
|
|
|
upos->url = new_parsed;
|
|
|
|
|
2001-12-04 16:03:35 -05:00
|
|
|
success = download_child_p (upos, orig_parsed, depth,
|
2008-07-23 18:56:29 -04:00
|
|
|
start_url_parsed, blacklist, iri);
|
2001-11-25 20:11:48 -05:00
|
|
|
|
2014-11-26 05:18:09 -05:00
|
|
|
if (success)
|
2014-11-26 06:39:47 -05:00
|
|
|
blacklist_add (blacklist, upos->url->url);
|
2014-11-26 05:18:09 -05:00
|
|
|
else
|
|
|
|
DEBUGP (("Redirection \"%s\" failed the test.\n", redirected));
|
|
|
|
|
2001-11-25 20:11:48 -05:00
|
|
|
url_free (new_parsed);
|
|
|
|
xfree (upos);
|
|
|
|
|
|
|
|
return success;
|
|
|
|
}
|
2007-07-29 21:22:34 -04:00
|
|
|
|
|
|
|
/* vim:set sts=2 sw=2 cino+={s: */
|