mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
99 lines
2.7 KiB
C
99 lines
2.7 KiB
C
/* Keep track of visited URLs in spider mode.
|
||
Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
||
|
||
This file is part of GNU Wget.
|
||
|
||
GNU Wget is free software; you can redistribute it and/or modify
|
||
it under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 3 of the License, or
|
||
(at your option) any later version.
|
||
|
||
GNU Wget is distributed in the hope that it will be useful,
|
||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with Wget. If not, see <http://www.gnu.org/licenses/>.
|
||
|
||
In addition, as a special exception, the Free Software Foundation
|
||
gives permission to link the code of its release of Wget with the
|
||
OpenSSL project's "OpenSSL" library (or with modified versions of it
|
||
that use the same license as the "OpenSSL" library), and distribute
|
||
the linked executables. You must obey the GNU General Public License
|
||
in all respects for all of the code used other than "OpenSSL". If you
|
||
modify this file, you may extend this exception to your version of the
|
||
file, but you are not obligated to do so. If you do not wish to do
|
||
so, delete this exception statement from your version. */
|
||
|
||
#include "wget.h"
|
||
|
||
#include <stdio.h>
|
||
#include <errno.h>
|
||
#include <assert.h>
|
||
|
||
#include "spider.h"
|
||
#include "url.h"
|
||
#include "utils.h"
|
||
#include "hash.h"
|
||
#include "res.h"
|
||
|
||
|
||
static struct hash_table *nonexisting_urls_set;
|
||
|
||
/* Cleanup the data structures associated with this file. */
|
||
|
||
void
|
||
spider_cleanup (void)
|
||
{
|
||
if (nonexisting_urls_set)
|
||
string_set_free (nonexisting_urls_set);
|
||
}
|
||
|
||
/* Remembers broken links. */
|
||
void
|
||
nonexisting_url (const char *url)
|
||
{
|
||
/* Ignore robots.txt URLs */
|
||
if (is_robots_txt_url (url))
|
||
return;
|
||
if (!nonexisting_urls_set)
|
||
nonexisting_urls_set = make_string_hash_table (0);
|
||
string_set_add (nonexisting_urls_set, url);
|
||
}
|
||
|
||
void
|
||
print_broken_links (void)
|
||
{
|
||
hash_table_iterator iter;
|
||
int num_elems;
|
||
|
||
if (!nonexisting_urls_set)
|
||
{
|
||
logprintf (LOG_NOTQUIET, _("Found no broken links.\n\n"));
|
||
return;
|
||
}
|
||
|
||
num_elems = hash_table_count (nonexisting_urls_set);
|
||
assert (num_elems > 0);
|
||
|
||
logprintf (LOG_NOTQUIET, ngettext("Found %d broken link.\n\n",
|
||
"Found %d broken links.\n\n", num_elems),
|
||
num_elems);
|
||
|
||
for (hash_table_iterate (nonexisting_urls_set, &iter);
|
||
hash_table_iter_next (&iter); )
|
||
{
|
||
struct url_list *list;
|
||
const char *url = (const char *) iter.key;
|
||
|
||
logprintf (LOG_NOTQUIET, _("%s\n"), url);
|
||
}
|
||
logputs (LOG_NOTQUIET, "\n");
|
||
}
|
||
|
||
/*
|
||
* vim: et ts=2 sw=2
|
||
*/
|
||
|