mirror of
https://github.com/moparisthebest/wget
synced 2024-07-03 16:38:41 -04:00
[svn] Fixes for recursive spider mode.
This commit is contained in:
parent
79f66dfd15
commit
60c88ee992
@ -1,3 +1,22 @@
|
||||
2006-06-28 Mauro Tortonesi <mauro@ferrara.linux.it>
|
||||
|
||||
* res.c: Implemented is_robots_txt_url function for detection of
|
||||
robots.txt URLs and related test routine.
|
||||
|
||||
* res.h: Ditto.
|
||||
|
||||
* url.c: Implemented are_urls_equal function for URL comparison and
|
||||
related testing routine.
|
||||
|
||||
* url.h: Ditto.
|
||||
|
||||
* convert.c: Fixes for recursive spider mode: don't consider
|
||||
non-existing robots.txt as a broken link, and use are_urls_equal
|
||||
instead of strcasecmp for referrer URLs comparison.
|
||||
|
||||
* test.c: Call tests routines for are_urls_equal and
|
||||
is_robots_txt_url.
|
||||
|
||||
2006-06-26 Hrvoje Niksic <hniksic@xemacs.org>
|
||||
|
||||
* wget.h (wgint): Typedef to any 64-bit (or larger) type we can
|
||||
|
462
src/convert.c
462
src/convert.c
@ -45,6 +45,7 @@ so, delete this exception statement from your version. */
|
||||
#include "utils.h"
|
||||
#include "hash.h"
|
||||
#include "ptimer.h"
|
||||
#include "res.h"
|
||||
|
||||
static struct hash_table *dl_file_url_map;
|
||||
struct hash_table *dl_url_file_map;
|
||||
@ -99,13 +100,13 @@ convert_all_links (void)
|
||||
char *file = file_array[i];
|
||||
|
||||
/* Determine the URL of the HTML file. get_urls_html will need
|
||||
it. */
|
||||
it. */
|
||||
url = hash_table_get (dl_file_url_map, file);
|
||||
if (!url)
|
||||
{
|
||||
DEBUGP (("Apparently %s has been removed.\n", file));
|
||||
continue;
|
||||
}
|
||||
{
|
||||
DEBUGP (("Apparently %s has been removed.\n", file));
|
||||
continue;
|
||||
}
|
||||
|
||||
DEBUGP (("Scanning %s (from %s)\n", file, url));
|
||||
|
||||
@ -117,48 +118,48 @@ convert_all_links (void)
|
||||
links that have been followed from other files. */
|
||||
|
||||
for (cur_url = urls; cur_url; cur_url = cur_url->next)
|
||||
{
|
||||
char *local_name;
|
||||
struct url *u = cur_url->url;
|
||||
{
|
||||
char *local_name;
|
||||
struct url *u = cur_url->url;
|
||||
|
||||
if (cur_url->link_base_p)
|
||||
{
|
||||
/* Base references have been resolved by our parser, so
|
||||
we turn the base URL into an empty string. (Perhaps
|
||||
we should remove the tag entirely?) */
|
||||
cur_url->convert = CO_NULLIFY_BASE;
|
||||
continue;
|
||||
}
|
||||
if (cur_url->link_base_p)
|
||||
{
|
||||
/* Base references have been resolved by our parser, so
|
||||
we turn the base URL into an empty string. (Perhaps
|
||||
we should remove the tag entirely?) */
|
||||
cur_url->convert = CO_NULLIFY_BASE;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* We decide the direction of conversion according to whether
|
||||
a URL was downloaded. Downloaded URLs will be converted
|
||||
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
||||
local_name = hash_table_get (dl_url_file_map, u->url);
|
||||
/* We decide the direction of conversion according to whether
|
||||
a URL was downloaded. Downloaded URLs will be converted
|
||||
ABS2REL, whereas non-downloaded will be converted REL2ABS. */
|
||||
local_name = hash_table_get (dl_url_file_map, u->url);
|
||||
|
||||
/* Decide on the conversion type. */
|
||||
if (local_name)
|
||||
{
|
||||
/* We've downloaded this URL. Convert it to relative
|
||||
/* Decide on the conversion type. */
|
||||
if (local_name)
|
||||
{
|
||||
/* We've downloaded this URL. Convert it to relative
|
||||
form. We do this even if the URL already is in
|
||||
relative form, because our directory structure may
|
||||
not be identical to that on the server (think `-nd',
|
||||
`--cut-dirs', etc.) */
|
||||
cur_url->convert = CO_CONVERT_TO_RELATIVE;
|
||||
cur_url->local_name = xstrdup (local_name);
|
||||
DEBUGP (("will convert url %s to local %s\n", u->url, local_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We haven't downloaded this URL. If it's not already
|
||||
cur_url->convert = CO_CONVERT_TO_RELATIVE;
|
||||
cur_url->local_name = xstrdup (local_name);
|
||||
DEBUGP (("will convert url %s to local %s\n", u->url, local_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* We haven't downloaded this URL. If it's not already
|
||||
complete (including a full host name), convert it to
|
||||
that form, so it can be reached while browsing this
|
||||
HTML locally. */
|
||||
if (!cur_url->link_complete_p)
|
||||
cur_url->convert = CO_CONVERT_TO_COMPLETE;
|
||||
cur_url->local_name = NULL;
|
||||
DEBUGP (("will convert url %s to complete\n", u->url));
|
||||
}
|
||||
}
|
||||
if (!cur_url->link_complete_p)
|
||||
cur_url->convert = CO_CONVERT_TO_COMPLETE;
|
||||
cur_url->local_name = NULL;
|
||||
DEBUGP (("will convert url %s to complete\n", u->url));
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert the links in the file. */
|
||||
convert_links (file, urls);
|
||||
@ -171,13 +172,13 @@ convert_all_links (void)
|
||||
secs = ptimer_measure (timer);
|
||||
ptimer_destroy (timer);
|
||||
logprintf (LOG_VERBOSE, _("Converted %d files in %s seconds.\n"),
|
||||
file_count, print_decimal (secs));
|
||||
file_count, print_decimal (secs));
|
||||
}
|
||||
|
||||
static void write_backup_file (const char *, downloaded_file_t);
|
||||
static const char *replace_attr (const char *, int, FILE *, const char *);
|
||||
static const char *replace_attr_refresh_hack (const char *, int, FILE *,
|
||||
const char *, int);
|
||||
const char *, int);
|
||||
static char *local_quote_string (const char *);
|
||||
static char *construct_relative (const char *, const char *);
|
||||
|
||||
@ -205,11 +206,11 @@ convert_links (const char *file, struct urlpos *links)
|
||||
struct urlpos *dry;
|
||||
for (dry = links; dry; dry = dry->next)
|
||||
if (dry->convert != CO_NOCONVERT)
|
||||
++dry_count;
|
||||
++dry_count;
|
||||
if (!dry_count)
|
||||
{
|
||||
logputs (LOG_VERBOSE, _("nothing to do.\n"));
|
||||
return;
|
||||
logputs (LOG_VERBOSE, _("nothing to do.\n"));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -217,7 +218,7 @@ convert_links (const char *file, struct urlpos *links)
|
||||
if (!fm)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
|
||||
file, strerror (errno));
|
||||
file, strerror (errno));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -232,7 +233,7 @@ convert_links (const char *file, struct urlpos *links)
|
||||
if (unlink (file) < 0 && errno != ENOENT)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Unable to delete `%s': %s\n"),
|
||||
file, strerror (errno));
|
||||
file, strerror (errno));
|
||||
read_file_free (fm);
|
||||
return;
|
||||
}
|
||||
@ -241,7 +242,7 @@ convert_links (const char *file, struct urlpos *links)
|
||||
if (!fp)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Cannot convert links in %s: %s\n"),
|
||||
file, strerror (errno));
|
||||
file, strerror (errno));
|
||||
read_file_free (fm);
|
||||
return;
|
||||
}
|
||||
@ -254,16 +255,16 @@ convert_links (const char *file, struct urlpos *links)
|
||||
char *url_start = fm->content + link->pos;
|
||||
|
||||
if (link->pos >= fm->length)
|
||||
{
|
||||
DEBUGP (("Something strange is going on. Please investigate."));
|
||||
break;
|
||||
}
|
||||
{
|
||||
DEBUGP (("Something strange is going on. Please investigate."));
|
||||
break;
|
||||
}
|
||||
/* If the URL is not to be converted, skip it. */
|
||||
if (link->convert == CO_NOCONVERT)
|
||||
{
|
||||
DEBUGP (("Skipping %s at position %d.\n", link->url->url, link->pos));
|
||||
continue;
|
||||
}
|
||||
{
|
||||
DEBUGP (("Skipping %s at position %d.\n", link->url->url, link->pos));
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Echo the file contents, up to the offending URL's opening
|
||||
quote, to the outfile. */
|
||||
@ -271,52 +272,52 @@ convert_links (const char *file, struct urlpos *links)
|
||||
p = url_start;
|
||||
|
||||
switch (link->convert)
|
||||
{
|
||||
case CO_CONVERT_TO_RELATIVE:
|
||||
/* Convert absolute URL to relative. */
|
||||
{
|
||||
char *newname = construct_relative (file, link->local_name);
|
||||
char *quoted_newname = local_quote_string (newname);
|
||||
{
|
||||
case CO_CONVERT_TO_RELATIVE:
|
||||
/* Convert absolute URL to relative. */
|
||||
{
|
||||
char *newname = construct_relative (file, link->local_name);
|
||||
char *quoted_newname = local_quote_string (newname);
|
||||
|
||||
if (!link->link_refresh_p)
|
||||
p = replace_attr (p, link->size, fp, quoted_newname);
|
||||
else
|
||||
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
|
||||
link->refresh_timeout);
|
||||
if (!link->link_refresh_p)
|
||||
p = replace_attr (p, link->size, fp, quoted_newname);
|
||||
else
|
||||
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newname,
|
||||
link->refresh_timeout);
|
||||
|
||||
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
|
||||
link->url->url, newname, link->pos, file));
|
||||
xfree (newname);
|
||||
xfree (quoted_newname);
|
||||
++to_file_count;
|
||||
break;
|
||||
}
|
||||
case CO_CONVERT_TO_COMPLETE:
|
||||
/* Convert the link to absolute URL. */
|
||||
{
|
||||
char *newlink = link->url->url;
|
||||
char *quoted_newlink = html_quote_string (newlink);
|
||||
DEBUGP (("TO_RELATIVE: %s to %s at position %d in %s.\n",
|
||||
link->url->url, newname, link->pos, file));
|
||||
xfree (newname);
|
||||
xfree (quoted_newname);
|
||||
++to_file_count;
|
||||
break;
|
||||
}
|
||||
case CO_CONVERT_TO_COMPLETE:
|
||||
/* Convert the link to absolute URL. */
|
||||
{
|
||||
char *newlink = link->url->url;
|
||||
char *quoted_newlink = html_quote_string (newlink);
|
||||
|
||||
if (!link->link_refresh_p)
|
||||
p = replace_attr (p, link->size, fp, quoted_newlink);
|
||||
else
|
||||
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
|
||||
link->refresh_timeout);
|
||||
if (!link->link_refresh_p)
|
||||
p = replace_attr (p, link->size, fp, quoted_newlink);
|
||||
else
|
||||
p = replace_attr_refresh_hack (p, link->size, fp, quoted_newlink,
|
||||
link->refresh_timeout);
|
||||
|
||||
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
|
||||
newlink, link->pos, file));
|
||||
xfree (quoted_newlink);
|
||||
++to_url_count;
|
||||
break;
|
||||
}
|
||||
case CO_NULLIFY_BASE:
|
||||
/* Change the base href to "". */
|
||||
p = replace_attr (p, link->size, fp, "");
|
||||
break;
|
||||
case CO_NOCONVERT:
|
||||
abort ();
|
||||
break;
|
||||
}
|
||||
DEBUGP (("TO_COMPLETE: <something> to %s at position %d in %s.\n",
|
||||
newlink, link->pos, file));
|
||||
xfree (quoted_newlink);
|
||||
++to_url_count;
|
||||
break;
|
||||
}
|
||||
case CO_NULLIFY_BASE:
|
||||
/* Change the base href to "". */
|
||||
p = replace_attr (p, link->size, fp, "");
|
||||
break;
|
||||
case CO_NOCONVERT:
|
||||
abort ();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Output the rest of the file. */
|
||||
@ -359,7 +360,7 @@ construct_relative (const char *basefile, const char *linkfile)
|
||||
for (b = basefile, l = linkfile; *b == *l && *b != '\0'; ++b, ++l)
|
||||
{
|
||||
if (*b == '/')
|
||||
start = (b - basefile) + 1;
|
||||
start = (b - basefile) + 1;
|
||||
}
|
||||
basefile += start;
|
||||
linkfile += start;
|
||||
@ -380,7 +381,7 @@ construct_relative (const char *basefile, const char *linkfile)
|
||||
for (b = basefile; *b; b++)
|
||||
{
|
||||
if (*b == '/')
|
||||
++basedirs;
|
||||
++basedirs;
|
||||
}
|
||||
|
||||
/* Construct LINK as explained above. */
|
||||
@ -410,12 +411,12 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
|
||||
if (downloaded_file_return == FILE_DOWNLOADED_AND_HTML_EXTENSION_ADDED)
|
||||
{
|
||||
/* Just write "orig" over "html". We need to do it this way
|
||||
because when we're checking to see if we've downloaded the
|
||||
file before (to see if we can skip downloading it), we don't
|
||||
know if it's a text/html file. Therefore we don't know yet
|
||||
at that stage that -E is going to cause us to tack on
|
||||
".html", so we need to compare vs. the original URL plus
|
||||
".orig", not the original URL plus ".html.orig". */
|
||||
because when we're checking to see if we've downloaded the
|
||||
file before (to see if we can skip downloading it), we don't
|
||||
know if it's a text/html file. Therefore we don't know yet
|
||||
at that stage that -E is going to cause us to tack on
|
||||
".html", so we need to compare vs. the original URL plus
|
||||
".orig", not the original URL plus ".html.orig". */
|
||||
filename_plus_orig_suffix = alloca (filename_len + 1);
|
||||
strcpy (filename_plus_orig_suffix, file);
|
||||
strcpy ((filename_plus_orig_suffix + filename_len) - 4, "orig");
|
||||
@ -440,25 +441,25 @@ write_backup_file (const char *file, downloaded_file_t downloaded_file_return)
|
||||
{
|
||||
/* Rename <file> to <file>.orig before former gets written over. */
|
||||
if (rename (file, filename_plus_orig_suffix) != 0)
|
||||
logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),
|
||||
file, filename_plus_orig_suffix, strerror (errno));
|
||||
logprintf (LOG_NOTQUIET, _("Cannot back up %s as %s: %s\n"),
|
||||
file, filename_plus_orig_suffix, strerror (errno));
|
||||
|
||||
/* Remember that we've already written a .orig backup for this file.
|
||||
Note that we never free this memory since we need it till the
|
||||
convert_all_links() call, which is one of the last things the
|
||||
program does before terminating. BTW, I'm not sure if it would be
|
||||
safe to just set 'converted_file_ptr->string' to 'file' below,
|
||||
rather than making a copy of the string... Another note is that I
|
||||
thought I could just add a field to the urlpos structure saying
|
||||
that we'd written a .orig file for this URL, but that didn't work,
|
||||
so I had to make this separate list.
|
||||
-- Dan Harkless <wget@harkless.org>
|
||||
Note that we never free this memory since we need it till the
|
||||
convert_all_links() call, which is one of the last things the
|
||||
program does before terminating. BTW, I'm not sure if it would be
|
||||
safe to just set 'converted_file_ptr->string' to 'file' below,
|
||||
rather than making a copy of the string... Another note is that I
|
||||
thought I could just add a field to the urlpos structure saying
|
||||
that we'd written a .orig file for this URL, but that didn't work,
|
||||
so I had to make this separate list.
|
||||
-- Dan Harkless <wget@harkless.org>
|
||||
|
||||
This [adding a field to the urlpos structure] didn't work
|
||||
because convert_file() is called from convert_all_links at
|
||||
the end of the retrieval with a freshly built new urlpos
|
||||
list.
|
||||
-- Hrvoje Niksic <hniksic@xemacs.org>
|
||||
-- Hrvoje Niksic <hniksic@xemacs.org>
|
||||
*/
|
||||
string_set_add (converted_files, file);
|
||||
}
|
||||
@ -472,9 +473,9 @@ static const char *
|
||||
replace_attr (const char *p, int size, FILE *fp, const char *new_text)
|
||||
{
|
||||
bool quote_flag = false;
|
||||
char quote_char = '\"'; /* use "..." for quoting, unless the
|
||||
original value is quoted, in which
|
||||
case reuse its quoting char. */
|
||||
char quote_char = '\"'; /* use "..." for quoting, unless the
|
||||
original value is quoted, in which
|
||||
case reuse its quoting char. */
|
||||
const char *frag_beg, *frag_end;
|
||||
|
||||
/* Structure of our string is:
|
||||
@ -489,7 +490,7 @@ replace_attr (const char *p, int size, FILE *fp, const char *new_text)
|
||||
quote_char = *p;
|
||||
quote_flag = true;
|
||||
++p;
|
||||
size -= 2; /* disregard opening and closing quote */
|
||||
size -= 2; /* disregard opening and closing quote */
|
||||
}
|
||||
putc (quote_char, fp);
|
||||
fputs (new_text, fp);
|
||||
@ -511,13 +512,13 @@ replace_attr (const char *p, int size, FILE *fp, const char *new_text)
|
||||
|
||||
static const char *
|
||||
replace_attr_refresh_hack (const char *p, int size, FILE *fp,
|
||||
const char *new_text, int timeout)
|
||||
const char *new_text, int timeout)
|
||||
{
|
||||
/* "0; URL=..." */
|
||||
char *new_with_timeout = (char *)alloca (numdigit (timeout)
|
||||
+ 6 /* "; URL=" */
|
||||
+ strlen (new_text)
|
||||
+ 1);
|
||||
+ 6 /* "; URL=" */
|
||||
+ strlen (new_text)
|
||||
+ 1);
|
||||
sprintf (new_with_timeout, "%d; URL=%s", timeout, new_text);
|
||||
|
||||
return replace_attr (p, size, fp, new_with_timeout);
|
||||
@ -538,21 +539,21 @@ find_fragment (const char *beg, int size, const char **bp, const char **ep)
|
||||
for (; beg < end; beg++)
|
||||
{
|
||||
switch (*beg)
|
||||
{
|
||||
case '&':
|
||||
saw_amp = true;
|
||||
break;
|
||||
case '#':
|
||||
if (!saw_amp)
|
||||
{
|
||||
*bp = beg;
|
||||
*ep = end;
|
||||
return true;
|
||||
}
|
||||
/* fallthrough */
|
||||
default:
|
||||
saw_amp = false;
|
||||
}
|
||||
{
|
||||
case '&':
|
||||
saw_amp = true;
|
||||
break;
|
||||
case '#':
|
||||
if (!saw_amp)
|
||||
{
|
||||
*bp = beg;
|
||||
*ep = end;
|
||||
return true;
|
||||
}
|
||||
/* fallthrough */
|
||||
default:
|
||||
saw_amp = false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -588,26 +589,26 @@ local_quote_string (const char *file)
|
||||
switch (*from)
|
||||
{
|
||||
case '%':
|
||||
*to++ = '%';
|
||||
*to++ = '2';
|
||||
*to++ = '5';
|
||||
break;
|
||||
*to++ = '%';
|
||||
*to++ = '2';
|
||||
*to++ = '5';
|
||||
break;
|
||||
case '#':
|
||||
*to++ = '%';
|
||||
*to++ = '2';
|
||||
*to++ = '3';
|
||||
break;
|
||||
*to++ = '%';
|
||||
*to++ = '2';
|
||||
*to++ = '3';
|
||||
break;
|
||||
case '?':
|
||||
if (opt.html_extension)
|
||||
{
|
||||
*to++ = '%';
|
||||
*to++ = '3';
|
||||
*to++ = 'F';
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
if (opt.html_extension)
|
||||
{
|
||||
*to++ = '%';
|
||||
*to++ = '3';
|
||||
*to++ = 'F';
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
default:
|
||||
*to++ = *from;
|
||||
*to++ = *from;
|
||||
}
|
||||
*to = '\0';
|
||||
|
||||
@ -618,11 +619,11 @@ local_quote_string (const char *file)
|
||||
downloaded_html_list, and downloaded_html_set. Other code calls
|
||||
these functions to let us know that a file has been downloaded. */
|
||||
|
||||
#define ENSURE_TABLES_EXIST do { \
|
||||
if (!dl_file_url_map) \
|
||||
dl_file_url_map = make_string_hash_table (0); \
|
||||
if (!dl_url_file_map) \
|
||||
dl_url_file_map = make_string_hash_table (0); \
|
||||
#define ENSURE_TABLES_EXIST do { \
|
||||
if (!dl_file_url_map) \
|
||||
dl_file_url_map = make_string_hash_table (0); \
|
||||
if (!dl_url_file_map) \
|
||||
dl_url_file_map = make_string_hash_table (0); \
|
||||
} while (0)
|
||||
|
||||
/* Return true if S1 and S2 are the same, except for "/index.html".
|
||||
@ -704,7 +705,7 @@ dissociate_urls_from_file (const char *file)
|
||||
{
|
||||
/* Can't use hash_table_iter_* because the table mutates while mapping. */
|
||||
hash_table_for_each (dl_url_file_map, dissociate_urls_from_file_mapper,
|
||||
(char *) file);
|
||||
(char *) file);
|
||||
}
|
||||
|
||||
/* Register that URL has been successfully downloaded to FILE. This
|
||||
@ -727,29 +728,29 @@ register_download (const char *url, const char *file)
|
||||
if (hash_table_get_pair (dl_file_url_map, file, &old_file, &old_url))
|
||||
{
|
||||
if (0 == strcmp (url, old_url))
|
||||
/* We have somehow managed to download the same URL twice.
|
||||
Nothing to do. */
|
||||
return;
|
||||
/* We have somehow managed to download the same URL twice.
|
||||
Nothing to do. */
|
||||
return;
|
||||
|
||||
if (match_except_index (url, old_url)
|
||||
&& !hash_table_contains (dl_url_file_map, url))
|
||||
/* The two URLs differ only in the "index.html" ending. For
|
||||
example, one is "http://www.server.com/", and the other is
|
||||
"http://www.server.com/index.html". Don't remove the old
|
||||
one, just add the new one as a non-canonical entry. */
|
||||
goto url_only;
|
||||
&& !hash_table_contains (dl_url_file_map, url))
|
||||
/* The two URLs differ only in the "index.html" ending. For
|
||||
example, one is "http://www.server.com/", and the other is
|
||||
"http://www.server.com/index.html". Don't remove the old
|
||||
one, just add the new one as a non-canonical entry. */
|
||||
goto url_only;
|
||||
|
||||
hash_table_remove (dl_file_url_map, file);
|
||||
xfree (old_file);
|
||||
xfree (old_url);
|
||||
|
||||
/* Remove all the URLs that point to this file. Yes, there can
|
||||
be more than one such URL, because we store redirections as
|
||||
multiple entries in dl_url_file_map. For example, if URL1
|
||||
redirects to URL2 which gets downloaded to FILE, we map both
|
||||
URL1 and URL2 to FILE in dl_url_file_map. (dl_file_url_map
|
||||
only points to URL2.) When another URL gets loaded to FILE,
|
||||
we want both URL1 and URL2 dissociated from it.
|
||||
be more than one such URL, because we store redirections as
|
||||
multiple entries in dl_url_file_map. For example, if URL1
|
||||
redirects to URL2 which gets downloaded to FILE, we map both
|
||||
URL1 and URL2 to FILE in dl_url_file_map. (dl_file_url_map
|
||||
only points to URL2.) When another URL gets loaded to FILE,
|
||||
we want both URL1 and URL2 dissociated from it.
|
||||
|
||||
This is a relatively expensive operation because it performs
|
||||
a linear search of the whole hash table, but it should be
|
||||
@ -922,10 +923,10 @@ downloaded_file (downloaded_file_t mode, const char *file)
|
||||
if (mode == CHECK_FOR_FILE)
|
||||
{
|
||||
if (!downloaded_files_hash)
|
||||
return FILE_NOT_ALREADY_DOWNLOADED;
|
||||
return FILE_NOT_ALREADY_DOWNLOADED;
|
||||
ptr = hash_table_get (downloaded_files_hash, file);
|
||||
if (!ptr)
|
||||
return FILE_NOT_ALREADY_DOWNLOADED;
|
||||
return FILE_NOT_ALREADY_DOWNLOADED;
|
||||
return *ptr;
|
||||
}
|
||||
|
||||
@ -949,9 +950,9 @@ downloaded_files_free (void)
|
||||
{
|
||||
hash_table_iterator iter;
|
||||
for (hash_table_iterate (downloaded_files_hash, &iter);
|
||||
hash_table_iter_next (&iter);
|
||||
)
|
||||
xfree (iter.key);
|
||||
hash_table_iter_next (&iter);
|
||||
)
|
||||
xfree (iter.key);
|
||||
hash_table_destroy (downloaded_files_hash);
|
||||
downloaded_files_hash = NULL;
|
||||
}
|
||||
@ -972,8 +973,8 @@ in_list (const struct broken_urls_list *list, const char *url)
|
||||
|
||||
for (ptr = list; ptr; ptr = ptr->next)
|
||||
{
|
||||
/* TODO: strcasecmp may not be appropriate to compare URLs */
|
||||
if (strcasecmp (url, ptr->url) == 0) return true;
|
||||
/* str[case]cmp is inadequate for URL comparison */
|
||||
if (are_urls_equal (url, ptr->url) == 0) return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -983,6 +984,10 @@ void
|
||||
nonexisting_url (const char *url, const char *referrer)
|
||||
{
|
||||
struct broken_urls_list *list;
|
||||
|
||||
/* Ignore robots.txt URLs */
|
||||
if (is_robots_txt_url (url))
|
||||
return;
|
||||
|
||||
if (!nonexisting_urls_hash)
|
||||
nonexisting_urls_hash = make_string_hash_table (0);
|
||||
@ -1014,12 +1019,12 @@ nonexisting_urls_free (void)
|
||||
{
|
||||
hash_table_iterator iter;
|
||||
for (hash_table_iterate (nonexisting_urls_hash, &iter);
|
||||
hash_table_iter_next (&iter);
|
||||
)
|
||||
hash_table_iter_next (&iter);
|
||||
)
|
||||
{
|
||||
xfree (iter.key);
|
||||
xfree (iter.value);
|
||||
}
|
||||
xfree (iter.key);
|
||||
xfree (iter.value);
|
||||
}
|
||||
hash_table_destroy (nonexisting_urls_hash);
|
||||
nonexisting_urls_hash = NULL;
|
||||
}
|
||||
@ -1055,12 +1060,12 @@ print_broken_links (void)
|
||||
)
|
||||
{
|
||||
struct broken_urls_list *list;
|
||||
|
||||
|
||||
logprintf (LOG_NOTQUIET, _("%s referred by:\n"), (const char *)iter.key);
|
||||
|
||||
for (list = (struct broken_urls_list *) iter.value;
|
||||
list;
|
||||
list = list->next)
|
||||
list = list->next)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _(" %s\n"), list->url);
|
||||
}
|
||||
@ -1091,52 +1096,57 @@ html_quote_string (const char *s)
|
||||
for (i = 0; *s; s++, i++)
|
||||
{
|
||||
if (*s == '&')
|
||||
i += 4; /* `amp;' */
|
||||
i += 4; /* `amp;' */
|
||||
else if (*s == '<' || *s == '>')
|
||||
i += 3; /* `lt;' and `gt;' */
|
||||
i += 3; /* `lt;' and `gt;' */
|
||||
else if (*s == '\"')
|
||||
i += 5; /* `quot;' */
|
||||
i += 5; /* `quot;' */
|
||||
else if (*s == ' ')
|
||||
i += 4; /* #32; */
|
||||
i += 4; /* #32; */
|
||||
}
|
||||
res = xmalloc (i + 1);
|
||||
s = b;
|
||||
for (p = res; *s; s++)
|
||||
{
|
||||
switch (*s)
|
||||
{
|
||||
case '&':
|
||||
*p++ = '&';
|
||||
*p++ = 'a';
|
||||
*p++ = 'm';
|
||||
*p++ = 'p';
|
||||
*p++ = ';';
|
||||
break;
|
||||
case '<': case '>':
|
||||
*p++ = '&';
|
||||
*p++ = (*s == '<' ? 'l' : 'g');
|
||||
*p++ = 't';
|
||||
*p++ = ';';
|
||||
break;
|
||||
case '\"':
|
||||
*p++ = '&';
|
||||
*p++ = 'q';
|
||||
*p++ = 'u';
|
||||
*p++ = 'o';
|
||||
*p++ = 't';
|
||||
*p++ = ';';
|
||||
break;
|
||||
case ' ':
|
||||
*p++ = '&';
|
||||
*p++ = '#';
|
||||
*p++ = '3';
|
||||
*p++ = '2';
|
||||
*p++ = ';';
|
||||
break;
|
||||
default:
|
||||
*p++ = *s;
|
||||
}
|
||||
{
|
||||
case '&':
|
||||
*p++ = '&';
|
||||
*p++ = 'a';
|
||||
*p++ = 'm';
|
||||
*p++ = 'p';
|
||||
*p++ = ';';
|
||||
break;
|
||||
case '<': case '>':
|
||||
*p++ = '&';
|
||||
*p++ = (*s == '<' ? 'l' : 'g');
|
||||
*p++ = 't';
|
||||
*p++ = ';';
|
||||
break;
|
||||
case '\"':
|
||||
*p++ = '&';
|
||||
*p++ = 'q';
|
||||
*p++ = 'u';
|
||||
*p++ = 'o';
|
||||
*p++ = 't';
|
||||
*p++ = ';';
|
||||
break;
|
||||
case ' ':
|
||||
*p++ = '&';
|
||||
*p++ = '#';
|
||||
*p++ = '3';
|
||||
*p++ = '2';
|
||||
*p++ = ';';
|
||||
break;
|
||||
default:
|
||||
*p++ = *s;
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* vim: et ts=2 sw=2
|
||||
*/
|
||||
|
||||
|
272
src/res.c
272
src/res.c
@ -84,6 +84,10 @@ so, delete this exception statement from your version. */
|
||||
#include "retr.h"
|
||||
#include "res.h"
|
||||
|
||||
#ifdef TESTING
|
||||
#include "test.h"
|
||||
#endif
|
||||
|
||||
struct path_info {
|
||||
char *path;
|
||||
bool allowedp;
|
||||
@ -104,7 +108,7 @@ struct robot_specs {
|
||||
|
||||
static void
|
||||
match_user_agent (const char *agent, int length,
|
||||
bool *matches, bool *exact_match)
|
||||
bool *matches, bool *exact_match)
|
||||
{
|
||||
if (length == 1 && *agent == '*')
|
||||
{
|
||||
@ -128,7 +132,7 @@ match_user_agent (const char *agent, int length,
|
||||
|
||||
static void
|
||||
add_path (struct robot_specs *specs, const char *path_b, const char *path_e,
|
||||
bool allowedp, bool exactp)
|
||||
bool allowedp, bool exactp)
|
||||
{
|
||||
struct path_info pp;
|
||||
if (path_b < path_e && *path_b == '/')
|
||||
@ -142,11 +146,11 @@ add_path (struct robot_specs *specs, const char *path_b, const char *path_e,
|
||||
if (specs->count > specs->size)
|
||||
{
|
||||
if (specs->size == 0)
|
||||
specs->size = 1;
|
||||
specs->size = 1;
|
||||
else
|
||||
specs->size <<= 1;
|
||||
specs->size <<= 1;
|
||||
specs->paths = xrealloc (specs->paths,
|
||||
specs->size * sizeof (struct path_info));
|
||||
specs->size * sizeof (struct path_info));
|
||||
}
|
||||
specs->paths[specs->count - 1] = pp;
|
||||
}
|
||||
@ -176,12 +180,12 @@ prune_non_exact (struct robot_specs *specs)
|
||||
|
||||
#define EOL(p) ((p) >= lineend)
|
||||
|
||||
#define SKIP_SPACE(p) do { \
|
||||
while (!EOL (p) && ISSPACE (*p)) \
|
||||
++p; \
|
||||
#define SKIP_SPACE(p) do { \
|
||||
while (!EOL (p) && ISSPACE (*p)) \
|
||||
++p; \
|
||||
} while (0)
|
||||
|
||||
#define FIELD_IS(string_literal) \
|
||||
#define FIELD_IS(string_literal) \
|
||||
BOUNDED_EQUAL_NO_CASE (field_b, field_e, string_literal)
|
||||
|
||||
/* Parse textual RES specs beginning with SOURCE of length LENGTH.
|
||||
@ -245,113 +249,113 @@ res_parse (const char *source, int length)
|
||||
const char *value_b, *value_e;
|
||||
|
||||
if (p == end)
|
||||
break;
|
||||
break;
|
||||
lineend_real = memchr (p, '\n', end - p);
|
||||
if (lineend_real)
|
||||
++lineend_real;
|
||||
++lineend_real;
|
||||
else
|
||||
lineend_real = end;
|
||||
lineend_real = end;
|
||||
lineend = lineend_real;
|
||||
|
||||
/* Before doing anything else, check whether the line is empty
|
||||
or comment-only. */
|
||||
or comment-only. */
|
||||
SKIP_SPACE (p);
|
||||
if (EOL (p) || *p == '#')
|
||||
goto next;
|
||||
goto next;
|
||||
|
||||
/* Make sure the end-of-line comments are respected by setting
|
||||
lineend to a location preceding the first comment. Real line
|
||||
ending remains in lineend_real. */
|
||||
lineend to a location preceding the first comment. Real line
|
||||
ending remains in lineend_real. */
|
||||
for (lineend = p; lineend < lineend_real; lineend++)
|
||||
if ((lineend == p || ISSPACE (*(lineend - 1)))
|
||||
&& *lineend == '#')
|
||||
break;
|
||||
if ((lineend == p || ISSPACE (*(lineend - 1)))
|
||||
&& *lineend == '#')
|
||||
break;
|
||||
|
||||
/* Ignore trailing whitespace in the same way. */
|
||||
while (lineend > p && ISSPACE (*(lineend - 1)))
|
||||
--lineend;
|
||||
--lineend;
|
||||
|
||||
assert (!EOL (p));
|
||||
|
||||
field_b = p;
|
||||
while (!EOL (p) && (ISALNUM (*p) || *p == '-'))
|
||||
++p;
|
||||
++p;
|
||||
field_e = p;
|
||||
|
||||
SKIP_SPACE (p);
|
||||
if (field_b == field_e || EOL (p) || *p != ':')
|
||||
{
|
||||
DEBUGP (("Ignoring malformed line %d", line_count));
|
||||
goto next;
|
||||
}
|
||||
++p; /* skip ':' */
|
||||
{
|
||||
DEBUGP (("Ignoring malformed line %d", line_count));
|
||||
goto next;
|
||||
}
|
||||
++p; /* skip ':' */
|
||||
SKIP_SPACE (p);
|
||||
|
||||
value_b = p;
|
||||
while (!EOL (p))
|
||||
++p;
|
||||
++p;
|
||||
value_e = p;
|
||||
|
||||
/* Finally, we have a syntactically valid line. */
|
||||
if (FIELD_IS ("user-agent"))
|
||||
{
|
||||
/* We have to support several cases:
|
||||
{
|
||||
/* We have to support several cases:
|
||||
|
||||
--previous records--
|
||||
--previous records--
|
||||
|
||||
User-Agent: foo
|
||||
User-Agent: Wget
|
||||
User-Agent: bar
|
||||
... matching record ...
|
||||
User-Agent: foo
|
||||
User-Agent: Wget
|
||||
User-Agent: bar
|
||||
... matching record ...
|
||||
|
||||
User-Agent: baz
|
||||
User-Agent: qux
|
||||
... non-matching record ...
|
||||
User-Agent: baz
|
||||
User-Agent: qux
|
||||
... non-matching record ...
|
||||
|
||||
User-Agent: *
|
||||
... matching record, but will be pruned later ...
|
||||
User-Agent: *
|
||||
... matching record, but will be pruned later ...
|
||||
|
||||
We have to respect `User-Agent' at the beginning of each
|
||||
new record simply because we don't know if we're going to
|
||||
encounter "Wget" among the agents or not. Hence,
|
||||
match_user_agent is called when record_count != 0.
|
||||
We have to respect `User-Agent' at the beginning of each
|
||||
new record simply because we don't know if we're going to
|
||||
encounter "Wget" among the agents or not. Hence,
|
||||
match_user_agent is called when record_count != 0.
|
||||
|
||||
But if record_count is 0, we have to keep calling it
|
||||
until it matches, and if that happens, we must not call
|
||||
it any more, until the next record. Hence the other part
|
||||
of the condition. */
|
||||
if (record_count != 0 || user_agent_applies == false)
|
||||
match_user_agent (value_b, value_e - value_b,
|
||||
&user_agent_applies, &user_agent_exact);
|
||||
if (user_agent_exact)
|
||||
found_exact = true;
|
||||
record_count = 0;
|
||||
}
|
||||
But if record_count is 0, we have to keep calling it
|
||||
until it matches, and if that happens, we must not call
|
||||
it any more, until the next record. Hence the other part
|
||||
of the condition. */
|
||||
if (record_count != 0 || user_agent_applies == false)
|
||||
match_user_agent (value_b, value_e - value_b,
|
||||
&user_agent_applies, &user_agent_exact);
|
||||
if (user_agent_exact)
|
||||
found_exact = true;
|
||||
record_count = 0;
|
||||
}
|
||||
else if (FIELD_IS ("allow"))
|
||||
{
|
||||
if (user_agent_applies)
|
||||
{
|
||||
add_path (specs, value_b, value_e, true, user_agent_exact);
|
||||
}
|
||||
++record_count;
|
||||
}
|
||||
{
|
||||
if (user_agent_applies)
|
||||
{
|
||||
add_path (specs, value_b, value_e, true, user_agent_exact);
|
||||
}
|
||||
++record_count;
|
||||
}
|
||||
else if (FIELD_IS ("disallow"))
|
||||
{
|
||||
if (user_agent_applies)
|
||||
{
|
||||
bool allowed = false;
|
||||
if (value_b == value_e)
|
||||
/* Empty "disallow" line means everything is *allowed*! */
|
||||
allowed = true;
|
||||
add_path (specs, value_b, value_e, allowed, user_agent_exact);
|
||||
}
|
||||
++record_count;
|
||||
}
|
||||
{
|
||||
if (user_agent_applies)
|
||||
{
|
||||
bool allowed = false;
|
||||
if (value_b == value_e)
|
||||
/* Empty "disallow" line means everything is *allowed*! */
|
||||
allowed = true;
|
||||
add_path (specs, value_b, value_e, allowed, user_agent_exact);
|
||||
}
|
||||
++record_count;
|
||||
}
|
||||
else
|
||||
{
|
||||
DEBUGP (("Ignoring unknown field at line %d", line_count));
|
||||
goto next;
|
||||
}
|
||||
{
|
||||
DEBUGP (("Ignoring unknown field at line %d", line_count));
|
||||
goto next;
|
||||
}
|
||||
|
||||
next:
|
||||
p = lineend_real;
|
||||
@ -361,15 +365,15 @@ res_parse (const char *source, int length)
|
||||
if (found_exact)
|
||||
{
|
||||
/* We've encountered an exactly matching user-agent. Throw out
|
||||
all the stuff with user-agent: *. */
|
||||
all the stuff with user-agent: *. */
|
||||
prune_non_exact (specs);
|
||||
}
|
||||
else if (specs->size > specs->count)
|
||||
{
|
||||
/* add_path normally over-allocates specs->paths. Reallocate it
|
||||
to the correct size in order to conserve some memory. */
|
||||
to the correct size in order to conserve some memory. */
|
||||
specs->paths = xrealloc (specs->paths,
|
||||
specs->count * sizeof (struct path_info));
|
||||
specs->count * sizeof (struct path_info));
|
||||
specs->size = specs->count;
|
||||
}
|
||||
|
||||
@ -387,7 +391,7 @@ res_parse_from_file (const char *filename)
|
||||
if (!fm)
|
||||
{
|
||||
logprintf (LOG_NOTQUIET, _("Cannot open %s: %s"),
|
||||
filename, strerror (errno));
|
||||
filename, strerror (errno));
|
||||
return NULL;
|
||||
}
|
||||
specs = res_parse (fm->content, fm->length);
|
||||
@ -411,16 +415,16 @@ free_specs (struct robot_specs *specs)
|
||||
that number is not a numerical representation of '/', decode C and
|
||||
advance the pointer. */
|
||||
|
||||
#define DECODE_MAYBE(c, ptr) do { \
|
||||
if (c == '%' && ISXDIGIT (ptr[1]) && ISXDIGIT (ptr[2])) \
|
||||
{ \
|
||||
char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]); \
|
||||
if (decoded != '/') \
|
||||
{ \
|
||||
c = decoded; \
|
||||
ptr += 2; \
|
||||
} \
|
||||
} \
|
||||
#define DECODE_MAYBE(c, ptr) do { \
|
||||
if (c == '%' && ISXDIGIT (ptr[1]) && ISXDIGIT (ptr[2])) \
|
||||
{ \
|
||||
char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]); \
|
||||
if (decoded != '/') \
|
||||
{ \
|
||||
c = decoded; \
|
||||
ptr += 2; \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* The inner matching engine: return true if RECORD_PATH matches
|
||||
@ -438,13 +442,13 @@ matches (const char *record_path, const char *url_path)
|
||||
char rc = *rp;
|
||||
char uc = *up;
|
||||
if (!rc)
|
||||
return true;
|
||||
return true;
|
||||
if (!uc)
|
||||
return false;
|
||||
return false;
|
||||
DECODE_MAYBE(rc, rp);
|
||||
DECODE_MAYBE(uc, up);
|
||||
if (rc != uc)
|
||||
return false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@ -461,11 +465,11 @@ res_match_path (const struct robot_specs *specs, const char *path)
|
||||
for (i = 0; i < specs->count; i++)
|
||||
if (matches (specs->paths[i].path, path))
|
||||
{
|
||||
bool allowedp = specs->paths[i].allowedp;
|
||||
DEBUGP (("%s path %s because of rule `%s'.\n",
|
||||
allowedp ? "Allowing" : "Rejecting",
|
||||
path, specs->paths[i].path));
|
||||
return allowedp;
|
||||
bool allowedp = specs->paths[i].allowedp;
|
||||
DEBUGP (("%s path %s because of rule `%s'.\n",
|
||||
allowedp ? "Allowing" : "Rejecting",
|
||||
path, specs->paths[i].path));
|
||||
return allowedp;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -475,12 +479,12 @@ res_match_path (const struct robot_specs *specs, const char *path)
|
||||
static struct hash_table *registered_specs;
|
||||
|
||||
/* Stolen from cookies.c. */
|
||||
#define SET_HOSTPORT(host, port, result) do { \
|
||||
int HP_len = strlen (host); \
|
||||
result = alloca (HP_len + 1 + numdigit (port) + 1); \
|
||||
memcpy (result, host, HP_len); \
|
||||
result[HP_len] = ':'; \
|
||||
number_to_string (result + HP_len + 1, port); \
|
||||
#define SET_HOSTPORT(host, port, result) do { \
|
||||
int HP_len = strlen (host); \
|
||||
result = alloca (HP_len + 1 + numdigit (port) + 1); \
|
||||
memcpy (result, host, HP_len); \
|
||||
result[HP_len] = ':'; \
|
||||
number_to_string (result + HP_len + 1, port); \
|
||||
} while (0)
|
||||
|
||||
/* Register RES specs that below to server on HOST:PORT. They will
|
||||
@ -499,7 +503,7 @@ res_register_specs (const char *host, int port, struct robot_specs *specs)
|
||||
if (hash_table_get_pair (registered_specs, hp, &hp_old, &old))
|
||||
{
|
||||
if (old)
|
||||
free_specs (old);
|
||||
free_specs (old);
|
||||
hash_table_put (registered_specs, hp_old, specs);
|
||||
}
|
||||
else
|
||||
@ -544,14 +548,25 @@ res_retrieve_file (const char *url, char **file)
|
||||
if (err != RETROK && *file != NULL)
|
||||
{
|
||||
/* If the file is not retrieved correctly, but retrieve_url
|
||||
allocated the file name, deallocate is here so that the
|
||||
caller doesn't have to worry about it. */
|
||||
allocated the file name, deallocate is here so that the
|
||||
caller doesn't have to worry about it. */
|
||||
xfree (*file);
|
||||
*file = NULL;
|
||||
}
|
||||
return err == RETROK;
|
||||
}
|
||||
|
||||
bool
|
||||
is_robots_txt_url (const char *url)
|
||||
{
|
||||
char *robots_url = uri_merge (url, RES_SPECS_LOCATION);
|
||||
bool ret = are_urls_equal (url, robots_url);
|
||||
|
||||
xfree (robots_url);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
res_cleanup (void)
|
||||
{
|
||||
@ -559,13 +574,44 @@ res_cleanup (void)
|
||||
{
|
||||
hash_table_iterator iter;
|
||||
for (hash_table_iterate (registered_specs, &iter);
|
||||
hash_table_iter_next (&iter);
|
||||
)
|
||||
{
|
||||
xfree (iter.key);
|
||||
free_specs (iter.value);
|
||||
}
|
||||
hash_table_iter_next (&iter);
|
||||
)
|
||||
{
|
||||
xfree (iter.key);
|
||||
free_specs (iter.value);
|
||||
}
|
||||
hash_table_destroy (registered_specs);
|
||||
registered_specs = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef TESTING
|
||||
|
||||
const char *
|
||||
test_is_robots_txt_url()
|
||||
{
|
||||
int i;
|
||||
struct {
|
||||
char *url;
|
||||
bool expected_result;
|
||||
} test_array[] = {
|
||||
{ "http://www.yoyodyne.com/robots.txt", true },
|
||||
{ "http://www.yoyodyne.com/somepath/", false },
|
||||
{ "http://www.yoyodyne.com/somepath/robots.txt", false },
|
||||
};
|
||||
|
||||
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
|
||||
{
|
||||
mu_assert ("test_is_robots_txt_url: wrong result",
|
||||
is_robots_txt_url (test_array[i].url) == test_array[i].expected_result);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif /* TESTING */
|
||||
|
||||
/*
|
||||
* vim: et ts=2 sw=2
|
||||
*/
|
||||
|
||||
|
@ -42,6 +42,8 @@ struct robot_specs *res_get_specs (const char *, int);
|
||||
|
||||
bool res_retrieve_file (const char *, char **);
|
||||
|
||||
bool is_robots_txt_url (const char *);
|
||||
|
||||
void res_cleanup (void);
|
||||
|
||||
#endif /* RES_H */
|
||||
|
@ -40,6 +40,8 @@ const char *test_subdir_p();
|
||||
const char *test_dir_matches_p();
|
||||
const char *test_cmd_spec_restrict_file_names();
|
||||
const char *test_append_uri_pathel();
|
||||
const char *test_are_urls_equal();
|
||||
const char *test_is_robots_txt_url();
|
||||
|
||||
int tests_run;
|
||||
|
||||
@ -51,6 +53,8 @@ all_tests()
|
||||
mu_run_test (test_dir_matches_p);
|
||||
mu_run_test (test_cmd_spec_restrict_file_names);
|
||||
mu_run_test (test_append_uri_pathel);
|
||||
mu_run_test (test_are_urls_equal);
|
||||
mu_run_test (test_is_robots_txt_url);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user