Added -i to allow ingore-patterns to get added

This commit is contained in:
Daniel Stenberg 2001-01-03 08:35:16 +00:00
parent 880208c5b2
commit 0d12c56738
1 changed files with 32 additions and 10 deletions

View File

@ -9,10 +9,14 @@
# Written to use 'curl' for URL checking.
#
# Author: Daniel Stenberg <daniel@haxx.se>
# Version: 0.2 Dec 19, 2000
# Version: 0.3 Jan 3, 2001
#
# HISTORY
#
# 0.3 - The -i now adds regexes that if a full URL link matches one of those,
# it is not followed. This can then be used to prevent this script from
# following '.*\.cgi', specific pages or whatever.
#
# 0.2 - Made it only HEAD non html files (i.e skip the GET). Makes it a lot
# faster to skip large non HTML files such as pdfs or big RFCs! ;-)
# Added a -c option that allows me to pass options to curl.
@ -32,6 +36,8 @@ my $help;
my $external;
my $curlopts;
my @ignorelist;
argv:
if($ARGV[0] eq "-v" ) {
$verbose++;
@ -44,6 +50,12 @@ elsif($ARGV[0] eq "-c" ) {
shift @ARGV;
goto argv;
}
elsif($ARGV[0] eq "-i" ) {
push @ignorelist, $ARGV[1];
shift @ARGV;
shift @ARGV;
goto argv;
}
elsif($ARGV[0] eq "-l" ) {
$linenumber = 1;
shift @ARGV;
@ -72,10 +84,12 @@ $rooturls{$ARGV[0]}=1;
if(($geturl eq "") || $help) {
print "Usage: $0 [-hilvx] <full URL>\n",
" Use a traling slash for directory URLs!\n",
" -h This help text\n",
" -l Line number report for BAD links\n",
" -v Verbose mode\n",
" -x Check non-local (external?) links only\n";
" -c [data] Pass [data] as argument to every curl invoke\n",
" -h This help text\n",
" -i [regex] Ignore root links that match this pattern\n",
" -l Line number report for BAD links\n",
" -v Verbose mode\n",
" -x Check non-local (external?) links only\n";
exit;
}
@ -303,9 +317,6 @@ while(1) {
if($geturl == -1) {
last;
}
if($verbose) {
print "ROOT: $geturl\n";
}
#
# Splits the URL in its different parts
@ -332,6 +343,8 @@ while(1) {
next;
}
print " ==== $geturl ====\n";
if($verbose == 2) {
printf("Error code $error, Content-Type: $ctype, got %d bytes\n",
length($in));
@ -405,8 +418,17 @@ while(1) {
}
}
else {
# the link works, add it!
$rooturls{$link}++; # check this if not checked already
# the link works, add it if it isn't in the ingore list
my $ignore=0;
for(@ignorelist) {
if($link =~ /$_/) {
$ignore=1;
}
}
if(!$ignore) {
# not ignored, add
$rooturls{$link}++; # check this if not checked already
}
}
}