mirror of
https://github.com/moparisthebest/curl
synced 2024-12-21 23:58:49 -05:00
Added -i to allow ingore-patterns to get added
This commit is contained in:
parent
880208c5b2
commit
0d12c56738
@ -9,10 +9,14 @@
|
|||||||
# Written to use 'curl' for URL checking.
|
# Written to use 'curl' for URL checking.
|
||||||
#
|
#
|
||||||
# Author: Daniel Stenberg <daniel@haxx.se>
|
# Author: Daniel Stenberg <daniel@haxx.se>
|
||||||
# Version: 0.2 Dec 19, 2000
|
# Version: 0.3 Jan 3, 2001
|
||||||
#
|
#
|
||||||
# HISTORY
|
# HISTORY
|
||||||
#
|
#
|
||||||
|
# 0.3 - The -i now adds regexes that if a full URL link matches one of those,
|
||||||
|
# it is not followed. This can then be used to prevent this script from
|
||||||
|
# following '.*\.cgi', specific pages or whatever.
|
||||||
|
#
|
||||||
# 0.2 - Made it only HEAD non html files (i.e skip the GET). Makes it a lot
|
# 0.2 - Made it only HEAD non html files (i.e skip the GET). Makes it a lot
|
||||||
# faster to skip large non HTML files such as pdfs or big RFCs! ;-)
|
# faster to skip large non HTML files such as pdfs or big RFCs! ;-)
|
||||||
# Added a -c option that allows me to pass options to curl.
|
# Added a -c option that allows me to pass options to curl.
|
||||||
@ -32,6 +36,8 @@ my $help;
|
|||||||
my $external;
|
my $external;
|
||||||
my $curlopts;
|
my $curlopts;
|
||||||
|
|
||||||
|
my @ignorelist;
|
||||||
|
|
||||||
argv:
|
argv:
|
||||||
if($ARGV[0] eq "-v" ) {
|
if($ARGV[0] eq "-v" ) {
|
||||||
$verbose++;
|
$verbose++;
|
||||||
@ -44,6 +50,12 @@ elsif($ARGV[0] eq "-c" ) {
|
|||||||
shift @ARGV;
|
shift @ARGV;
|
||||||
goto argv;
|
goto argv;
|
||||||
}
|
}
|
||||||
|
elsif($ARGV[0] eq "-i" ) {
|
||||||
|
push @ignorelist, $ARGV[1];
|
||||||
|
shift @ARGV;
|
||||||
|
shift @ARGV;
|
||||||
|
goto argv;
|
||||||
|
}
|
||||||
elsif($ARGV[0] eq "-l" ) {
|
elsif($ARGV[0] eq "-l" ) {
|
||||||
$linenumber = 1;
|
$linenumber = 1;
|
||||||
shift @ARGV;
|
shift @ARGV;
|
||||||
@ -72,10 +84,12 @@ $rooturls{$ARGV[0]}=1;
|
|||||||
if(($geturl eq "") || $help) {
|
if(($geturl eq "") || $help) {
|
||||||
print "Usage: $0 [-hilvx] <full URL>\n",
|
print "Usage: $0 [-hilvx] <full URL>\n",
|
||||||
" Use a traling slash for directory URLs!\n",
|
" Use a traling slash for directory URLs!\n",
|
||||||
" -h This help text\n",
|
" -c [data] Pass [data] as argument to every curl invoke\n",
|
||||||
" -l Line number report for BAD links\n",
|
" -h This help text\n",
|
||||||
" -v Verbose mode\n",
|
" -i [regex] Ignore root links that match this pattern\n",
|
||||||
" -x Check non-local (external?) links only\n";
|
" -l Line number report for BAD links\n",
|
||||||
|
" -v Verbose mode\n",
|
||||||
|
" -x Check non-local (external?) links only\n";
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -303,9 +317,6 @@ while(1) {
|
|||||||
if($geturl == -1) {
|
if($geturl == -1) {
|
||||||
last;
|
last;
|
||||||
}
|
}
|
||||||
if($verbose) {
|
|
||||||
print "ROOT: $geturl\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Splits the URL in its different parts
|
# Splits the URL in its different parts
|
||||||
@ -332,6 +343,8 @@ while(1) {
|
|||||||
next;
|
next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
print " ==== $geturl ====\n";
|
||||||
|
|
||||||
if($verbose == 2) {
|
if($verbose == 2) {
|
||||||
printf("Error code $error, Content-Type: $ctype, got %d bytes\n",
|
printf("Error code $error, Content-Type: $ctype, got %d bytes\n",
|
||||||
length($in));
|
length($in));
|
||||||
@ -405,8 +418,17 @@ while(1) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
# the link works, add it!
|
# the link works, add it if it isn't in the ingore list
|
||||||
$rooturls{$link}++; # check this if not checked already
|
my $ignore=0;
|
||||||
|
for(@ignorelist) {
|
||||||
|
if($link =~ /$_/) {
|
||||||
|
$ignore=1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(!$ignore) {
|
||||||
|
# not ignored, add
|
||||||
|
$rooturls{$link}++; # check this if not checked already
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user