Some improvements to dead link checking tool.

git-svn-id: file:///home/ksmith/gitmigration/svn/xmpp/trunk@3486 4b5297f7-1745-476d-ba37-a9c6900126ab
This commit is contained in:
Unknown User 2009-10-02 21:10:04 +00:00
parent b4bee07387
commit 6891c87367
1 changed files with 61 additions and 18 deletions

View File

@ -6,7 +6,7 @@
# Last Modified: 2009-04-06 # Last Modified: 2009-04-06
# Author: Tobias Markmann (tm@ayena.de) # Author: Tobias Markmann (tm@ayena.de)
# License: public domain # License: public domain
# HowTo: ./checkdeadlinks.py xepnum # HowTo: ./checkdeadlinks.py --xep=xepnum
## LICENSE ## ## LICENSE ##
# #
@ -36,31 +36,74 @@ import glob
import os import os
from select import select from select import select
import socket import socket
import getopt
from string import split,strip,join,find from string import split,strip,join,find
import sys import sys
import time import time
import re import re
import urllib import urllib
import urllib2
from xml.dom.minidom import parse,parseString,Document from xml.dom.minidom import parse,parseString,Document
xepnum = sys.argv[1]; def usage():
print "checkdeadlinks.py"
print ""
print "-h, --help Print this help message"
print "-x, --xep [number] Defines the number of the XEP to check"
print "-v, --verbose Enables more verbosity"
xepfile = 'xep-' + xepnum + '.xml' def main(argv):
thexep = parse(xepfile) try:
opts, args = getopt.gnu_getopt(argv, "hv:x", ["help", "verbose", "xep="])
except getopt.GetoptError:
usage()
sys.exit(2)
links = thexep.getElementsByTagName("link") global verbose
deadlinks = 0 verbose = 0
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-x", "--xep"):
global xepnum
xepnum = arg
elif opt in ("-v", "--verbose"):
verbose = 1
xepfile = 'xep-' + xepnum + '.xml'
thexep = parse(xepfile)
links = thexep.getElementsByTagName("link")
deadlinks = 0
if verbose:
print 'Checking XEP-' + xepnum + ':'
for link in links:
url = link.getAttribute("url")
if re.match("^(http|https)", url):
if verbose:
print url + ' :',
page = 0
try:
request = urllib2.Request(url)
request.add_header('User-Agent', "Mozilla/5.001 (windows; U; NT4.0; en-US; rv:1.0) Gecko/25250101")
opener = urllib2.build_opener()
page = opener.open(request).read()
except Exception, e:
reason = str(e)
if verbose:
print "DEAD"
else:
print "XEP-" + xepnum + " - DEAD: " + url + " [" + reason + "]"
deadlinks = deadlinks + 1
else:
if verbose:
print 'OK'
#if deadlinks = 0:
#print "all http/https links are good"
for link in links: if __name__ == "__main__":
url = link.getAttribute("url") main(sys.argv[1:])
if re.match("^(http|https)", url):
try:
urllib.urlopen(url)
except:
print "dead-url: " + url
deadlinks = deadlinks + 1
if deadlinks < 1:
print "all http/https links are good"