Now blocks over 140,000 ad domains

In addition to the extra domains, there are numerous performance
improvements so the script runs faster.  There were also some
extraneous domains that were either blank or had some special
characters that needed to be removed.

Since the ad lists are not under my control, the end of the script will
now tell you how many domains will be blocked.  In case the sources it
pulls from ever update, you can run this script again and see the
difference.
This commit is contained in:
jacobsalmela 2015-02-17 13:52:54 -06:00
parent fe4e579005
commit b7573a533a
1 changed files with 26 additions and 23 deletions

View File

@ -1,36 +1,39 @@
#!/bin/bash #!/bin/bash
# The Pi-hole now blocks over 140,000 ad domains
# Address to send ads to (the RPi) # Address to send ads to (the RPi)
piholeIP="127.0.0.1" piholeIP="127.0.0.1"
# Optionally, uncomment to automatically detect the address. Thanks Gregg
#piholeIP=$(ifconfig eth0 | awk '/inet addr/{print substr($2,6)}')
# Config file to hold URL rules # Config file to hold URL rules
eventHorizion="/etc/dnsmasq.d/adList.conf" eventHorizion="/etc/dnsmasq.d/adList.conf"
# Download the original URL to a text file for easier parsing echo "Getting yoyo ad list..." # Approximately 2452 domains at the time of writing
echo "Getting yoyo ad list..." curl -s -d mimetype=plaintext -d hostformat=unixhosts http://pgl.yoyo.org/adservers/serverlist.php? | sort > /tmp/matter.txt
curl -o /tmp/yoyo.txt -s http://pgl.yoyo.org/adservers/serverlist.php?hostformat=unixhosts&mimetype=plaintext echo "Getting winhelp2002 ad list..." # 12985 domains
sleep 10
if [ -f /tmp/yoyo.txt ];then
cat /tmp/yoyo.txt | grep -v "<" | sed '/^$/d' | sed 's/\ /\\ /g' | sort > /tmp/matter.txt
else
echo "Unable to get yoyo ad list"
fi
# Download and append other ad URLs from different sources
echo "Getting winhelp2002 ad list..."
curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt curl -s http://winhelp2002.mvps.org/hosts.txt | grep -v "#" | grep -v "127.0.0.1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt
echo "Getting adaway ad list..." echo "Getting adaway ad list..." # 445 domains
curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt curl -s https://adaway.org/hosts.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt
echo "Getting hosts-file ad list..." echo "Getting hosts-file ad list..." # 28050 domains
curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt curl -s http://hosts-file.net/.%5Cad_servers.txt | grep -v "#" | grep -v "::1" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt
echo "Getting malwaredomainlist ad list..." echo "Getting malwaredomainlist ad list..." # 1352 domains
curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt curl -s http://www.malwaredomainlist.com/hostslist/hosts.txt | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $3}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt
echo "Getting adblock.gjtech ad list..." echo "Getting adblock.gjtech ad list..." # 696 domains
curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | sort >> /tmp/matter.txt curl -s http://adblock.gjtech.net/?format=unix-hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt
echo "Getting someone who cares ad list..." echo "Getting someone who cares ad list..." # 10600
curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | awk '{print $2}' >> /tmp/matter.txt curl -s http://someonewhocares.org/hosts/hosts | grep -v "#" | sed '/^$/d' | sed 's/\ /\\ /g' | grep -v '^\\' | grep -v '\\$' | awk '{print $2}' | grep -v '^\\' | grep -v '\\$' | sort >> /tmp/matter.txt
echo "Getting Mother of All Ad Blocks list..." # 102168 domains!! Thanks Kacy
curl -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ http://adblock.mahakala.is/ | grep -v "#" | awk '{print $2}' | sort >> /tmp/matter.txt
# Sort the aggregated results and remove any duplicates # Sort the aggregated results and remove any duplicates
echo "removing duplicates and formatting to address=/<ad domain>/"$piholeIP echo "Removing duplicates and formatting to address=/<ad domain>/"$piholeIP
cat /tmp/matter.txt | sort | uniq | sed '/^$/d' | awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > /tmp/andLight.txt cat /tmp/matter.txt | sort | uniq | sed '/^$/d' | awk -v "IP=$piholeIP" '{sub(/\r$/,""); print "address=/"$0"/"IP}' > /tmp/andLight.txt
# Count how many domains were added so it can be displayed to the user
numberOfAdsBlocked=$(cat /tmp/andLight.txt | wc -l | sed 's/^[ \t]*//')
echo "$numberOfAdsBlocked ad domains added to the blacklist"
# Turn the file into a dnsmasq config file
mv /tmp/andLight.txt $eventHorizion mv /tmp/andLight.txt $eventHorizion
# Restart DNS # Restart DNS