1
0
mirror of https://github.com/moparisthebest/pi-hole synced 2024-11-25 02:32:15 -05:00

Merge pull request #1 from dschaper/hawson-reduce-memory

Upstream Branch pseudo-rebase
This commit is contained in:
Dan Schaper 2015-11-05 18:11:43 -08:00
commit deb6e76e08

View File

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
# http://pi-hole.net # http://pi-hole.net
# Compiles a list of ad-serving domains by downloading them from multiple sources # Compiles a list of ad-serving domains by downloading them from multiple sources
# This script should only be run after you have a static IP address set on the Pi # This script should only be run after you have a static IP address set on the Pi
piholeIP=$(hostname -I) piholeIP=$(hostname -I)
@ -8,7 +8,7 @@ piholeIP=$(hostname -I)
# Ad-list sources--one per line in single quotes # Ad-list sources--one per line in single quotes
sources=('https://adaway.org/hosts.txt' sources=('https://adaway.org/hosts.txt'
'http://adblock.gjtech.net/?format=unix-hosts' 'http://adblock.gjtech.net/?format=unix-hosts'
#'http://adblock.mahakala.is/' 'http://adblock.mahakala.is/'
'http://hosts-file.net/.%5Cad_servers.txt' 'http://hosts-file.net/.%5Cad_servers.txt'
'http://www.malwaredomainlist.com/hostslist/hosts.txt' 'http://www.malwaredomainlist.com/hostslist/hosts.txt'
'http://pgl.yoyo.org/adservers/serverlist.php?' 'http://pgl.yoyo.org/adservers/serverlist.php?'
@ -19,6 +19,9 @@ sources=('https://adaway.org/hosts.txt'
adList=/etc/pihole/gravity.list adList=/etc/pihole/gravity.list
origin=/etc/pihole origin=/etc/pihole
piholeDir=/etc/pihole piholeDir=/etc/pihole
if [[ -f $piholeDir/pihole.conf ]];then
. $piholeDir/pihole.conf
fi
justDomainsExtension=domains justDomainsExtension=domains
matter=pihole.0.matter.txt matter=pihole.0.matter.txt
andLight=pihole.1.andLight.txt andLight=pihole.1.andLight.txt
@ -27,16 +30,10 @@ eventHorizon=pihole.3.eventHorizon.txt
accretionDisc=pihole.4.accretionDisc.txt accretionDisc=pihole.4.accretionDisc.txt
eyeOfTheNeedle=pihole.5.wormhole.txt eyeOfTheNeedle=pihole.5.wormhole.txt
blacklist=$piholeDir/blacklist.txt blacklist=$piholeDir/blacklist.txt
latentBlacklist=$origin/latentBlacklist.txt
whitelist=$piholeDir/whitelist.txt whitelist=$piholeDir/whitelist.txt
latentWhitelist=$origin/latentWhitelist.txt latentWhitelist=$origin/latentWhitelist.txt
# After setting defaults, check if there's local overrides
if [[ -r $piholeDir/pihole.conf ]];then
echo "** Local calibration requested..."
. $piholeDir/pihole.conf
fi
echo "** Neutrino emissions detected..." echo "** Neutrino emissions detected..."
# Create the pihole resource directory if it doesn't exist. Future files will be stored here # Create the pihole resource directory if it doesn't exist. Future files will be stored here
@ -57,12 +54,8 @@ function createSwapFile()
sudo dphys-swapfile setup sudo dphys-swapfile setup
sudo dphys-swapfile swapon sudo dphys-swapfile swapon
} }
if [[ -f /etc/dphys-swapfile ]];then
if [[ -n "$noSwap" ]]; then
# if $noSwap is set, don't do anything
:
elif [[ -f /etc/dphys-swapfile ]];then
swapSize=$(cat /etc/dphys-swapfile | grep -m1 CONF_SWAPSIZE | cut -d'=' -f2) swapSize=$(cat /etc/dphys-swapfile | grep -m1 CONF_SWAPSIZE | cut -d'=' -f2)
if [[ $swapSize != 500 ]];then if [[ $swapSize != 500 ]];then
mv /etc/dphys-swapfile /etc/dphys-swapfile.orig mv /etc/dphys-swapfile /etc/dphys-swapfile.orig
@ -82,31 +75,55 @@ do
url=${sources[$i]} url=${sources[$i]}
# Get just the domain from the URL # Get just the domain from the URL
domain=$(echo "$url" | cut -d'/' -f3) domain=$(echo "$url" | cut -d'/' -f3)
# Save the file as list.#.domain # Save the file as list.#.domain
saveLocation=$origin/list.$i.$domain.$justDomainsExtension saveLocation=$origin/list.$i.$domain.$justDomainsExtension
echo -n "Getting $domain list... " agent="Mozilla/10.0"
# Use a case statement to download lists that need special cURL commands to complete properly
echo -n "Getting $domain list... "
# Use a case statement to download lists that need special cURL commands
# to complete properly and reset the user agent when required
case "$domain" in case "$domain" in
"adblock.mahakala.is") data=$(curl -s -A 'Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0' -e http://forum.xda-developers.com/ -z $saveLocation $url);; "adblock.mahakala.is")
agent='Mozilla/5.0 (X11; Linux x86_64; rv:30.0) Gecko/20100101 Firefox/30.0'
cmd="curl -e http://forum.xda-developers.com/"
;;
"pgl.yoyo.org")
cmd="curl -d mimetype=plaintext -d hostformat=hosts"
;;
"pgl.yoyo.org") data=$(curl -s -d mimetype=plaintext -d hostformat=hosts -z $saveLocation $url);; # Default is a simple curl request
*) cmd="curl"
*) data=$(curl -s -z $saveLocation -A "Mozilla/10.0" $url);;
esac esac
if [[ -n "$data" ]];then # tmp file, so we don't have to store the (long!) lists in RAM
tmpfile=`mktemp`
timeCheck=""
if [ -r $saveLocation ]; then
timeCheck="-z $saveLocation"
fi
CMD="$cmd -s $timeCheck -A '$agent' $url > $tmpfile"
echo "running [$CMD]"
$cmd -s $timeCheck -A "$agent" $url > $tmpfile
if [[ -s "$tmpfile" ]];then
# Remove comments and print only the domain name # Remove comments and print only the domain name
# Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious # Most of the lists downloaded are already in hosts file format but the spacing/formating is not contigious
# This helps with that and makes it easier to read # This helps with that and makes it easier to read
# It also helps with debugging so each stage of the script can be researched more in depth # It also helps with debugging so each stage of the script can be researched more in depth
echo "$data" | awk 'NF {if ($1 !~ "#") { if (NF>1) {print $2} else {print $1}}}' | \ awk '($1 !~ /^#/) { if (NF>1) {print $2} else {print $1}}' $tmpfile | \
sed -e 's/^[. \t]*//' -e 's/\.\.\+/./g' -e 's/[. \t]*$//' | grep "\." > $saveLocation sed -nr -e 's/\.{2,}/./g' -e '/\./p' > $saveLocation
echo "Done." echo "Done."
else else
echo "Skipping list because it does not have any new entries." echo "Skipping list because it does not have any new entries."
fi fi
# cleanup
rm -f $tmpfile
done done
# Find all files with the .domains extension and compile them into one file and remove CRs # Find all files with the .domains extension and compile them into one file and remove CRs
@ -114,39 +131,43 @@ echo "** Aggregating list of domains..."
find $origin/ -type f -name "*.$justDomainsExtension" -exec cat {} \; | tr -d '\r' > $origin/$matter find $origin/ -type f -name "*.$justDomainsExtension" -exec cat {} \; | tr -d '\r' > $origin/$matter
# Append blacklist entries if they exist # Append blacklist entries if they exist
if [[ -f $blacklist ]];then if [[ -r $blacklist ]];then
numberOf=$(cat $blacklist | sed '/^\s*$/d' | wc -l) numberOf=$(cat $blacklist | sed '/^\s*$/d' | wc -l)
echo "** Blacklisting $numberOf domain(s)..." echo "** Blacklisting $numberOf domain(s)..."
cat $blacklist >> $origin/$matter cat $blacklist >> $origin/$matter
else
:
fi fi
function gravity_advanced()
########################### ###########################
{ function gravity_advanced() {
numberOf=$(cat $origin/$andLight | sed '/^\s*$/d' | wc -l)
echo "** $numberOf domains being pulled in by gravity..." numberOf=$(wc -l $origin/$andLight)
echo "** $numberOf domains being pulled in by gravity..."
# Remove carriage returns and preceding whitespace # Remove carriage returns and preceding whitespace
cat $origin/$andLight | sed $'s/\r$//' | sed '/^\s*$/d' > $origin/$supernova # not really needed anymore?
cp $origin/$andLight $origin/$supernova
# Sort and remove duplicates # Sort and remove duplicates
cat $origin/$supernova | sort | uniq > $origin/$eventHorizon sort -u $origin/$supernova > $origin/$eventHorizon
numberOf=$(cat $origin/$eventHorizon | sed '/^\s*$/d' | wc -l) numberOf=$(wc -l $origin/$eventHorizon)
echo "** $numberOf unique domains trapped in the event horizon." echo "** $numberOf unique domains trapped in the event horizon."
# Format domain list as "192.168.x.x domain.com" # Format domain list as "192.168.x.x domain.com"
echo "** Formatting domains into a HOSTS file..." echo "** Formatting domains into a HOSTS file..."
cat $origin/$eventHorizon | awk '{sub(/\r$/,""); print "'"$piholeIP"' " $0}' > $origin/$accretionDisc awk '{print "'"$piholeIP"'" $1}' $origin/$eventHorizon > $origin/$accretionDisc
# Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it # Copy the file over as /etc/pihole/gravity.list so dnsmasq can use it
sudo cp $origin/$accretionDisc $adList sudo cp $origin/$accretionDisc $adList
kill -HUP $(pidof dnsmasq) kill -HUP $(pidof dnsmasq)
} }
# Whitelist (if applicable) then remove duplicates and format for dnsmasq # Whitelist (if applicable) then remove duplicates and format for dnsmasq
if [[ -f $whitelist ]];then if [[ -r $whitelist ]];then
# Remove whitelist entries # Remove whitelist entries
numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l) numberOf=$(cat $whitelist | sed '/^\s*$/d' | wc -l)
plural=; [[ "$numberOf" != "1" ]] && plural=s plural=; [[ "$numberOf" != "1" ]] && plural=s
echo "** Whitelisting $numberOf domain${plural}..." echo "** Whitelisting $numberOf domain${plural}..."
# Append a "$" to the end, prepend a "^" to the beginning, and # Append a "$" to the end, prepend a "^" to the beginning, and
# replace "." with "\." of each line to turn each entry into a # replace "." with "\." of each line to turn each entry into a
# regexp so it can be parsed out with grep -x # regexp so it can be parsed out with grep -x
@ -163,6 +184,7 @@ do
echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist echo "$url" | awk -F '/' '{print "^"$3"$"}' | sed 's/\./\\./g' >> $latentWhitelist
done done
# Remove whitelist entries from deduped list
grep -vxf $latentWhitelist $origin/$matter > $origin/$andLight grep -vxf $latentWhitelist $origin/$matter > $origin/$andLight
gravity_advanced gravity_advanced