mirror of
https://github.com/moparisthebest/SickRage
synced 2024-12-04 15:12:23 -05:00
Created a context manager wrapper for BeautifulSoup4 so that we can cleanup/clear tags/context on exit via WITH statements.
Fixed issues with torrent providers returning no results.
This commit is contained in:
parent
77feb5a74c
commit
a317ff61c2
13
sickbeard/bs4_parser.py
Normal file
13
sickbeard/bs4_parser.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import sickbeard
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
class BS4Parser:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.soup = BeautifulSoup(*args, **kwargs)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
return self.soup
|
||||||
|
|
||||||
|
def __exit__(self, exc_ty, exc_val, tb):
|
||||||
|
self.soup.clear(True)
|
||||||
|
self.soup = None
|
@ -31,7 +31,6 @@ import httplib
|
|||||||
import urlparse
|
import urlparse
|
||||||
import uuid
|
import uuid
|
||||||
import base64
|
import base64
|
||||||
import string
|
|
||||||
import zipfile
|
import zipfile
|
||||||
|
|
||||||
from lib import requests
|
from lib import requests
|
||||||
@ -1241,7 +1240,7 @@ def mapIndexersToShow(showObj):
|
|||||||
return mapped
|
return mapped
|
||||||
|
|
||||||
|
|
||||||
def touchFile(self, fname, atime=None):
|
def touchFile(fname, atime=None):
|
||||||
if None != atime:
|
if None != atime:
|
||||||
try:
|
try:
|
||||||
with file(fname, 'a'):
|
with file(fname, 'a'):
|
||||||
|
@ -22,7 +22,7 @@ import datetime
|
|||||||
import urlparse
|
import urlparse
|
||||||
import sickbeard
|
import sickbeard
|
||||||
import generic
|
import generic
|
||||||
from sickbeard.common import Quality, cpu_presets
|
from sickbeard.common import Quality
|
||||||
from sickbeard import logger
|
from sickbeard import logger
|
||||||
from sickbeard import tvcache
|
from sickbeard import tvcache
|
||||||
from sickbeard import db
|
from sickbeard import db
|
||||||
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
@ -168,48 +168,45 @@ class BitSoupProvider(generic.TorrentProvider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data, "html.parser")
|
with BS4Parser(data, "html.parser") as html:
|
||||||
|
torrent_table = html.find('table', attrs={'class': 'koptekst'})
|
||||||
torrent_table = html.find('table', attrs={'class': 'koptekst'})
|
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
|
||||||
|
|
||||||
html.clear(True)
|
#Continue only if one Release is found
|
||||||
|
if len(torrent_rows) < 2:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
|
logger.DEBUG)
|
||||||
|
continue
|
||||||
|
|
||||||
#Continue only if one Release is found
|
for result in torrent_rows[1:]:
|
||||||
if len(torrent_rows) < 2:
|
cells = result.find_all('td')
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for result in torrent_rows[1:]:
|
link = cells[1].find('a')
|
||||||
cells = result.find_all('td')
|
download_url = self.urls['download'] % cells[3].find('a')['href']
|
||||||
|
|
||||||
link = cells[1].find('a')
|
id = link['href']
|
||||||
download_url = self.urls['download'] % cells[3].find('a')['href']
|
id = id.replace('details.php?id=','')
|
||||||
|
id = id.replace('&hit=1', '')
|
||||||
id = link['href']
|
|
||||||
id = id.replace('details.php?id=','')
|
|
||||||
id = id.replace('&hit=1', '')
|
|
||||||
|
|
||||||
try:
|
|
||||||
title = link.getText()
|
|
||||||
id = int(id)
|
|
||||||
seeders = int(cells[9].getText())
|
|
||||||
leechers = int(cells[10].getText())
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
#Filter unseeded torrent
|
try:
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
title = link.getText()
|
||||||
continue
|
id = int(id)
|
||||||
|
seeders = int(cells[9].getText())
|
||||||
|
leechers = int(cells[10].getText())
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
if not title or not download_url:
|
#Filter unseeded torrent
|
||||||
continue
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
item = title, download_url, id, seeders, leechers
|
if not title or not download_url:
|
||||||
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
continue
|
||||||
|
|
||||||
items[mode].append(item)
|
item = title, download_url, id, seeders, leechers
|
||||||
|
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
@ -175,7 +175,6 @@ class FreshOnTVProvider(generic.TorrentProvider):
|
|||||||
if not self._doLogin():
|
if not self._doLogin():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
|
||||||
for mode in search_params.keys():
|
for mode in search_params.keys():
|
||||||
for search_string in search_params[mode]:
|
for search_string in search_params[mode]:
|
||||||
|
|
||||||
@ -193,55 +192,52 @@ class FreshOnTVProvider(generic.TorrentProvider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||||
|
torrent_table = html.find('table', attrs={'class': 'frame'})
|
||||||
|
torrent_rows = torrent_table.findChildren('tr') if torrent_table else []
|
||||||
|
|
||||||
torrent_table = html.find('table', attrs={'class': 'frame'})
|
#Continue only if one Release is found
|
||||||
torrent_rows = torrent_table.findChildren('tr') if torrent_table else []
|
if len(torrent_rows) < 2:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
html.clear(True)
|
logger.DEBUG)
|
||||||
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(torrent_rows) < 2:
|
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# skip colheader
|
|
||||||
for result in torrent_rows[1:]:
|
|
||||||
cells = result.findChildren('td')
|
|
||||||
|
|
||||||
link = cells[1].find('a', attrs = {'class': 'torrent_name_link'})
|
|
||||||
#skip if torrent has been nuked due to poor quality
|
|
||||||
if cells[1].find('img', alt='Nuked') != None:
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
torrent_id = link['href'].replace('/details.php?id=', '')
|
# skip colheader
|
||||||
|
for result in torrent_rows[1:]:
|
||||||
|
cells = result.findChildren('td')
|
||||||
try:
|
|
||||||
if link.has_key('title'):
|
|
||||||
title = cells[1].find('a', {'class': 'torrent_name_link'})['title']
|
|
||||||
else:
|
|
||||||
title = link.contents[0]
|
|
||||||
download_url = self.urls['download'] % (torrent_id)
|
|
||||||
id = int(torrent_id)
|
|
||||||
|
|
||||||
seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip())
|
link = cells[1].find('a', attrs = {'class': 'torrent_name_link'})
|
||||||
leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip())
|
#skip if torrent has been nuked due to poor quality
|
||||||
except (AttributeError, TypeError):
|
if cells[1].find('img', alt='Nuked') != None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
#Filter unseeded torrent
|
torrent_id = link['href'].replace('/details.php?id=', '')
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not title or not download_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
item = title, download_url, id, seeders, leechers
|
try:
|
||||||
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
if link.has_key('title'):
|
||||||
|
title = cells[1].find('a', {'class': 'torrent_name_link'})['title']
|
||||||
|
else:
|
||||||
|
title = link.contents[0]
|
||||||
|
download_url = self.urls['download'] % (torrent_id)
|
||||||
|
id = int(torrent_id)
|
||||||
|
|
||||||
items[mode].append(item)
|
seeders = int(cells[8].find('a', {'class': 'link'}).span.contents[0].strip())
|
||||||
|
leechers = int(cells[9].find('a', {'class': 'link'}).contents[0].strip())
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
#Filter unseeded torrent
|
||||||
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not title or not download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = title, download_url, id, seeders, leechers
|
||||||
|
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
@ -196,64 +196,22 @@ class HDTorrentsProvider(generic.TorrentProvider):
|
|||||||
data = split_data[2]
|
data = split_data[2]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||||
|
#Get first entry in table
|
||||||
|
entries = html.find_all('td', attrs={'align': 'center'})
|
||||||
|
|
||||||
#Get first entry in table
|
if not entries:
|
||||||
entries = html.find_all('td', attrs={'align': 'center'})
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
|
logger.DEBUG)
|
||||||
html.clear(True)
|
|
||||||
|
|
||||||
if not entries:
|
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
try:
|
|
||||||
title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50')
|
|
||||||
url = self.urls['home'] % entries[15].find('a')['href']
|
|
||||||
download_url = self.urls['home'] % entries[15].find('a')['href']
|
|
||||||
id = entries[23].find('div')['id']
|
|
||||||
seeders = int(entries[20].get_text())
|
|
||||||
leechers = int(entries[21].get_text())
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not title or not download_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
item = title, download_url, id, seeders, leechers
|
|
||||||
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
|
||||||
|
|
||||||
items[mode].append(item)
|
|
||||||
|
|
||||||
#Now attempt to get any others
|
|
||||||
result_table = html.find('table', attrs={'class': 'mainblockcontenttt'})
|
|
||||||
|
|
||||||
if not result_table:
|
|
||||||
continue
|
|
||||||
|
|
||||||
entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'})
|
|
||||||
|
|
||||||
if not entries:
|
|
||||||
continue
|
|
||||||
|
|
||||||
for result in entries:
|
|
||||||
block2 = result.find_parent('tr').find_next_sibling('tr')
|
|
||||||
if not block2:
|
|
||||||
continue
|
continue
|
||||||
cells = block2.find_all('td')
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50')
|
title = entries[22].find('a')['title'].strip('History - ').replace('Blu-ray', 'bd50')
|
||||||
url = self.urls['home'] % cells[4].find('a')['href']
|
url = self.urls['home'] % entries[15].find('a')['href']
|
||||||
download_url = self.urls['home'] % cells[4].find('a')['href']
|
download_url = self.urls['home'] % entries[15].find('a')['href']
|
||||||
detail = cells[1].find('a')['href']
|
id = entries[23].find('div')['id']
|
||||||
id = detail.replace('details.php?id=', '')
|
seeders = int(entries[20].get_text())
|
||||||
seeders = int(cells[9].get_text())
|
leechers = int(entries[21].get_text())
|
||||||
leechers = int(cells[10].get_text())
|
|
||||||
except (AttributeError, TypeError):
|
except (AttributeError, TypeError):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@ -268,6 +226,45 @@ class HDTorrentsProvider(generic.TorrentProvider):
|
|||||||
|
|
||||||
items[mode].append(item)
|
items[mode].append(item)
|
||||||
|
|
||||||
|
#Now attempt to get any others
|
||||||
|
result_table = html.find('table', attrs={'class': 'mainblockcontenttt'})
|
||||||
|
|
||||||
|
if not result_table:
|
||||||
|
continue
|
||||||
|
|
||||||
|
entries = result_table.find_all('td', attrs={'align': 'center', 'class': 'listas'})
|
||||||
|
|
||||||
|
if not entries:
|
||||||
|
continue
|
||||||
|
|
||||||
|
for result in entries:
|
||||||
|
block2 = result.find_parent('tr').find_next_sibling('tr')
|
||||||
|
if not block2:
|
||||||
|
continue
|
||||||
|
cells = block2.find_all('td')
|
||||||
|
|
||||||
|
try:
|
||||||
|
title = cells[1].find('b').get_text().strip('\t ').replace('Blu-ray', 'bd50')
|
||||||
|
url = self.urls['home'] % cells[4].find('a')['href']
|
||||||
|
download_url = self.urls['home'] % cells[4].find('a')['href']
|
||||||
|
detail = cells[1].find('a')['href']
|
||||||
|
id = detail.replace('details.php?id=', '')
|
||||||
|
seeders = int(cells[9].get_text())
|
||||||
|
leechers = int(cells[10].get_text())
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not title or not download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = title, download_url, id, seeders, leechers
|
||||||
|
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
|
||||||
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
from sickbeard.show_name_helpers import allPossibleShowNames
|
from sickbeard.show_name_helpers import allPossibleShowNames
|
||||||
@ -167,51 +167,48 @@ class IPTorrentsProvider(generic.TorrentProvider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||||
|
if not html:
|
||||||
if not html:
|
logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
|
||||||
logger.log(u"Invalid HTML data: " + str(data), logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
if html.find(text='No Torrents Found!'):
|
|
||||||
logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
torrent_table = html.find('table', attrs={'class': 'torrents'})
|
|
||||||
torrents = torrent_table.find_all('tr') if torrent_table else []
|
|
||||||
|
|
||||||
html.clear(True)
|
|
||||||
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(torrents) < 2:
|
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.WARNING)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for result in torrents[1:]:
|
|
||||||
|
|
||||||
try:
|
|
||||||
torrent = result.find_all('td')[1].find('a')
|
|
||||||
torrent_name = torrent.string
|
|
||||||
torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
|
|
||||||
torrent_details_url = self.urls['base_url'] + torrent['href']
|
|
||||||
torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string)
|
|
||||||
## Not used, perhaps in the future ##
|
|
||||||
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
|
|
||||||
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Filter unseeded torrent and torrents with no name/url
|
if html.find(text='No Torrents Found!'):
|
||||||
if mode != 'RSS' and torrent_seeders == 0:
|
logger.log(u"No results found for: " + search_string + " (" + searchURL + ")", logger.DEBUG)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if not torrent_name or not torrent_download_url:
|
torrent_table = html.find('table', attrs={'class': 'torrents'})
|
||||||
|
torrents = torrent_table.find_all('tr') if torrent_table else []
|
||||||
|
|
||||||
|
#Continue only if one Release is found
|
||||||
|
if len(torrents) < 2:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
|
logger.WARNING)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
item = torrent_name, torrent_download_url
|
for result in torrents[1:]:
|
||||||
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG)
|
|
||||||
items[mode].append(item)
|
try:
|
||||||
|
torrent = result.find_all('td')[1].find('a')
|
||||||
|
torrent_name = torrent.string
|
||||||
|
torrent_download_url = self.urls['base_url'] + (result.find_all('td')[3].find('a'))['href']
|
||||||
|
torrent_details_url = self.urls['base_url'] + torrent['href']
|
||||||
|
torrent_seeders = int(result.find('td', attrs={'class': 'ac t_seeders'}).string)
|
||||||
|
## Not used, perhaps in the future ##
|
||||||
|
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
|
||||||
|
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Filter unseeded torrent and torrents with no name/url
|
||||||
|
if mode != 'RSS' and torrent_seeders == 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not torrent_name or not torrent_download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = torrent_name, torrent_download_url
|
||||||
|
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")", logger.DEBUG)
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
@ -40,11 +40,9 @@ from sickbeard.show_name_helpers import allPossibleShowNames, sanitizeSceneName
|
|||||||
from sickbeard.exceptions import ex
|
from sickbeard.exceptions import ex
|
||||||
from sickbeard import encodingKludge as ek
|
from sickbeard import encodingKludge as ek
|
||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from sickbeard import tv
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
|
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
|
|
||||||
|
|
||||||
@ -119,55 +117,52 @@ class KATProvider(generic.TorrentProvider):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(data, features=["html5lib", "permissive"])
|
with BS4Parser(data, features=["html5lib", "permissive"]) as soup:
|
||||||
file_table = soup.find('table', attrs={'class': 'torrentFileList'})
|
file_table = soup.find('table', attrs={'class': 'torrentFileList'})
|
||||||
|
|
||||||
# cleanup memory
|
if not file_table:
|
||||||
soup.clear(True)
|
return None
|
||||||
|
|
||||||
if not file_table:
|
files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})]
|
||||||
return None
|
videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files)
|
||||||
|
|
||||||
files = [x.text for x in file_table.find_all('td', attrs={'class': 'torFileName'})]
|
#Filtering SingleEpisode/MultiSeason Torrent
|
||||||
videoFiles = filter(lambda x: x.rpartition(".")[2].lower() in mediaExtensions, files)
|
if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1):
|
||||||
|
logger.log(u"Result " + title + " have " + str(
|
||||||
|
ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG)
|
||||||
|
logger.log(
|
||||||
|
u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...",
|
||||||
|
logger.DEBUG)
|
||||||
|
return None
|
||||||
|
|
||||||
#Filtering SingleEpisode/MultiSeason Torrent
|
if Quality.sceneQuality(title) != Quality.UNKNOWN:
|
||||||
if len(videoFiles) < ep_number or len(videoFiles) > float(ep_number * 1.1):
|
return title
|
||||||
logger.log(u"Result " + title + " have " + str(
|
|
||||||
ep_number) + " episode and episodes retrived in torrent are " + str(len(videoFiles)), logger.DEBUG)
|
for fileName in videoFiles:
|
||||||
logger.log(
|
quality = Quality.sceneQuality(os.path.basename(fileName))
|
||||||
u"Result " + title + " Seem to be a Single Episode or MultiSeason torrent, skipping result...",
|
if quality != Quality.UNKNOWN: break
|
||||||
logger.DEBUG)
|
|
||||||
return None
|
if fileName is not None and quality == Quality.UNKNOWN:
|
||||||
|
quality = Quality.assumeQuality(os.path.basename(fileName))
|
||||||
|
|
||||||
|
if quality == Quality.UNKNOWN:
|
||||||
|
logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG)
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
myParser = NameParser(showObj=self.show)
|
||||||
|
parse_result = myParser.parse(fileName)
|
||||||
|
except (InvalidNameException, InvalidShowException):
|
||||||
|
return None
|
||||||
|
|
||||||
|
logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG)
|
||||||
|
|
||||||
|
if parse_result.series_name and parse_result.season_number:
|
||||||
|
title = parse_result.series_name + ' S%02d' % int(
|
||||||
|
parse_result.season_number) + ' ' + self._reverseQuality(quality)
|
||||||
|
|
||||||
if Quality.sceneQuality(title) != Quality.UNKNOWN:
|
|
||||||
return title
|
return title
|
||||||
|
|
||||||
for fileName in videoFiles:
|
|
||||||
quality = Quality.sceneQuality(os.path.basename(fileName))
|
|
||||||
if quality != Quality.UNKNOWN: break
|
|
||||||
|
|
||||||
if fileName is not None and quality == Quality.UNKNOWN:
|
|
||||||
quality = Quality.assumeQuality(os.path.basename(fileName))
|
|
||||||
|
|
||||||
if quality == Quality.UNKNOWN:
|
|
||||||
logger.log(u"Unable to obtain a Season Quality for " + title, logger.DEBUG)
|
|
||||||
return None
|
|
||||||
|
|
||||||
try:
|
|
||||||
myParser = NameParser(showObj=self.show)
|
|
||||||
parse_result = myParser.parse(fileName)
|
|
||||||
except (InvalidNameException, InvalidShowException):
|
|
||||||
return None
|
|
||||||
|
|
||||||
logger.log(u"Season quality for " + title + " is " + Quality.qualityStrings[quality], logger.DEBUG)
|
|
||||||
|
|
||||||
if parse_result.series_name and parse_result.season_number:
|
|
||||||
title = parse_result.series_name + ' S%02d' % int(
|
|
||||||
parse_result.season_number) + ' ' + self._reverseQuality(quality)
|
|
||||||
|
|
||||||
return title
|
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
|
||||||
@ -230,6 +225,7 @@ class KATProvider(generic.TorrentProvider):
|
|||||||
results = []
|
results = []
|
||||||
items = {'Season': [], 'Episode': [], 'RSS': []}
|
items = {'Season': [], 'Episode': [], 'RSS': []}
|
||||||
|
|
||||||
|
soup = None
|
||||||
for mode in search_params.keys():
|
for mode in search_params.keys():
|
||||||
for search_string in search_params[mode]:
|
for search_string in search_params[mode]:
|
||||||
|
|
||||||
@ -250,54 +246,51 @@ class KATProvider(generic.TorrentProvider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
soup = BeautifulSoup(html, features=["html5lib", "permissive"])
|
with BS4Parser(html, features=["html5lib", "permissive"]) as soup:
|
||||||
|
torrent_table = soup.find('table', attrs={'class': 'data'})
|
||||||
|
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||||
|
|
||||||
torrent_table = soup.find('table', attrs={'class': 'data'})
|
#Continue only if one Release is found
|
||||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
if len(torrent_rows) < 2:
|
||||||
|
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
|
||||||
soup.clear(True)
|
logger.WARNING)
|
||||||
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(torrent_rows) < 2:
|
|
||||||
logger.log(u"The data returned from " + self.name + " does not contain any torrents",
|
|
||||||
logger.WARNING)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for tr in torrent_rows[1:]:
|
|
||||||
try:
|
|
||||||
link = urlparse.urljoin(self.url,
|
|
||||||
(tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
|
|
||||||
id = tr.get('id')[-7:]
|
|
||||||
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
|
|
||||||
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
|
|
||||||
url = tr.find('a', 'imagnet')['href']
|
|
||||||
verified = True if tr.find('a', 'iverify') else False
|
|
||||||
trusted = True if tr.find('img', {'alt': 'verified'}) else False
|
|
||||||
seeders = int(tr.find_all('td')[-2].text)
|
|
||||||
leechers = int(tr.find_all('td')[-1].text)
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
for tr in torrent_rows[1:]:
|
||||||
continue
|
try:
|
||||||
|
link = urlparse.urljoin(self.url,
|
||||||
|
(tr.find('div', {'class': 'torrentname'}).find_all('a')[1])['href'])
|
||||||
|
id = tr.get('id')[-7:]
|
||||||
|
title = (tr.find('div', {'class': 'torrentname'}).find_all('a')[1]).text \
|
||||||
|
or (tr.find('div', {'class': 'torrentname'}).find_all('a')[2]).text
|
||||||
|
url = tr.find('a', 'imagnet')['href']
|
||||||
|
verified = True if tr.find('a', 'iverify') else False
|
||||||
|
trusted = True if tr.find('img', {'alt': 'verified'}) else False
|
||||||
|
seeders = int(tr.find_all('td')[-2].text)
|
||||||
|
leechers = int(tr.find_all('td')[-1].text)
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
if self.confirmed and not verified:
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
logger.log(
|
continue
|
||||||
u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
#Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
|
if self.confirmed and not verified:
|
||||||
if mode == 'Season' and search_mode == 'sponly':
|
logger.log(
|
||||||
ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
|
u"KAT Provider found result " + title + " but that doesn't seem like a verified result so I'm ignoring it",
|
||||||
title = self._find_season_quality(title, link, ep_number)
|
logger.DEBUG)
|
||||||
|
continue
|
||||||
|
|
||||||
if not title or not url:
|
#Check number video files = episode in season and find the real Quality for full season torrent analyzing files in torrent
|
||||||
continue
|
if mode == 'Season' and search_mode == 'sponly':
|
||||||
|
ep_number = int(epcount / len(set(allPossibleShowNames(self.show))))
|
||||||
|
title = self._find_season_quality(title, link, ep_number)
|
||||||
|
|
||||||
item = title, url, id, seeders, leechers
|
if not title or not url:
|
||||||
|
continue
|
||||||
|
|
||||||
items[mode].append(item)
|
item = title, url, id, seeders, leechers
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
||||||
|
@ -37,7 +37,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
|
|
||||||
@ -118,16 +118,16 @@ class NextGenProvider(generic.TorrentProvider):
|
|||||||
self.session.headers.update(
|
self.session.headers.update(
|
||||||
{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'})
|
{'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:24.0) Gecko/20130519 Firefox/24.0)'})
|
||||||
data = self.session.get(self.urls['login_page'], verify=False)
|
data = self.session.get(self.urls['login_page'], verify=False)
|
||||||
bs = BeautifulSoup(data.content.decode('iso-8859-1'))
|
with BS4Parser(data.content.decode('iso-8859-1')) as bs:
|
||||||
csrfraw = bs.find('form', attrs={'id': 'login'})['action']
|
csrfraw = bs.find('form', attrs={'id': 'login'})['action']
|
||||||
output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params)
|
output = self.session.post(self.urls['base_url'] + csrfraw, data=login_params)
|
||||||
|
|
||||||
if self.loginSuccess(output):
|
if self.loginSuccess(output):
|
||||||
self.last_login_check = now
|
self.last_login_check = now
|
||||||
self.login_opener = self.session
|
self.login_opener = self.session
|
||||||
return True
|
return True
|
||||||
|
|
||||||
error = 'unknown'
|
error = 'unknown'
|
||||||
except:
|
except:
|
||||||
error = traceback.format_exc()
|
error = traceback.format_exc()
|
||||||
self.login_opener = None
|
self.login_opener = None
|
||||||
@ -204,59 +204,58 @@ class NextGenProvider(generic.TorrentProvider):
|
|||||||
if data:
|
if data:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data.decode('iso-8859-1'), features=["html5lib", "permissive"])
|
with BS4Parser(data.decode('iso-8859-1'), features=["html5lib", "permissive"]) as html:
|
||||||
resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'})
|
resultsTable = html.find('div', attrs={'id': 'torrent-table-wrapper'})
|
||||||
|
|
||||||
if not resultsTable:
|
if not resultsTable:
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Collecting entries
|
|
||||||
entries_std = html.find_all('div', attrs={'id': 'torrent-std'})
|
|
||||||
entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'})
|
|
||||||
|
|
||||||
entries = entries_std + entries_sticky
|
|
||||||
|
|
||||||
#Xirg STANDARD TORRENTS
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(entries) > 0:
|
|
||||||
|
|
||||||
for result in entries:
|
|
||||||
|
|
||||||
try:
|
|
||||||
torrentName = \
|
|
||||||
((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title']
|
|
||||||
torrentId = (
|
|
||||||
((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace(
|
|
||||||
'download.php?id=', '')
|
|
||||||
torrent_name = str(torrentName)
|
|
||||||
torrent_download_url = (self.urls['download'] % torrentId).encode('utf8')
|
|
||||||
torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8')
|
|
||||||
#torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0])
|
|
||||||
## Not used, perhaps in the future ##
|
|
||||||
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
|
|
||||||
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Filter unseeded torrent and torrents with no name/url
|
|
||||||
#if mode != 'RSS' and torrent_seeders == 0:
|
|
||||||
# continue
|
|
||||||
|
|
||||||
if not torrent_name or not torrent_download_url:
|
|
||||||
continue
|
|
||||||
|
|
||||||
item = torrent_name, torrent_download_url
|
|
||||||
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")",
|
|
||||||
logger.DEBUG)
|
logger.DEBUG)
|
||||||
items[mode].append(item)
|
continue
|
||||||
|
|
||||||
else:
|
# Collecting entries
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
entries_std = html.find_all('div', attrs={'id': 'torrent-std'})
|
||||||
logger.WARNING)
|
entries_sticky = html.find_all('div', attrs={'id': 'torrent-sticky'})
|
||||||
continue
|
|
||||||
|
|
||||||
|
entries = entries_std + entries_sticky
|
||||||
|
|
||||||
|
#Xirg STANDARD TORRENTS
|
||||||
|
#Continue only if one Release is found
|
||||||
|
if len(entries) > 0:
|
||||||
|
|
||||||
|
for result in entries:
|
||||||
|
|
||||||
|
try:
|
||||||
|
torrentName = \
|
||||||
|
((result.find('div', attrs={'id': 'torrent-udgivelse2-users'})).find('a'))['title']
|
||||||
|
torrentId = (
|
||||||
|
((result.find('div', attrs={'id': 'torrent-download'})).find('a'))['href']).replace(
|
||||||
|
'download.php?id=', '')
|
||||||
|
torrent_name = str(torrentName)
|
||||||
|
torrent_download_url = (self.urls['download'] % torrentId).encode('utf8')
|
||||||
|
torrent_details_url = (self.urls['detail'] % torrentId).encode('utf8')
|
||||||
|
#torrent_seeders = int(result.find('div', attrs = {'id' : 'torrent-seeders'}).find('a')['class'][0])
|
||||||
|
## Not used, perhaps in the future ##
|
||||||
|
#torrent_id = int(torrent['href'].replace('/details.php?id=', ''))
|
||||||
|
#torrent_leechers = int(result.find('td', attrs = {'class' : 'ac t_leechers'}).string)
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Filter unseeded torrent and torrents with no name/url
|
||||||
|
#if mode != 'RSS' and torrent_seeders == 0:
|
||||||
|
# continue
|
||||||
|
|
||||||
|
if not torrent_name or not torrent_download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = torrent_name, torrent_download_url
|
||||||
|
logger.log(u"Found result: " + torrent_name + " (" + torrent_details_url + ")",
|
||||||
|
logger.DEBUG)
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
|
else:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
|
logger.WARNING)
|
||||||
|
continue
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
||||||
|
@ -40,7 +40,7 @@ from sickbeard import clients
|
|||||||
|
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
|
|
||||||
|
|
||||||
@ -150,39 +150,36 @@ class PublicHDProvider(generic.TorrentProvider):
|
|||||||
html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)])
|
html = os.linesep.join([s for s in html.splitlines() if not optreg.search(s)])
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(html, features=["html5lib", "permissive"])
|
with BS4Parser(html, features=["html5lib", "permissive"]) as html:
|
||||||
|
torrent_table = html.find('table', attrs={'id': 'torrbg'})
|
||||||
|
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||||
|
|
||||||
torrent_table = html.find('table', attrs={'id': 'torrbg'})
|
#Continue only if one Release is found
|
||||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
if len(torrent_rows) < 2:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
html.clear(True)
|
logger.DEBUG)
|
||||||
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(torrent_rows) < 2:
|
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for tr in torrent_rows[1:]:
|
|
||||||
|
|
||||||
try:
|
|
||||||
link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
|
|
||||||
title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.')
|
|
||||||
url = tr.find(href=re.compile('magnet+'))['href']
|
|
||||||
seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
|
|
||||||
leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
for tr in torrent_rows[1:]:
|
||||||
continue
|
|
||||||
|
|
||||||
if not title or not url:
|
try:
|
||||||
continue
|
link = self.url + tr.find(href=re.compile('page=torrent-details'))['href']
|
||||||
|
title = tr.find(lambda x: x.has_attr('title')).text.replace('_', '.')
|
||||||
|
url = tr.find(href=re.compile('magnet+'))['href']
|
||||||
|
seeders = int(tr.find_all('td', {'class': 'header'})[4].text)
|
||||||
|
leechers = int(tr.find_all('td', {'class': 'header'})[5].text)
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
item = title, url, link, seeders, leechers
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
items[mode].append(item)
|
if not title or not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = title, url, link, seeders, leechers
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
logger.log(u"Failed to parsing " + self.name + " Traceback: " + traceback.format_exc(),
|
||||||
|
@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
@ -196,62 +196,58 @@ class SCCProvider(generic.TorrentProvider):
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
for dataItem in data:
|
for dataItem in data:
|
||||||
html = BeautifulSoup(dataItem, features=["html5lib", "permissive"])
|
with BS4Parser(dataItem, features=["html5lib", "permissive"]) as html:
|
||||||
|
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
|
||||||
|
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||||
|
|
||||||
torrent_table = html.find('table', attrs={'id': 'torrents-table'})
|
#Continue only if at least one Release is found
|
||||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
if len(torrent_rows) < 2:
|
||||||
|
if html.title:
|
||||||
html.clear(True)
|
source = self.name + " (" + html.title.string + ")"
|
||||||
|
|
||||||
#Continue only if at least one Release is found
|
|
||||||
if len(torrent_rows) < 2:
|
|
||||||
if html.title:
|
|
||||||
source = self.name + " (" + html.title.string + ")"
|
|
||||||
else:
|
|
||||||
source = self.name
|
|
||||||
logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for result in torrent_table.find_all('tr')[1:]:
|
|
||||||
|
|
||||||
try:
|
|
||||||
link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
|
|
||||||
all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
|
|
||||||
# Foreign section contain two links, the others one
|
|
||||||
if self._isSection('Foreign', dataItem):
|
|
||||||
url = all_urls[1]
|
|
||||||
else:
|
else:
|
||||||
url = all_urls[0]
|
source = self.name
|
||||||
|
logger.log(u"The Data returned from " + source + " does not contain any torrent", logger.DEBUG)
|
||||||
title = link.string
|
|
||||||
if re.search('\.\.\.', title):
|
|
||||||
details_html = BeautifulSoup(self.getURL(self.url + "/" + link['href']))
|
|
||||||
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
|
|
||||||
details_html.clear(True)
|
|
||||||
|
|
||||||
download_url = self.urls['download'] % url['href']
|
|
||||||
id = int(link['href'].replace('details?id=', ''))
|
|
||||||
seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
|
|
||||||
leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
for result in torrent_table.find_all('tr')[1:]:
|
||||||
continue
|
|
||||||
|
|
||||||
if not title or not download_url:
|
try:
|
||||||
continue
|
link = result.find('td', attrs={'class': 'ttr_name'}).find('a')
|
||||||
|
all_urls = result.find('td', attrs={'class': 'td_dl'}).find_all('a', limit=2)
|
||||||
|
# Foreign section contain two links, the others one
|
||||||
|
if self._isSection('Foreign', dataItem):
|
||||||
|
url = all_urls[1]
|
||||||
|
else:
|
||||||
|
url = all_urls[0]
|
||||||
|
|
||||||
item = title, download_url, id, seeders, leechers
|
title = link.string
|
||||||
|
if re.search('\.\.\.', title):
|
||||||
|
with BS4Parser(self.getURL(self.url + "/" + link['href'])) as details_html:
|
||||||
|
title = re.search('(?<=").+(?<!")', details_html.title.string).group(0)
|
||||||
|
|
||||||
if self._isSection('Non-Scene', dataItem):
|
download_url = self.urls['download'] % url['href']
|
||||||
logger.log(u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG)
|
id = int(link['href'].replace('details?id=', ''))
|
||||||
elif self._isSection('Foreign', dataItem):
|
seeders = int(result.find('td', attrs={'class': 'ttr_seeders'}).string)
|
||||||
logger.log(u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG)
|
leechers = int(result.find('td', attrs={'class': 'ttr_leechers'}).string)
|
||||||
else:
|
except (AttributeError, TypeError):
|
||||||
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
continue
|
||||||
|
|
||||||
items[mode].append(item)
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not title or not download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = title, download_url, id, seeders, leechers
|
||||||
|
|
||||||
|
if self._isSection('Non-Scene', dataItem):
|
||||||
|
logger.log(u"Found result: " + title + "(" + nonsceneSearchURL + ")", logger.DEBUG)
|
||||||
|
elif self._isSection('Foreign', dataItem):
|
||||||
|
logger.log(u"Found result: " + title + "(" + foreignSearchURL + ")", logger.DEBUG)
|
||||||
|
else:
|
||||||
|
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
@ -33,7 +33,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
@ -168,51 +168,47 @@ class TorrentBytesProvider(generic.TorrentProvider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data)
|
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||||
|
torrent_table = html.find('table', attrs={'border': '1'})
|
||||||
|
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||||
|
|
||||||
torrent_table = html.find('table', attrs={'border': '1'})
|
#Continue only if one Release is found
|
||||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
if len(torrent_rows) < 2:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
# cleanup memory
|
logger.DEBUG)
|
||||||
html.clear(True)
|
|
||||||
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(torrent_rows) < 2:
|
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for result in torrent_rows[1:]:
|
|
||||||
cells = result.find_all('td')
|
|
||||||
|
|
||||||
link = cells[1].find('a', attrs={'class': 'index'})
|
|
||||||
|
|
||||||
full_id = link['href'].replace('details.php?id=', '')
|
|
||||||
torrent_id = full_id[:6]
|
|
||||||
|
|
||||||
try:
|
|
||||||
if link.has_key('title'):
|
|
||||||
title = cells[1].find('a', {'class': 'index'})['title']
|
|
||||||
else:
|
|
||||||
title = link.contents[0]
|
|
||||||
download_url = self.urls['download'] % (torrent_id, link.contents[0])
|
|
||||||
id = int(torrent_id)
|
|
||||||
seeders = int(cells[8].find('span').contents[0])
|
|
||||||
leechers = int(cells[9].find('span').contents[0])
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
#Filter unseeded torrent
|
for result in torrent_rows[1:]:
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
cells = result.find_all('td')
|
||||||
continue
|
|
||||||
|
|
||||||
if not title or not download_url:
|
link = cells[1].find('a', attrs={'class': 'index'})
|
||||||
continue
|
|
||||||
|
|
||||||
item = title, download_url, id, seeders, leechers
|
full_id = link['href'].replace('details.php?id=', '')
|
||||||
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
torrent_id = full_id[:6]
|
||||||
|
|
||||||
items[mode].append(item)
|
try:
|
||||||
|
if link.has_key('title'):
|
||||||
|
title = cells[1].find('a', {'class': 'index'})['title']
|
||||||
|
else:
|
||||||
|
title = link.contents[0]
|
||||||
|
download_url = self.urls['download'] % (torrent_id, link.contents[0])
|
||||||
|
id = int(torrent_id)
|
||||||
|
seeders = int(cells[8].find('span').contents[0])
|
||||||
|
leechers = int(cells[9].find('span').contents[0])
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
|
#Filter unseeded torrent
|
||||||
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not title or not download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = title, download_url, id, seeders, leechers
|
||||||
|
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
@ -34,7 +34,7 @@ from sickbeard.exceptions import ex
|
|||||||
from sickbeard import clients
|
from sickbeard import clients
|
||||||
from lib import requests
|
from lib import requests
|
||||||
from lib.requests import exceptions
|
from lib.requests import exceptions
|
||||||
from bs4 import BeautifulSoup
|
from sickbeard.bs4_parser import BS4Parser
|
||||||
from lib.unidecode import unidecode
|
from lib.unidecode import unidecode
|
||||||
from sickbeard.helpers import sanitizeSceneName
|
from sickbeard.helpers import sanitizeSceneName
|
||||||
|
|
||||||
@ -172,44 +172,40 @@ class TorrentLeechProvider(generic.TorrentProvider):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
try:
|
try:
|
||||||
html = BeautifulSoup(data, features=["html5lib", "permissive"])
|
with BS4Parser(data, features=["html5lib", "permissive"]) as html:
|
||||||
|
torrent_table = html.find('table', attrs={'id': 'torrenttable'})
|
||||||
|
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
||||||
|
|
||||||
torrent_table = html.find('table', attrs={'id': 'torrenttable'})
|
#Continue only if one Release is found
|
||||||
torrent_rows = torrent_table.find_all('tr') if torrent_table else []
|
if len(torrent_rows) < 2:
|
||||||
|
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
||||||
# cleanup memory
|
logger.DEBUG)
|
||||||
html.clear(True)
|
|
||||||
|
|
||||||
#Continue only if one Release is found
|
|
||||||
if len(torrent_rows) < 2:
|
|
||||||
logger.log(u"The Data returned from " + self.name + " do not contains any torrent",
|
|
||||||
logger.DEBUG)
|
|
||||||
continue
|
|
||||||
|
|
||||||
for result in torrent_table.find_all('tr')[1:]:
|
|
||||||
|
|
||||||
try:
|
|
||||||
link = result.find('td', attrs={'class': 'name'}).find('a')
|
|
||||||
url = result.find('td', attrs={'class': 'quickdownload'}).find('a')
|
|
||||||
title = link.string
|
|
||||||
download_url = self.urls['download'] % url['href']
|
|
||||||
id = int(link['href'].replace('/torrent/', ''))
|
|
||||||
seeders = int(result.find('td', attrs={'class': 'seeders'}).string)
|
|
||||||
leechers = int(result.find('td', attrs={'class': 'leechers'}).string)
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
continue
|
continue
|
||||||
|
|
||||||
#Filter unseeded torrent
|
for result in torrent_table.find_all('tr')[1:]:
|
||||||
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not title or not download_url:
|
try:
|
||||||
continue
|
link = result.find('td', attrs={'class': 'name'}).find('a')
|
||||||
|
url = result.find('td', attrs={'class': 'quickdownload'}).find('a')
|
||||||
|
title = link.string
|
||||||
|
download_url = self.urls['download'] % url['href']
|
||||||
|
id = int(link['href'].replace('/torrent/', ''))
|
||||||
|
seeders = int(result.find('td', attrs={'class': 'seeders'}).string)
|
||||||
|
leechers = int(result.find('td', attrs={'class': 'leechers'}).string)
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
continue
|
||||||
|
|
||||||
item = title, download_url, id, seeders, leechers
|
#Filter unseeded torrent
|
||||||
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
if mode != 'RSS' and (seeders < self.minseed or leechers < self.minleech):
|
||||||
|
continue
|
||||||
|
|
||||||
items[mode].append(item)
|
if not title or not download_url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
item = title, download_url, id, seeders, leechers
|
||||||
|
logger.log(u"Found result: " + title + "(" + searchURL + ")", logger.DEBUG)
|
||||||
|
|
||||||
|
items[mode].append(item)
|
||||||
|
|
||||||
except Exception, e:
|
except Exception, e:
|
||||||
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
logger.log(u"Failed parsing " + self.name + " Traceback: " + traceback.format_exc(), logger.ERROR)
|
||||||
|
Loading…
Reference in New Issue
Block a user