mirror of
https://github.com/moparisthebest/SickRage
synced 2024-11-16 06:15:09 -05:00
Merge branch 'release/v3.2.0'
This commit is contained in:
commit
c86aadca1e
16
SickBeard.py
16
SickBeard.py
@ -25,6 +25,7 @@ import signal
|
||||
import sys
|
||||
import shutil
|
||||
import subprocess
|
||||
import traceback
|
||||
|
||||
if sys.version_info < (2, 6):
|
||||
print "Sorry, requires Python 2.6 or 2.7."
|
||||
@ -68,6 +69,7 @@ throwaway = datetime.datetime.strptime('20110101', '%Y%m%d')
|
||||
signal.signal(signal.SIGINT, sickbeard.sig_handler)
|
||||
signal.signal(signal.SIGTERM, sickbeard.sig_handler)
|
||||
|
||||
|
||||
class SickRage(object):
|
||||
def __init__(self):
|
||||
# system event callback for shutdown/restart
|
||||
@ -127,9 +129,6 @@ class SickRage(object):
|
||||
|
||||
try:
|
||||
locale.setlocale(locale.LC_ALL, "")
|
||||
except (locale.Error, IOError):
|
||||
pass
|
||||
try:
|
||||
sickbeard.SYS_ENCODING = locale.getpreferredencoding()
|
||||
except (locale.Error, IOError):
|
||||
pass
|
||||
@ -146,9 +145,8 @@ class SickRage(object):
|
||||
# On non-unicode builds this will raise an AttributeError, if encoding type is not valid it throws a LookupError
|
||||
sys.setdefaultencoding(sickbeard.SYS_ENCODING)
|
||||
except:
|
||||
print 'Sorry, you MUST add the SickRage folder to the PYTHONPATH environment variable'
|
||||
print 'or find another way to force Python to use ' + sickbeard.SYS_ENCODING + ' for string encoding.'
|
||||
sys.exit(1)
|
||||
sys.exit("Sorry, you MUST add the SickRage folder to the PYTHONPATH environment variable\n" +
|
||||
"or find another way to force Python to use " + sickbeard.SYS_ENCODING + " for string encoding.")
|
||||
|
||||
# Need console logging for SickBeard.py and SickBeard-console.exe
|
||||
self.consoleLogging = (not hasattr(sys, "frozen")) or (sickbeard.MY_NAME.lower().find('-console') > 0)
|
||||
@ -456,9 +454,9 @@ class SickRage(object):
|
||||
sickbeard.showList.append(curShow)
|
||||
except Exception, e:
|
||||
logger.log(
|
||||
u"There was an error creating the show in " + sqlShow["location"] + ": " + str(e).decode('utf-8',
|
||||
'replace'),
|
||||
u"There was an error creating the show in " + sqlShow["location"] + ": " + str(e).decode('utf-8'),
|
||||
logger.ERROR)
|
||||
logger.log(traceback.format_exc(), logger.DEBUG)
|
||||
|
||||
def restore(self, srcDir, dstDir):
|
||||
try:
|
||||
@ -508,7 +506,7 @@ class SickRage(object):
|
||||
popen_list = [os.path.join(sickbeard.PROG_DIR, 'updater.exe'), str(sickbeard.PID),
|
||||
sys.executable]
|
||||
else:
|
||||
logger.log(u"Unknown SB launch method, please file a bug report about this", logger.ERROR)
|
||||
logger.log(u"Unknown SR launch method, please file a bug report about this", logger.ERROR)
|
||||
popen_list = [sys.executable, os.path.join(sickbeard.PROG_DIR, 'updater.py'),
|
||||
str(sickbeard.PID),
|
||||
sys.executable,
|
||||
|
@ -38,7 +38,9 @@ addOption("Command", "SickBeard.GetRootDirs", "?cmd=sb.getrootdirs", "", "", "ac
|
||||
addList("Command", "SickBeard.PauseBacklog", "?cmd=sb.pausebacklog", "sb.pausebacklog", "", "", "action");
|
||||
addOption("Command", "SickBeard.Ping", "?cmd=sb.ping", "", "", "action");
|
||||
addOption("Command", "SickBeard.Restart", "?cmd=sb.restart", "", "", "action");
|
||||
addList("Command", "SickBeard.SearchTVDB", "?cmd=sb.searchtvdb", "sb.searchtvdb", "", "", "action");
|
||||
addList("Command", "SickBeard.SearchAllIndexers", "?cmd=sb.searchindexers", "sb.searchindexers", "", "", "action");
|
||||
addList("Command", "SickBeard.SearchTVDB", "?cmd=sb.searchtvdb&indexer=1", "sb.searchindexers", "", "", "action");
|
||||
addList("Command", "SickBeard.SearchTVRage", "?cmd=sb.searchtvrage&indexer=2", "sb.searchindexers", "", "", "action");
|
||||
addList("Command", "SickBeard.SetDefaults", "?cmd=sb.setdefaults", "sb.setdefaults", "", "", "action");
|
||||
addOption("Command", "SickBeard.Shutdown", "?cmd=sb.shutdown", "", "", "action");
|
||||
addList("Command", "Coming Episodes", "?cmd=future", "future");
|
||||
@ -140,44 +142,44 @@ addOption("show.addnew-opt", "Optional Param", "", 1);
|
||||
addList("show.addnew-opt", "No Season Folder", "&season_folder=0", "quality");
|
||||
addList("show.addnew-opt", "Use Season Folder", "&season_folder=1", "quality");
|
||||
|
||||
addOptGroup("sb.searchtvdb", "Search by Name");
|
||||
addList("sb.searchtvdb", "Lost", "&name=Lost", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "office", "&name=office", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "OffiCE", "&name=OffiCE", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "Leno", "&name=leno", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "Top Gear", "&name=Top Gear", "sb.searchtvdb-lang");
|
||||
endOptGroup("sb.searchtvdb");
|
||||
addOptGroup("sb.searchtvdb", "Search by indexerid");
|
||||
addList("sb.searchtvdb", "73739", "&indexerid=73739", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "74608", "&indexerid=74608", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "199051", "&indexerid=199051", "sb.searchtvdb-lang");
|
||||
addList("sb.searchtvdb", "123456 (invalid show)", "&indexerid=123456", "sb.searchtvdb-lang");
|
||||
endOptGroup("sb.searchtvdb");
|
||||
addOptGroup("sb.searchindexers", "Search by Name");
|
||||
addList("sb.searchindexers", "Lost", "&name=Lost", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "office", "&name=office", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "OffiCE", "&name=OffiCE", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "Leno", "&name=leno", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "Top Gear", "&name=Top Gear", "sb.searchindexers-lang");
|
||||
endOptGroup("sb.searchindexers");
|
||||
addOptGroup("sb.searchindexers", "Search by indexerid");
|
||||
addList("sb.searchindexers", "73739", "&indexerid=73739", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "74608", "&indexerid=74608", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "199051", "&indexerid=199051", "sb.searchindexers-lang");
|
||||
addList("sb.searchindexers", "123456 (invalid show)", "&indexerid=123456", "sb.searchindexers-lang");
|
||||
endOptGroup("sb.searchindexers");
|
||||
|
||||
addOption("sb.searchtvdb-lang", "Optional Param", "", 1);
|
||||
addOption("sb.searchtvdb-lang", "Chinese", "&lang=zh"); // 27
|
||||
addOption("sb.searchtvdb-lang", "Croatian", "&lang=hr"); // 31
|
||||
addOption("sb.searchtvdb-lang", "Czech", "&lang=cs"); // 28
|
||||
addOption("sb.searchtvdb-lang", "Danish", "&lang=da"); // 10
|
||||
addOption("sb.searchtvdb-lang", "Dutch", "&lang=nl"); // 13
|
||||
addOption("sb.searchtvdb-lang", "English", "&lang=en"); // 7
|
||||
addOption("sb.searchtvdb-lang", "Finnish", "&lang=fi"); // 11 -- Suomeksi
|
||||
addOption("sb.searchtvdb-lang", "French", "&lang=fr"); // 17
|
||||
addOption("sb.searchtvdb-lang", "German", "&lang=de"); // 14
|
||||
addOption("sb.searchtvdb-lang", "Greek", "&lang=el"); // 20
|
||||
addOption("sb.searchtvdb-lang", "Hebrew", "&lang=he"); // 24
|
||||
addOption("sb.searchtvdb-lang", "Hungarian", "&lang=hu"); // 19 -- Magyar
|
||||
addOption("sb.searchtvdb-lang", "Italian", "&lang=it"); // 15
|
||||
addOption("sb.searchtvdb-lang", "Japanese", "&lang=ja"); // 25
|
||||
addOption("sb.searchtvdb-lang", "Korean", "&lang=ko"); // 32
|
||||
addOption("sb.searchtvdb-lang", "Norwegian", "&lang=no"); // 9
|
||||
addOption("sb.searchtvdb-lang", "Polish", "&lang=pl"); // 18
|
||||
addOption("sb.searchtvdb-lang", "Portuguese", "&lang=pt");// 26
|
||||
addOption("sb.searchtvdb-lang", "Russian", "&lang=ru"); // 22
|
||||
addOption("sb.searchtvdb-lang", "Slovenian", "&lang=sl"); // 30
|
||||
addOption("sb.searchtvdb-lang", "Spanish", "&lang=es"); // 16
|
||||
addOption("sb.searchtvdb-lang", "Swedish", "&lang=sv"); // 8
|
||||
addOption("sb.searchtvdb-lang", "Turkish", "&lang=tr"); // 21
|
||||
addOption("sb.searchindexers-lang", "Optional Param", "", 1);
|
||||
addOption("sb.searchindexers-lang", "Chinese", "&lang=zh"); // 27
|
||||
addOption("sb.searchindexers-lang", "Croatian", "&lang=hr"); // 31
|
||||
addOption("sb.searchindexers-lang", "Czech", "&lang=cs"); // 28
|
||||
addOption("sb.searchindexers-lang", "Danish", "&lang=da"); // 10
|
||||
addOption("sb.searchindexers-lang", "Dutch", "&lang=nl"); // 13
|
||||
addOption("sb.searchindexers-lang", "English", "&lang=en"); // 7
|
||||
addOption("sb.searchindexers-lang", "Finnish", "&lang=fi"); // 11 -- Suomeksi
|
||||
addOption("sb.searchindexers-lang", "French", "&lang=fr"); // 17
|
||||
addOption("sb.searchindexers-lang", "German", "&lang=de"); // 14
|
||||
addOption("sb.searchindexers-lang", "Greek", "&lang=el"); // 20
|
||||
addOption("sb.searchindexers-lang", "Hebrew", "&lang=he"); // 24
|
||||
addOption("sb.searchindexers-lang", "Hungarian", "&lang=hu"); // 19 -- Magyar
|
||||
addOption("sb.searchindexers-lang", "Italian", "&lang=it"); // 15
|
||||
addOption("sb.searchindexers-lang", "Japanese", "&lang=ja"); // 25
|
||||
addOption("sb.searchindexers-lang", "Korean", "&lang=ko"); // 32
|
||||
addOption("sb.searchindexers-lang", "Norwegian", "&lang=no"); // 9
|
||||
addOption("sb.searchindexers-lang", "Polish", "&lang=pl"); // 18
|
||||
addOption("sb.searchindexers-lang", "Portuguese", "&lang=pt");// 26
|
||||
addOption("sb.searchindexers-lang", "Russian", "&lang=ru"); // 22
|
||||
addOption("sb.searchindexers-lang", "Slovenian", "&lang=sl"); // 30
|
||||
addOption("sb.searchindexers-lang", "Spanish", "&lang=es"); // 16
|
||||
addOption("sb.searchindexers-lang", "Swedish", "&lang=sv"); // 8
|
||||
addOption("sb.searchindexers-lang", "Turkish", "&lang=tr"); // 21
|
||||
|
||||
#for $curShow in $sortedShowList:
|
||||
addList("seasons", "$curShow.name", "&indexerid=$curShow.indexerid", "seasons-$curShow.indexerid");
|
||||
|
@ -198,19 +198,6 @@
|
||||
|
||||
<div id="summary">
|
||||
<table class="summaryTable pull-left">
|
||||
#if $show.network and $show.airs:
|
||||
<tr><td class="showLegend">Originally Airs: </td><td>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""# on $show.network</td></tr>
|
||||
#else if $show.network:
|
||||
<tr><td class="showLegend">Originally Airs: </td><td>$show.network</td></tr>
|
||||
#else if $show.airs:
|
||||
<tr><td class="showLegend">Originally Airs: </td><td>>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""#</td></tr>
|
||||
#end if
|
||||
<tr><td class="showLegend">Status: </td><td>$show.status</td></tr>
|
||||
#if $showLoc[1]:
|
||||
<tr><td class="showLegend">Location: </td><td>$showLoc[0]</td></tr>
|
||||
#else:
|
||||
<tr><td class="showLegend"><span style="color: red;">Location: </span></td><td><span style="color: red;">$showLoc[0]</span> (dir is missing)</td></tr>
|
||||
#end if
|
||||
#set $anyQualities, $bestQualities = $Quality.splitQuality(int($show.quality))
|
||||
<tr><td class="showLegend">Quality: </td><td>
|
||||
#if $show.quality in $qualityPresets:
|
||||
@ -222,6 +209,21 @@
|
||||
#if $bestQualities:
|
||||
<i>Replace with:</i> <%=", ".join([Quality.qualityStrings[x] for x in sorted(bestQualities)])%>
|
||||
#end if
|
||||
#end if
|
||||
|
||||
#if $show.network and $show.airs:
|
||||
<tr><td class="showLegend">Originally Airs: </td><td>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""# on $show.network</td></tr>
|
||||
#else if $show.network:
|
||||
<tr><td class="showLegend">Originally Airs: </td><td>$show.network</td></tr>
|
||||
#else if $show.airs:
|
||||
<tr><td class="showLegend">Originally Airs: </td><td>>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""#</td></tr>
|
||||
#end if
|
||||
<tr><td class="showLegend">Show Status: </td><td>$show.status</td></tr>
|
||||
<tr><td class="showLegend">Default EP Status: </td><td>$statusStrings[$show.default_ep_status]</td></tr>
|
||||
#if $showLoc[1]:
|
||||
<tr><td class="showLegend">Location: </td><td>$showLoc[0]</td></tr>
|
||||
#else:
|
||||
<tr><td class="showLegend"><span style="color: red;">Location: </span></td><td><span style="color: red;">$showLoc[0]</span> (dir is missing)</td></tr>
|
||||
#end if
|
||||
<tr><td class="showLegend">Scene Name:</td><td>#if $show.exceptions then $exceptions_string else $show.name#</td></tr>
|
||||
|
||||
@ -263,14 +265,13 @@
|
||||
#end if
|
||||
</table>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="clearfix"></div>
|
||||
|
||||
<div class="pull-left" style="padding-bottom: 10px;">
|
||||
Change selected episodes to
|
||||
<div class="pull-left" >
|
||||
Change selected episodes to:</br>
|
||||
<select id="statusSelect" class="form-control form-control-inline input-sm">
|
||||
#for $curStatus in [$WANTED, $SKIPPED, $ARCHIVED, $IGNORED, $FAILED] + sorted($Quality.DOWNLOADED):
|
||||
#if $curStatus == $DOWNLOADED:
|
||||
@ -284,6 +285,8 @@
|
||||
<input class="btn btn-inline" type="button" id="changeStatus" value="Go" />
|
||||
</div>
|
||||
|
||||
</br>
|
||||
|
||||
<div class="pull-right clearfix" id="checkboxControls">
|
||||
<div style="padding-bottom: 5px;">
|
||||
<label for="wanted"><span class="wanted"><input type="checkbox" id="wanted" checked="checked" /> Wanted: <b>$epCounts[$Overview.WANTED]</b></span></label>
|
||||
@ -298,7 +301,6 @@
|
||||
<button class="btn btn-xs clearAll">Clear All</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<br />
|
||||
|
||||
<table class="sickbeardTable display_show" cellspacing="0" border="0" cellpadding="0">
|
||||
|
@ -1,6 +1,7 @@
|
||||
#import sickbeard
|
||||
#import lib.adba as adba
|
||||
#from sickbeard import common
|
||||
#from sickbeard.common import *
|
||||
#from sickbeard import exceptions
|
||||
#from sickbeard import scene_exceptions
|
||||
#from sickbeard.blackandwhitelist import *
|
||||
@ -63,29 +64,15 @@
|
||||
|
||||
<form action="editShow" method="post">
|
||||
<input type="hidden" name="show" value="$show.indexerid" />
|
||||
<b>Location:</b> <input type="text" name="location" id="location" value="$show._location" class="form-control form-control-inline input-sm input350" /><br />
|
||||
<br />
|
||||
<b>Quality:</b>
|
||||
#set $qualities = $common.Quality.splitQuality(int($show.quality))
|
||||
#set global $anyQualities = $qualities[0]
|
||||
#set global $bestQualities = $qualities[1]
|
||||
#include $os.path.join($sickbeard.PROG_DIR, "gui/slick/interfaces/default/inc_qualityChooser.tmpl")
|
||||
<b>Location:</b></br>
|
||||
<input type="text" name="location" id="location" value="$show._location" class="form-control form-control-inline input-sm input350" /><br />
|
||||
<br />
|
||||
|
||||
#if $anyQualities + $bestQualities
|
||||
<b>Archive on first match: </b>
|
||||
<input type="checkbox" name="archive_firstmatch" #if $show.archive_firstmatch == 1 then "checked=\"checked\"" else ""# /><br>
|
||||
(check this to have the episode archived after the first best match is found from your archive quality list)
|
||||
<br />
|
||||
<br />
|
||||
#end if
|
||||
|
||||
<b>Scene Exception:</b>
|
||||
<b>Scene Exception:</b><br />
|
||||
<input type="text" id="SceneName" class="form-control form-control-inline input-sm input200">
|
||||
<input class="btn btn-inline" type="button" value="Add" id="addSceneName"><br />
|
||||
|
||||
<div id="SceneException" >
|
||||
|
||||
<div>
|
||||
<p>This will <b>affect the episode show search</b> on nzb and torrent provider.<br />
|
||||
This list overrides the original name, it doesn't append to it.<br />
|
||||
@ -108,45 +95,75 @@
|
||||
<div class="clearfix"></div>
|
||||
<br />
|
||||
|
||||
<b>Info Language:</b> <select name="indexerLang" id="indexerLangSelect" class="form-control form-control-inline input-sm"></select><br />
|
||||
Note: This will only affect the language of the retrieved metadata file contents and episode filenames.<br />
|
||||
This <b>DOES NOT</b> allow SickRage to download non-english TV episodes!<br />
|
||||
<b>Quality:</b><br />
|
||||
#set $qualities = $common.Quality.splitQuality(int($show.quality))
|
||||
#set global $anyQualities = $qualities[0]
|
||||
#set global $bestQualities = $qualities[1]
|
||||
#include $os.path.join($sickbeard.PROG_DIR, "gui/slick/interfaces/default/inc_qualityChooser.tmpl")
|
||||
<br />
|
||||
|
||||
<b>Flatten files (no folders):</b> <input type="checkbox" name="flatten_folders" #if $show.flatten_folders == 1 and not $sickbeard.NAMING_FORCE_FOLDERS then "checked=\"checked\"" else ""# #if $sickbeard.NAMING_FORCE_FOLDERS then "disabled=\"disabled\"" else ""#/><br /><br />
|
||||
<b>Paused:</b> <input type="checkbox" name="paused" #if $show.paused == 1 then "checked=\"checked\"" else ""# /><br /><br />
|
||||
<b>Subtitles:</b> <input type="checkbox" name="subtitles"#if $show.subtitles == 1 and $sickbeard.USE_SUBTITLES then " checked=\"checked\"" else ""##if not $sickbeard.USE_SUBTITLES then " disabled=\"disabled\"" else ""#/><br /><br />
|
||||
<b>Default Episode Status:</b><br />
|
||||
(this will set a default status to be applied to any newly added episodes)<br />
|
||||
<select name="defaultEpStatus" id="defaultEpStatusSelect" class="form-control form-control-inline input-sm">
|
||||
#for $curStatus in [$WANTED, $SKIPPED, $ARCHIVED, $IGNORED]:
|
||||
<option value="$curStatus">$statusStrings[$curStatus]</option>
|
||||
#end for
|
||||
</select><br />
|
||||
<br />
|
||||
|
||||
<b>Info Language:</b><br />
|
||||
(this will only affect the language of the retrieved metadata file contents and episode filenames)<br />
|
||||
<select name="indexerLang" id="indexerLangSelect" class="form-control form-control-inline input-sm"></select><br />
|
||||
<br />
|
||||
|
||||
<b>Flatten files (no folders):</b> <input type="checkbox" name="flatten_folders" #if $show.flatten_folders == 1 and not $sickbeard.NAMING_FORCE_FOLDERS then "checked=\"checked\"" else ""# #if $sickbeard.NAMING_FORCE_FOLDERS then "disabled=\"disabled\"" else ""#/><br />
|
||||
<b>Paused:</b> <input type="checkbox" name="paused" #if $show.paused == 1 then "checked=\"checked\"" else ""# /><br />
|
||||
<b>Subtitles:</b> <input type="checkbox" name="subtitles"#if $show.subtitles == 1 and $sickbeard.USE_SUBTITLES then " checked=\"checked\"" else ""##if not $sickbeard.USE_SUBTITLES then " disabled=\"disabled\"" else ""#/><br />
|
||||
<br/>
|
||||
|
||||
<b>Scene Numbering: </b>
|
||||
<input type="checkbox" name="scene" #if $show.scene == 1 then "checked=\"checked\"" else ""# /><br/>
|
||||
(check this if you wish to search by scene numbering, uncheck to search by indexer numbering)
|
||||
<br/><br/>
|
||||
(check this if you wish to search by scene numbering, uncheck to search by indexer numbering)<br/>
|
||||
<br/>
|
||||
|
||||
<b>Air by date: </b>
|
||||
<input type="checkbox" name="air_by_date" #if $show.air_by_date == 1 then "checked=\"checked\"" else ""# /><br />
|
||||
(check this if the show is released as Show.03.02.2010 rather than Show.S02E03)
|
||||
<br /><br />
|
||||
(check this if the show is released as Show.03.02.2010 rather than Show.S02E03)<br />
|
||||
<br />
|
||||
|
||||
<b>Sports: </b>
|
||||
<input type="checkbox" name="sports" #if $show.sports == 1 then "checked=\"checked\"" else ""# /><br />
|
||||
(check this if the show is a sporting or MMA event)
|
||||
<br /><br />
|
||||
(check this if the show is a sporting or MMA event)<br />
|
||||
<br />
|
||||
|
||||
<b>Anime: </b>
|
||||
<input type="checkbox" name="anime" #if $show.is_anime then "CHECKED" else ""#><br />
|
||||
(check this if the show is released as Show.265 rather than Show.S02E03, this show is an anime)
|
||||
<br /><br />
|
||||
(check this if the show is released as Show.265 rather than Show.S02E03, this show is an anime)<br />
|
||||
<br />
|
||||
|
||||
<b>DVD Order: </b>
|
||||
<input type="checkbox" name="dvdorder" #if $show.dvdorder == 1 then "checked=\"checked\"" else ""# /><br/>
|
||||
(check this if you wish to use the DVD order instead of the Airing order)
|
||||
<br/><br/>
|
||||
|
||||
<b>Ignored Words:</b> <input type="text" name="rls_ignore_words" id="rls_ignore_words" value="$show.rls_ignore_words" class="form-control form-control-inline input-sm input350" /><br />
|
||||
Results with any of these words in the title will be filtered out <br />
|
||||
Separate words with a comma, e.g. "word1,word2,word3"
|
||||
<br /><br />
|
||||
#if $anyQualities + $bestQualities
|
||||
<b>Archive on first match:</b>
|
||||
<input type="checkbox" name="archive_firstmatch" #if $show.archive_firstmatch == 1 then "checked=\"checked\"" else ""# /><br>
|
||||
(check this to have the episode archived after the first best match is found from your archive quality list)</br>
|
||||
<br />
|
||||
#end if
|
||||
|
||||
<b>Required Words:</b> <input type="text" name="rls_require_words" id="rls_require_words" value="$show.rls_require_words" class="form-control form-control-inline input-sm input350" /><br />
|
||||
<b>Ignored Words:</b></br>
|
||||
<input type="text" name="rls_ignore_words" id="rls_ignore_words" value="$show.rls_ignore_words" class="form-control form-control-inline input-sm input350" /><br />
|
||||
Results with any of these words in the title will be filtered out<br />
|
||||
Separate words with a comma, e.g. "word1,word2,word3"<br />
|
||||
<br />
|
||||
|
||||
<b>Required Words:</b></br>
|
||||
<input type="text" name="rls_require_words" id="rls_require_words" value="$show.rls_require_words" class="form-control form-control-inline input-sm input350" /><br />
|
||||
Results without one of these words in the title will be filtered out <br />
|
||||
Separate words with a comma, e.g. "word1,word2,word3"
|
||||
<br /><br />
|
||||
Separate words with a comma, e.g. "word1,word2,word3"<br />
|
||||
<br />
|
||||
|
||||
#if $show.is_anime:
|
||||
#from sickbeard.blackandwhitelist import *
|
||||
|
@ -280,10 +280,13 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name))
|
||||
#set $cur_downloaded = 0
|
||||
#set $cur_total = 0
|
||||
#set $download_stat_tip = ''
|
||||
#if None is not $curShow.status and re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
|
||||
#set $display_status = 'Continuing'
|
||||
#else
|
||||
#set $display_status = $curShow.status
|
||||
#set $display_status = $curShow.status
|
||||
#if None is not $display_status
|
||||
#if re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
|
||||
#set $display_status = 'Continuing'
|
||||
#else if re.search(r'(?i)(?:nded)', $curShow.status)
|
||||
#set $display_status = 'Ended'
|
||||
#end if
|
||||
#end if
|
||||
|
||||
#if $curShow.indexerid in $show_stat:
|
||||
@ -604,11 +607,17 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name))
|
||||
</td>
|
||||
|
||||
<td align="center">
|
||||
#if None is not $curShow.status and re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
|
||||
Continuing
|
||||
#else:
|
||||
$curShow.status
|
||||
#set $display_status = $curShow.status
|
||||
#if None is not $display_status
|
||||
#if re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
|
||||
#set $display_status = 'Continuing'
|
||||
#else if re.search(r'(?i)(?:nded)', $curShow.status)
|
||||
#set $display_status = 'Ended'
|
||||
#end if
|
||||
#end if
|
||||
|
||||
$display_status
|
||||
|
||||
</td>
|
||||
|
||||
</tr>
|
||||
|
@ -43,16 +43,6 @@
|
||||
<link rel="stylesheet" type="text/css" href="$sbRoot/css/${sickbeard.THEME_NAME}.css?$sbPID" />
|
||||
|
||||
|
||||
<style type="text/css">
|
||||
<!--
|
||||
|
||||
#if $sickbeard.NEWEST_VERSION_STRING:
|
||||
.ui-pnotify { top: 30px !important; }
|
||||
#end if
|
||||
|
||||
//-->
|
||||
</style>
|
||||
|
||||
<script type="text/javascript" src="$sbRoot/js/lib/jquery-1.8.3.min.js?$sbPID"></script>
|
||||
<script type="text/javascript" src="$sbRoot/js/lib/bootstrap.min.js?$sbPID"></script>
|
||||
<script type="text/javascript" src="$sbRoot/js/lib/bootstrap-hover-dropdown.min.js?$sbPID"></script>
|
||||
|
351
lib/ftfy/__init__.py
Normal file
351
lib/ftfy/__init__.py
Normal file
@ -0,0 +1,351 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
ftfy: fixes text for you
|
||||
|
||||
This is a module for making text less broken. See the `fix_text` function
|
||||
for more information.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
|
||||
# See the docstring for ftfy.bad_codecs to see what we're doing here.
|
||||
import ftfy.bad_codecs
|
||||
ftfy.bad_codecs.ok()
|
||||
|
||||
from ftfy import fixes
|
||||
from ftfy.fixes import fix_text_encoding
|
||||
from ftfy.compatibility import PYTHON34_OR_LATER, is_printable
|
||||
import unicodedata
|
||||
import warnings
|
||||
|
||||
|
||||
def fix_text(text,
|
||||
remove_unsafe_private_use=(not PYTHON34_OR_LATER),
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
normalization='NFKC',
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True,
|
||||
max_decode_length=2**16):
|
||||
r"""
|
||||
Given Unicode text as input, make its representation consistent and
|
||||
possibly less broken.
|
||||
|
||||
Let's start with some examples:
|
||||
|
||||
>>> print(fix_text('ünicode'))
|
||||
ünicode
|
||||
|
||||
>>> print(fix_text('Broken text… it’s flubberific!'))
|
||||
Broken text... it's flubberific!
|
||||
|
||||
>>> print(fix_text('HTML entities <3'))
|
||||
HTML entities <3
|
||||
|
||||
>>> print(fix_text('<em>HTML entities <3</em>'))
|
||||
<em>HTML entities <3</em>
|
||||
|
||||
>>> print(fix_text('\001\033[36;44mI’m blue, da ba dee da ba '
|
||||
... 'doo…\033[0m'))
|
||||
I'm blue, da ba dee da ba doo...
|
||||
|
||||
>>> # This example string starts with a byte-order mark, even if
|
||||
>>> # you can't see it on the Web.
|
||||
>>> print(fix_text('\ufeffParty like\nit’s 1999!'))
|
||||
Party like
|
||||
it's 1999!
|
||||
|
||||
>>> len(fix_text('fi' * 100000))
|
||||
200000
|
||||
|
||||
>>> len(fix_text(''))
|
||||
0
|
||||
|
||||
Based on the options you provide, ftfy applies these steps in order:
|
||||
|
||||
- If `remove_unsafe_private_use` is True, it removes a range of private-use
|
||||
characters that could trigger a Python bug. The bug is fixed in
|
||||
the most recent versions of Python, so this will default to False
|
||||
starting on Python 3.4.
|
||||
- If `fix_entities` is True, replace HTML entities with their equivalent
|
||||
characters. If it's "auto" (the default), then consider replacing HTML
|
||||
entities, but don't do so in text where you have seen a pair of actual
|
||||
angle brackets (that's probably actually HTML and you shouldn't mess
|
||||
with the entities).
|
||||
- If `remove_terminal_escapes` is True, remove sequences of bytes that are
|
||||
instructions for Unix terminals, such as the codes that make text appear
|
||||
in different colors.
|
||||
- If `fix_encoding` is True, look for common mistakes that come from
|
||||
encoding or decoding Unicode text incorrectly, and fix them if they are
|
||||
reasonably fixable. See `fix_text_encoding` for details.
|
||||
- If `normalization` is not None, apply the specified form of Unicode
|
||||
normalization, which can be one of 'NFC', 'NFKC', 'NFD', and 'NFKD'.
|
||||
The default, 'NFKC', applies the following relevant transformations:
|
||||
|
||||
- C: Combine characters and diacritics that are written using separate
|
||||
code points, such as converting "e" plus an acute accent modifier
|
||||
into "é", or converting "ka" (か) plus a dakuten into the
|
||||
single character "ga" (が).
|
||||
- K: Replace characters that are functionally equivalent with the most
|
||||
common form. For example, half-width katakana will be replaced with
|
||||
full-width versions, full-width Roman characters will be replaced with
|
||||
ASCII characters, ellipsis characters will be replaced with three
|
||||
periods, and the ligature 'fl' will be replaced with 'fl'.
|
||||
|
||||
- If `uncurl_quotes` is True, replace various curly quotation marks with
|
||||
plain-ASCII straight quotes.
|
||||
- If `fix_line_breaks` is true, convert all line breaks to Unix style
|
||||
(CRLF and CR line breaks become LF line breaks).
|
||||
- If `fix_control_characters` is true, remove all C0 control characters
|
||||
except the common useful ones: TAB, CR, LF, and FF. (CR characters
|
||||
may have already been removed by the `fix_line_breaks` step.)
|
||||
- If `remove_bom` is True, remove the Byte-Order Mark if it exists.
|
||||
- If anything was changed, repeat all the steps, so that the function is
|
||||
idempotent. "&amp;" will become "&", for example, not "&".
|
||||
|
||||
`fix_text` will work one line at a time, with the possibility that some
|
||||
lines are in different encodings. When it encounters lines longer than
|
||||
`max_decode_length`, it will not run the `fix_encoding` step, to avoid
|
||||
unbounded slowdowns.
|
||||
|
||||
If you are certain your entire text is in the same encoding (though that
|
||||
encoding is possibly flawed), and do not mind performing operations on
|
||||
the whole text at once, use `fix_text_segment`.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
out = []
|
||||
pos = 0
|
||||
while pos < len(text):
|
||||
textbreak = text.find('\n', pos) + 1
|
||||
fix_encoding_this_time = fix_encoding
|
||||
if textbreak == 0:
|
||||
textbreak = len(text)
|
||||
if (textbreak - pos) > max_decode_length:
|
||||
fix_encoding_this_time = False
|
||||
|
||||
substring = text[pos:textbreak]
|
||||
|
||||
if fix_entities == 'auto' and '<' in substring and '>' in substring:
|
||||
# we see angle brackets together; this could be HTML
|
||||
fix_entities = False
|
||||
|
||||
out.append(
|
||||
fix_text_segment(
|
||||
substring,
|
||||
remove_unsafe_private_use=remove_unsafe_private_use,
|
||||
fix_entities=fix_entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding_this_time,
|
||||
normalization=normalization,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom
|
||||
)
|
||||
)
|
||||
pos = textbreak
|
||||
|
||||
return ''.join(out)
|
||||
|
||||
ftfy = fix_text
|
||||
|
||||
|
||||
def fix_file(input_file,
|
||||
remove_unsafe_private_use=True,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
normalization='NFKC',
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True):
|
||||
"""
|
||||
Fix text that is found in a file.
|
||||
|
||||
If the file is being read as Unicode text, use that. If it's being read as
|
||||
bytes, then unfortunately, we have to guess what encoding it is. We'll try
|
||||
a few common encodings, but we make no promises. See the `guess_bytes`
|
||||
function for how this is done.
|
||||
|
||||
The output is a stream of fixed lines of text.
|
||||
"""
|
||||
entities = fix_entities
|
||||
for line in input_file:
|
||||
if isinstance(line, bytes):
|
||||
line, encoding = guess_bytes(line)
|
||||
if fix_entities == 'auto' and '<' in line and '>' in line:
|
||||
entities = False
|
||||
yield fix_text_segment(
|
||||
line,
|
||||
remove_unsafe_private_use=remove_unsafe_private_use,
|
||||
fix_entities=entities,
|
||||
remove_terminal_escapes=remove_terminal_escapes,
|
||||
fix_encoding=fix_encoding,
|
||||
normalization=normalization,
|
||||
uncurl_quotes=uncurl_quotes,
|
||||
fix_line_breaks=fix_line_breaks,
|
||||
remove_control_chars=remove_control_chars,
|
||||
remove_bom=remove_bom
|
||||
)
|
||||
|
||||
|
||||
def fix_text_segment(text,
|
||||
remove_unsafe_private_use=True,
|
||||
fix_entities='auto',
|
||||
remove_terminal_escapes=True,
|
||||
fix_encoding=True,
|
||||
normalization='NFKC',
|
||||
uncurl_quotes=True,
|
||||
fix_line_breaks=True,
|
||||
remove_control_chars=True,
|
||||
remove_bom=True):
|
||||
"""
|
||||
Apply fixes to text in a single chunk. This could be a line of text
|
||||
within a larger run of `fix_text`, or it could be a larger amount
|
||||
of text that you are certain is all in the same encoding.
|
||||
|
||||
See `fix_text` for a description of the parameters.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
|
||||
|
||||
if fix_entities == 'auto' and '<' in text and '>' in text:
|
||||
fix_entities = False
|
||||
while True:
|
||||
origtext = text
|
||||
if remove_unsafe_private_use:
|
||||
text = fixes.remove_unsafe_private_use(text)
|
||||
if fix_entities:
|
||||
text = fixes.unescape_html(text)
|
||||
if remove_terminal_escapes:
|
||||
text = fixes.remove_terminal_escapes(text)
|
||||
if fix_encoding:
|
||||
text = fixes.fix_text_encoding(text)
|
||||
if normalization is not None:
|
||||
text = unicodedata.normalize(normalization, text)
|
||||
if uncurl_quotes:
|
||||
text = fixes.uncurl_quotes(text)
|
||||
if fix_line_breaks:
|
||||
text = fixes.fix_line_breaks(text)
|
||||
if remove_control_chars:
|
||||
text = fixes.remove_control_chars(text)
|
||||
if remove_bom:
|
||||
text = fixes.remove_bom(text)
|
||||
if text == origtext:
|
||||
return text
|
||||
|
||||
|
||||
def guess_bytes(bstring):
|
||||
"""
|
||||
If you have some bytes in an unknown encoding, here's a reasonable
|
||||
strategy for decoding them, by trying a few common encodings that
|
||||
can be distinguished from each other.
|
||||
|
||||
This is not a magic bullet. If the bytes are coming from some MySQL
|
||||
database with the "character set" set to ISO Elbonian, this won't figure
|
||||
it out. Perhaps more relevantly, this currently doesn't try East Asian
|
||||
encodings.
|
||||
|
||||
The encodings we try are:
|
||||
|
||||
- UTF-16 with a byte order mark, because a UTF-16 byte order mark looks
|
||||
like nothing else
|
||||
- UTF-8, because it's the global de facto standard
|
||||
- "utf-8-variants", because it's what people actually implement when they
|
||||
think they're doing UTF-8
|
||||
- MacRoman, because Microsoft Office thinks it's still a thing, and it
|
||||
can be distinguished by its line breaks. (If there are no line breaks in
|
||||
the string, though, you're out of luck.)
|
||||
- "sloppy-windows-1252", the Latin-1-like encoding that is the most common
|
||||
single-byte encoding
|
||||
"""
|
||||
if bstring.startswith(b'\xfe\xff') or bstring.startswith(b'\xff\xfe'):
|
||||
return bstring.decode('utf-16'), 'utf-16'
|
||||
|
||||
byteset = set(bytes(bstring))
|
||||
byte_ed, byte_c0, byte_CR, byte_LF = b'\xed\xc0\r\n'
|
||||
|
||||
try:
|
||||
if byte_ed in byteset or byte_c0 in byteset:
|
||||
# Byte 0xed can be used to encode a range of codepoints that
|
||||
# are UTF-16 surrogates. UTF-8 does not use UTF-16 surrogates,
|
||||
# so when we see 0xed, it's very likely we're being asked to
|
||||
# decode CESU-8, the variant that encodes UTF-16 surrogates
|
||||
# instead of the original characters themselves.
|
||||
#
|
||||
# This will occasionally trigger on standard UTF-8, as there
|
||||
# are some Korean characters that also use byte 0xed, but that's
|
||||
# not harmful.
|
||||
#
|
||||
# Byte 0xc0 is impossible because, numerically, it would only
|
||||
# encode characters lower than U+0040. Those already have
|
||||
# single-byte representations, and UTF-8 requires using the
|
||||
# shortest possible representation. However, Java hides the null
|
||||
# codepoint, U+0000, in a non-standard longer representation -- it
|
||||
# encodes it as 0xc0 0x80 instead of 0x00, guaranteeing that 0x00
|
||||
# will never appear in the encoded bytes.
|
||||
#
|
||||
# The 'utf-8-variants' decoder can handle both of these cases, as
|
||||
# well as standard UTF-8, at the cost of a bit of speed.
|
||||
return bstring.decode('utf-8-variants'), 'utf-8-variants'
|
||||
else:
|
||||
return bstring.decode('utf-8'), 'utf-8'
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
if byte_CR in bstring and byte_LF not in bstring:
|
||||
return bstring.decode('macroman'), 'macroman'
|
||||
else:
|
||||
return bstring.decode('sloppy-windows-1252'), 'sloppy-windows-1252'
|
||||
|
||||
|
||||
def explain_unicode(text):
|
||||
"""
|
||||
A utility method that's useful for debugging mysterious Unicode.
|
||||
|
||||
It breaks down a string, showing you for each codepoint its number in
|
||||
hexadecimal, its glyph, its category in the Unicode standard, and its name
|
||||
in the Unicode standard.
|
||||
|
||||
>>> explain_unicode('(╯°□°)╯︵ ┻━┻')
|
||||
U+0028 ( [Ps] LEFT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+25A1 □ [So] WHITE SQUARE
|
||||
U+00B0 ° [So] DEGREE SIGN
|
||||
U+0029 ) [Pe] RIGHT PARENTHESIS
|
||||
U+256F ╯ [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
|
||||
U+FE35 ︵ [Ps] PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
|
||||
U+0020 [Zs] SPACE
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
U+2501 ━ [So] BOX DRAWINGS HEAVY HORIZONTAL
|
||||
U+253B ┻ [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
|
||||
"""
|
||||
for char in text:
|
||||
if is_printable(char):
|
||||
display = char
|
||||
else:
|
||||
display = char.encode('unicode-escape').decode('ascii')
|
||||
print('U+{code:04X} {display:<7} [{category}] {name}'.format(
|
||||
display=display,
|
||||
code=ord(char),
|
||||
category=unicodedata.category(char),
|
||||
name=unicodedata.name(char, '<unknown>')
|
||||
))
|
||||
|
||||
|
||||
def fix_bad_encoding(text):
|
||||
"""
|
||||
Kept for compatibility with previous versions of ftfy.
|
||||
"""
|
||||
warnings.warn(
|
||||
'fix_bad_encoding is now known as fix_text_encoding',
|
||||
DeprecationWarning
|
||||
)
|
||||
return fix_text_encoding(text)
|
94
lib/ftfy/bad_codecs/__init__.py
Normal file
94
lib/ftfy/bad_codecs/__init__.py
Normal file
@ -0,0 +1,94 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Give Python the ability to decode some common, flawed encodings.
|
||||
|
||||
Python does not want you to be sloppy with your text. Its encoders and decoders
|
||||
("codecs") follow the relevant standards whenever possible, which means that
|
||||
when you get text that *doesn't* follow those standards, you'll probably fail
|
||||
to decode it. Or you might succeed at decoding it for implementation-specific
|
||||
reasons, which is perhaps worse.
|
||||
|
||||
There are some encodings out there that Python wishes didn't exist, which are
|
||||
widely used outside of Python:
|
||||
|
||||
- "utf-8-variants", a family of not-quite-UTF-8 encodings, including the
|
||||
ever-popular CESU-8 and "Java modified UTF-8".
|
||||
- "Sloppy" versions of character map encodings, where bytes that don't map to
|
||||
anything will instead map to the Unicode character with the same number.
|
||||
|
||||
Simply importing this module, or in fact any part of the `ftfy` package, will
|
||||
make these new "bad codecs" available to Python through the standard Codecs
|
||||
API. You never have to actually call any functions inside `ftfy.bad_codecs`.
|
||||
|
||||
However, if you want to call something because your code checker insists on it,
|
||||
you can call ``ftfy.bad_codecs.ok()``.
|
||||
|
||||
A quick example of decoding text that's encoded in CESU-8:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> print(b'\xed\xa0\xbd\xed\xb8\x8d'.decode('utf-8-variants'))
|
||||
😍
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
from encodings import normalize_encoding
|
||||
import codecs
|
||||
|
||||
_CACHE = {}
|
||||
|
||||
# Define some aliases for 'utf-8-variants'. All hyphens get turned into
|
||||
# underscores, because of `normalize_encoding`.
|
||||
UTF8_VAR_NAMES = (
|
||||
'utf_8_variants', 'utf8_variants',
|
||||
'utf_8_variant', 'utf8_variant',
|
||||
'utf_8_var', 'utf8_var',
|
||||
'cesu_8', 'cesu8',
|
||||
'java_utf_8', 'java_utf8'
|
||||
)
|
||||
|
||||
|
||||
def search_function(encoding):
|
||||
"""
|
||||
Register our "bad codecs" with Python's codecs API. This involves adding
|
||||
a search function that takes in an encoding name, and returns a codec
|
||||
for that encoding if it knows one, or None if it doesn't.
|
||||
|
||||
The encodings this will match are:
|
||||
|
||||
- Encodings of the form 'sloppy-windows-NNNN' or 'sloppy-iso-8859-N',
|
||||
where the non-sloppy version is an encoding that leaves some bytes
|
||||
unmapped to characters.
|
||||
- The 'utf-8-variants' encoding, which has the several aliases seen
|
||||
above.
|
||||
"""
|
||||
if encoding in _CACHE:
|
||||
return _CACHE[encoding]
|
||||
|
||||
norm_encoding = normalize_encoding(encoding)
|
||||
codec = None
|
||||
if norm_encoding in UTF8_VAR_NAMES:
|
||||
from ftfy.bad_codecs.utf8_variants import CODEC_INFO
|
||||
codec = CODEC_INFO
|
||||
elif norm_encoding.startswith('sloppy_'):
|
||||
from ftfy.bad_codecs.sloppy import CODECS
|
||||
codec = CODECS.get(norm_encoding)
|
||||
|
||||
if codec is not None:
|
||||
_CACHE[encoding] = codec
|
||||
|
||||
return codec
|
||||
|
||||
|
||||
def ok():
|
||||
"""
|
||||
A feel-good function that gives you something to call after importing
|
||||
this package.
|
||||
|
||||
Why is this here? Pyflakes. Pyflakes gets upset when you import a module
|
||||
and appear not to use it. It doesn't know that you're using it when
|
||||
you use the ``unicode.encode`` and ``bytes.decode`` methods with certain
|
||||
encodings.
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
codecs.register(search_function)
|
156
lib/ftfy/bad_codecs/sloppy.py
Normal file
156
lib/ftfy/bad_codecs/sloppy.py
Normal file
@ -0,0 +1,156 @@
|
||||
# coding: utf-8
|
||||
r"""
|
||||
Decodes single-byte encodings, filling their "holes" in the same messy way that
|
||||
everyone else does.
|
||||
|
||||
A single-byte encoding maps each byte to a Unicode character, except that some
|
||||
bytes are left unmapped. In the commonly-used Windows-1252 encoding, for
|
||||
example, bytes 0x81 and 0x8D, among others, have no meaning.
|
||||
|
||||
Python, wanting to preserve some sense of decorum, will handle these bytes
|
||||
as errors. But Windows knows that 0x81 and 0x8D are possible bytes and they're
|
||||
different from each other. It just hasn't defined what they are in terms of
|
||||
Unicode.
|
||||
|
||||
Software that has to interoperate with Windows-1252 and Unicode -- such as all
|
||||
the common Web browsers -- will pick some Unicode characters for them to map
|
||||
to, and the characters they pick are the Unicode characters with the same
|
||||
numbers: U+0081 and U+008D. This is the same as what Latin-1 does, and the
|
||||
resulting characters tend to fall into a range of Unicode that's set aside for
|
||||
obselete Latin-1 control characters anyway.
|
||||
|
||||
These sloppy codecs let Python do the same thing, thus interoperating with
|
||||
other software that works this way. It defines a sloppy version of many
|
||||
single-byte encodings with holes. (There is no need for a sloppy version of
|
||||
an encoding without holes: for example, there is no such thing as
|
||||
sloppy-iso-8859-2 or sloppy-macroman.)
|
||||
|
||||
The following encodings will become defined:
|
||||
|
||||
- sloppy-windows-1250 (Central European, sort of based on ISO-8859-2)
|
||||
- sloppy-windows-1251 (Cyrillic)
|
||||
- sloppy-windows-1252 (Western European, based on Latin-1)
|
||||
- sloppy-windows-1253 (Greek, sort of based on ISO-8859-7)
|
||||
- sloppy-windows-1254 (Turkish, based on ISO-8859-9)
|
||||
- sloppy-windows-1255 (Hebrew, based on ISO-8859-8)
|
||||
- sloppy-windows-1256 (Arabic)
|
||||
- sloppy-windows-1257 (Baltic, based on ISO-8859-13)
|
||||
- sloppy-windows-1258 (Vietnamese)
|
||||
- sloppy-cp874 (Thai, based on ISO-8859-11)
|
||||
- sloppy-iso-8859-3 (Maltese and Esperanto, I guess)
|
||||
- sloppy-iso-8859-6 (different Arabic)
|
||||
- sloppy-iso-8859-7 (Greek)
|
||||
- sloppy-iso-8859-8 (Hebrew)
|
||||
- sloppy-iso-8859-11 (Thai)
|
||||
|
||||
Aliases such as "sloppy-cp1252" for "sloppy-windows-1252" will also be
|
||||
defined.
|
||||
|
||||
Only sloppy-windows-1251 and sloppy-windows-1252 are used by the rest of ftfy;
|
||||
the rest are rather uncommon.
|
||||
|
||||
Here are some examples, using `ftfy.explain_unicode` to illustrate how
|
||||
sloppy-windows-1252 merges Windows-1252 with Latin-1:
|
||||
|
||||
>>> from ftfy import explain_unicode
|
||||
>>> some_bytes = b'\x80\x81\x82'
|
||||
>>> explain_unicode(some_bytes.decode('latin-1'))
|
||||
U+0080 \x80 [Cc] <unknown>
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+0082 \x82 [Cc] <unknown>
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('windows-1252', 'replace'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+FFFD <EFBFBD> [So] REPLACEMENT CHARACTER
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
|
||||
>>> explain_unicode(some_bytes.decode('sloppy-windows-1252'))
|
||||
U+20AC € [Sc] EURO SIGN
|
||||
U+0081 \x81 [Cc] <unknown>
|
||||
U+201A ‚ [Ps] SINGLE LOW-9 QUOTATION MARK
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import codecs
|
||||
from encodings import normalize_encoding
|
||||
|
||||
REPLACEMENT_CHAR = '\ufffd'
|
||||
|
||||
|
||||
def make_sloppy_codec(encoding):
|
||||
"""
|
||||
Take a codec name, and return a 'sloppy' version of that codec that can
|
||||
encode and decode the unassigned bytes in that encoding.
|
||||
|
||||
Single-byte encodings in the standard library are defined using some
|
||||
boilerplate classes surrounding the functions that do the actual work,
|
||||
`codecs.charmap_decode` and `charmap_encode`. This function, given an
|
||||
encoding name, *defines* those boilerplate classes.
|
||||
"""
|
||||
# Make an array of all 256 possible bytes.
|
||||
all_bytes = bytearray(range(256))
|
||||
|
||||
# Get a list of what they would decode to in Latin-1.
|
||||
sloppy_chars = list(all_bytes.decode('latin-1'))
|
||||
|
||||
# Get a list of what they decode to in the given encoding. Use the
|
||||
# replacement character for unassigned bytes.
|
||||
decoded_chars = all_bytes.decode(encoding, 'replace')
|
||||
|
||||
# Update the sloppy_chars list. Each byte that was successfully decoded
|
||||
# gets its decoded value in the list. The unassigned bytes are left as
|
||||
# they are, which gives their decoding in Latin-1.
|
||||
for i, char in enumerate(decoded_chars):
|
||||
if char != REPLACEMENT_CHAR:
|
||||
sloppy_chars[i] = char
|
||||
|
||||
# Create the data structures that tell the charmap methods how to encode
|
||||
# and decode in this sloppy encoding.
|
||||
decoding_table = ''.join(sloppy_chars)
|
||||
encoding_table = codecs.charmap_build(decoding_table)
|
||||
|
||||
# Now produce all the class boilerplate. Look at the Python source for
|
||||
# `encodings.cp1252` for comparison; this is almost exactly the same,
|
||||
# except I made it follow pep8.
|
||||
class Codec(codecs.Codec):
|
||||
def encode(self, input, errors='strict'):
|
||||
return codecs.charmap_encode(input, errors, encoding_table)
|
||||
|
||||
def decode(self, input, errors='strict'):
|
||||
return codecs.charmap_decode(input, errors, decoding_table)
|
||||
|
||||
class IncrementalEncoder(codecs.IncrementalEncoder):
|
||||
def encode(self, input, final=False):
|
||||
return codecs.charmap_encode(input, self.errors, encoding_table)[0]
|
||||
|
||||
class IncrementalDecoder(codecs.IncrementalDecoder):
|
||||
def decode(self, input, final=False):
|
||||
return codecs.charmap_decode(input, self.errors, decoding_table)[0]
|
||||
|
||||
class StreamWriter(Codec, codecs.StreamWriter):
|
||||
pass
|
||||
|
||||
class StreamReader(Codec, codecs.StreamReader):
|
||||
pass
|
||||
|
||||
return codecs.CodecInfo(
|
||||
name='sloppy-' + encoding,
|
||||
encode=Codec().encode,
|
||||
decode=Codec().decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
||||
|
||||
# Define a codec for each incomplete encoding. The resulting CODECS dictionary
|
||||
# can be used by the main module of ftfy.bad_codecs.
|
||||
CODECS = {}
|
||||
INCOMPLETE_ENCODINGS = (
|
||||
['windows-%s' % num for num in range(1250, 1259)] +
|
||||
['iso-8859-%s' % num for num in (3, 6, 7, 8, 11)] +
|
||||
['cp%s' % num for num in range(1250, 1259)] + ['cp874']
|
||||
)
|
||||
|
||||
for _encoding in INCOMPLETE_ENCODINGS:
|
||||
_new_name = normalize_encoding('sloppy-' + _encoding)
|
||||
CODECS[_new_name] = make_sloppy_codec(_encoding)
|
281
lib/ftfy/bad_codecs/utf8_variants.py
Normal file
281
lib/ftfy/bad_codecs/utf8_variants.py
Normal file
@ -0,0 +1,281 @@
|
||||
r"""
|
||||
This file defines a codec called "utf-8-variants" (or "utf-8-var"), which can
|
||||
decode text that's been encoded with a popular non-standard version of UTF-8.
|
||||
This includes CESU-8, the accidental encoding made by layering UTF-8 on top of
|
||||
UTF-16, as well as Java's twist on CESU-8 that contains a two-byte encoding for
|
||||
codepoint 0.
|
||||
|
||||
This is particularly relevant in Python 3, which provides no other way of
|
||||
decoding CESU-8 or Java's encoding. [1]
|
||||
|
||||
The easiest way to use the codec is to simply import `ftfy.bad_codecs`:
|
||||
|
||||
>>> import ftfy.bad_codecs
|
||||
>>> result = b'here comes a null! \xc0\x80'.decode('utf-8-var')
|
||||
>>> print(repr(result).lstrip('u'))
|
||||
'here comes a null! \x00'
|
||||
|
||||
The codec does not at all enforce "correct" CESU-8. For example, the Unicode
|
||||
Consortium's not-quite-standard describing CESU-8 requires that there is only
|
||||
one possible encoding of any character, so it does not allow mixing of valid
|
||||
UTF-8 and CESU-8. This codec *does* allow that, just like Python 2's UTF-8
|
||||
decoder does.
|
||||
|
||||
Characters in the Basic Multilingual Plane still have only one encoding. This
|
||||
codec still enforces the rule, within the BMP, that characters must appear in
|
||||
their shortest form. There is one exception: the sequence of bytes `0xc0 0x80`,
|
||||
instead of just `0x00`, may be used to encode the null character `U+0000`, like
|
||||
in Java.
|
||||
|
||||
If you encode with this codec, you get legitimate UTF-8. Decoding with this
|
||||
codec and then re-encoding is not idempotent, although encoding and then
|
||||
decoding is. So this module won't produce CESU-8 for you. Look for that
|
||||
functionality in the sister module, "Breaks Text For You", coming approximately
|
||||
never.
|
||||
|
||||
[1] In a pinch, you can decode CESU-8 in Python 2 using the UTF-8 codec: first
|
||||
decode the bytes (incorrectly), then encode them, then decode them again, using
|
||||
UTF-8 as the codec every time.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ftfy.compatibility import bytes_to_ints, unichr, PYTHON2
|
||||
from encodings.utf_8 import (IncrementalDecoder as UTF8IncrementalDecoder,
|
||||
IncrementalEncoder as UTF8IncrementalEncoder)
|
||||
import re
|
||||
import codecs
|
||||
|
||||
NAME = 'utf-8-variants'
|
||||
# This regular expression matches all possible six-byte CESU-8 sequences.
|
||||
CESU8_RE = re.compile(b'\xed[\xa0-\xaf][\x80-\xbf]\xed[\xb0-\xbf][\x80-\xbf]')
|
||||
|
||||
|
||||
class IncrementalDecoder(UTF8IncrementalDecoder):
|
||||
"""
|
||||
An incremental decoder that extends Python's built-in UTF-8 decoder.
|
||||
|
||||
This encoder needs to take in bytes, possibly arriving in a stream, and
|
||||
output the correctly decoded text. The general strategy for doing this
|
||||
is to fall back on the real UTF-8 decoder whenever possible, because
|
||||
the real UTF-8 decoder is way optimized, but to call specialized methods
|
||||
we define here for the cases the real encoder isn't expecting.
|
||||
"""
|
||||
def _buffer_decode(self, input, errors, final):
|
||||
"""
|
||||
Decode bytes that may be arriving in a stream, following the Codecs
|
||||
API.
|
||||
|
||||
`input` is the incoming sequence of bytes. `errors` tells us how to
|
||||
handle errors, though we delegate all error-handling cases to the real
|
||||
UTF-8 decoder to ensure correct behavior. `final` indicates whether
|
||||
this is the end of the sequence, in which case we should raise an
|
||||
error given incomplete input.
|
||||
|
||||
Returns as much decoded text as possible, and the number of bytes
|
||||
consumed.
|
||||
"""
|
||||
# decoded_segments are the pieces of text we have decoded so far,
|
||||
# and position is our current position in the byte string. (Bytes
|
||||
# before this position have been consumed, and bytes after it have
|
||||
# yet to be decoded.)
|
||||
decoded_segments = []
|
||||
position = 0
|
||||
while True:
|
||||
# Use _buffer_decode_step to decode a segment of text.
|
||||
decoded, consumed = self._buffer_decode_step(
|
||||
input[position:],
|
||||
errors,
|
||||
final
|
||||
)
|
||||
if consumed == 0:
|
||||
# Either there's nothing left to decode, or we need to wait
|
||||
# for more input. Either way, we're done for now.
|
||||
break
|
||||
|
||||
# Append the decoded text to the list, and update our position.
|
||||
decoded_segments.append(decoded)
|
||||
position += consumed
|
||||
|
||||
if final:
|
||||
# _buffer_decode_step must consume all the bytes when `final` is
|
||||
# true.
|
||||
assert position == len(input)
|
||||
|
||||
return ''.join(decoded_segments), position
|
||||
|
||||
def _buffer_decode_step(self, input, errors, final):
|
||||
"""
|
||||
There are three possibilities for each decoding step:
|
||||
|
||||
- Decode as much real UTF-8 as possible.
|
||||
- Decode a six-byte CESU-8 sequence at the current position.
|
||||
- Decode a Java-style null at the current position.
|
||||
|
||||
This method figures out which step is appropriate, and does it.
|
||||
"""
|
||||
# Get a reference to the superclass method that we'll be using for
|
||||
# most of the real work.
|
||||
sup = UTF8IncrementalDecoder._buffer_decode
|
||||
|
||||
# Find the next byte position that indicates a variant of UTF-8.
|
||||
# CESU-8 sequences always start with 0xed, and Java nulls always
|
||||
# start with 0xc0, both of which are conveniently impossible in
|
||||
# real UTF-8.
|
||||
cutoff1 = input.find(b'\xed')
|
||||
cutoff2 = input.find(b'\xc0')
|
||||
|
||||
# Set `cutoff` to whichever cutoff comes first.
|
||||
if cutoff1 != -1 and cutoff2 != -1:
|
||||
cutoff = min(cutoff1, cutoff2)
|
||||
elif cutoff1 != -1:
|
||||
cutoff = cutoff1
|
||||
elif cutoff2 != -1:
|
||||
cutoff = cutoff2
|
||||
else:
|
||||
# The entire input can be decoded as UTF-8, so just do so.
|
||||
return sup(input, errors, final)
|
||||
|
||||
if cutoff1 == 0:
|
||||
# Decode a possible six-byte sequence starting with 0xed.
|
||||
return self._buffer_decode_surrogates(sup, input, errors, final)
|
||||
elif cutoff2 == 0:
|
||||
# Decode a possible two-byte sequence, 0xc0 0x80.
|
||||
return self._buffer_decode_null(sup, input, errors, final)
|
||||
else:
|
||||
# Decode the bytes up until the next weird thing as UTF-8.
|
||||
# Set final=True because 0xc0 and 0xed don't make sense in the
|
||||
# middle of a sequence, in any variant.
|
||||
return sup(input[:cutoff], errors, True)
|
||||
|
||||
@staticmethod
|
||||
def _buffer_decode_null(sup, input, errors, final):
|
||||
"""
|
||||
Decode the bytes 0xc0 0x80 as U+0000, like Java does.
|
||||
"""
|
||||
nextbyte = input[1:2]
|
||||
if nextbyte == b'':
|
||||
if final:
|
||||
# We found 0xc0 at the end of the stream, which is an error.
|
||||
# Delegate to the superclass method to handle that error.
|
||||
return sup(input, errors, final)
|
||||
else:
|
||||
# We found 0xc0 and we don't know what comes next, so consume
|
||||
# no bytes and wait.
|
||||
return '', 0
|
||||
elif nextbyte == b'\x80':
|
||||
# We found the usual 0xc0 0x80 sequence, so decode it and consume
|
||||
# two bytes.
|
||||
return '\u0000', 2
|
||||
else:
|
||||
# We found 0xc0 followed by something else, which is an error.
|
||||
# Whatever should happen is equivalent to what happens when the
|
||||
# superclass is given just the byte 0xc0, with final=True.
|
||||
return sup(b'\xc0', errors, True)
|
||||
|
||||
@staticmethod
|
||||
def _buffer_decode_surrogates(sup, input, errors, final):
|
||||
"""
|
||||
When we have improperly encoded surrogates, we can still see the
|
||||
bits that they were meant to represent.
|
||||
|
||||
The surrogates were meant to encode a 20-bit number, to which we
|
||||
add 0x10000 to get a codepoint. That 20-bit number now appears in
|
||||
this form:
|
||||
|
||||
11101101 1010abcd 10efghij 11101101 1011klmn 10opqrst
|
||||
|
||||
The CESU8_RE above matches byte sequences of this form. Then we need
|
||||
to extract the bits and assemble a codepoint number from them.
|
||||
"""
|
||||
if len(input) < 6:
|
||||
if final:
|
||||
# We found 0xed near the end of the stream, and there aren't
|
||||
# six bytes to decode. Delegate to the superclass method to
|
||||
# handle it as normal UTF-8. It might be a Hangul character
|
||||
# or an error.
|
||||
if PYTHON2 and len(input) >= 3:
|
||||
# We can't trust Python 2 to raise an error when it's
|
||||
# asked to decode a surrogate, so let's force the issue.
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input, errors, final)
|
||||
else:
|
||||
# We found 0xed, the stream isn't over yet, and we don't know
|
||||
# enough of the following bytes to decode anything, so consume
|
||||
# zero bytes and wait.
|
||||
return '', 0
|
||||
else:
|
||||
if CESU8_RE.match(input):
|
||||
# If this is a CESU-8 sequence, do some math to pull out
|
||||
# the intended 20-bit value, and consume six bytes.
|
||||
bytenums = bytes_to_ints(input[:6])
|
||||
codepoint = (
|
||||
((bytenums[1] & 0x0f) << 16) +
|
||||
((bytenums[2] & 0x3f) << 10) +
|
||||
((bytenums[4] & 0x0f) << 6) +
|
||||
(bytenums[5] & 0x3f) +
|
||||
0x10000
|
||||
)
|
||||
return unichr(codepoint), 6
|
||||
else:
|
||||
# This looked like a CESU-8 sequence, but it wasn't one.
|
||||
# 0xed indicates the start of a three-byte sequence, so give
|
||||
# three bytes to the superclass to decode as usual -- except
|
||||
# for working around the Python 2 discrepancy as before.
|
||||
if PYTHON2:
|
||||
input = mangle_surrogates(input)
|
||||
return sup(input[:3], errors, False)
|
||||
|
||||
|
||||
def mangle_surrogates(bytestring):
|
||||
"""
|
||||
When Python 3 sees the UTF-8 encoding of a surrogate codepoint, it treats
|
||||
it as an error (which it is). In 'replace' mode, it will decode as three
|
||||
replacement characters. But Python 2 will just output the surrogate
|
||||
codepoint.
|
||||
|
||||
To ensure consistency between Python 2 and Python 3, and protect downstream
|
||||
applications from malformed strings, we turn surrogate sequences at the
|
||||
start of the string into the bytes `ff ff ff`, which we're *sure* won't
|
||||
decode, and which turn into three replacement characters in 'replace' mode.
|
||||
"""
|
||||
if PYTHON2:
|
||||
if bytestring.startswith(b'\xed') and len(bytestring) >= 3:
|
||||
decoded = bytestring[:3].decode('utf-8', 'replace')
|
||||
if '\ud800' <= decoded <= '\udfff':
|
||||
return b'\xff\xff\xff' + mangle_surrogates(bytestring[3:])
|
||||
return bytestring
|
||||
else:
|
||||
# On Python 3, nothing needs to be done.
|
||||
return bytestring
|
||||
|
||||
# The encoder is identical to UTF-8.
|
||||
IncrementalEncoder = UTF8IncrementalEncoder
|
||||
|
||||
|
||||
# Everything below here is boilerplate that matches the modules in the
|
||||
# built-in `encodings` package.
|
||||
def encode(input, errors='strict'):
|
||||
return IncrementalEncoder(errors).encode(input, final=True), len(input)
|
||||
|
||||
|
||||
def decode(input, errors='strict'):
|
||||
return IncrementalDecoder(errors).decode(input, final=True), len(input)
|
||||
|
||||
|
||||
class StreamWriter(codecs.StreamWriter):
|
||||
encode = encode
|
||||
|
||||
|
||||
class StreamReader(codecs.StreamReader):
|
||||
decode = decode
|
||||
|
||||
|
||||
CODEC_INFO = codecs.CodecInfo(
|
||||
name=NAME,
|
||||
encode=encode,
|
||||
decode=decode,
|
||||
incrementalencoder=IncrementalEncoder,
|
||||
incrementaldecoder=IncrementalDecoder,
|
||||
streamreader=StreamReader,
|
||||
streamwriter=StreamWriter,
|
||||
)
|
144
lib/ftfy/badness.py
Normal file
144
lib/ftfy/badness.py
Normal file
@ -0,0 +1,144 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
Heuristics to determine whether re-encoding text is actually making it
|
||||
more reasonable.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ftfy.chardata import chars_to_classes
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
# The following regex uses the mapping of character classes to ASCII
|
||||
# characters defined in chardata.py and build_data.py:
|
||||
#
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# 0 = Math symbol (Sm)
|
||||
# 1 = Currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def _make_weirdness_regex():
|
||||
"""
|
||||
Creates a list of regexes that match 'weird' character sequences.
|
||||
The more matches there are, the weirder the text is.
|
||||
"""
|
||||
groups = []
|
||||
|
||||
# Match lowercase letters that are followed by non-ASCII uppercase letters
|
||||
groups.append('lA')
|
||||
|
||||
# Match diacritical marks, except when they modify a non-cased letter or
|
||||
# another mark.
|
||||
#
|
||||
# You wouldn't put a diacritical mark on a digit or a space, for example.
|
||||
# You might put it on a Latin letter, but in that case there will almost
|
||||
# always be a pre-composed version, and we normalize to pre-composed
|
||||
# versions first. The cases that can't be pre-composed tend to be in
|
||||
# large scripts without case, which are in class C.
|
||||
groups.append('[^CM]M')
|
||||
|
||||
# Match non-Latin characters adjacent to Latin characters.
|
||||
#
|
||||
# This is a simplification from ftfy version 2, which compared all
|
||||
# adjacent scripts. However, the ambiguities we need to resolve come from
|
||||
# encodings designed to represent Latin characters.
|
||||
groups.append('[Ll][AaC]')
|
||||
groups.append('[AaC][Ll]')
|
||||
|
||||
# Match C1 control characters, which are almost always the result of
|
||||
# decoding Latin-1 that was meant to be Windows-1252.
|
||||
groups.append('X')
|
||||
|
||||
# Match private use and unassigned characters.
|
||||
groups.append('P')
|
||||
groups.append('_')
|
||||
|
||||
# Match adjacent characters from any different pair of these categories:
|
||||
# - Modifier marks (M)
|
||||
# - Letter modifiers (m)
|
||||
# - Miscellaneous numbers (N)
|
||||
# - Symbols (0123)
|
||||
|
||||
exclusive_categories = 'MmN0123'
|
||||
for cat1 in exclusive_categories:
|
||||
others_range = ''.join(c for c in exclusive_categories if c != cat1)
|
||||
groups.append('{cat1}[{others_range}]'.format(
|
||||
cat1=cat1, others_range=others_range
|
||||
))
|
||||
regex = '|'.join('({0})'.format(group) for group in groups)
|
||||
return re.compile(regex)
|
||||
|
||||
WEIRDNESS_RE = _make_weirdness_regex()
|
||||
|
||||
# A few characters are common ending punctuation that can show up at the end
|
||||
# of a mojibake sequence. It's plausible that such a character could appear
|
||||
# after an accented capital letter, for example, so we'll want to add a
|
||||
# slight preference to leave these characters alone.
|
||||
#
|
||||
# The match ends with a + so that we only give the bonus once for a
|
||||
# consecutive sequence of these characters.
|
||||
ENDING_PUNCT_RE = re.compile(
|
||||
'['
|
||||
'\N{HORIZONTAL ELLIPSIS}\N{EM DASH}\N{EN DASH}'
|
||||
'\N{RIGHT SINGLE QUOTATION MARK}\N{RIGHT DOUBLE QUOTATION MARK}'
|
||||
'\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}'
|
||||
'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}'
|
||||
']+'
|
||||
)
|
||||
|
||||
def sequence_weirdness(text):
|
||||
"""
|
||||
Determine how often a text has unexpected characters or sequences of
|
||||
characters. This metric is used to disambiguate when text should be
|
||||
re-decoded or left as is.
|
||||
|
||||
We start by normalizing text in NFC form, so that penalties for
|
||||
diacritical marks don't apply to characters that know what to do with
|
||||
them.
|
||||
|
||||
The following things are deemed weird:
|
||||
|
||||
- Lowercase letters followed by non-ASCII uppercase letters
|
||||
- Non-Latin characters next to Latin characters
|
||||
- Un-combined diacritical marks, unless they're stacking on non-alphabetic
|
||||
characters (in languages that do that kind of thing a lot) or other
|
||||
marks
|
||||
- C1 control characters
|
||||
- Adjacent symbols from any different pair of these categories:
|
||||
|
||||
- Modifier marks
|
||||
- Letter modifiers
|
||||
- Non-digit numbers
|
||||
- Symbols (including math and currency)
|
||||
|
||||
The return value is the number of instances of weirdness.
|
||||
"""
|
||||
text2 = unicodedata.normalize('NFC', text)
|
||||
weirdness = len(WEIRDNESS_RE.findall(chars_to_classes(text2)))
|
||||
punct_discount = len(ENDING_PUNCT_RE.findall(text2))
|
||||
return weirdness * 2 - punct_discount
|
||||
|
||||
|
||||
def text_cost(text):
|
||||
"""
|
||||
An overall cost function for text. Weirder is worse, but all else being
|
||||
equal, shorter strings are better.
|
||||
|
||||
The overall cost is measured as the "weirdness" (see
|
||||
:func:`sequence_weirdness`) plus the length.
|
||||
"""
|
||||
return sequence_weirdness(text) + len(text)
|
111
lib/ftfy/build_data.py
Normal file
111
lib/ftfy/build_data.py
Normal file
@ -0,0 +1,111 @@
|
||||
"""
|
||||
A script to make the char_classes.dat file.
|
||||
|
||||
This never needs to run in normal usage. It needs to be run if the character
|
||||
classes we care about change, or if a new version of Python supports a new
|
||||
Unicode standard and we want it to affect our string decoding.
|
||||
|
||||
The file that we generate is based on Unicode 6.1, as supported by Python 3.3.
|
||||
You can certainly use it in earlier versions. This simply makes sure that we
|
||||
get consistent results from running ftfy on different versions of Python.
|
||||
|
||||
The file will be written to the current directory.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import unicodedata
|
||||
import sys
|
||||
import zlib
|
||||
if sys.hexversion >= 0x03000000:
|
||||
unichr = chr
|
||||
|
||||
# L = Latin capital letter
|
||||
# l = Latin lowercase letter
|
||||
# A = Non-latin capital or title-case letter
|
||||
# a = Non-latin lowercase letter
|
||||
# C = Non-cased letter (Lo)
|
||||
# X = Control character (Cc)
|
||||
# m = Letter modifier (Lm)
|
||||
# M = Mark (Mc, Me, Mn)
|
||||
# N = Miscellaneous numbers (No)
|
||||
# P = Private use (Co)
|
||||
# 0 = Math symbol (Sm)
|
||||
# 1 = Currency symbol (Sc)
|
||||
# 2 = Symbol modifier (Sk)
|
||||
# 3 = Other symbol (So)
|
||||
# S = UTF-16 surrogate
|
||||
# _ = Unassigned character
|
||||
# = Whitespace
|
||||
# o = Other
|
||||
|
||||
|
||||
def make_char_data_file(do_it_anyway=False):
|
||||
"""
|
||||
Build the compressed data file 'char_classes.dat' and write it to the
|
||||
current directory.
|
||||
|
||||
If you run this, run it in Python 3.3 or later. It will run in earlier
|
||||
versions, but you won't get the current Unicode standard, leading to
|
||||
inconsistent behavior. To protect against this, running this in the
|
||||
wrong version of Python will raise an error unless you pass
|
||||
`do_it_anyway=True`.
|
||||
"""
|
||||
if sys.hexversion < 0x03030000 and not do_it_anyway:
|
||||
raise RuntimeError(
|
||||
"This function should be run in Python 3.3 or later."
|
||||
)
|
||||
|
||||
cclasses = [None] * 0x110000
|
||||
for codepoint in range(0x0, 0x110000):
|
||||
char = unichr(codepoint)
|
||||
category = unicodedata.category(char)
|
||||
|
||||
if category.startswith('L'): # letters
|
||||
is_latin = unicodedata.name(char).startswith('LATIN')
|
||||
if is_latin and codepoint < 0x200:
|
||||
if category == 'Lu':
|
||||
cclasses[codepoint] = 'L'
|
||||
else:
|
||||
cclasses[codepoint] = 'l'
|
||||
else: # non-Latin letter, or close enough
|
||||
if category == 'Lu' or category == 'Lt':
|
||||
cclasses[codepoint] = 'A'
|
||||
elif category == 'Ll':
|
||||
cclasses[codepoint] = 'a'
|
||||
elif category == 'Lo':
|
||||
cclasses[codepoint] = 'C'
|
||||
elif category == 'Lm':
|
||||
cclasses[codepoint] = 'm'
|
||||
else:
|
||||
raise ValueError('got some weird kind of letter')
|
||||
elif category.startswith('M'): # marks
|
||||
cclasses[codepoint] = 'M'
|
||||
elif category == 'No':
|
||||
cclasses[codepoint] = 'N'
|
||||
elif category == 'Sm':
|
||||
cclasses[codepoint] = '0'
|
||||
elif category == 'Sc':
|
||||
cclasses[codepoint] = '1'
|
||||
elif category == 'Sk':
|
||||
cclasses[codepoint] = '2'
|
||||
elif category == 'So':
|
||||
cclasses[codepoint] = '3'
|
||||
elif category == 'Cn':
|
||||
cclasses[codepoint] = '_'
|
||||
elif category == 'Cc':
|
||||
cclasses[codepoint] = 'X'
|
||||
elif category == 'Cs':
|
||||
cclasses[codepoint] = 'S'
|
||||
elif category == 'Co':
|
||||
cclasses[codepoint] = 'P'
|
||||
elif category.startswith('Z'):
|
||||
cclasses[codepoint] = ' '
|
||||
else:
|
||||
cclasses[codepoint] = 'o'
|
||||
|
||||
cclasses[9] = cclasses[10] = cclasses[12] = cclasses[13] = ' '
|
||||
out = open('char_classes.dat', 'wb')
|
||||
out.write(zlib.compress(''.join(cclasses).encode('ascii')))
|
||||
out.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
make_char_data_file()
|
BIN
lib/ftfy/char_classes.dat
Normal file
BIN
lib/ftfy/char_classes.dat
Normal file
Binary file not shown.
81
lib/ftfy/chardata.py
Normal file
81
lib/ftfy/chardata.py
Normal file
@ -0,0 +1,81 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This gives other modules access to the gritty details about characters and the
|
||||
encodings that use them.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
import re
|
||||
import zlib
|
||||
from pkg_resources import resource_string
|
||||
from ftfy.compatibility import unichr
|
||||
|
||||
# These are the five encodings we will try to fix in ftfy, in the
|
||||
# order that they should be tried.
|
||||
CHARMAP_ENCODINGS = [
|
||||
'latin-1',
|
||||
'sloppy-windows-1252',
|
||||
'macroman',
|
||||
'cp437',
|
||||
'sloppy-windows-1251',
|
||||
]
|
||||
|
||||
|
||||
def _build_regexes():
|
||||
"""
|
||||
ENCODING_REGEXES contain reasonably fast ways to detect if we
|
||||
could represent a given string in a given encoding. The simplest one is
|
||||
the 'ascii' detector, which of course just determines if all characters
|
||||
are between U+0000 and U+007F.
|
||||
"""
|
||||
# Define a regex that matches ASCII text.
|
||||
encoding_regexes = {'ascii': re.compile('^[\x00-\x7f]*$')}
|
||||
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
latin1table = ''.join(unichr(i) for i in range(128, 256))
|
||||
charlist = latin1table.encode('latin-1').decode(encoding)
|
||||
|
||||
# Build a regex from the ASCII range, followed by the decodings of
|
||||
# bytes 0x80-0xff in this character set. (This uses the fact that all
|
||||
# regex special characters are ASCII, and therefore won't appear in the
|
||||
# string.)
|
||||
regex = '^[\x00-\x7f{0}]*$'.format(charlist)
|
||||
encoding_regexes[encoding] = re.compile(regex)
|
||||
return encoding_regexes
|
||||
ENCODING_REGEXES = _build_regexes()
|
||||
|
||||
|
||||
def possible_encoding(text, encoding):
|
||||
"""
|
||||
Given text and a single-byte encoding, check whether that text could have
|
||||
been decoded from that single-byte encoding.
|
||||
|
||||
In other words, check whether it can be encoded in that encoding, possibly
|
||||
sloppily.
|
||||
"""
|
||||
return bool(ENCODING_REGEXES[encoding].match(text))
|
||||
|
||||
|
||||
CHAR_CLASS_STRING = zlib.decompress(
|
||||
resource_string(__name__, 'char_classes.dat')
|
||||
).decode('ascii')
|
||||
|
||||
def chars_to_classes(string):
|
||||
"""
|
||||
Convert each Unicode character to a letter indicating which of many
|
||||
classes it's in.
|
||||
|
||||
See build_data.py for where this data comes from and what it means.
|
||||
"""
|
||||
return string.translate(CHAR_CLASS_STRING)
|
||||
|
||||
|
||||
# A translate mapping that will strip all C0 control characters except
|
||||
# those that represent whitespace.
|
||||
CONTROL_CHARS = {}
|
||||
for i in range(32):
|
||||
CONTROL_CHARS[i] = None
|
||||
|
||||
# Map whitespace control characters to themselves.
|
||||
for char in '\t\n\f\r':
|
||||
del CONTROL_CHARS[ord(char)]
|
34
lib/ftfy/cli.py
Normal file
34
lib/ftfy/cli.py
Normal file
@ -0,0 +1,34 @@
|
||||
"""
|
||||
A simple command-line utility for fixing text found in a file.
|
||||
|
||||
Because files do not come with their encoding marked, it first runs the file
|
||||
through `ftfy.guess_bytes`, then runs it through `ftfy.fix_text`.
|
||||
"""
|
||||
from ftfy import fix_file
|
||||
|
||||
import sys
|
||||
ENCODE_STDOUT = (sys.hexversion < 0x03000000)
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Run ftfy as a command-line utility. (Requires Python 2.7 or later, or
|
||||
the 'argparse' module.)
|
||||
"""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('filename', help='file to transcode')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
file = open(args.filename)
|
||||
for line in fix_file(file):
|
||||
if ENCODE_STDOUT:
|
||||
sys.stdout.write(line.encode('utf-8'))
|
||||
else:
|
||||
sys.stdout.write(line)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
79
lib/ftfy/compatibility.py
Normal file
79
lib/ftfy/compatibility.py
Normal file
@ -0,0 +1,79 @@
|
||||
"""
|
||||
Makes some function names and behavior consistent between Python 2 and
|
||||
Python 3, and also between narrow and wide builds.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import sys
|
||||
import re
|
||||
import unicodedata
|
||||
|
||||
if sys.hexversion >= 0x03000000:
|
||||
from html import entities
|
||||
unichr = chr
|
||||
xrange = range
|
||||
PYTHON2 = False
|
||||
else:
|
||||
import htmlentitydefs as entities
|
||||
unichr = unichr
|
||||
xrange = xrange
|
||||
PYTHON2 = True
|
||||
htmlentitydefs = entities
|
||||
|
||||
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
|
||||
|
||||
|
||||
def _narrow_unichr_workaround(codepoint):
|
||||
"""
|
||||
A replacement for unichr() on narrow builds of Python. This will get
|
||||
us the narrow representation of an astral character, which will be
|
||||
a string of length two, containing two UTF-16 surrogates.
|
||||
"""
|
||||
escaped = b'\\U%08x' % codepoint
|
||||
return escaped.decode('unicode-escape')
|
||||
|
||||
|
||||
if sys.maxunicode < 0x10000:
|
||||
unichr = _narrow_unichr_workaround
|
||||
# In a narrow build of Python, we can't write a regex involving astral
|
||||
# characters. If we want to write the regex:
|
||||
#
|
||||
# [\U00100000-\U0010ffff]
|
||||
#
|
||||
# The actual string that defines it quietly turns into:
|
||||
#
|
||||
# [\udbc0\udc00-\udbff\udfff]
|
||||
#
|
||||
# And now the range operator only applies to the middle two characters.
|
||||
# It looks like a range that's going backwards from \dc00 to \dbff,
|
||||
# which is an error.
|
||||
#
|
||||
# What we can do instead is rewrite the expression to be _about_ the two
|
||||
# surrogates that make up the astral characters, instead of the characters
|
||||
# themselves. This would be wrong on a wide build, but it works on a
|
||||
# narrow build.
|
||||
UNSAFE_PRIVATE_USE_RE = re.compile('[\udbc0-\udbff][\udc00-\udfff]')
|
||||
else:
|
||||
UNSAFE_PRIVATE_USE_RE = re.compile('[\U00100000-\U0010ffff]')
|
||||
|
||||
|
||||
def bytes_to_ints(bytestring):
|
||||
"""
|
||||
No matter what version of Python this is, make a sequence of integers from
|
||||
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
|
||||
sequence of integers.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return [ord(b) for b in bytestring]
|
||||
else:
|
||||
return bytestring
|
||||
|
||||
|
||||
def is_printable(char):
|
||||
"""
|
||||
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
|
||||
let's make a crude approximation in Python 2.
|
||||
"""
|
||||
if PYTHON2:
|
||||
return not unicodedata.category(char).startswith('C')
|
||||
else:
|
||||
return char.isprintable()
|
473
lib/ftfy/fixes.py
Normal file
473
lib/ftfy/fixes.py
Normal file
@ -0,0 +1,473 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
This module contains the individual fixes that the main fix_text function
|
||||
can perform.
|
||||
"""
|
||||
|
||||
from __future__ import unicode_literals
|
||||
from ftfy.chardata import (possible_encoding,
|
||||
CHARMAP_ENCODINGS, CONTROL_CHARS)
|
||||
from ftfy.badness import text_cost
|
||||
from ftfy.compatibility import htmlentitydefs, unichr, UNSAFE_PRIVATE_USE_RE
|
||||
import re
|
||||
import sys
|
||||
import codecs
|
||||
|
||||
|
||||
BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode.
|
||||
|
||||
ftfy is designed to fix problems that were introduced by handling Unicode
|
||||
incorrectly. It might be able to fix the bytes you just handed it, but the
|
||||
fact that you just gave a pile of bytes to a function that fixes text means
|
||||
that your code is *also* handling Unicode incorrectly.
|
||||
|
||||
ftfy takes Unicode text as input. You should take these bytes and decode
|
||||
them from the encoding you think they are in. If you're not sure what encoding
|
||||
they're in:
|
||||
|
||||
- First, try to find out. 'utf-8' is a good assumption.
|
||||
- If the encoding is simply unknowable, try running your bytes through
|
||||
ftfy.guess_bytes. As the name implies, this may not always be accurate.
|
||||
|
||||
If you're confused by this, please read the Python Unicode HOWTO:
|
||||
|
||||
http://docs.python.org/%d/howto/unicode.html
|
||||
""" % sys.version_info[0]
|
||||
|
||||
|
||||
def fix_text_encoding(text):
|
||||
r"""
|
||||
Fix text with incorrectly-decoded garbage ("mojibake") whenever possible.
|
||||
|
||||
Something you will find all over the place, in real-world text, is text
|
||||
that's mistakenly encoded as utf-8, decoded in some ugly format like
|
||||
latin-1 or even Windows codepage 1252, and encoded as utf-8 again.
|
||||
|
||||
This causes your perfectly good Unicode-aware code to end up with garbage
|
||||
text because someone else (or maybe "someone else") made a mistake.
|
||||
|
||||
This function looks for the evidence of that having happened and fixes it.
|
||||
It determines whether it should replace nonsense sequences of single-byte
|
||||
characters that were really meant to be UTF-8 characters, and if so, turns
|
||||
them into the correctly-encoded Unicode character that they were meant to
|
||||
represent.
|
||||
|
||||
The input to the function must be Unicode. If you don't have Unicode text,
|
||||
you're not using the right tool to solve your problem.
|
||||
|
||||
.. note::
|
||||
The following examples are written using unmarked literal strings,
|
||||
but they are Unicode text. In Python 2 we have "unicode_literals"
|
||||
turned on, and in Python 3 this is always the case.
|
||||
|
||||
ftfy decodes text that looks like it was decoded incorrectly. It leaves
|
||||
alone text that doesn't.
|
||||
|
||||
>>> print(fix_text_encoding('único'))
|
||||
único
|
||||
|
||||
>>> print(fix_text_encoding('This text is fine already :þ'))
|
||||
This text is fine already :þ
|
||||
|
||||
Because these characters often come from Microsoft products, we allow
|
||||
for the possibility that we get not just Unicode characters 128-255, but
|
||||
also Windows's conflicting idea of what characters 128-160 are.
|
||||
|
||||
>>> print(fix_text_encoding('This — should be an em dash'))
|
||||
This — should be an em dash
|
||||
|
||||
We might have to deal with both Windows characters and raw control
|
||||
characters at the same time, especially when dealing with characters like
|
||||
0x81 that have no mapping in Windows. This is a string that Python's
|
||||
standard `.encode` and `.decode` methods cannot correct.
|
||||
|
||||
>>> print(fix_text_encoding('This text is sad .â\x81”.'))
|
||||
This text is sad .⁔.
|
||||
|
||||
However, it has safeguards against fixing sequences of letters and
|
||||
punctuation that can occur in valid text:
|
||||
|
||||
>>> print(fix_text_encoding('not such a fan of Charlotte Brontë…”'))
|
||||
not such a fan of Charlotte Brontë…”
|
||||
|
||||
Cases of genuine ambiguity can sometimes be addressed by finding other
|
||||
characters that are not double-encoded, and expecting the encoding to
|
||||
be consistent:
|
||||
|
||||
>>> print(fix_text_encoding('AHÅ™, the new sofa from IKEA®'))
|
||||
AHÅ™, the new sofa from IKEA®
|
||||
|
||||
Finally, we handle the case where the text is in a single-byte encoding
|
||||
that was intended as Windows-1252 all along but read as Latin-1:
|
||||
|
||||
>>> print(fix_text_encoding('This text was never UTF-8 at all\x85'))
|
||||
This text was never UTF-8 at all…
|
||||
|
||||
The best version of the text is found using
|
||||
:func:`ftfy.badness.text_cost`.
|
||||
"""
|
||||
text, _plan = fix_encoding_and_explain(text)
|
||||
return text
|
||||
|
||||
|
||||
def fix_encoding_and_explain(text):
|
||||
"""
|
||||
Re-decodes text that has been decoded incorrectly, and also return a
|
||||
"plan" indicating all the steps required to fix it.
|
||||
|
||||
To fix similar text in the same way, without having to detect anything,
|
||||
you can use the ``apply_plan`` function.
|
||||
"""
|
||||
best_version = text
|
||||
best_cost = text_cost(text)
|
||||
best_plan = []
|
||||
plan_so_far = []
|
||||
while True:
|
||||
prevtext = text
|
||||
text, plan = fix_one_step_and_explain(text)
|
||||
plan_so_far.extend(plan)
|
||||
cost = text_cost(text)
|
||||
|
||||
# Add a penalty if we used a particularly obsolete encoding. The result
|
||||
# is that we won't use these encodings unless they can successfully
|
||||
# replace multiple characters.
|
||||
if ('encode', 'macroman') in plan_so_far or\
|
||||
('encode', 'cp437') in plan_so_far:
|
||||
cost += 2
|
||||
|
||||
# We need pretty solid evidence to decode from Windows-1251 (Cyrillic).
|
||||
if ('encode', 'sloppy-windows-1251') in plan_so_far:
|
||||
cost += 5
|
||||
|
||||
if cost < best_cost:
|
||||
best_cost = cost
|
||||
best_version = text
|
||||
best_plan = list(plan_so_far)
|
||||
if text == prevtext:
|
||||
return best_version, best_plan
|
||||
|
||||
|
||||
def fix_one_step_and_explain(text):
|
||||
"""
|
||||
Performs a single step of re-decoding text that's been decoded incorrectly.
|
||||
|
||||
Returns the decoded text, plus a "plan" for how to reproduce what it
|
||||
did.
|
||||
"""
|
||||
if isinstance(text, bytes):
|
||||
raise UnicodeError(BYTES_ERROR_TEXT)
|
||||
if len(text) == 0:
|
||||
return text, []
|
||||
|
||||
# The first plan is to return ASCII text unchanged.
|
||||
if possible_encoding(text, 'ascii'):
|
||||
return text, []
|
||||
|
||||
# As we go through the next step, remember the possible encodings
|
||||
# that we encounter but don't successfully fix yet. We may need them
|
||||
# later.
|
||||
possible_1byte_encodings = []
|
||||
|
||||
# Suppose the text was supposed to be UTF-8, but it was decoded using
|
||||
# a single-byte encoding instead. When these cases can be fixed, they
|
||||
# are usually the correct thing to do, so try them next.
|
||||
for encoding in CHARMAP_ENCODINGS:
|
||||
if possible_encoding(text, encoding):
|
||||
encoded_bytes = text.encode(encoding)
|
||||
|
||||
# Now, find out if it's UTF-8 (or close enough). Otherwise,
|
||||
# remember the encoding for later.
|
||||
try:
|
||||
decoding = 'utf-8'
|
||||
if b'\xed' in encoded_bytes or b'\xc0' in encoded_bytes:
|
||||
decoding = 'utf-8-variants'
|
||||
fixed = encoded_bytes.decode(decoding)
|
||||
steps = [('encode', encoding), ('decode', decoding)]
|
||||
return fixed, steps
|
||||
except UnicodeDecodeError:
|
||||
possible_1byte_encodings.append(encoding)
|
||||
|
||||
# The next most likely case is that this is Latin-1 that was intended to
|
||||
# be read as Windows-1252, because those two encodings in particular are
|
||||
# easily confused.
|
||||
if 'latin-1' in possible_1byte_encodings:
|
||||
if 'windows-1252' in possible_1byte_encodings:
|
||||
# This text is in the intersection of Latin-1 and
|
||||
# Windows-1252, so it's probably legit.
|
||||
return text, []
|
||||
else:
|
||||
# Otherwise, it means we have characters that are in Latin-1 but
|
||||
# not in Windows-1252. Those are C1 control characters. Nobody
|
||||
# wants those. Assume they were meant to be Windows-1252. Don't
|
||||
# use the sloppy codec, because bad Windows-1252 characters are
|
||||
# a bad sign.
|
||||
encoded = text.encode('latin-1')
|
||||
try:
|
||||
fixed = encoded.decode('windows-1252')
|
||||
steps = []
|
||||
if fixed != text:
|
||||
steps = [('encode', 'latin-1'), ('decode', 'windows-1252')]
|
||||
return fixed, steps
|
||||
except UnicodeDecodeError:
|
||||
# This text contained characters that don't even make sense
|
||||
# if you assume they were supposed to be Windows-1252. In
|
||||
# that case, let's not assume anything.
|
||||
pass
|
||||
|
||||
# The cases that remain are mixups between two different single-byte
|
||||
# encodings, and not the common case of Latin-1 vs. Windows-1252.
|
||||
#
|
||||
# Those cases are somewhat rare, and impossible to solve without false
|
||||
# positives. If you're in one of these situations, you should try using
|
||||
# the `ftfy.guess_bytes` function.
|
||||
|
||||
# Return the text unchanged; the plan is empty.
|
||||
return text, []
|
||||
|
||||
|
||||
def apply_plan(text, plan):
|
||||
"""
|
||||
Apply a plan for fixing the encoding of text.
|
||||
|
||||
The plan is a list of tuples of the form (operation, encoding), where
|
||||
`operation` is either 'encode' or 'decode', and `encoding` is an encoding
|
||||
name such as 'utf-8' or 'latin-1'.
|
||||
|
||||
Because only text can be encoded, and only bytes can be decoded, the plan
|
||||
should alternate 'encode' and 'decode' steps, or else this function will
|
||||
encounter an error.
|
||||
"""
|
||||
obj = text
|
||||
for operation, encoding in plan:
|
||||
if operation == 'encode':
|
||||
obj = obj.encode(encoding)
|
||||
elif operation == 'decode':
|
||||
obj = obj.decode(encoding)
|
||||
else:
|
||||
raise ValueError("Unknown plan step: %s" % operation)
|
||||
|
||||
return obj
|
||||
|
||||
|
||||
HTML_ENTITY_RE = re.compile(r"&#?\w{0,8};")
|
||||
|
||||
|
||||
def unescape_html(text):
|
||||
"""
|
||||
Decode all three types of HTML entities/character references.
|
||||
|
||||
Code by Fredrik Lundh of effbot.org. Rob Speer made a slight change
|
||||
to it for efficiency: it won't match entities longer than 8 characters,
|
||||
because there are no valid entities like that.
|
||||
|
||||
>>> print(unescape_html('<tag>'))
|
||||
<tag>
|
||||
"""
|
||||
def fixup(match):
|
||||
"""
|
||||
Replace one matched HTML entity with the character it represents,
|
||||
if possible.
|
||||
"""
|
||||
text = match.group(0)
|
||||
if text[:2] == "&#":
|
||||
# character reference
|
||||
try:
|
||||
if text[:3] == "&#x":
|
||||
return unichr(int(text[3:-1], 16))
|
||||
else:
|
||||
return unichr(int(text[2:-1]))
|
||||
except ValueError:
|
||||
pass
|
||||
else:
|
||||
# named entity
|
||||
try:
|
||||
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
|
||||
except KeyError:
|
||||
pass
|
||||
return text # leave as is
|
||||
return HTML_ENTITY_RE.sub(fixup, text)
|
||||
|
||||
|
||||
ANSI_RE = re.compile('\033\\[((?:\\d|;)*)([a-zA-Z])')
|
||||
|
||||
def remove_terminal_escapes(text):
|
||||
r"""
|
||||
Strip out "ANSI" terminal escape sequences, such as those that produce
|
||||
colored text on Unix.
|
||||
|
||||
>>> print(remove_terminal_escapes(
|
||||
... "\033[36;44mI'm blue, da ba dee da ba doo...\033[0m"
|
||||
... ))
|
||||
I'm blue, da ba dee da ba doo...
|
||||
"""
|
||||
return ANSI_RE.sub('', text)
|
||||
|
||||
|
||||
SINGLE_QUOTE_RE = re.compile('[\u2018-\u201b]')
|
||||
DOUBLE_QUOTE_RE = re.compile('[\u201c-\u201f]')
|
||||
|
||||
def uncurl_quotes(text):
|
||||
r"""
|
||||
Replace curly quotation marks with straight equivalents.
|
||||
|
||||
>>> print(uncurl_quotes('\u201chere\u2019s a test\u201d'))
|
||||
"here's a test"
|
||||
"""
|
||||
return SINGLE_QUOTE_RE.sub("'", DOUBLE_QUOTE_RE.sub('"', text))
|
||||
|
||||
|
||||
def fix_line_breaks(text):
|
||||
r"""
|
||||
Convert all line breaks to Unix style.
|
||||
|
||||
This will convert the following sequences into the standard \\n
|
||||
line break:
|
||||
|
||||
- CRLF (\\r\\n), used on Windows and in some communication
|
||||
protocols
|
||||
- CR (\\r), once used on Mac OS Classic, and now kept alive
|
||||
by misguided software such as Microsoft Office for Mac
|
||||
- LINE SEPARATOR (\\u2028) and PARAGRAPH SEPARATOR (\\u2029),
|
||||
defined by Unicode and used to sow confusion and discord
|
||||
- NEXT LINE (\\x85), a C1 control character that is certainly
|
||||
not what you meant
|
||||
|
||||
The NEXT LINE character is a bit of an odd case, because it
|
||||
usually won't show up if `fix_encoding` is also being run.
|
||||
\\x85 is very common mojibake for \\u2026, HORIZONTAL ELLIPSIS.
|
||||
|
||||
>>> print(fix_line_breaks(
|
||||
... "This string is made of two things:\u2029"
|
||||
... "1. Unicode\u2028"
|
||||
... "2. Spite"
|
||||
... ))
|
||||
This string is made of two things:
|
||||
1. Unicode
|
||||
2. Spite
|
||||
|
||||
For further testing and examples, let's define a function to make sure
|
||||
we can see the control characters in their escaped form:
|
||||
|
||||
>>> def eprint(text):
|
||||
... print(text.encode('unicode-escape').decode('ascii'))
|
||||
|
||||
>>> eprint(fix_line_breaks("Content-type: text/plain\r\n\r\nHi."))
|
||||
Content-type: text/plain\n\nHi.
|
||||
|
||||
>>> eprint(fix_line_breaks("This is how Microsoft \r trolls Mac users"))
|
||||
This is how Microsoft \n trolls Mac users
|
||||
|
||||
>>> eprint(fix_line_breaks("What is this \x85 I don't even"))
|
||||
What is this \n I don't even
|
||||
"""
|
||||
return text.replace('\r\n', '\n').replace('\r', '\n')\
|
||||
.replace('\u2028', '\n').replace('\u2029', '\n')\
|
||||
.replace('\u0085', '\n')
|
||||
|
||||
|
||||
def remove_control_chars(text):
|
||||
"""
|
||||
Remove all control characters except for the important ones.
|
||||
|
||||
This removes characters in these ranges:
|
||||
|
||||
- U+0000 to U+0008
|
||||
- U+000B
|
||||
- U+000E to U+001F
|
||||
- U+007F
|
||||
|
||||
It leaves alone these characters that are commonly used for formatting:
|
||||
|
||||
- TAB (U+0009)
|
||||
- LF (U+000A)
|
||||
- FF (U+000C)
|
||||
- CR (U+000D)
|
||||
"""
|
||||
return text.translate(CONTROL_CHARS)
|
||||
|
||||
|
||||
def remove_bom(text):
|
||||
r"""
|
||||
Remove a left-over byte-order mark.
|
||||
|
||||
>>> print(remove_bom("\ufeffWhere do you want to go today?"))
|
||||
Where do you want to go today?
|
||||
"""
|
||||
return text.lstrip(unichr(0xfeff))
|
||||
|
||||
|
||||
def remove_unsafe_private_use(text):
|
||||
r"""
|
||||
Python 3.3's Unicode support isn't perfect, and in fact there are certain
|
||||
string operations that will crash some versions of it with a SystemError:
|
||||
http://bugs.python.org/issue18183
|
||||
|
||||
The best solution is to remove all characters from Supplementary Private
|
||||
Use Area B, using a regex that is known not to crash given those
|
||||
characters.
|
||||
|
||||
These are the characters from U+100000 to U+10FFFF. It's sad to lose an
|
||||
entire plane of Unicode, but on the other hand, these characters are not
|
||||
assigned and never will be. If you get one of these characters and don't
|
||||
know what its purpose is, its purpose is probably to crash your code.
|
||||
|
||||
If you were using these for actual private use, this might be inconvenient.
|
||||
You can turn off this fixer, of course, but I kind of encourage using
|
||||
Supplementary Private Use Area A instead.
|
||||
|
||||
>>> print(remove_unsafe_private_use('\U0001F4A9\U00100000'))
|
||||
💩
|
||||
|
||||
This fixer is off by default in Python 3.4 or later. (The bug is actually
|
||||
fixed in 3.3.3 and 2.7.6, but I don't want the default behavior to change
|
||||
based on a micro version upgrade of Python.)
|
||||
"""
|
||||
return UNSAFE_PRIVATE_USE_RE.sub('', text)
|
||||
|
||||
|
||||
# Define a regex to match valid escape sequences in Python string literals.
|
||||
ESCAPE_SEQUENCE_RE = re.compile(r'''
|
||||
( \\U........ # 8-digit hex escapes
|
||||
| \\u.... # 4-digit hex escapes
|
||||
| \\x.. # 2-digit hex escapes
|
||||
| \\[0-7]{1,3} # Octal escapes
|
||||
| \\N\{[^}]+\} # Unicode characters by name
|
||||
| \\[\\'"abfnrtv] # Single-character escapes
|
||||
)''', re.UNICODE | re.VERBOSE)
|
||||
|
||||
|
||||
def decode_escapes(text):
|
||||
r"""
|
||||
Decode backslashed escape sequences, including \\x, \\u, and \\U character
|
||||
references, even in the presence of other Unicode.
|
||||
|
||||
This is what Python's "string-escape" and "unicode-escape" codecs were
|
||||
meant to do, but in contrast, this actually works. It will decode the
|
||||
string exactly the same way that the Python interpreter decodes its string
|
||||
literals.
|
||||
|
||||
>>> factoid = '\\u20a1 is the currency symbol for the colón.'
|
||||
>>> print(factoid[1:])
|
||||
u20a1 is the currency symbol for the colón.
|
||||
>>> print(decode_escapes(factoid))
|
||||
₡ is the currency symbol for the colón.
|
||||
|
||||
Even though Python itself can read string literals with a combination of
|
||||
escapes and literal Unicode -- you're looking at one right now -- the
|
||||
"unicode-escape" codec doesn't work on literal Unicode. (See
|
||||
http://stackoverflow.com/a/24519338/773754 for more details.)
|
||||
|
||||
Instead, this function searches for just the parts of a string that
|
||||
represent escape sequences, and decodes them, leaving the rest alone. All
|
||||
valid escape sequences are made of ASCII characters, and this allows
|
||||
"unicode-escape" to work correctly.
|
||||
|
||||
This fix cannot be automatically applied by the `ftfy.fix_text` function,
|
||||
because escaped text is not necessarily a mistake, and there is no way
|
||||
to distinguish text that's supposed to be escaped from text that isn't.
|
||||
"""
|
||||
def decode_match(match):
|
||||
"Given a regex match, decode the escape sequence it contains."
|
||||
return codecs.decode(match.group(0), 'unicode-escape')
|
||||
|
||||
return ESCAPE_SEQUENCE_RE.sub(decode_match, text)
|
39
lib/ftfy/streamtester/__init__.py
Normal file
39
lib/ftfy/streamtester/__init__.py
Normal file
@ -0,0 +1,39 @@
|
||||
"""
|
||||
This file defines a general method for evaluating ftfy using data that arrives
|
||||
in a stream. A concrete implementation of it is found in `twitter_tester.py`.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
from ftfy.fixes import fix_text_encoding
|
||||
from ftfy.chardata import possible_encoding
|
||||
|
||||
|
||||
class StreamTester:
|
||||
"""
|
||||
Take in a sequence of texts, and show the ones that will be changed by
|
||||
ftfy. This will also periodically show updates, such as the proportion of
|
||||
texts that changed.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.num_fixed = 0
|
||||
self.count = 0
|
||||
|
||||
def check_ftfy(self, text):
|
||||
"""
|
||||
Given a single text input, check whether `ftfy.fix_text_encoding`
|
||||
would change it. If so, display the change.
|
||||
"""
|
||||
self.count += 1
|
||||
if not possible_encoding(text, 'ascii'):
|
||||
fixed = fix_text_encoding(text)
|
||||
if text != fixed:
|
||||
# possibly filter common bots before printing
|
||||
print(u'\nText:\t{text}\nFixed:\t{fixed}\n'.format(
|
||||
text=text, fixed=fixed
|
||||
))
|
||||
self.num_fixed += 1
|
||||
|
||||
# Print status updates once in a while
|
||||
if self.count % 100 == 0:
|
||||
print('.', end='', flush=True)
|
||||
if self.count % 10000 == 0:
|
||||
print('\n%d/%d fixed' % (self.num_fixed, self.count))
|
73
lib/ftfy/streamtester/oauth.py
Normal file
73
lib/ftfy/streamtester/oauth.py
Normal file
@ -0,0 +1,73 @@
|
||||
# coding: utf-8
|
||||
"""
|
||||
Do what is necessary to authenticate this tester as a Twitter "app", using
|
||||
somebody's Twitter account.
|
||||
"""
|
||||
from __future__ import unicode_literals
|
||||
import os
|
||||
|
||||
|
||||
AUTH_TOKEN_PATH = os.path.expanduser('~/.cache/oauth/twitter_ftfy.auth')
|
||||
|
||||
def get_auth():
|
||||
"""
|
||||
Twitter has some bizarre requirements about how to authorize an "app" to
|
||||
use its API.
|
||||
|
||||
The user of the app has to log in to get a secret token. That's fine. But
|
||||
the app itself has its own "consumer secret" token. The app has to know it,
|
||||
and the user of the app has to not know it.
|
||||
|
||||
This is, of course, impossible. It's equivalent to DRM. Your computer can't
|
||||
*really* make use of secret information while hiding the same information
|
||||
from you.
|
||||
|
||||
The threat appears to be that, if you have this super-sekrit token, you can
|
||||
impersonate the app while doing something different. Well, of course you
|
||||
can do that, because you *have the source code* and you can change it to do
|
||||
what you want. You still have to log in as a particular user who has a
|
||||
token that's actually secret, you know.
|
||||
|
||||
Even developers of closed-source applications that use the Twitter API are
|
||||
unsure what to do, for good reason. These "secrets" are not secret in any
|
||||
cryptographic sense. A bit of Googling shows that the secret tokens for
|
||||
every popular Twitter app are already posted on the Web.
|
||||
|
||||
Twitter wants us to pretend this string can be kept secret, and hide this
|
||||
secret behind a fig leaf like everybody else does. So that's what we've
|
||||
done.
|
||||
"""
|
||||
|
||||
from twitter.oauth import OAuth
|
||||
from twitter import oauth_dance, read_token_file
|
||||
|
||||
def unhide(secret):
|
||||
"""
|
||||
Do something mysterious and exactly as secure as every other Twitter
|
||||
app.
|
||||
"""
|
||||
return ''.join([chr(ord(c) - 0x2800) for c in secret])
|
||||
|
||||
fig_leaf = '⠴⡹⠹⡩⠶⠴⡶⡅⡂⡩⡅⠳⡏⡉⡈⠰⠰⡹⡥⡶⡈⡐⡍⡂⡫⡍⡗⡬⡒⡧⡶⡣⡰⡄⡧⡸⡑⡣⠵⡓⠶⠴⡁'
|
||||
consumer_key = 'OFhyNd2Zt4Ba6gJGJXfbsw'
|
||||
|
||||
if os.path.exists(AUTH_TOKEN_PATH):
|
||||
token, token_secret = read_token_file(AUTH_TOKEN_PATH)
|
||||
else:
|
||||
authdir = os.path.dirname(AUTH_TOKEN_PATH)
|
||||
if not os.path.exists(authdir):
|
||||
os.makedirs(authdir)
|
||||
token, token_secret = oauth_dance(
|
||||
app_name='ftfy-tester',
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf),
|
||||
token_filename=AUTH_TOKEN_PATH
|
||||
)
|
||||
|
||||
return OAuth(
|
||||
token=token,
|
||||
token_secret=token_secret,
|
||||
consumer_key=consumer_key,
|
||||
consumer_secret=unhide(fig_leaf)
|
||||
)
|
||||
|
89
lib/ftfy/streamtester/twitter_tester.py
Normal file
89
lib/ftfy/streamtester/twitter_tester.py
Normal file
@ -0,0 +1,89 @@
|
||||
"""
|
||||
Implements a StreamTester that runs over Twitter data. See the class
|
||||
docstring.
|
||||
|
||||
This module is written for Python 3 only. The __future__ imports you see here
|
||||
are just to let Python 2 scan the file without crashing with a SyntaxError.
|
||||
"""
|
||||
from __future__ import print_function, unicode_literals
|
||||
import os
|
||||
from collections import defaultdict
|
||||
from ftfy.streamtester import StreamTester
|
||||
|
||||
|
||||
class TwitterTester(StreamTester):
|
||||
"""
|
||||
This class uses the StreamTester code (defined in `__init__.py`) to
|
||||
evaluate ftfy's real-world performance, by feeding it live data from
|
||||
Twitter.
|
||||
|
||||
This is a semi-manual evaluation. It requires a human to look at the
|
||||
results and determine if they are good. The three possible cases we
|
||||
can see here are:
|
||||
|
||||
- Success: the process takes in mojibake and outputs correct text.
|
||||
- False positive: the process takes in correct text, and outputs
|
||||
mojibake. Every false positive should be considered a bug, and
|
||||
reported on GitHub if it isn't already.
|
||||
- Confusion: the process takes in mojibake and outputs different
|
||||
mojibake. Not a great outcome, but not as dire as a false
|
||||
positive.
|
||||
|
||||
This tester cannot reveal false negatives. So far, that can only be
|
||||
done by the unit tests.
|
||||
"""
|
||||
OUTPUT_DIR = './twitterlogs'
|
||||
|
||||
def __init__(self):
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
super().__init__()
|
||||
|
||||
def save_files(self):
|
||||
"""
|
||||
When processing data from live Twitter, save it to log files so that
|
||||
it can be replayed later.
|
||||
"""
|
||||
if not os.path.exists(self.OUTPUT_DIR):
|
||||
os.makedirs(self.OUTPUT_DIR)
|
||||
for lang, lines in self.lines_by_lang.items():
|
||||
filename = 'tweets.{}.txt'.format(lang)
|
||||
fullname = os.path.join(self.OUTPUT_DIR, filename)
|
||||
langfile = open(fullname, 'a')
|
||||
for line in lines:
|
||||
print(line.replace('\n', ' '), file=langfile)
|
||||
langfile.close()
|
||||
self.lines_by_lang = defaultdict(list)
|
||||
|
||||
def run_sample(self):
|
||||
"""
|
||||
Listen to live data from Twitter, and pass on the fully-formed tweets
|
||||
to `check_ftfy`. This requires the `twitter` Python package as a
|
||||
dependency.
|
||||
"""
|
||||
from twitter import TwitterStream
|
||||
from ftfy.streamtester.oauth import get_auth
|
||||
twitter_stream = TwitterStream(auth=get_auth())
|
||||
iterator = twitter_stream.statuses.sample()
|
||||
for tweet in iterator:
|
||||
if 'text' in tweet:
|
||||
self.check_ftfy(tweet['text'])
|
||||
if 'user' in tweet:
|
||||
lang = tweet['user'].get('lang', 'NONE')
|
||||
self.lines_by_lang[lang].append(tweet['text'])
|
||||
if self.count % 10000 == 100:
|
||||
self.save_files()
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
When run from the command line, this script connects to the Twitter stream
|
||||
and runs the TwitterTester on it forever. Or at least until the stream
|
||||
drops.
|
||||
"""
|
||||
tester = TwitterTester()
|
||||
tester.run_sample()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@ -4,10 +4,6 @@ import requests
|
||||
def TraktCall(method, api, username=None, password=None, data={}):
|
||||
base_url = 'http://api.trakt.tv/'
|
||||
|
||||
# if the API isn't given then it failed
|
||||
if not api:
|
||||
return None
|
||||
|
||||
# if username and password given then encode password with sha1
|
||||
auth = None
|
||||
if username and password:
|
||||
|
@ -1,6 +1,6 @@
|
||||
# !/usr/bin/env python2
|
||||
#encoding:utf-8
|
||||
#author:dbr/Ben
|
||||
# encoding:utf-8
|
||||
# author:dbr/Ben
|
||||
#project:tvdb_api
|
||||
#repository:http://github.com/dbr/tvdb_api
|
||||
#license:unlicense (http://unlicense.org/)
|
||||
@ -39,7 +39,7 @@ from lib.dateutil.parser import parse
|
||||
from lib.cachecontrol import CacheControl, caches
|
||||
|
||||
from tvdb_ui import BaseUI, ConsoleUI
|
||||
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound,
|
||||
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound, tvdb_showincomplete,
|
||||
tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound)
|
||||
|
||||
|
||||
@ -598,9 +598,9 @@ class Tvdb:
|
||||
value = parse(value, fuzzy=True).date()
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
|
||||
#if key == 'airs_time':
|
||||
# value = parse(value).time()
|
||||
# value = value.strftime("%I:%M %p")
|
||||
#if key == 'airs_time':
|
||||
# value = parse(value).time()
|
||||
# value = value.strftime("%I:%M %p")
|
||||
except:
|
||||
pass
|
||||
|
||||
@ -627,10 +627,9 @@ class Tvdb:
|
||||
"""Loads a URL using caching, returns an ElementTree of the source
|
||||
"""
|
||||
try:
|
||||
src = self._loadUrl(url, params=params, language=language).values()[0]
|
||||
return src
|
||||
except:
|
||||
return []
|
||||
return self._loadUrl(url, params=params, language=language).values()[0]
|
||||
except Exception, e:
|
||||
raise tvdb_error(e)
|
||||
|
||||
def _setItem(self, sid, seas, ep, attrib, value):
|
||||
"""Creates a new episode, creating Show(), Season() and
|
||||
@ -681,11 +680,7 @@ class Tvdb:
|
||||
log().debug("Searching for show %s" % series)
|
||||
self.config['params_getSeries']['seriesname'] = series
|
||||
|
||||
try:
|
||||
seriesFound = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
|
||||
return seriesFound
|
||||
except:
|
||||
return []
|
||||
return self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
|
||||
|
||||
def _getSeries(self, series):
|
||||
"""This searches TheTVDB.com for the series name,
|
||||
@ -694,13 +689,13 @@ class Tvdb:
|
||||
BaseUI is used to select the first result.
|
||||
"""
|
||||
allSeries = self.search(series)
|
||||
if not allSeries:
|
||||
log().debug('Series result returned zero')
|
||||
raise tvdb_shownotfound("Show search returned zero results (cannot find show on TVDB)")
|
||||
|
||||
if not isinstance(allSeries, list):
|
||||
allSeries = [allSeries]
|
||||
|
||||
if len(allSeries) == 0:
|
||||
log().debug('Series result returned zero')
|
||||
raise tvdb_shownotfound("Show-name search returned zero results (cannot find show on TVDB)")
|
||||
|
||||
if self.config['custom_ui'] is not None:
|
||||
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
||||
CustomUI = self.config['custom_ui']
|
||||
@ -735,37 +730,38 @@ class Tvdb:
|
||||
"""
|
||||
log().debug('Getting season banners for %s' % (sid))
|
||||
bannersEt = self._getetsrc(self.config['url_seriesBanner'] % (sid))
|
||||
|
||||
if not bannersEt:
|
||||
log().debug('Banners result returned zero')
|
||||
return
|
||||
|
||||
banners = {}
|
||||
for cur_banner in bannersEt['banner']:
|
||||
bid = cur_banner['id']
|
||||
btype = cur_banner['bannertype']
|
||||
btype2 = cur_banner['bannertype2']
|
||||
if btype is None or btype2 is None:
|
||||
continue
|
||||
if not btype in banners:
|
||||
banners[btype] = {}
|
||||
if not btype2 in banners[btype]:
|
||||
banners[btype][btype2] = {}
|
||||
if not bid in banners[btype][btype2]:
|
||||
banners[btype][btype2][bid] = {}
|
||||
|
||||
try:
|
||||
for cur_banner in bannersEt['banner']:
|
||||
bid = cur_banner['id']
|
||||
btype = cur_banner['bannertype']
|
||||
btype2 = cur_banner['bannertype2']
|
||||
if btype is None or btype2 is None:
|
||||
for k, v in cur_banner.items():
|
||||
if k is None or v is None:
|
||||
continue
|
||||
if not btype in banners:
|
||||
banners[btype] = {}
|
||||
if not btype2 in banners[btype]:
|
||||
banners[btype][btype2] = {}
|
||||
if not bid in banners[btype][btype2]:
|
||||
banners[btype][btype2][bid] = {}
|
||||
|
||||
for k, v in cur_banner.items():
|
||||
if k is None or v is None:
|
||||
continue
|
||||
k, v = k.lower(), v.lower()
|
||||
banners[btype][btype2][bid][k] = v
|
||||
|
||||
k, v = k.lower(), v.lower()
|
||||
banners[btype][btype2][bid][k] = v
|
||||
|
||||
for k, v in banners[btype][btype2][bid].items():
|
||||
if k.endswith("path"):
|
||||
new_key = "_%s" % (k)
|
||||
log().debug("Transforming %s to %s" % (k, new_key))
|
||||
new_url = self.config['url_artworkPrefix'] % (v)
|
||||
banners[btype][btype2][bid][new_key] = new_url
|
||||
except:
|
||||
pass
|
||||
for k, v in banners[btype][btype2][bid].items():
|
||||
if k.endswith("path"):
|
||||
new_key = "_%s" % (k)
|
||||
log().debug("Transforming %s to %s" % (k, new_key))
|
||||
new_url = self.config['url_artworkPrefix'] % (v)
|
||||
banners[btype][btype2][bid][new_key] = new_url
|
||||
|
||||
self._setShowData(sid, "_banners", banners)
|
||||
|
||||
@ -796,21 +792,22 @@ class Tvdb:
|
||||
log().debug("Getting actors for %s" % (sid))
|
||||
actorsEt = self._getetsrc(self.config['url_actorsInfo'] % (sid))
|
||||
|
||||
if not actorsEt:
|
||||
log().debug('Actors result returned zero')
|
||||
return
|
||||
|
||||
cur_actors = Actors()
|
||||
try:
|
||||
for curActorItem in actorsEt["actor"]:
|
||||
curActor = Actor()
|
||||
for k, v in curActorItem.items():
|
||||
k = k.lower()
|
||||
if v is not None:
|
||||
if k == "image":
|
||||
v = self.config['url_artworkPrefix'] % (v)
|
||||
else:
|
||||
v = self._cleanData(v)
|
||||
curActor[k] = v
|
||||
cur_actors.append(curActor)
|
||||
except:
|
||||
pass
|
||||
for curActorItem in actorsEt["actor"]:
|
||||
curActor = Actor()
|
||||
for k, v in curActorItem.items():
|
||||
k = k.lower()
|
||||
if v is not None:
|
||||
if k == "image":
|
||||
v = self.config['url_artworkPrefix'] % (v)
|
||||
else:
|
||||
v = self._cleanData(v)
|
||||
curActor[k] = v
|
||||
cur_actors.append(curActor)
|
||||
|
||||
self._setShowData(sid, '_actors', cur_actors)
|
||||
|
||||
@ -840,10 +837,11 @@ class Tvdb:
|
||||
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
|
||||
)
|
||||
|
||||
# check and make sure we have data to process and that it contains a series name
|
||||
if not len(seriesInfoEt) or (isinstance(seriesInfoEt, dict) and 'seriesname' not in seriesInfoEt['series']):
|
||||
return False
|
||||
if not seriesInfoEt:
|
||||
log().debug('Series result returned zero')
|
||||
raise tvdb_shownotfound("Show search returned zero results (cannot find show on TVDB)")
|
||||
|
||||
# get series data
|
||||
for k, v in seriesInfoEt['series'].items():
|
||||
if v is not None:
|
||||
if k in ['banner', 'fanart', 'poster']:
|
||||
@ -853,6 +851,7 @@ class Tvdb:
|
||||
|
||||
self._setShowData(sid, k, v)
|
||||
|
||||
# get episode data
|
||||
if getEpInfo:
|
||||
# Parse banners
|
||||
if self.config['banners_enabled']:
|
||||
@ -864,15 +863,17 @@ class Tvdb:
|
||||
|
||||
# Parse episode data
|
||||
log().debug('Getting all episodes of %s' % (sid))
|
||||
|
||||
if self.config['useZip']:
|
||||
url = self.config['url_epInfo_zip'] % (sid, language)
|
||||
else:
|
||||
url = self.config['url_epInfo'] % (sid, language)
|
||||
|
||||
epsEt = self._getetsrc(url, language=language)
|
||||
|
||||
episodes = epsEt["episode"]
|
||||
if not epsEt:
|
||||
log().debug('Series results incomplete')
|
||||
raise tvdb_showincomplete("Show search returned incomplete results (cannot find complete show on TVDB)")
|
||||
|
||||
episodes = epsEt["episode"]
|
||||
if not isinstance(episodes, list):
|
||||
episodes = [episodes]
|
||||
|
||||
@ -945,14 +946,10 @@ class Tvdb:
|
||||
selected_series = [selected_series]
|
||||
[[self._setShowData(show['id'], k, v) for k, v in show.items()] for show in selected_series]
|
||||
return selected_series
|
||||
#test = self._getSeries(key)
|
||||
#sids = self._nameToSid(key)
|
||||
#return list(self.shows[sid] for sid in sids)
|
||||
|
||||
def __repr__(self):
|
||||
return str(self.shows)
|
||||
|
||||
|
||||
def main():
|
||||
"""Simple example of using tvdb_api - it just
|
||||
grabs an episode name interactively.
|
||||
|
@ -11,7 +11,7 @@
|
||||
__author__ = "dbr/Ben"
|
||||
__version__ = "1.9"
|
||||
|
||||
__all__ = ["tvdb_error", "tvdb_userabort", "tvdb_shownotfound",
|
||||
__all__ = ["tvdb_error", "tvdb_userabort", "tvdb_shownotfound", "tvdb_showincomplete",
|
||||
"tvdb_seasonnotfound", "tvdb_episodenotfound", "tvdb_attributenotfound"]
|
||||
|
||||
class tvdb_exception(Exception):
|
||||
@ -35,6 +35,11 @@ class tvdb_shownotfound(tvdb_exception):
|
||||
"""
|
||||
pass
|
||||
|
||||
class tvdb_showincomplete(tvdb_exception):
|
||||
"""Show found but incomplete on thetvdb.com (incomplete show)
|
||||
"""
|
||||
pass
|
||||
|
||||
class tvdb_seasonnotfound(tvdb_exception):
|
||||
"""Season cannot be found on thetvdb.com
|
||||
"""
|
||||
|
@ -1,7 +1,7 @@
|
||||
# !/usr/bin/env python2
|
||||
# encoding:utf-8
|
||||
#author:echel0n
|
||||
#project:tvrage_api
|
||||
# author:echel0n
|
||||
# project:tvrage_api
|
||||
#repository:http://github.com/echel0n/tvrage_api
|
||||
#license:unlicense (http://unlicense.org/)
|
||||
|
||||
@ -36,7 +36,7 @@ from lib.dateutil.parser import parse
|
||||
from cachecontrol import CacheControl, caches
|
||||
|
||||
from tvrage_ui import BaseUI
|
||||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
|
||||
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_showincomplete,
|
||||
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
|
||||
|
||||
|
||||
@ -465,10 +465,6 @@ class TVRage:
|
||||
elif key == 'firstaired':
|
||||
value = parse(value, fuzzy=True).date()
|
||||
value = value.strftime("%Y-%m-%d")
|
||||
|
||||
#if key == 'airs_time':
|
||||
# value = parse(value).time()
|
||||
# value = value.strftime("%I:%M %p")
|
||||
except:
|
||||
pass
|
||||
|
||||
@ -485,10 +481,9 @@ class TVRage:
|
||||
"""
|
||||
|
||||
try:
|
||||
src = self._loadUrl(url, params).values()[0]
|
||||
return src
|
||||
except:
|
||||
return []
|
||||
return self._loadUrl(url, params).values()[0]
|
||||
except Exception, e:
|
||||
raise tvrage_error(e)
|
||||
|
||||
def _setItem(self, sid, seas, ep, attrib, value):
|
||||
"""Creates a new episode, creating Show(), Season() and
|
||||
@ -518,9 +513,7 @@ class TVRage:
|
||||
"""
|
||||
if sid not in self.shows:
|
||||
self.shows[sid] = Show()
|
||||
|
||||
if not isinstance(key, dict or list) and not isinstance(value, dict or list):
|
||||
self.shows[sid].data[key] = value
|
||||
self.shows[sid].data[key] = value
|
||||
|
||||
def _cleanData(self, data):
|
||||
"""Cleans up strings returned by tvrage.com
|
||||
@ -544,11 +537,7 @@ class TVRage:
|
||||
log().debug("Searching for show %s" % series)
|
||||
self.config['params_getSeries']['show'] = series
|
||||
|
||||
try:
|
||||
seriesFound = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
|
||||
return seriesFound
|
||||
except:
|
||||
return []
|
||||
return self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
|
||||
|
||||
def _getSeries(self, series):
|
||||
"""This searches tvrage.com for the series name,
|
||||
@ -557,13 +546,13 @@ class TVRage:
|
||||
BaseUI is used to select the first result.
|
||||
"""
|
||||
allSeries = self.search(series)
|
||||
if not allSeries:
|
||||
log().debug('Series result returned zero')
|
||||
raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)")
|
||||
|
||||
if not isinstance(allSeries, list):
|
||||
allSeries = [allSeries]
|
||||
|
||||
if len(allSeries) == 0:
|
||||
log().debug('Series result returned zero')
|
||||
raise tvrage_shownotfound("Show-name search returned zero results (cannot find show on TVRAGE)")
|
||||
|
||||
if self.config['custom_ui'] is not None:
|
||||
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
|
||||
CustomUI = self.config['custom_ui']
|
||||
@ -588,50 +577,55 @@ class TVRage:
|
||||
self.config['params_seriesInfo']
|
||||
)
|
||||
|
||||
# check and make sure we have data to process and that it contains a series name
|
||||
if not len(seriesInfoEt) or (isinstance(seriesInfoEt, dict) and 'seriesname' not in seriesInfoEt):
|
||||
return False
|
||||
if not seriesInfoEt:
|
||||
log().debug('Series result returned zero')
|
||||
raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)")
|
||||
|
||||
# get series data
|
||||
for k, v in seriesInfoEt.items():
|
||||
if v is not None:
|
||||
v = self._cleanData(v)
|
||||
|
||||
self._setShowData(sid, k, v)
|
||||
|
||||
# series search ends here
|
||||
# get episode data
|
||||
if getEpInfo:
|
||||
# Parse episode data
|
||||
log().debug('Getting all episodes of %s' % (sid))
|
||||
|
||||
self.config['params_epInfo']['sid'] = sid
|
||||
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
|
||||
|
||||
if not epsEt:
|
||||
log().debug('Series results incomplete')
|
||||
raise tvrage_showincomplete(
|
||||
"Show search returned incomplete results (cannot find complete show on TVRAGE)")
|
||||
|
||||
seasons = epsEt['episodelist']['season']
|
||||
if not isinstance(seasons, list):
|
||||
seasons = [seasons]
|
||||
|
||||
for season in seasons:
|
||||
seas_no = int(season['@no'])
|
||||
|
||||
episodes = season['episode']
|
||||
if not isinstance(episodes, list):
|
||||
episodes = [episodes]
|
||||
|
||||
for episode in episodes:
|
||||
ep_no = int(episode['episodenumber'])
|
||||
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
|
||||
|
||||
for k, v in episode.items():
|
||||
try:
|
||||
k = k.lower()
|
||||
if v is not None:
|
||||
if k == 'link':
|
||||
v = v.rsplit('/', 1)[1]
|
||||
k = 'id'
|
||||
k = k.lower()
|
||||
|
||||
if v is not None:
|
||||
if k == 'link':
|
||||
v = v.rsplit('/', 1)[1]
|
||||
k = 'id'
|
||||
else:
|
||||
v = self._cleanData(v)
|
||||
|
||||
self._setItem(sid, seas_no, ep_no, k, v)
|
||||
except:
|
||||
continue
|
||||
self._setItem(sid, seas_no, ep_no, k, v)
|
||||
|
||||
return True
|
||||
|
||||
def _nameToSid(self, name):
|
||||
@ -661,7 +655,7 @@ class TVRage:
|
||||
self._getShowData(key, True)
|
||||
return self.shows[key]
|
||||
|
||||
key = key.lower()
|
||||
key = str(key).lower()
|
||||
self.config['searchterm'] = key
|
||||
selected_series = self._getSeries(key)
|
||||
if isinstance(selected_series, dict):
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python2
|
||||
#encoding:utf-8
|
||||
# encoding:utf-8
|
||||
#author:echel0n
|
||||
#project:tvrage_api
|
||||
#repository:http://github.com/echel0n/tvrage_api
|
||||
@ -10,40 +10,53 @@
|
||||
__author__ = "echel0n"
|
||||
__version__ = "1.0"
|
||||
|
||||
__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound",
|
||||
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
|
||||
__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete",
|
||||
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
|
||||
|
||||
|
||||
class tvrage_exception(Exception):
|
||||
"""Any exception generated by tvrage_api
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_error(tvrage_exception):
|
||||
"""An error with tvrage.com (Cannot connect, for example)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_userabort(tvrage_exception):
|
||||
"""User aborted the interactive selection (via
|
||||
the q command, ^c etc)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_shownotfound(tvrage_exception):
|
||||
"""Show cannot be found on tvrage.com (non-existant show)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_showincomplete(tvrage_exception):
|
||||
"""Show found but incomplete on tvrage.com (incomplete show)
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_seasonnotfound(tvrage_exception):
|
||||
"""Season cannot be found on tvrage.com
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_episodenotfound(tvrage_exception):
|
||||
"""Episode cannot be found on tvrage.com
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class tvrage_attributenotfound(tvrage_exception):
|
||||
"""Raised if an episode does not have the requested
|
||||
attribute (such as a episode name)
|
||||
|
12
readme.md
12
readme.md
@ -1,9 +1,12 @@
|
||||
SickRage
|
||||
=====
|
||||
Video File Manager for TV Shows, It watches for new episodes of your favorite shows and when they are posted it does its magic.
|
||||
Video File Manager for TV Shows, It watches for new episodes of your favorite shows and when they are posted it does its magic.
|
||||
|
||||
## Important
|
||||
Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk
|
||||
## Branch Build Status
|
||||
- DEVELOP<br>
|
||||
[![Build Status](https://travis-ci.org/SiCKRAGETV/SickRage.svg?branch=develop)](https://travis-ci.org/SiCKRAGETV/SickRage)
|
||||
- MASTER<br>
|
||||
[![Build Status](https://travis-ci.org/SiCKRAGETV/SickRage.svg?branch=master)](https://travis-ci.org/SiCKRAGETV/SickRage)
|
||||
|
||||
## Features
|
||||
- XBMC library updates, poster/fanart downloads, and NFO/TBN generation
|
||||
@ -36,3 +39,6 @@ With your bug reports, specify:
|
||||
- What happened
|
||||
- What you expected
|
||||
- Link to logfile on http://www.pastebin.com
|
||||
|
||||
## Important
|
||||
Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk
|
||||
|
@ -45,8 +45,8 @@ from sickbeard import naming
|
||||
from sickbeard import dailysearcher
|
||||
from sickbeard import scene_numbering, scene_exceptions, name_cache
|
||||
from indexers.indexer_api import indexerApi
|
||||
from indexers.indexer_exceptions import indexer_shownotfound, indexer_exception, indexer_error, indexer_episodenotfound, \
|
||||
indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts
|
||||
from indexers.indexer_exceptions import indexer_shownotfound, indexer_showincomplete, indexer_exception, indexer_error, \
|
||||
indexer_episodenotfound, indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts
|
||||
from sickbeard.common import SD, SKIPPED, NAMING_REPEAT
|
||||
from sickbeard.databases import mainDB, cache_db, failed_db
|
||||
|
||||
@ -466,10 +466,12 @@ TRAKT_API_KEY = 'abd806c54516240c76e4ebc9c5ccf394'
|
||||
|
||||
__INITIALIZED__ = False
|
||||
|
||||
|
||||
def get_backlog_cycle_time():
|
||||
cycletime = DAILYSEARCH_FREQUENCY * 2 + 7
|
||||
return max([cycletime, 720])
|
||||
|
||||
|
||||
def initialize(consoleLogging=True):
|
||||
with INIT_LOCK:
|
||||
|
||||
@ -548,7 +550,8 @@ def initialize(consoleLogging=True):
|
||||
|
||||
# git_remote
|
||||
GIT_REMOTE = check_setting_str(CFG, 'General', 'git_remote', 'origin')
|
||||
GIT_REMOTE_URL = check_setting_str(CFG, 'General', 'git_remote_url', 'https://github.com/SiCKRAGETV/SickRage.git')
|
||||
GIT_REMOTE_URL = check_setting_str(CFG, 'General', 'git_remote_url',
|
||||
'https://github.com/SiCKRAGETV/SickRage.git')
|
||||
|
||||
# current commit hash
|
||||
CUR_COMMIT_HASH = check_setting_str(CFG, 'General', 'cur_commit_hash', '')
|
||||
@ -659,7 +662,8 @@ def initialize(consoleLogging=True):
|
||||
NAMING_ABD_PATTERN = check_setting_str(CFG, 'General', 'naming_abd_pattern', '%SN - %A.D - %EN')
|
||||
NAMING_CUSTOM_ABD = bool(check_setting_int(CFG, 'General', 'naming_custom_abd', 0))
|
||||
NAMING_SPORTS_PATTERN = check_setting_str(CFG, 'General', 'naming_sports_pattern', '%SN - %A-D - %EN')
|
||||
NAMING_ANIME_PATTERN = check_setting_str(CFG, 'General', 'naming_anime_pattern', 'Season %0S/%SN - S%0SE%0E - %EN')
|
||||
NAMING_ANIME_PATTERN = check_setting_str(CFG, 'General', 'naming_anime_pattern',
|
||||
'Season %0S/%SN - S%0SE%0E - %EN')
|
||||
NAMING_ANIME = check_setting_int(CFG, 'General', 'naming_anime', 3)
|
||||
NAMING_CUSTOM_SPORTS = bool(check_setting_int(CFG, 'General', 'naming_custom_sports', 0))
|
||||
NAMING_CUSTOM_ANIME = bool(check_setting_int(CFG, 'General', 'naming_custom_anime', 0))
|
||||
@ -1466,7 +1470,7 @@ def save_config():
|
||||
new_config['General']['keep_processed_dir'] = int(KEEP_PROCESSED_DIR)
|
||||
new_config['General']['process_method'] = PROCESS_METHOD
|
||||
new_config['General']['move_associated_files'] = int(MOVE_ASSOCIATED_FILES)
|
||||
new_config['General']['postpone_if_sync_files'] = int (POSTPONE_IF_SYNC_FILES)
|
||||
new_config['General']['postpone_if_sync_files'] = int(POSTPONE_IF_SYNC_FILES)
|
||||
new_config['General']['nfo_rename'] = int(NFO_RENAME)
|
||||
new_config['General']['process_automatically'] = int(PROCESS_AUTOMATICALLY)
|
||||
new_config['General']['unpack'] = int(UNPACK)
|
||||
|
@ -27,7 +27,7 @@ from sickbeard import encodingKludge as ek
|
||||
from sickbeard.name_parser.parser import NameParser, InvalidNameException, InvalidShowException
|
||||
|
||||
MIN_DB_VERSION = 9 # oldest db version we support migrating from
|
||||
MAX_DB_VERSION = 40
|
||||
MAX_DB_VERSION = 41
|
||||
|
||||
class MainSanityCheck(db.DBSanityCheck):
|
||||
def check(self):
|
||||
@ -915,3 +915,15 @@ class AddVersionToTvEpisodes(AddIndexerMapping):
|
||||
self.addColumn("history", "version", "NUMERIC", "-1")
|
||||
|
||||
self.incDBVersion()
|
||||
|
||||
class AddDefaultEpStatusToTvShows(AddVersionToTvEpisodes):
|
||||
def test(self):
|
||||
return self.checkDBVersion() >= 41
|
||||
|
||||
def execute(self):
|
||||
backupDatabase(41)
|
||||
|
||||
logger.log(u"Adding column default_ep_status to tv_shows")
|
||||
self.addColumn("tv_shows", "default_ep_status", "TEXT", "")
|
||||
|
||||
self.incDBVersion()
|
||||
|
@ -11,15 +11,18 @@
|
||||
# SickRage is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os
|
||||
|
||||
from sickbeard import logger
|
||||
import sickbeard
|
||||
from sickbeard import logger
|
||||
|
||||
import ftfy
|
||||
import ftfy.bad_codecs
|
||||
|
||||
# This module tries to deal with the apparently random behavior of python when dealing with unicode <-> utf-8
|
||||
# encodings. It tries to just use unicode, but if that fails then it tries forcing it to utf-8. Any functions
|
||||
@ -28,18 +31,19 @@ import sickbeard
|
||||
def fixStupidEncodings(x, silent=False):
|
||||
if type(x) == str:
|
||||
try:
|
||||
return x.decode(sickbeard.SYS_ENCODING)
|
||||
return str(ftfy.fix_text(u'' + x)).decode(sickbeard.SYS_ENCODING)
|
||||
except UnicodeDecodeError:
|
||||
logger.log(u"Unable to decode value: " + repr(x), logger.ERROR)
|
||||
return None
|
||||
return x
|
||||
except UnicodeEncodeError:
|
||||
logger.log(u"Unable to encode value: " + repr(x), logger.ERROR)
|
||||
return x
|
||||
elif type(x) == unicode:
|
||||
return x
|
||||
else:
|
||||
logger.log(
|
||||
u"Unknown value passed in, ignoring it: " + str(type(x)) + " (" + repr(x) + ":" + repr(type(x)) + ")",
|
||||
logger.DEBUG if silent else logger.ERROR)
|
||||
return None
|
||||
|
||||
|
||||
|
||||
def fixListEncodings(x):
|
||||
@ -49,21 +53,13 @@ def fixListEncodings(x):
|
||||
return filter(lambda x: x != None, map(fixStupidEncodings, x))
|
||||
|
||||
|
||||
def callPeopleStupid(x):
|
||||
try:
|
||||
return x.encode(sickbeard.SYS_ENCODING)
|
||||
except UnicodeEncodeError:
|
||||
logger.log(
|
||||
u"YOUR COMPUTER SUCKS! Your data is being corrupted by a bad locale/encoding setting. Report this error on the forums or IRC please: " + repr(
|
||||
x) + ", " + sickbeard.SYS_ENCODING, logger.ERROR)
|
||||
return x.encode(sickbeard.SYS_ENCODING, 'ignore')
|
||||
|
||||
|
||||
def ek(func, *args, **kwargs):
|
||||
if os.name == 'nt':
|
||||
result = func(*args, **kwargs)
|
||||
else:
|
||||
result = func(*[callPeopleStupid(x) if type(x) in (str, unicode) else x for x in args], **kwargs)
|
||||
result = func(
|
||||
*[fixStupidEncodings(x).encode(sickbeard.SYS_ENCODING) if type(x) in (str, unicode) else x for x in args],
|
||||
**kwargs)
|
||||
|
||||
if type(result) in (list, tuple):
|
||||
return fixListEncodings(result)
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
|
||||
|
||||
def ex(e):
|
||||
"""
|
||||
Returns a unicode string from the exception text if it exists.
|
||||
|
@ -26,7 +26,7 @@ from sickbeard.exceptions import ex, EpisodeNotFoundException
|
||||
from sickbeard.history import dateFormat
|
||||
from sickbeard.common import Quality
|
||||
from sickbeard.common import WANTED, FAILED
|
||||
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
|
||||
def prepareFailedName(release):
|
||||
"""Standardizes release name for failed DB"""
|
||||
@ -36,9 +36,7 @@ def prepareFailedName(release):
|
||||
fixed = fixed.rpartition(".")[0]
|
||||
|
||||
fixed = re.sub("[\.\-\+\ ]", "_", fixed)
|
||||
|
||||
if not isinstance(fixed, unicode):
|
||||
fixed = unicode(fixed, 'utf-8', 'replace')
|
||||
fixed = fixStupidEncodings(fixed)
|
||||
|
||||
return fixed
|
||||
|
||||
|
@ -213,8 +213,12 @@ def _remove_file_failed(file):
|
||||
def findCertainShow(showList, indexerid):
|
||||
|
||||
results = []
|
||||
if showList and indexerid:
|
||||
results = filter(lambda x: int(x.indexerid) == int(indexerid), showList)
|
||||
|
||||
if not isinstance(indexerid, list):
|
||||
indexerid = [indexerid]
|
||||
|
||||
if showList and len(indexerid):
|
||||
results = filter(lambda x: int(x.indexerid) in indexerid, showList)
|
||||
|
||||
if len(results) == 1:
|
||||
return results[0]
|
||||
|
@ -20,6 +20,7 @@ import db
|
||||
import datetime
|
||||
|
||||
from sickbeard.common import SNATCHED, SUBTITLED, FAILED, Quality
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
|
||||
|
||||
dateFormat = "%Y%m%d%H%M%S"
|
||||
@ -27,9 +28,7 @@ dateFormat = "%Y%m%d%H%M%S"
|
||||
|
||||
def _logHistoryItem(action, showid, season, episode, quality, resource, provider, version=-1):
|
||||
logDate = datetime.datetime.today().strftime(dateFormat)
|
||||
|
||||
if not isinstance(resource, unicode):
|
||||
resource = unicode(resource, 'utf-8', 'replace')
|
||||
resource = fixStupidEncodings(resource)
|
||||
|
||||
myDB = db.DBConnection()
|
||||
myDB.action(
|
||||
|
@ -37,6 +37,7 @@ indexerConfig[INDEXER_TVRAGE] = {
|
||||
}
|
||||
|
||||
# TVDB Indexer Settings
|
||||
indexerConfig[INDEXER_TVDB]['trakt_id'] = 'tvdb_id'
|
||||
indexerConfig[INDEXER_TVDB]['xem_origin'] = 'tvdb'
|
||||
indexerConfig[INDEXER_TVDB]['icon'] = 'thetvdb16.png'
|
||||
indexerConfig[INDEXER_TVDB]['scene_url'] = 'http://midgetspy.github.io/sb_tvdb_scene_exceptions/exceptions.txt'
|
||||
@ -44,6 +45,7 @@ indexerConfig[INDEXER_TVDB]['show_url'] = 'http://thetvdb.com/?tab=series&id='
|
||||
indexerConfig[INDEXER_TVDB]['base_url'] = 'http://thetvdb.com/api/%(apikey)s/series/' % indexerConfig[INDEXER_TVDB]['api_params']
|
||||
|
||||
# TVRAGE Indexer Settings
|
||||
indexerConfig[INDEXER_TVRAGE]['trakt_id'] = 'tvrage_id'
|
||||
indexerConfig[INDEXER_TVRAGE]['xem_origin'] = 'rage'
|
||||
indexerConfig[INDEXER_TVRAGE]['icon'] = 'tvrage16.png'
|
||||
indexerConfig[INDEXER_TVRAGE]['scene_url'] = 'https://raw.githubusercontent.com/echel0n/sb_tvrage_scene_exceptions/master/exceptions.txt'
|
||||
|
@ -1,5 +1,5 @@
|
||||
#!/usr/bin/env python2
|
||||
#encoding:utf-8
|
||||
# encoding:utf-8
|
||||
#author:echel0n
|
||||
#project:indexer_api
|
||||
#repository:http://github.com/echel0n/Sick-Beard
|
||||
@ -12,19 +12,20 @@ __version__ = "1.0"
|
||||
|
||||
from lib.tvrage_api.tvrage_exceptions import \
|
||||
tvrage_exception, tvrage_attributenotfound, tvrage_episodenotfound, tvrage_error, \
|
||||
tvrage_seasonnotfound, tvrage_shownotfound, tvrage_userabort
|
||||
tvrage_seasonnotfound, tvrage_shownotfound, tvrage_showincomplete, tvrage_userabort
|
||||
|
||||
from lib.tvdb_api.tvdb_exceptions import \
|
||||
tvdb_exception, tvdb_attributenotfound, tvdb_episodenotfound, tvdb_error, \
|
||||
tvdb_seasonnotfound, tvdb_shownotfound, tvdb_userabort
|
||||
tvdb_seasonnotfound, tvdb_shownotfound, tvdb_showincomplete, tvdb_userabort
|
||||
|
||||
indexerExcepts = ["indexer_exception", "indexer_error", "indexer_userabort", "indexer_shownotfound",
|
||||
"indexer_showincomplete",
|
||||
"indexer_seasonnotfound", "indexer_episodenotfound", "indexer_attributenotfound"]
|
||||
|
||||
tvdbExcepts = ["tvdb_exception", "tvdb_error", "tvdb_userabort", "tvdb_shownotfound",
|
||||
tvdbExcepts = ["tvdb_exception", "tvdb_error", "tvdb_userabort", "tvdb_shownotfound", "tvdb_showincomplete",
|
||||
"tvdb_seasonnotfound", "tvdb_episodenotfound", "tvdb_attributenotfound"]
|
||||
|
||||
tvrageExcepts = ["tvdb_exception", "tvrage_error", "tvrage_userabort", "tvrage_shownotfound",
|
||||
tvrageExcepts = ["tvdb_exception", "tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete",
|
||||
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
|
||||
|
||||
# link API exceptions to our exception handler
|
||||
@ -35,3 +36,4 @@ indexer_attributenotfound = tvdb_attributenotfound, tvrage_attributenotfound
|
||||
indexer_episodenotfound = tvdb_episodenotfound, tvrage_episodenotfound
|
||||
indexer_seasonnotfound = tvdb_seasonnotfound, tvrage_seasonnotfound
|
||||
indexer_shownotfound = tvdb_shownotfound, tvrage_shownotfound
|
||||
indexer_showincomplete = tvdb_showincomplete, tvrage_showincomplete
|
@ -26,7 +26,7 @@ import os.path
|
||||
import regexes
|
||||
import sickbeard
|
||||
|
||||
from sickbeard import logger, helpers, scene_numbering, common, exceptions, scene_exceptions, encodingKludge as ek, db
|
||||
from sickbeard import logger, helpers, scene_numbering, common, exceptions as ex, scene_exceptions, encodingKludge as ek, db
|
||||
from dateutil import parser
|
||||
|
||||
|
||||
|
@ -29,6 +29,7 @@ import sickbeard
|
||||
|
||||
from sickbeard import logger, common
|
||||
from sickbeard import db
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
from sickbeard.exceptions import ex
|
||||
|
||||
|
||||
@ -50,7 +51,7 @@ class EmailNotifier:
|
||||
ep_name: The name of the episode that was snatched
|
||||
title: The title of the notification (optional)
|
||||
"""
|
||||
ep_name = ep_name.encode('utf-8', 'replace')
|
||||
ep_name = fixStupidEncodings(ep_name)
|
||||
|
||||
if sickbeard.EMAIL_NOTIFY_ONSNATCH:
|
||||
show = self._parseEp(ep_name)
|
||||
@ -85,7 +86,7 @@ class EmailNotifier:
|
||||
ep_name: The name of the episode that was downloaded
|
||||
title: The title of the notification (optional)
|
||||
"""
|
||||
ep_name = ep_name.encode('utf-8', 'replace')
|
||||
ep_name = fixStupidEncodings(ep_name)
|
||||
|
||||
if sickbeard.EMAIL_NOTIFY_ONDOWNLOAD:
|
||||
show = self._parseEp(ep_name)
|
||||
@ -120,7 +121,7 @@ class EmailNotifier:
|
||||
ep_name: The name of the episode that was downloaded
|
||||
lang: Subtitle language wanted
|
||||
"""
|
||||
ep_name = ep_name.encode('utf-8', 'replace')
|
||||
ep_name = fixStupidEncodings(ep_name)
|
||||
|
||||
if sickbeard.EMAIL_NOTIFY_ONSUBTITLEDOWNLOAD:
|
||||
show = self._parseEp(ep_name)
|
||||
@ -197,7 +198,7 @@ class EmailNotifier:
|
||||
return False
|
||||
|
||||
def _parseEp(self, ep_name):
|
||||
ep_name = ep_name.encode('utf-8', 'replace')
|
||||
ep_name = fixStupidEncodings(ep_name)
|
||||
|
||||
sep = " - "
|
||||
titles = ep_name.split(sep)
|
||||
|
@ -45,11 +45,11 @@ class TraktNotifier:
|
||||
ep_obj: The TVEpisode object to add to trakt
|
||||
"""
|
||||
|
||||
if sickbeard.USE_TRAKT:
|
||||
trakt_id = sickbeard.indexerApi(ep_obj.show.indexer).config['trakt_id']
|
||||
|
||||
if sickbeard.USE_TRAKT:
|
||||
# URL parameters
|
||||
data = {
|
||||
'tvdb_id': ep_obj.show.indexerid,
|
||||
'title': ep_obj.show.name,
|
||||
'year': ep_obj.show.startyear,
|
||||
'episodes': [{
|
||||
@ -58,48 +58,53 @@ class TraktNotifier:
|
||||
}]
|
||||
}
|
||||
|
||||
if data is not None:
|
||||
TraktCall("show/episode/library/%API%", self._api(), self._username(), self._password(), data)
|
||||
if sickbeard.TRAKT_REMOVE_WATCHLIST:
|
||||
TraktCall("show/episode/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
|
||||
if trakt_id == 'tvdb_id':
|
||||
data[trakt_id] = ep_obj.show.indexerid
|
||||
|
||||
if sickbeard.TRAKT_REMOVE_SERIESLIST:
|
||||
data_show = None
|
||||
# update library
|
||||
TraktCall("show/episode/library/%API%", self._api(), self._username(), self._password(), data)
|
||||
|
||||
# URL parameters, should not need to recheck data (done above)
|
||||
data = {
|
||||
'shows': [
|
||||
{
|
||||
'tvdb_id': ep_obj.show.indexerid,
|
||||
'title': ep_obj.show.name,
|
||||
'year': ep_obj.show.startyear
|
||||
}
|
||||
]
|
||||
}
|
||||
# remove from watchlist
|
||||
if sickbeard.TRAKT_REMOVE_WATCHLIST:
|
||||
TraktCall("show/episode/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
|
||||
|
||||
TraktCall("show/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
|
||||
if sickbeard.TRAKT_REMOVE_SERIESLIST:
|
||||
data = {
|
||||
'shows': [
|
||||
{
|
||||
'title': ep_obj.show.name,
|
||||
'year': ep_obj.show.startyear
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Remove all episodes from episode watchlist
|
||||
# Start by getting all episodes in the watchlist
|
||||
watchlist = TraktCall("user/watchlist/episodes.json/%API%/" + sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_PASSWORD)
|
||||
if trakt_id == 'tvdb_id':
|
||||
data['shows'][trakt_id] = ep_obj.show.indexerid
|
||||
|
||||
# Convert watchlist to only contain current show
|
||||
TraktCall("show/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
|
||||
|
||||
# Remove all episodes from episode watchlist
|
||||
# Start by getting all episodes in the watchlist
|
||||
watchlist = TraktCall("user/watchlist/episodes.json/%API%/" + sickbeard.TRAKT_USERNAME,
|
||||
sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_PASSWORD)
|
||||
|
||||
# Convert watchlist to only contain current show
|
||||
if watchlist:
|
||||
for show in watchlist:
|
||||
# Check if tvdb_id exists
|
||||
if 'tvdb_id' in show:
|
||||
if unicode(data['shows'][0]['tvdb_id']) == show['tvdb_id']:
|
||||
data_show = {
|
||||
'title': show['title'],
|
||||
'tvdb_id': show['tvdb_id'],
|
||||
'episodes': []
|
||||
}
|
||||
if show[trakt_id] == ep_obj.show.indexerid:
|
||||
data_show = {
|
||||
'title': show['title'],
|
||||
trakt_id: show[trakt_id],
|
||||
'episodes': []
|
||||
}
|
||||
|
||||
# Add series and episode (number) to the arry
|
||||
for episodes in show['episodes']:
|
||||
ep = {'season': episodes['season'], 'episode': episodes['number']}
|
||||
data_show['episodes'].append(ep)
|
||||
if data_show is not None:
|
||||
TraktCall("show/episode/unwatchlist/%API%", sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_PASSWORD, data_show)
|
||||
# Add series and episode (number) to the array
|
||||
for episodes in show['episodes']:
|
||||
ep = {'season': episodes['season'], 'episode': episodes['number']}
|
||||
data_show['episodes'].append(ep)
|
||||
|
||||
TraktCall("show/episode/unwatchlist/%API%", sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME,
|
||||
sickbeard.TRAKT_PASSWORD, data_show)
|
||||
|
||||
def test_notify(self, api, username, password):
|
||||
"""
|
||||
|
@ -23,13 +23,14 @@ import xml.etree.cElementTree as etree
|
||||
import xml.etree
|
||||
import re
|
||||
|
||||
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
|
||||
|
||||
from sickbeard import logger, classes, helpers
|
||||
from sickbeard.common import Quality
|
||||
from sickbeard import encodingKludge as ek
|
||||
from sickbeard.exceptions import ex
|
||||
|
||||
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
|
||||
|
||||
def getSeasonNZBs(name, urlData, season):
|
||||
try:
|
||||
@ -84,7 +85,7 @@ def createNZBString(fileElements, xmlns):
|
||||
for curFile in fileElements:
|
||||
rootElement.append(stripNS(curFile, xmlns))
|
||||
|
||||
return xml.etree.ElementTree.tostring(rootElement, 'utf-8', 'replace')
|
||||
return xml.etree.ElementTree.tostring(fixStupidEncodings(rootElement))
|
||||
|
||||
|
||||
def saveNZB(nzbName, nzbString):
|
||||
|
@ -20,13 +20,14 @@ import re
|
||||
import time
|
||||
import threading
|
||||
import datetime
|
||||
import sickbeard
|
||||
|
||||
from lib import adba
|
||||
import sickbeard
|
||||
import adba
|
||||
from sickbeard import helpers
|
||||
from sickbeard import name_cache
|
||||
from sickbeard import logger
|
||||
from sickbeard import db
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
|
||||
exception_dict = {}
|
||||
anidb_exception_dict = {}
|
||||
@ -233,8 +234,7 @@ def retrieve_exceptions():
|
||||
# if this exception isn't already in the DB then add it
|
||||
if cur_exception not in existing_exceptions:
|
||||
|
||||
if not isinstance(cur_exception, unicode):
|
||||
cur_exception = unicode(cur_exception, 'utf-8', 'replace')
|
||||
cur_exception = fixStupidEncodings(cur_exception)
|
||||
|
||||
myDB.action("INSERT INTO scene_exceptions (indexer_id, show_name, season) VALUES (?,?,?)",
|
||||
[cur_indexer_id, cur_exception, curSeason])
|
||||
@ -267,9 +267,7 @@ def update_scene_exceptions(indexer_id, scene_exceptions, season=-1):
|
||||
exceptionsCache[indexer_id][season] = scene_exceptions
|
||||
|
||||
for cur_exception in scene_exceptions:
|
||||
|
||||
if not isinstance(cur_exception, unicode):
|
||||
cur_exception = unicode(cur_exception, 'utf-8', 'replace')
|
||||
cur_exception = fixStupidEncodings(cur_exception)
|
||||
|
||||
myDB.action("INSERT INTO scene_exceptions (indexer_id, show_name, season) VALUES (?,?,?)",
|
||||
[indexer_id, cur_exception, season])
|
||||
|
@ -293,6 +293,10 @@ class QueueItemAdd(ShowQueueItem):
|
||||
self.show.scene = self.scene if self.scene != None else sickbeard.SCENE_DEFAULT
|
||||
self.show.paused = self.paused if self.paused != None else False
|
||||
|
||||
# set up default new/missing episode status
|
||||
self.show.default_ep_status = self.default_status
|
||||
logger.log(u"Setting all episodes to the specified default status: " + str(self.show.default_ep_status))
|
||||
|
||||
# be smartish about this
|
||||
if self.show.genre and "talk show" in self.show.genre.lower():
|
||||
self.show.air_by_date = 1
|
||||
@ -364,17 +368,10 @@ class QueueItemAdd(ShowQueueItem):
|
||||
logger.log(u"Error searching dir for episodes: " + ex(e), logger.ERROR)
|
||||
logger.log(traceback.format_exc(), logger.DEBUG)
|
||||
|
||||
# if they gave a custom status then change all the eps to it
|
||||
if self.default_status != SKIPPED:
|
||||
logger.log(u"Setting all episodes to the specified default status: " + str(self.default_status))
|
||||
myDB = db.DBConnection()
|
||||
myDB.action("UPDATE tv_episodes SET status = ? WHERE status = ? AND showid = ? AND season != 0",
|
||||
[self.default_status, SKIPPED, self.show.indexerid])
|
||||
|
||||
# if they started with WANTED eps then run the backlog
|
||||
if self.default_status == WANTED:
|
||||
# if they set default ep status to WANTED then run the backlog to search for episodes
|
||||
if self.show.default_ep_status == WANTED:
|
||||
logger.log(u"Launching backlog for this show since its episodes are WANTED")
|
||||
sickbeard.backlogSearchScheduler.action.searchBacklog([self.show]) #@UndefinedVariable
|
||||
sickbeard.backlogSearchScheduler.action.searchBacklog([self.show])
|
||||
|
||||
self.show.writeMetadata()
|
||||
self.show.updateMetadata()
|
||||
@ -539,17 +536,21 @@ class QueueItemUpdate(ShowQueueItem):
|
||||
self.show.indexer).name + ", the show info will not be refreshed: " + ex(e), logger.ERROR)
|
||||
IndexerEpList = None
|
||||
|
||||
foundMissingEps = False
|
||||
if IndexerEpList == None:
|
||||
logger.log(u"No data returned from " + sickbeard.indexerApi(
|
||||
self.show.indexer).name + ", unable to update this show", logger.ERROR)
|
||||
else:
|
||||
# for each ep we found on TVDB delete it from the DB list
|
||||
# for each ep we found on the Indexer delete it from the DB list
|
||||
for curSeason in IndexerEpList:
|
||||
for curEpisode in IndexerEpList[curSeason]:
|
||||
logger.log(u"Removing " + str(curSeason) + "x" + str(curEpisode) + " from the DB list",
|
||||
logger.DEBUG)
|
||||
if curSeason in DBEpList and curEpisode in DBEpList[curSeason]:
|
||||
del DBEpList[curSeason][curEpisode]
|
||||
else:
|
||||
# found missing episodes
|
||||
foundMissingEps = True
|
||||
|
||||
# for the remaining episodes in the DB list just delete them from the DB
|
||||
for curSeason in DBEpList:
|
||||
@ -562,8 +563,12 @@ class QueueItemUpdate(ShowQueueItem):
|
||||
except exceptions.EpisodeDeletedException:
|
||||
pass
|
||||
|
||||
sickbeard.showQueueScheduler.action.refreshShow(self.show, self.force)
|
||||
# if they set default ep status to WANTED then run the backlog
|
||||
if foundMissingEps and self.show.default_ep_status == WANTED:
|
||||
logger.log(u"Launching backlog for this show since we found missing episodes")
|
||||
sickbeard.backlogSearchScheduler.action.searchBacklog([self.show])
|
||||
|
||||
sickbeard.showQueueScheduler.action.refreshShow(self.show, self.force)
|
||||
|
||||
class QueueItemForceUpdate(QueueItemUpdate):
|
||||
def __init__(self, show=None):
|
||||
|
@ -97,7 +97,7 @@ class TVShow(object):
|
||||
self._scene = 0
|
||||
self._rls_ignore_words = ""
|
||||
self._rls_require_words = ""
|
||||
|
||||
self._default_ep_status = ""
|
||||
self.dirty = True
|
||||
|
||||
self._location = ""
|
||||
@ -139,6 +139,7 @@ class TVShow(object):
|
||||
scene = property(lambda self: self._scene, dirty_setter("_scene"))
|
||||
rls_ignore_words = property(lambda self: self._rls_ignore_words, dirty_setter("_rls_ignore_words"))
|
||||
rls_require_words = property(lambda self: self._rls_require_words, dirty_setter("_rls_require_words"))
|
||||
default_ep_status = property(lambda self: self._default_ep_status, dirty_setter("_default_ep_status"))
|
||||
|
||||
@property
|
||||
def is_anime(self):
|
||||
@ -577,7 +578,6 @@ class TVShow(object):
|
||||
myDB = db.DBConnection()
|
||||
myDB.mass_action(sql_l)
|
||||
|
||||
|
||||
# Done updating save last update date
|
||||
self.last_update_indexer = datetime.date.today().toordinal()
|
||||
self.saveToDB()
|
||||
@ -770,9 +770,11 @@ class TVShow(object):
|
||||
self.status = sqlResults[0]["status"]
|
||||
if not self.status:
|
||||
self.status = ""
|
||||
|
||||
self.airs = sqlResults[0]["airs"]
|
||||
if not self.airs:
|
||||
self.airs = ""
|
||||
|
||||
self.startyear = sqlResults[0]["startyear"]
|
||||
if not self.startyear:
|
||||
self.startyear = 0
|
||||
@ -825,6 +827,10 @@ class TVShow(object):
|
||||
self.rls_ignore_words = sqlResults[0]["rls_ignore_words"]
|
||||
self.rls_require_words = sqlResults[0]["rls_require_words"]
|
||||
|
||||
self.default_ep_status = sqlResults[0]["default_ep_status"]
|
||||
if not self.default_ep_status:
|
||||
self.default_ep_status = ""
|
||||
|
||||
if not self.imdbid:
|
||||
self.imdbid = sqlResults[0]["imdb_id"]
|
||||
|
||||
@ -1156,7 +1162,8 @@ class TVShow(object):
|
||||
"imdb_id": self.imdbid,
|
||||
"last_update_indexer": self.last_update_indexer,
|
||||
"rls_ignore_words": self.rls_ignore_words,
|
||||
"rls_require_words": self.rls_require_words
|
||||
"rls_require_words": self.rls_require_words,
|
||||
"default_ep_status": self.default_ep_status
|
||||
}
|
||||
|
||||
myDB = db.DBConnection()
|
||||
@ -1741,9 +1748,9 @@ class TVEpisode(object):
|
||||
if self.status == UNAIRED:
|
||||
self.status = WANTED
|
||||
|
||||
# if we somehow are still UNKNOWN then just skip it
|
||||
# if we somehow are still UNKNOWN then just use the shows defined default status
|
||||
elif self.status == UNKNOWN:
|
||||
self.status = SKIPPED
|
||||
self.status = self.show.default_ep_status
|
||||
|
||||
else:
|
||||
logger.log(
|
||||
@ -2024,7 +2031,6 @@ class TVEpisode(object):
|
||||
'%SN S%0SE%E',
|
||||
'%SN S%SE%E',
|
||||
'%SN S%0SE%0E'
|
||||
|
||||
]
|
||||
|
||||
strings = []
|
||||
@ -2062,7 +2068,6 @@ class TVEpisode(object):
|
||||
|
||||
if len(self.relatedEps) == 0:
|
||||
goodName = self.name
|
||||
|
||||
else:
|
||||
goodName = ''
|
||||
|
||||
@ -2494,7 +2499,7 @@ class TVEpisode(object):
|
||||
if airs:
|
||||
hr = int(airs.group(1))
|
||||
hr = (12 + hr, hr)[None is airs.group(3)]
|
||||
hr = (hr, hr - 12)[0 == hr % 12]
|
||||
hr = (hr, hr - 12)[0 == hr % 12 and 0 != hr]
|
||||
min = int((airs.group(2), min)[None is airs.group(2)])
|
||||
airtime = datetime.time(hr, min)
|
||||
|
||||
|
@ -20,19 +20,20 @@ from __future__ import with_statement
|
||||
|
||||
import time
|
||||
import datetime
|
||||
import itertools
|
||||
|
||||
import sickbeard
|
||||
|
||||
from sickbeard import db
|
||||
from sickbeard import logger
|
||||
from sickbeard.common import Quality
|
||||
|
||||
from sickbeard import helpers, show_name_helpers
|
||||
from sickbeard.exceptions import MultipleShowObjectsException
|
||||
from sickbeard.exceptions import AuthException
|
||||
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
|
||||
from sickbeard.rssfeeds import RSSFeeds
|
||||
from sickbeard import clients
|
||||
import itertools
|
||||
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
|
||||
from sickbeard.encodingKludge import fixStupidEncodings
|
||||
|
||||
class CacheDBConnection(db.DBConnection):
|
||||
def __init__(self, providerName):
|
||||
@ -262,8 +263,7 @@ class TVCache():
|
||||
# get quality of release
|
||||
quality = parse_result.quality
|
||||
|
||||
if not isinstance(name, unicode):
|
||||
name = unicode(name, 'utf-8', 'replace')
|
||||
name = fixStupidEncodings(name)
|
||||
|
||||
# get release group
|
||||
release_group = parse_result.release_group
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -64,8 +64,8 @@ from browser import WebFileBrowser
|
||||
from lib.dateutil import tz
|
||||
from lib.unrar2 import RarFile
|
||||
|
||||
from lib import subliminal
|
||||
from trakt import TraktCall
|
||||
from lib import adba, subliminal
|
||||
from lib.trakt import TraktCall
|
||||
|
||||
try:
|
||||
import json
|
||||
@ -77,13 +77,13 @@ try:
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
from lib import adba
|
||||
|
||||
from Cheetah.Template import Template
|
||||
from tornado.web import RequestHandler, HTTPError, asynchronous
|
||||
|
||||
from bug_tracker import BugTracker
|
||||
|
||||
|
||||
def authenticated(handler_class):
|
||||
def wrap_execute(handler_execute):
|
||||
def basicauth(handler, transforms, *args, **kwargs):
|
||||
@ -101,7 +101,7 @@ def authenticated(handler_class):
|
||||
'/api/builder' not in handler.request.uri):
|
||||
return True
|
||||
elif (handler.request.uri.startswith(sickbeard.WEB_ROOT + '/calendar') and
|
||||
sickbeard.CALENDAR_UNPROTECTED):
|
||||
sickbeard.CALENDAR_UNPROTECTED):
|
||||
return True
|
||||
|
||||
auth_hdr = handler.request.headers.get('Authorization')
|
||||
@ -394,8 +394,9 @@ class MainHandler(RequestHandler):
|
||||
|
||||
# add localtime to the dict
|
||||
for index, item in enumerate(sql_results):
|
||||
sql_results[index]['localtime'] = sbdatetime.sbdatetime.convert_to_setting(network_timezones.parse_date_time(item['airdate'],
|
||||
item['airs'], item['network']))
|
||||
sql_results[index]['localtime'] = sbdatetime.sbdatetime.convert_to_setting(
|
||||
network_timezones.parse_date_time(item['airdate'],
|
||||
item['airs'], item['network']))
|
||||
|
||||
sql_results.sort(sorts[sickbeard.COMING_EPS_SORT])
|
||||
|
||||
@ -424,7 +425,7 @@ class MainHandler(RequestHandler):
|
||||
t.sql_results = sql_results
|
||||
|
||||
# Allow local overriding of layout parameter
|
||||
if layout and layout in ('poster', 'banner', 'list','calendar'):
|
||||
if layout and layout in ('poster', 'banner', 'list', 'calendar'):
|
||||
t.layout = layout
|
||||
else:
|
||||
t.layout = sickbeard.COMING_EPS_LAYOUT
|
||||
@ -487,7 +488,8 @@ class MainHandler(RequestHandler):
|
||||
ical = ical + 'DESCRIPTION:' + show['airs'] + ' on ' + show['network'] + '\\n\\n' + \
|
||||
episode['description'].splitlines()[0] + '\r\n'
|
||||
else:
|
||||
ical = ical + 'DESCRIPTION:' + (show['airs'] or '(Unknown airs)') + ' on ' + (show['network'] or 'Unknown network') + '\r\n'
|
||||
ical = ical + 'DESCRIPTION:' + (show['airs'] or '(Unknown airs)') + ' on ' + (
|
||||
show['network'] or 'Unknown network') + '\r\n'
|
||||
|
||||
ical = ical + 'END:VEVENT\r\n'
|
||||
|
||||
@ -1074,7 +1076,8 @@ class Manage(MainHandler):
|
||||
return _munge(t)
|
||||
|
||||
|
||||
def massEditSubmit(self, archive_firstmatch=None, paused=None, anime=None, sports=None, scene=None, flatten_folders=None,
|
||||
def massEditSubmit(self, archive_firstmatch=None, paused=None, anime=None, sports=None, scene=None,
|
||||
flatten_folders=None,
|
||||
quality_preset=False,
|
||||
subtitles=None, air_by_date=None, anyQualities=[], bestQualities=[], toEdit=None, *args,
|
||||
**kwargs):
|
||||
@ -1179,7 +1182,8 @@ class Manage(MainHandler):
|
||||
redirect("/manage/")
|
||||
|
||||
|
||||
def massUpdate(self, toUpdate=None, toRefresh=None, toRename=None, toDelete=None, toRemove=None, toMetadata=None, toSubtitle=None):
|
||||
def massUpdate(self, toUpdate=None, toRefresh=None, toRename=None, toDelete=None, toRemove=None, toMetadata=None,
|
||||
toSubtitle=None):
|
||||
|
||||
if toUpdate is not None:
|
||||
toUpdate = toUpdate.split('|')
|
||||
@ -1515,11 +1519,13 @@ class ConfigGeneral(MainHandler):
|
||||
|
||||
|
||||
def saveGeneral(self, log_dir=None, web_port=None, web_log=None, encryption_version=None, web_ipv6=None,
|
||||
update_shows_on_start=None, trash_remove_show=None, trash_rotate_logs=None, update_frequency=None, launch_browser=None, web_username=None,
|
||||
update_shows_on_start=None, trash_remove_show=None, trash_rotate_logs=None, update_frequency=None,
|
||||
launch_browser=None, web_username=None,
|
||||
use_api=None, api_key=None, indexer_default=None, timezone_display=None, cpu_preset=None,
|
||||
web_password=None, version_notify=None, enable_https=None, https_cert=None, https_key=None,
|
||||
handle_reverse_proxy=None, sort_article=None, auto_update=None, notify_on_update=None,
|
||||
proxy_setting=None, proxy_indexers=None, anon_redirect=None, git_path=None, git_remote=None, calendar_unprotected=None,
|
||||
proxy_setting=None, proxy_indexers=None, anon_redirect=None, git_path=None, git_remote=None,
|
||||
calendar_unprotected=None,
|
||||
fuzzy_dating=None, trim_zero=None, date_preset=None, date_preset_na=None, time_preset=None,
|
||||
indexer_timeout=None, play_videos=None, rootDir=None, theme_name=None):
|
||||
|
||||
@ -1671,7 +1677,8 @@ class ConfigSearch(MainHandler):
|
||||
backlog_startup=None, dailysearch_startup=None,
|
||||
torrent_dir=None, torrent_username=None, torrent_password=None, torrent_host=None,
|
||||
torrent_label=None, torrent_path=None, torrent_verify_cert=None,
|
||||
torrent_seed_time=None, torrent_paused=None, torrent_high_bandwidth=None, ignore_words=None, require_words=None):
|
||||
torrent_seed_time=None, torrent_paused=None, torrent_high_bandwidth=None, ignore_words=None,
|
||||
require_words=None):
|
||||
|
||||
results = []
|
||||
|
||||
@ -1753,7 +1760,8 @@ class ConfigPostProcessing(MainHandler):
|
||||
wdtv_data=None, tivo_data=None, mede8er_data=None,
|
||||
keep_processed_dir=None, process_method=None, process_automatically=None,
|
||||
rename_episodes=None, airdate_episodes=None, unpack=None,
|
||||
move_associated_files=None, postpone_if_sync_files=None, nfo_rename=None, tv_download_dir=None, naming_custom_abd=None,
|
||||
move_associated_files=None, postpone_if_sync_files=None, nfo_rename=None,
|
||||
tv_download_dir=None, naming_custom_abd=None,
|
||||
naming_anime=None,
|
||||
naming_abd_pattern=None, naming_strip_year=None, use_failed_downloads=None,
|
||||
delete_failed=None, extra_scripts=None, skip_removed_files=None,
|
||||
@ -1783,7 +1791,6 @@ class ConfigPostProcessing(MainHandler):
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
if unpack:
|
||||
if self.isRarSupported() != 'not supported':
|
||||
sickbeard.UNPACK = config.checkbox_to_value(unpack)
|
||||
@ -2004,17 +2011,17 @@ class ConfigProviders(MainHandler):
|
||||
error += "\nNo Provider Api key specified"
|
||||
|
||||
if error <> "":
|
||||
return json.dumps({'success' : False, 'error': error})
|
||||
return json.dumps({'success': False, 'error': error})
|
||||
|
||||
#Get list with Newznabproviders
|
||||
#providerDict = dict(zip([x.getID() for x in sickbeard.newznabProviderList], sickbeard.newznabProviderList))
|
||||
# Get list with Newznabproviders
|
||||
# providerDict = dict(zip([x.getID() for x in sickbeard.newznabProviderList], sickbeard.newznabProviderList))
|
||||
|
||||
#Get newznabprovider obj with provided name
|
||||
tempProvider= newznab.NewznabProvider(name, url, key)
|
||||
# Get newznabprovider obj with provided name
|
||||
tempProvider = newznab.NewznabProvider(name, url, key)
|
||||
|
||||
success, tv_categories, error = tempProvider.get_newznab_categories()
|
||||
|
||||
return json.dumps({'success' : success,'tv_categories' : tv_categories, 'error' : error})
|
||||
return json.dumps({'success': success, 'tv_categories': tv_categories, 'error': error})
|
||||
|
||||
def deleteNewznabProvider(self, nnid):
|
||||
|
||||
@ -2310,14 +2317,14 @@ class ConfigProviders(MainHandler):
|
||||
curTorrentProvider.enable_daily = config.checkbox_to_value(
|
||||
kwargs[curTorrentProvider.getID() + '_enable_daily'])
|
||||
except:
|
||||
curTorrentProvider.enable_daily = 0 # these exceptions are actually catching unselected checkboxes
|
||||
curTorrentProvider.enable_daily = 0 # these exceptions are actually catching unselected checkboxes
|
||||
|
||||
if hasattr(curTorrentProvider, 'enable_backlog'):
|
||||
try:
|
||||
curTorrentProvider.enable_backlog = config.checkbox_to_value(
|
||||
kwargs[curTorrentProvider.getID() + '_enable_backlog'])
|
||||
except:
|
||||
curTorrentProvider.enable_backlog = 0 # these exceptions are actually catching unselected checkboxes
|
||||
curTorrentProvider.enable_backlog = 0 # these exceptions are actually catching unselected checkboxes
|
||||
|
||||
for curNzbProvider in [curProvider for curProvider in sickbeard.providers.sortedProviderList() if
|
||||
curProvider.providerType == sickbeard.GenericProvider.NZB]:
|
||||
@ -2876,18 +2883,19 @@ class NewHomeAddShows(MainHandler):
|
||||
|
||||
indexer_id = show_name = indexer = None
|
||||
for cur_provider in sickbeard.metadata_provider_dict.values():
|
||||
(indexer_id, show_name, indexer) = cur_provider.retrieveShowMetadata(cur_path)
|
||||
if not (indexer_id and show_name):
|
||||
(indexer_id, show_name, indexer) = cur_provider.retrieveShowMetadata(cur_path)
|
||||
|
||||
# default to TVDB if indexer was not detected
|
||||
if show_name and not (indexer or indexer_id):
|
||||
(sn, idx, id) = helpers.searchIndexerForShowID(show_name, indexer, indexer_id)
|
||||
# default to TVDB if indexer was not detected
|
||||
if show_name and not (indexer or indexer_id):
|
||||
(sn, idx, id) = helpers.searchIndexerForShowID(show_name, indexer, indexer_id)
|
||||
|
||||
# set indexer and indexer_id from found info
|
||||
if not indexer and idx:
|
||||
indexer = idx
|
||||
# set indexer and indexer_id from found info
|
||||
if not indexer and idx:
|
||||
indexer = idx
|
||||
|
||||
if not indexer_id and id:
|
||||
indexer_id = id
|
||||
if not indexer_id and id:
|
||||
indexer_id = id
|
||||
|
||||
cur_dir['existing_info'] = (indexer_id, show_name, indexer)
|
||||
|
||||
@ -2921,7 +2929,7 @@ class NewHomeAddShows(MainHandler):
|
||||
if not show_dir:
|
||||
t.default_show_name = ''
|
||||
elif not show_name:
|
||||
t.default_show_name = ek.ek(os.path.basename, ek.ek(os.path.normpath, show_dir)).replace('.', ' ')
|
||||
t.default_show_name = re.sub(' \(\d{4}\)','', ek.ek(os.path.basename, ek.ek(os.path.normpath, show_dir)).replace('.', ' '))
|
||||
else:
|
||||
t.default_show_name = show_name
|
||||
|
||||
@ -2959,19 +2967,13 @@ class NewHomeAddShows(MainHandler):
|
||||
recommendedlist = TraktCall("recommendations/shows.json/%API%", sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME,
|
||||
sickbeard.TRAKT_PASSWORD)
|
||||
|
||||
if recommendedlist == 'NULL':
|
||||
logger.log(u"No shows found in your recommendedlist, aborting recommendedlist update", logger.DEBUG)
|
||||
return
|
||||
|
||||
if recommendedlist is None:
|
||||
logger.log(u"Could not connect to trakt service, aborting recommended list update", logger.ERROR)
|
||||
return
|
||||
|
||||
map(final_results.append,
|
||||
([int(show['tvdb_id'] or 0) if sickbeard.TRAKT_DEFAULT_INDEXER == 1 else int(show['tvdb_id'] or 0),
|
||||
show['url'], show['title'], show['overview'],
|
||||
datetime.date.fromtimestamp(int(show['first_aired']) / 1000.0).strftime('%Y%m%d')] for show in
|
||||
recommendedlist if not helpers.findCertainShow(sickbeard.showList, indexerid=int(show['tvdb_id']))))
|
||||
if recommendedlist:
|
||||
indexers = ['tvdb_id', 'tvrage_id']
|
||||
map(final_results.append, (
|
||||
[int(show[indexers[sickbeard.TRAKT_DEFAULT_INDEXER - 1]]), show['url'], show['title'], show['overview'],
|
||||
datetime.date.fromtimestamp(int(show['first_aired']) / 1000.0).strftime('%Y%m%d')]
|
||||
for show in recommendedlist if not helpers.findCertainShow(sickbeard.showList, [
|
||||
int(show[indexers[sickbeard.TRAKT_DEFAULT_INDEXER - 1]])])))
|
||||
|
||||
return json.dumps({'results': final_results})
|
||||
|
||||
@ -3000,12 +3002,16 @@ class NewHomeAddShows(MainHandler):
|
||||
t = PageTemplate(headers=self.request.headers, file="home_trendingShows.tmpl")
|
||||
t.submenu = HomeMenu()
|
||||
|
||||
t.trending_shows = TraktCall("shows/trending.json/%API%", sickbeard.TRAKT_API_KEY)
|
||||
t.trending_shows = []
|
||||
|
||||
if None is not t.trending_shows:
|
||||
for item in t.trending_shows:
|
||||
if helpers.findCertainShow(sickbeard.showList, int(item['tvdb_id'])):
|
||||
item['tvdb_id'] = u'ExistsInLibrary'
|
||||
trending_shows = TraktCall("shows/trending.json/%API%", sickbeard.TRAKT_API_KEY)
|
||||
if trending_shows:
|
||||
for show in trending_shows:
|
||||
try:
|
||||
if not helpers.findCertainShow(sickbeard.showList, [int(show['tvdb_id']), int(show['tvrage_id'])]):
|
||||
t.trending_shows += [show]
|
||||
except exceptions.MultipleShowObjectsException:
|
||||
continue
|
||||
|
||||
return _munge(t)
|
||||
|
||||
@ -3282,7 +3288,7 @@ class ErrorLogs(MainHandler):
|
||||
|
||||
for x in reversed(data):
|
||||
|
||||
x = x.decode('utf-8', 'replace')
|
||||
x = ek.fixStupidEncodings(x)
|
||||
match = re.match(regex, x)
|
||||
|
||||
if match:
|
||||
@ -3833,7 +3839,7 @@ class Home(MainHandler):
|
||||
flatten_folders=None, paused=None, directCall=False, air_by_date=None, sports=None, dvdorder=None,
|
||||
indexerLang=None, subtitles=None, archive_firstmatch=None, rls_ignore_words=None,
|
||||
rls_require_words=None, anime=None, blackWords=None, whiteWords=None, blacklist=None, whitelist=None,
|
||||
scene=None):
|
||||
scene=None, defaultEpStatus=None):
|
||||
|
||||
if show is None:
|
||||
errString = "Invalid show ID: " + str(show)
|
||||
@ -4007,6 +4013,7 @@ class Home(MainHandler):
|
||||
showObj.dvdorder = dvdorder
|
||||
showObj.rls_ignore_words = rls_ignore_words.strip()
|
||||
showObj.rls_require_words = rls_require_words.strip()
|
||||
showObj.default_ep_status = defaultEpStatus
|
||||
|
||||
# if we change location clear the db of episodes, change it, write to db, and rescan
|
||||
if os.path.normpath(showObj._location) != os.path.normpath(location):
|
||||
@ -4087,8 +4094,8 @@ class Home(MainHandler):
|
||||
|
||||
ui.notifications.message('<b>%s</b> has been %s %s' %
|
||||
(showObj.name,
|
||||
('deleted', 'trashed')[sickbeard.TRASH_REMOVE_SHOW],
|
||||
('(media untouched)', '(with all related media)')[bool(full)]))
|
||||
('deleted', 'trashed')[sickbeard.TRASH_REMOVE_SHOW],
|
||||
('(media untouched)', '(with all related media)')[bool(full)]))
|
||||
redirect("/home/")
|
||||
|
||||
|
||||
@ -4405,10 +4412,9 @@ class Home(MainHandler):
|
||||
|
||||
sickbeard.searchQueueScheduler.action.add_item(ep_queue_item) # @UndefinedVariable
|
||||
|
||||
if ep_queue_item.success:
|
||||
return returnManualSearchResult(ep_queue_item)
|
||||
if not ep_queue_item.started and ep_queue_item.success is None:
|
||||
return json.dumps({'result': 'success'}) #I Actually want to call it queued, because the search hasnt been started yet!
|
||||
return json.dumps(
|
||||
{'result': 'success'}) # I Actually want to call it queued, because the search hasnt been started yet!
|
||||
if ep_queue_item.started and ep_queue_item.success is None:
|
||||
return json.dumps({'result': 'success'})
|
||||
else:
|
||||
@ -4422,16 +4428,17 @@ class Home(MainHandler):
|
||||
episodes = []
|
||||
currentManualSearchThreadsQueued = []
|
||||
currentManualSearchThreadActive = []
|
||||
finishedManualSearchThreadItems= []
|
||||
finishedManualSearchThreadItems = []
|
||||
|
||||
# Queued Searches
|
||||
currentManualSearchThreadsQueued = sickbeard.searchQueueScheduler.action.get_all_ep_from_queue(show)
|
||||
|
||||
# Running Searches
|
||||
if (sickbeard.searchQueueScheduler.action.is_manualsearch_in_progress()):
|
||||
currentManualSearchThreadActive = sickbeard.searchQueueScheduler.action.currentItem
|
||||
|
||||
# Finished Searches
|
||||
finishedManualSearchThreadItems = sickbeard.search_queue.MANUAL_SEARCH_HISTORY
|
||||
finishedManualSearchThreadItems = sickbeard.search_queue.MANUAL_SEARCH_HISTORY
|
||||
|
||||
if currentManualSearchThreadsQueued:
|
||||
for searchThread in currentManualSearchThreadsQueued:
|
||||
@ -4439,18 +4446,18 @@ class Home(MainHandler):
|
||||
if isinstance(searchThread, sickbeard.search_queue.ManualSearchQueueItem):
|
||||
episodes.append({'episode': searchThread.segment.episode,
|
||||
'episodeindexid': searchThread.segment.indexerid,
|
||||
'season' : searchThread.segment.season,
|
||||
'searchstatus' : searchstatus,
|
||||
'status' : statusStrings[searchThread.segment.status],
|
||||
'season': searchThread.segment.season,
|
||||
'searchstatus': searchstatus,
|
||||
'status': statusStrings[searchThread.segment.status],
|
||||
'quality': self.getQualityClass(searchThread.segment)})
|
||||
else:
|
||||
for epObj in searchThread.segment:
|
||||
episodes.append({'episode': epObj.episode,
|
||||
'episodeindexid': epObj.indexerid,
|
||||
'season' : epObj.season,
|
||||
'searchstatus' : searchstatus,
|
||||
'status' : statusStrings[epObj.status],
|
||||
'quality': self.getQualityClass(epObj)})
|
||||
'episodeindexid': epObj.indexerid,
|
||||
'season': epObj.season,
|
||||
'searchstatus': searchstatus,
|
||||
'status': statusStrings[epObj.status],
|
||||
'quality': self.getQualityClass(epObj)})
|
||||
|
||||
if currentManualSearchThreadActive:
|
||||
searchThread = currentManualSearchThreadActive
|
||||
@ -4461,22 +4468,23 @@ class Home(MainHandler):
|
||||
searchstatus = 'searching'
|
||||
episodes.append({'episode': searchThread.segment.episode,
|
||||
'episodeindexid': searchThread.segment.indexerid,
|
||||
'season' : searchThread.segment.season,
|
||||
'searchstatus' : searchstatus,
|
||||
'status' : statusStrings[searchThread.segment.status],
|
||||
'season': searchThread.segment.season,
|
||||
'searchstatus': searchstatus,
|
||||
'status': statusStrings[searchThread.segment.status],
|
||||
'quality': self.getQualityClass(searchThread.segment)})
|
||||
|
||||
if finishedManualSearchThreadItems:
|
||||
for searchThread in finishedManualSearchThreadItems:
|
||||
if isinstance(searchThread, sickbeard.search_queue.ManualSearchQueueItem):
|
||||
if str(searchThread.show.indexerid) == show and not [x for x in episodes if x['episodeindexid'] == searchThread.segment.indexerid]:
|
||||
if str(searchThread.show.indexerid) == show and not [x for x in episodes if x[
|
||||
'episodeindexid'] == searchThread.segment.indexerid]:
|
||||
searchstatus = 'finished'
|
||||
episodes.append({'episode': searchThread.segment.episode,
|
||||
'episodeindexid': searchThread.segment.indexerid,
|
||||
'season' : searchThread.segment.season,
|
||||
'searchstatus' : searchstatus,
|
||||
'status' : statusStrings[searchThread.segment.status],
|
||||
'quality': self.getQualityClass(searchThread.segment)})
|
||||
'season': searchThread.segment.season,
|
||||
'searchstatus': searchstatus,
|
||||
'status': statusStrings[searchThread.segment.status],
|
||||
'quality': self.getQualityClass(searchThread.segment)})
|
||||
else:
|
||||
### These are only Failed Downloads/Retry SearchThreadItems.. lets loop through the segement/episodes
|
||||
if str(searchThread.show.indexerid) == show:
|
||||
@ -4485,14 +4493,12 @@ class Home(MainHandler):
|
||||
searchstatus = 'finished'
|
||||
episodes.append({'episode': epObj.episode,
|
||||
'episodeindexid': epObj.indexerid,
|
||||
'season' : epObj.season,
|
||||
'searchstatus' : searchstatus,
|
||||
'status' : statusStrings[epObj.status],
|
||||
'quality': self.getQualityClass(epObj)})
|
||||
'season': epObj.season,
|
||||
'searchstatus': searchstatus,
|
||||
'status': statusStrings[epObj.status],
|
||||
'quality': self.getQualityClass(epObj)})
|
||||
|
||||
return json.dumps({'show': show, 'episodes' : episodes})
|
||||
|
||||
#return json.dumps()
|
||||
return json.dumps({'show': show, 'episodes': episodes})
|
||||
|
||||
def getQualityClass(self, ep_obj):
|
||||
# return the correct json value
|
||||
@ -4530,7 +4536,8 @@ class Home(MainHandler):
|
||||
status = 'No subtitles downloaded'
|
||||
ui.notifications.message('Subtitles Search', status)
|
||||
return json.dumps({'result': status, 'subtitles': ','.join(sorted([x.alpha2 for x in
|
||||
ep_obj.subtitles.union(previous_subtitles)]))})
|
||||
ep_obj.subtitles.union(
|
||||
previous_subtitles)]))})
|
||||
|
||||
def setSceneNumbering(self, show, indexer, forSeason=None, forEpisode=None, forAbsolute=None, sceneSeason=None,
|
||||
sceneEpisode=None, sceneAbsolute=None):
|
||||
@ -4617,10 +4624,9 @@ class Home(MainHandler):
|
||||
ep_queue_item = search_queue.FailedQueueItem(ep_obj.show, [ep_obj])
|
||||
sickbeard.searchQueueScheduler.action.add_item(ep_queue_item) # @UndefinedVariable
|
||||
|
||||
if ep_queue_item.success:
|
||||
return returnManualSearchResult(ep_queue_item)
|
||||
if not ep_queue_item.started and ep_queue_item.success is None:
|
||||
return json.dumps({'result': 'success'}) #I Actually want to call it queued, because the search hasnt been started yet!
|
||||
return json.dumps(
|
||||
{'result': 'success'}) # I Actually want to call it queued, because the search hasnt been started yet!
|
||||
if ep_queue_item.started and ep_queue_item.success is None:
|
||||
return json.dumps({'result': 'success'})
|
||||
else:
|
||||
|
@ -191,14 +191,25 @@ def tearDown_test_db():
|
||||
although this seams not to work on my system it leaves me with an zero kb file
|
||||
"""
|
||||
# uncomment next line so leave the db intact between test and at the end
|
||||
#return False
|
||||
if os.path.exists(os.path.join(TESTDIR, TESTDBNAME)):
|
||||
os.remove(os.path.join(TESTDIR, TESTDBNAME))
|
||||
if os.path.exists(os.path.join(TESTDIR, TESTCACHEDBNAME)):
|
||||
os.remove(os.path.join(TESTDIR, TESTCACHEDBNAME))
|
||||
if os.path.exists(os.path.join(TESTDIR, TESTFAILEDDBNAME)):
|
||||
os.remove(os.path.join(TESTDIR, TESTFAILEDDBNAME))
|
||||
# return False
|
||||
|
||||
try:
|
||||
if os.path.exists(os.path.join(TESTDIR, TESTDBNAME)):
|
||||
os.remove(os.path.join(TESTDIR, TESTDBNAME))
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
if os.path.exists(os.path.join(TESTDIR, TESTCACHEDBNAME)):
|
||||
os.remove(os.path.join(TESTDIR, TESTCACHEDBNAME))
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
if os.path.exists(os.path.join(TESTDIR, TESTFAILEDDBNAME)):
|
||||
os.remove(os.path.join(TESTDIR, TESTFAILEDDBNAME))
|
||||
except:
|
||||
pass
|
||||
|
||||
def setUp_test_episode_file():
|
||||
if not os.path.exists(FILEDIR):
|
||||
|
@ -36,11 +36,11 @@ class TVShowTests(test.SickbeardTestDBCase):
|
||||
def test_change_indexerid(self):
|
||||
show = TVShow(1, 0001, "en")
|
||||
show.name = "show name"
|
||||
show.tvrname = "show name"
|
||||
show.network = "cbs"
|
||||
show.genre = "crime"
|
||||
show.runtime = 40
|
||||
show.status = "5"
|
||||
show.default_ep_status = "5"
|
||||
show.airs = "monday"
|
||||
show.startyear = 1987
|
||||
|
||||
@ -85,11 +85,11 @@ class TVTests(test.SickbeardTestDBCase):
|
||||
def test_getEpisode(self):
|
||||
show = TVShow(1, 0001, "en")
|
||||
show.name = "show name"
|
||||
show.tvrname = "show name"
|
||||
show.network = "cbs"
|
||||
show.genre = "crime"
|
||||
show.runtime = 40
|
||||
show.status = "5"
|
||||
show.default_ep_status = "5"
|
||||
show.airs = "monday"
|
||||
show.startyear = 1987
|
||||
show.saveToDB()
|
||||
|
Loading…
Reference in New Issue
Block a user