1
0
mirror of https://github.com/moparisthebest/SickRage synced 2024-11-16 06:15:09 -05:00

Merge branch 'release/v3.2.0'

This commit is contained in:
echel0n 2014-11-24 20:02:36 -08:00
commit c86aadca1e
47 changed files with 2939 additions and 699 deletions

View File

@ -25,6 +25,7 @@ import signal
import sys
import shutil
import subprocess
import traceback
if sys.version_info < (2, 6):
print "Sorry, requires Python 2.6 or 2.7."
@ -68,6 +69,7 @@ throwaway = datetime.datetime.strptime('20110101', '%Y%m%d')
signal.signal(signal.SIGINT, sickbeard.sig_handler)
signal.signal(signal.SIGTERM, sickbeard.sig_handler)
class SickRage(object):
def __init__(self):
# system event callback for shutdown/restart
@ -127,9 +129,6 @@ class SickRage(object):
try:
locale.setlocale(locale.LC_ALL, "")
except (locale.Error, IOError):
pass
try:
sickbeard.SYS_ENCODING = locale.getpreferredencoding()
except (locale.Error, IOError):
pass
@ -146,9 +145,8 @@ class SickRage(object):
# On non-unicode builds this will raise an AttributeError, if encoding type is not valid it throws a LookupError
sys.setdefaultencoding(sickbeard.SYS_ENCODING)
except:
print 'Sorry, you MUST add the SickRage folder to the PYTHONPATH environment variable'
print 'or find another way to force Python to use ' + sickbeard.SYS_ENCODING + ' for string encoding.'
sys.exit(1)
sys.exit("Sorry, you MUST add the SickRage folder to the PYTHONPATH environment variable\n" +
"or find another way to force Python to use " + sickbeard.SYS_ENCODING + " for string encoding.")
# Need console logging for SickBeard.py and SickBeard-console.exe
self.consoleLogging = (not hasattr(sys, "frozen")) or (sickbeard.MY_NAME.lower().find('-console') > 0)
@ -456,9 +454,9 @@ class SickRage(object):
sickbeard.showList.append(curShow)
except Exception, e:
logger.log(
u"There was an error creating the show in " + sqlShow["location"] + ": " + str(e).decode('utf-8',
'replace'),
u"There was an error creating the show in " + sqlShow["location"] + ": " + str(e).decode('utf-8'),
logger.ERROR)
logger.log(traceback.format_exc(), logger.DEBUG)
def restore(self, srcDir, dstDir):
try:
@ -508,7 +506,7 @@ class SickRage(object):
popen_list = [os.path.join(sickbeard.PROG_DIR, 'updater.exe'), str(sickbeard.PID),
sys.executable]
else:
logger.log(u"Unknown SB launch method, please file a bug report about this", logger.ERROR)
logger.log(u"Unknown SR launch method, please file a bug report about this", logger.ERROR)
popen_list = [sys.executable, os.path.join(sickbeard.PROG_DIR, 'updater.py'),
str(sickbeard.PID),
sys.executable,

View File

@ -38,7 +38,9 @@ addOption("Command", "SickBeard.GetRootDirs", "?cmd=sb.getrootdirs", "", "", "ac
addList("Command", "SickBeard.PauseBacklog", "?cmd=sb.pausebacklog", "sb.pausebacklog", "", "", "action");
addOption("Command", "SickBeard.Ping", "?cmd=sb.ping", "", "", "action");
addOption("Command", "SickBeard.Restart", "?cmd=sb.restart", "", "", "action");
addList("Command", "SickBeard.SearchTVDB", "?cmd=sb.searchtvdb", "sb.searchtvdb", "", "", "action");
addList("Command", "SickBeard.SearchAllIndexers", "?cmd=sb.searchindexers", "sb.searchindexers", "", "", "action");
addList("Command", "SickBeard.SearchTVDB", "?cmd=sb.searchtvdb&indexer=1", "sb.searchindexers", "", "", "action");
addList("Command", "SickBeard.SearchTVRage", "?cmd=sb.searchtvrage&indexer=2", "sb.searchindexers", "", "", "action");
addList("Command", "SickBeard.SetDefaults", "?cmd=sb.setdefaults", "sb.setdefaults", "", "", "action");
addOption("Command", "SickBeard.Shutdown", "?cmd=sb.shutdown", "", "", "action");
addList("Command", "Coming Episodes", "?cmd=future", "future");
@ -140,44 +142,44 @@ addOption("show.addnew-opt", "Optional Param", "", 1);
addList("show.addnew-opt", "No Season Folder", "&season_folder=0", "quality");
addList("show.addnew-opt", "Use Season Folder", "&season_folder=1", "quality");
addOptGroup("sb.searchtvdb", "Search by Name");
addList("sb.searchtvdb", "Lost", "&name=Lost", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "office", "&name=office", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "OffiCE", "&name=OffiCE", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "Leno", "&name=leno", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "Top Gear", "&name=Top Gear", "sb.searchtvdb-lang");
endOptGroup("sb.searchtvdb");
addOptGroup("sb.searchtvdb", "Search by indexerid");
addList("sb.searchtvdb", "73739", "&indexerid=73739", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "74608", "&indexerid=74608", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "199051", "&indexerid=199051", "sb.searchtvdb-lang");
addList("sb.searchtvdb", "123456 (invalid show)", "&indexerid=123456", "sb.searchtvdb-lang");
endOptGroup("sb.searchtvdb");
addOptGroup("sb.searchindexers", "Search by Name");
addList("sb.searchindexers", "Lost", "&name=Lost", "sb.searchindexers-lang");
addList("sb.searchindexers", "office", "&name=office", "sb.searchindexers-lang");
addList("sb.searchindexers", "OffiCE", "&name=OffiCE", "sb.searchindexers-lang");
addList("sb.searchindexers", "Leno", "&name=leno", "sb.searchindexers-lang");
addList("sb.searchindexers", "Top Gear", "&name=Top Gear", "sb.searchindexers-lang");
endOptGroup("sb.searchindexers");
addOptGroup("sb.searchindexers", "Search by indexerid");
addList("sb.searchindexers", "73739", "&indexerid=73739", "sb.searchindexers-lang");
addList("sb.searchindexers", "74608", "&indexerid=74608", "sb.searchindexers-lang");
addList("sb.searchindexers", "199051", "&indexerid=199051", "sb.searchindexers-lang");
addList("sb.searchindexers", "123456 (invalid show)", "&indexerid=123456", "sb.searchindexers-lang");
endOptGroup("sb.searchindexers");
addOption("sb.searchtvdb-lang", "Optional Param", "", 1);
addOption("sb.searchtvdb-lang", "Chinese", "&lang=zh"); // 27
addOption("sb.searchtvdb-lang", "Croatian", "&lang=hr"); // 31
addOption("sb.searchtvdb-lang", "Czech", "&lang=cs"); // 28
addOption("sb.searchtvdb-lang", "Danish", "&lang=da"); // 10
addOption("sb.searchtvdb-lang", "Dutch", "&lang=nl"); // 13
addOption("sb.searchtvdb-lang", "English", "&lang=en"); // 7
addOption("sb.searchtvdb-lang", "Finnish", "&lang=fi"); // 11 -- Suomeksi
addOption("sb.searchtvdb-lang", "French", "&lang=fr"); // 17
addOption("sb.searchtvdb-lang", "German", "&lang=de"); // 14
addOption("sb.searchtvdb-lang", "Greek", "&lang=el"); // 20
addOption("sb.searchtvdb-lang", "Hebrew", "&lang=he"); // 24
addOption("sb.searchtvdb-lang", "Hungarian", "&lang=hu"); // 19 -- Magyar
addOption("sb.searchtvdb-lang", "Italian", "&lang=it"); // 15
addOption("sb.searchtvdb-lang", "Japanese", "&lang=ja"); // 25
addOption("sb.searchtvdb-lang", "Korean", "&lang=ko"); // 32
addOption("sb.searchtvdb-lang", "Norwegian", "&lang=no"); // 9
addOption("sb.searchtvdb-lang", "Polish", "&lang=pl"); // 18
addOption("sb.searchtvdb-lang", "Portuguese", "&lang=pt");// 26
addOption("sb.searchtvdb-lang", "Russian", "&lang=ru"); // 22
addOption("sb.searchtvdb-lang", "Slovenian", "&lang=sl"); // 30
addOption("sb.searchtvdb-lang", "Spanish", "&lang=es"); // 16
addOption("sb.searchtvdb-lang", "Swedish", "&lang=sv"); // 8
addOption("sb.searchtvdb-lang", "Turkish", "&lang=tr"); // 21
addOption("sb.searchindexers-lang", "Optional Param", "", 1);
addOption("sb.searchindexers-lang", "Chinese", "&lang=zh"); // 27
addOption("sb.searchindexers-lang", "Croatian", "&lang=hr"); // 31
addOption("sb.searchindexers-lang", "Czech", "&lang=cs"); // 28
addOption("sb.searchindexers-lang", "Danish", "&lang=da"); // 10
addOption("sb.searchindexers-lang", "Dutch", "&lang=nl"); // 13
addOption("sb.searchindexers-lang", "English", "&lang=en"); // 7
addOption("sb.searchindexers-lang", "Finnish", "&lang=fi"); // 11 -- Suomeksi
addOption("sb.searchindexers-lang", "French", "&lang=fr"); // 17
addOption("sb.searchindexers-lang", "German", "&lang=de"); // 14
addOption("sb.searchindexers-lang", "Greek", "&lang=el"); // 20
addOption("sb.searchindexers-lang", "Hebrew", "&lang=he"); // 24
addOption("sb.searchindexers-lang", "Hungarian", "&lang=hu"); // 19 -- Magyar
addOption("sb.searchindexers-lang", "Italian", "&lang=it"); // 15
addOption("sb.searchindexers-lang", "Japanese", "&lang=ja"); // 25
addOption("sb.searchindexers-lang", "Korean", "&lang=ko"); // 32
addOption("sb.searchindexers-lang", "Norwegian", "&lang=no"); // 9
addOption("sb.searchindexers-lang", "Polish", "&lang=pl"); // 18
addOption("sb.searchindexers-lang", "Portuguese", "&lang=pt");// 26
addOption("sb.searchindexers-lang", "Russian", "&lang=ru"); // 22
addOption("sb.searchindexers-lang", "Slovenian", "&lang=sl"); // 30
addOption("sb.searchindexers-lang", "Spanish", "&lang=es"); // 16
addOption("sb.searchindexers-lang", "Swedish", "&lang=sv"); // 8
addOption("sb.searchindexers-lang", "Turkish", "&lang=tr"); // 21
#for $curShow in $sortedShowList:
addList("seasons", "$curShow.name", "&indexerid=$curShow.indexerid", "seasons-$curShow.indexerid");

View File

@ -198,19 +198,6 @@
<div id="summary">
<table class="summaryTable pull-left">
#if $show.network and $show.airs:
<tr><td class="showLegend">Originally Airs: </td><td>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""# on $show.network</td></tr>
#else if $show.network:
<tr><td class="showLegend">Originally Airs: </td><td>$show.network</td></tr>
#else if $show.airs:
<tr><td class="showLegend">Originally Airs: </td><td>>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""#</td></tr>
#end if
<tr><td class="showLegend">Status: </td><td>$show.status</td></tr>
#if $showLoc[1]:
<tr><td class="showLegend">Location: </td><td>$showLoc[0]</td></tr>
#else:
<tr><td class="showLegend"><span style="color: red;">Location: </span></td><td><span style="color: red;">$showLoc[0]</span> (dir is missing)</td></tr>
#end if
#set $anyQualities, $bestQualities = $Quality.splitQuality(int($show.quality))
<tr><td class="showLegend">Quality: </td><td>
#if $show.quality in $qualityPresets:
@ -222,6 +209,21 @@
#if $bestQualities:
<i>Replace with:</i> <%=", ".join([Quality.qualityStrings[x] for x in sorted(bestQualities)])%>
#end if
#end if
#if $show.network and $show.airs:
<tr><td class="showLegend">Originally Airs: </td><td>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""# on $show.network</td></tr>
#else if $show.network:
<tr><td class="showLegend">Originally Airs: </td><td>$show.network</td></tr>
#else if $show.airs:
<tr><td class="showLegend">Originally Airs: </td><td>>$show.airs #if not $network_timezones.test_timeformat($show.airs) then " <font color='#FF0000'><b>(invalid Timeformat)</b></font> " else ""#</td></tr>
#end if
<tr><td class="showLegend">Show Status: </td><td>$show.status</td></tr>
<tr><td class="showLegend">Default EP Status: </td><td>$statusStrings[$show.default_ep_status]</td></tr>
#if $showLoc[1]:
<tr><td class="showLegend">Location: </td><td>$showLoc[0]</td></tr>
#else:
<tr><td class="showLegend"><span style="color: red;">Location: </span></td><td><span style="color: red;">$showLoc[0]</span> (dir is missing)</td></tr>
#end if
<tr><td class="showLegend">Scene Name:</td><td>#if $show.exceptions then $exceptions_string else $show.name#</td></tr>
@ -263,14 +265,13 @@
#end if
</table>
</div>
</div>
</div>
<div class="clearfix"></div>
<div class="pull-left" style="padding-bottom: 10px;">
Change selected episodes to
<div class="pull-left" >
Change selected episodes to:</br>
<select id="statusSelect" class="form-control form-control-inline input-sm">
#for $curStatus in [$WANTED, $SKIPPED, $ARCHIVED, $IGNORED, $FAILED] + sorted($Quality.DOWNLOADED):
#if $curStatus == $DOWNLOADED:
@ -284,6 +285,8 @@
<input class="btn btn-inline" type="button" id="changeStatus" value="Go" />
</div>
</br>
<div class="pull-right clearfix" id="checkboxControls">
<div style="padding-bottom: 5px;">
<label for="wanted"><span class="wanted"><input type="checkbox" id="wanted" checked="checked" /> Wanted: <b>$epCounts[$Overview.WANTED]</b></span></label>
@ -298,7 +301,6 @@
<button class="btn btn-xs clearAll">Clear All</button>
</div>
</div>
<br />
<table class="sickbeardTable display_show" cellspacing="0" border="0" cellpadding="0">

View File

@ -1,6 +1,7 @@
#import sickbeard
#import lib.adba as adba
#from sickbeard import common
#from sickbeard.common import *
#from sickbeard import exceptions
#from sickbeard import scene_exceptions
#from sickbeard.blackandwhitelist import *
@ -63,29 +64,15 @@
<form action="editShow" method="post">
<input type="hidden" name="show" value="$show.indexerid" />
<b>Location:</b> <input type="text" name="location" id="location" value="$show._location" class="form-control form-control-inline input-sm input350" /><br />
<br />
<b>Quality:</b>
#set $qualities = $common.Quality.splitQuality(int($show.quality))
#set global $anyQualities = $qualities[0]
#set global $bestQualities = $qualities[1]
#include $os.path.join($sickbeard.PROG_DIR, "gui/slick/interfaces/default/inc_qualityChooser.tmpl")
<b>Location:</b></br>
<input type="text" name="location" id="location" value="$show._location" class="form-control form-control-inline input-sm input350" /><br />
<br />
#if $anyQualities + $bestQualities
<b>Archive on first match: </b>
<input type="checkbox" name="archive_firstmatch" #if $show.archive_firstmatch == 1 then "checked=\"checked\"" else ""# /><br>
(check this to have the episode archived after the first best match is found from your archive quality list)
<br />
<br />
#end if
<b>Scene Exception:</b>
<b>Scene Exception:</b><br />
<input type="text" id="SceneName" class="form-control form-control-inline input-sm input200">
<input class="btn btn-inline" type="button" value="Add" id="addSceneName"><br />
<div id="SceneException" >
<div>
<p>This will <b>affect the episode show search</b> on nzb and torrent provider.<br />
This list overrides the original name, it doesn't append to it.<br />
@ -108,45 +95,75 @@
<div class="clearfix"></div>
<br />
<b>Info Language:</b> <select name="indexerLang" id="indexerLangSelect" class="form-control form-control-inline input-sm"></select><br />
Note: This will only affect the language of the retrieved metadata file contents and episode filenames.<br />
This <b>DOES NOT</b> allow SickRage to download non-english TV episodes!<br />
<b>Quality:</b><br />
#set $qualities = $common.Quality.splitQuality(int($show.quality))
#set global $anyQualities = $qualities[0]
#set global $bestQualities = $qualities[1]
#include $os.path.join($sickbeard.PROG_DIR, "gui/slick/interfaces/default/inc_qualityChooser.tmpl")
<br />
<b>Flatten files (no folders):</b> <input type="checkbox" name="flatten_folders" #if $show.flatten_folders == 1 and not $sickbeard.NAMING_FORCE_FOLDERS then "checked=\"checked\"" else ""# #if $sickbeard.NAMING_FORCE_FOLDERS then "disabled=\"disabled\"" else ""#/><br /><br />
<b>Paused:</b> <input type="checkbox" name="paused" #if $show.paused == 1 then "checked=\"checked\"" else ""# /><br /><br />
<b>Subtitles:</b> <input type="checkbox" name="subtitles"#if $show.subtitles == 1 and $sickbeard.USE_SUBTITLES then " checked=\"checked\"" else ""##if not $sickbeard.USE_SUBTITLES then " disabled=\"disabled\"" else ""#/><br /><br />
<b>Default Episode Status:</b><br />
(this will set a default status to be applied to any newly added episodes)<br />
<select name="defaultEpStatus" id="defaultEpStatusSelect" class="form-control form-control-inline input-sm">
#for $curStatus in [$WANTED, $SKIPPED, $ARCHIVED, $IGNORED]:
<option value="$curStatus">$statusStrings[$curStatus]</option>
#end for
</select><br />
<br />
<b>Info Language:</b><br />
(this will only affect the language of the retrieved metadata file contents and episode filenames)<br />
<select name="indexerLang" id="indexerLangSelect" class="form-control form-control-inline input-sm"></select><br />
<br />
<b>Flatten files (no folders):</b> <input type="checkbox" name="flatten_folders" #if $show.flatten_folders == 1 and not $sickbeard.NAMING_FORCE_FOLDERS then "checked=\"checked\"" else ""# #if $sickbeard.NAMING_FORCE_FOLDERS then "disabled=\"disabled\"" else ""#/><br />
<b>Paused:</b> <input type="checkbox" name="paused" #if $show.paused == 1 then "checked=\"checked\"" else ""# /><br />
<b>Subtitles:</b> <input type="checkbox" name="subtitles"#if $show.subtitles == 1 and $sickbeard.USE_SUBTITLES then " checked=\"checked\"" else ""##if not $sickbeard.USE_SUBTITLES then " disabled=\"disabled\"" else ""#/><br />
<br/>
<b>Scene Numbering: </b>
<input type="checkbox" name="scene" #if $show.scene == 1 then "checked=\"checked\"" else ""# /><br/>
(check this if you wish to search by scene numbering, uncheck to search by indexer numbering)
<br/><br/>
(check this if you wish to search by scene numbering, uncheck to search by indexer numbering)<br/>
<br/>
<b>Air by date: </b>
<input type="checkbox" name="air_by_date" #if $show.air_by_date == 1 then "checked=\"checked\"" else ""# /><br />
(check this if the show is released as Show.03.02.2010 rather than Show.S02E03)
<br /><br />
(check this if the show is released as Show.03.02.2010 rather than Show.S02E03)<br />
<br />
<b>Sports: </b>
<input type="checkbox" name="sports" #if $show.sports == 1 then "checked=\"checked\"" else ""# /><br />
(check this if the show is a sporting or MMA event)
<br /><br />
(check this if the show is a sporting or MMA event)<br />
<br />
<b>Anime: </b>
<input type="checkbox" name="anime" #if $show.is_anime then "CHECKED" else ""#><br />
(check this if the show is released as Show.265 rather than Show.S02E03, this show is an anime)
<br /><br />
(check this if the show is released as Show.265 rather than Show.S02E03, this show is an anime)<br />
<br />
<b>DVD Order: </b>
<input type="checkbox" name="dvdorder" #if $show.dvdorder == 1 then "checked=\"checked\"" else ""# /><br/>
(check this if you wish to use the DVD order instead of the Airing order)
<br/><br/>
<b>Ignored Words:</b> <input type="text" name="rls_ignore_words" id="rls_ignore_words" value="$show.rls_ignore_words" class="form-control form-control-inline input-sm input350" /><br />
Results with any of these words in the title will be filtered out <br />
Separate words with a comma, e.g. "word1,word2,word3"
<br /><br />
#if $anyQualities + $bestQualities
<b>Archive on first match:</b>
<input type="checkbox" name="archive_firstmatch" #if $show.archive_firstmatch == 1 then "checked=\"checked\"" else ""# /><br>
(check this to have the episode archived after the first best match is found from your archive quality list)</br>
<br />
#end if
<b>Required Words:</b> <input type="text" name="rls_require_words" id="rls_require_words" value="$show.rls_require_words" class="form-control form-control-inline input-sm input350" /><br />
<b>Ignored Words:</b></br>
<input type="text" name="rls_ignore_words" id="rls_ignore_words" value="$show.rls_ignore_words" class="form-control form-control-inline input-sm input350" /><br />
Results with any of these words in the title will be filtered out<br />
Separate words with a comma, e.g. "word1,word2,word3"<br />
<br />
<b>Required Words:</b></br>
<input type="text" name="rls_require_words" id="rls_require_words" value="$show.rls_require_words" class="form-control form-control-inline input-sm input350" /><br />
Results without one of these words in the title will be filtered out <br />
Separate words with a comma, e.g. "word1,word2,word3"
<br /><br />
Separate words with a comma, e.g. "word1,word2,word3"<br />
<br />
#if $show.is_anime:
#from sickbeard.blackandwhitelist import *

View File

@ -280,10 +280,13 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name))
#set $cur_downloaded = 0
#set $cur_total = 0
#set $download_stat_tip = ''
#if None is not $curShow.status and re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
#set $display_status = 'Continuing'
#else
#set $display_status = $curShow.status
#set $display_status = $curShow.status
#if None is not $display_status
#if re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
#set $display_status = 'Continuing'
#else if re.search(r'(?i)(?:nded)', $curShow.status)
#set $display_status = 'Ended'
#end if
#end if
#if $curShow.indexerid in $show_stat:
@ -604,11 +607,17 @@ $myShowList.sort(lambda x, y: cmp(x.name, y.name))
</td>
<td align="center">
#if None is not $curShow.status and re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
Continuing
#else:
$curShow.status
#set $display_status = $curShow.status
#if None is not $display_status
#if re.search(r'(?i)(?:new|returning)\s*series', $curShow.status)
#set $display_status = 'Continuing'
#else if re.search(r'(?i)(?:nded)', $curShow.status)
#set $display_status = 'Ended'
#end if
#end if
$display_status
</td>
</tr>

View File

@ -43,16 +43,6 @@
<link rel="stylesheet" type="text/css" href="$sbRoot/css/${sickbeard.THEME_NAME}.css?$sbPID" />
<style type="text/css">
<!--
#if $sickbeard.NEWEST_VERSION_STRING:
.ui-pnotify { top: 30px !important; }
#end if
//-->
</style>
<script type="text/javascript" src="$sbRoot/js/lib/jquery-1.8.3.min.js?$sbPID"></script>
<script type="text/javascript" src="$sbRoot/js/lib/bootstrap.min.js?$sbPID"></script>
<script type="text/javascript" src="$sbRoot/js/lib/bootstrap-hover-dropdown.min.js?$sbPID"></script>

351
lib/ftfy/__init__.py Normal file
View File

@ -0,0 +1,351 @@
# -*- coding: utf-8 -*-
"""
ftfy: fixes text for you
This is a module for making text less broken. See the `fix_text` function
for more information.
"""
from __future__ import unicode_literals
# See the docstring for ftfy.bad_codecs to see what we're doing here.
import ftfy.bad_codecs
ftfy.bad_codecs.ok()
from ftfy import fixes
from ftfy.fixes import fix_text_encoding
from ftfy.compatibility import PYTHON34_OR_LATER, is_printable
import unicodedata
import warnings
def fix_text(text,
remove_unsafe_private_use=(not PYTHON34_OR_LATER),
fix_entities='auto',
remove_terminal_escapes=True,
fix_encoding=True,
normalization='NFKC',
uncurl_quotes=True,
fix_line_breaks=True,
remove_control_chars=True,
remove_bom=True,
max_decode_length=2**16):
r"""
Given Unicode text as input, make its representation consistent and
possibly less broken.
Let's start with some examples:
>>> print(fix_text('ˆnicode'))
ünicode
>>> print(fix_text('Broken text&hellip; it&#x2019;s flubberific!'))
Broken text... it's flubberific!
>>> print(fix_text('HTML entities &lt;3'))
HTML entities <3
>>> print(fix_text('<em>HTML entities &lt;3</em>'))
<em>HTML entities &lt;3</em>
>>> print(fix_text('\001\033[36;44mI&#x92;m blue, da ba dee da ba '
... 'doo&#133;\033[0m'))
I'm blue, da ba dee da ba doo...
>>> # This example string starts with a byte-order mark, even if
>>> # you can't see it on the Web.
>>> print(fix_text('\ufeffParty like\nit&rsquo;s 1999!'))
Party like
it's 1999!
>>> len(fix_text('' * 100000))
200000
>>> len(fix_text(''))
0
Based on the options you provide, ftfy applies these steps in order:
- If `remove_unsafe_private_use` is True, it removes a range of private-use
characters that could trigger a Python bug. The bug is fixed in
the most recent versions of Python, so this will default to False
starting on Python 3.4.
- If `fix_entities` is True, replace HTML entities with their equivalent
characters. If it's "auto" (the default), then consider replacing HTML
entities, but don't do so in text where you have seen a pair of actual
angle brackets (that's probably actually HTML and you shouldn't mess
with the entities).
- If `remove_terminal_escapes` is True, remove sequences of bytes that are
instructions for Unix terminals, such as the codes that make text appear
in different colors.
- If `fix_encoding` is True, look for common mistakes that come from
encoding or decoding Unicode text incorrectly, and fix them if they are
reasonably fixable. See `fix_text_encoding` for details.
- If `normalization` is not None, apply the specified form of Unicode
normalization, which can be one of 'NFC', 'NFKC', 'NFD', and 'NFKD'.
The default, 'NFKC', applies the following relevant transformations:
- C: Combine characters and diacritics that are written using separate
code points, such as converting "e" plus an acute accent modifier
into "é", or converting "ka" () plus a dakuten into the
single character "ga" ().
- K: Replace characters that are functionally equivalent with the most
common form. For example, half-width katakana will be replaced with
full-width versions, full-width Roman characters will be replaced with
ASCII characters, ellipsis characters will be replaced with three
periods, and the ligature '' will be replaced with 'fl'.
- If `uncurl_quotes` is True, replace various curly quotation marks with
plain-ASCII straight quotes.
- If `fix_line_breaks` is true, convert all line breaks to Unix style
(CRLF and CR line breaks become LF line breaks).
- If `fix_control_characters` is true, remove all C0 control characters
except the common useful ones: TAB, CR, LF, and FF. (CR characters
may have already been removed by the `fix_line_breaks` step.)
- If `remove_bom` is True, remove the Byte-Order Mark if it exists.
- If anything was changed, repeat all the steps, so that the function is
idempotent. "&amp;amp;" will become "&", for example, not "&amp;".
`fix_text` will work one line at a time, with the possibility that some
lines are in different encodings. When it encounters lines longer than
`max_decode_length`, it will not run the `fix_encoding` step, to avoid
unbounded slowdowns.
If you are certain your entire text is in the same encoding (though that
encoding is possibly flawed), and do not mind performing operations on
the whole text at once, use `fix_text_segment`.
"""
if isinstance(text, bytes):
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
out = []
pos = 0
while pos < len(text):
textbreak = text.find('\n', pos) + 1
fix_encoding_this_time = fix_encoding
if textbreak == 0:
textbreak = len(text)
if (textbreak - pos) > max_decode_length:
fix_encoding_this_time = False
substring = text[pos:textbreak]
if fix_entities == 'auto' and '<' in substring and '>' in substring:
# we see angle brackets together; this could be HTML
fix_entities = False
out.append(
fix_text_segment(
substring,
remove_unsafe_private_use=remove_unsafe_private_use,
fix_entities=fix_entities,
remove_terminal_escapes=remove_terminal_escapes,
fix_encoding=fix_encoding_this_time,
normalization=normalization,
uncurl_quotes=uncurl_quotes,
fix_line_breaks=fix_line_breaks,
remove_control_chars=remove_control_chars,
remove_bom=remove_bom
)
)
pos = textbreak
return ''.join(out)
ftfy = fix_text
def fix_file(input_file,
remove_unsafe_private_use=True,
fix_entities='auto',
remove_terminal_escapes=True,
fix_encoding=True,
normalization='NFKC',
uncurl_quotes=True,
fix_line_breaks=True,
remove_control_chars=True,
remove_bom=True):
"""
Fix text that is found in a file.
If the file is being read as Unicode text, use that. If it's being read as
bytes, then unfortunately, we have to guess what encoding it is. We'll try
a few common encodings, but we make no promises. See the `guess_bytes`
function for how this is done.
The output is a stream of fixed lines of text.
"""
entities = fix_entities
for line in input_file:
if isinstance(line, bytes):
line, encoding = guess_bytes(line)
if fix_entities == 'auto' and '<' in line and '>' in line:
entities = False
yield fix_text_segment(
line,
remove_unsafe_private_use=remove_unsafe_private_use,
fix_entities=entities,
remove_terminal_escapes=remove_terminal_escapes,
fix_encoding=fix_encoding,
normalization=normalization,
uncurl_quotes=uncurl_quotes,
fix_line_breaks=fix_line_breaks,
remove_control_chars=remove_control_chars,
remove_bom=remove_bom
)
def fix_text_segment(text,
remove_unsafe_private_use=True,
fix_entities='auto',
remove_terminal_escapes=True,
fix_encoding=True,
normalization='NFKC',
uncurl_quotes=True,
fix_line_breaks=True,
remove_control_chars=True,
remove_bom=True):
"""
Apply fixes to text in a single chunk. This could be a line of text
within a larger run of `fix_text`, or it could be a larger amount
of text that you are certain is all in the same encoding.
See `fix_text` for a description of the parameters.
"""
if isinstance(text, bytes):
raise UnicodeError(fixes.BYTES_ERROR_TEXT)
if fix_entities == 'auto' and '<' in text and '>' in text:
fix_entities = False
while True:
origtext = text
if remove_unsafe_private_use:
text = fixes.remove_unsafe_private_use(text)
if fix_entities:
text = fixes.unescape_html(text)
if remove_terminal_escapes:
text = fixes.remove_terminal_escapes(text)
if fix_encoding:
text = fixes.fix_text_encoding(text)
if normalization is not None:
text = unicodedata.normalize(normalization, text)
if uncurl_quotes:
text = fixes.uncurl_quotes(text)
if fix_line_breaks:
text = fixes.fix_line_breaks(text)
if remove_control_chars:
text = fixes.remove_control_chars(text)
if remove_bom:
text = fixes.remove_bom(text)
if text == origtext:
return text
def guess_bytes(bstring):
"""
If you have some bytes in an unknown encoding, here's a reasonable
strategy for decoding them, by trying a few common encodings that
can be distinguished from each other.
This is not a magic bullet. If the bytes are coming from some MySQL
database with the "character set" set to ISO Elbonian, this won't figure
it out. Perhaps more relevantly, this currently doesn't try East Asian
encodings.
The encodings we try are:
- UTF-16 with a byte order mark, because a UTF-16 byte order mark looks
like nothing else
- UTF-8, because it's the global de facto standard
- "utf-8-variants", because it's what people actually implement when they
think they're doing UTF-8
- MacRoman, because Microsoft Office thinks it's still a thing, and it
can be distinguished by its line breaks. (If there are no line breaks in
the string, though, you're out of luck.)
- "sloppy-windows-1252", the Latin-1-like encoding that is the most common
single-byte encoding
"""
if bstring.startswith(b'\xfe\xff') or bstring.startswith(b'\xff\xfe'):
return bstring.decode('utf-16'), 'utf-16'
byteset = set(bytes(bstring))
byte_ed, byte_c0, byte_CR, byte_LF = b'\xed\xc0\r\n'
try:
if byte_ed in byteset or byte_c0 in byteset:
# Byte 0xed can be used to encode a range of codepoints that
# are UTF-16 surrogates. UTF-8 does not use UTF-16 surrogates,
# so when we see 0xed, it's very likely we're being asked to
# decode CESU-8, the variant that encodes UTF-16 surrogates
# instead of the original characters themselves.
#
# This will occasionally trigger on standard UTF-8, as there
# are some Korean characters that also use byte 0xed, but that's
# not harmful.
#
# Byte 0xc0 is impossible because, numerically, it would only
# encode characters lower than U+0040. Those already have
# single-byte representations, and UTF-8 requires using the
# shortest possible representation. However, Java hides the null
# codepoint, U+0000, in a non-standard longer representation -- it
# encodes it as 0xc0 0x80 instead of 0x00, guaranteeing that 0x00
# will never appear in the encoded bytes.
#
# The 'utf-8-variants' decoder can handle both of these cases, as
# well as standard UTF-8, at the cost of a bit of speed.
return bstring.decode('utf-8-variants'), 'utf-8-variants'
else:
return bstring.decode('utf-8'), 'utf-8'
except UnicodeDecodeError:
pass
if byte_CR in bstring and byte_LF not in bstring:
return bstring.decode('macroman'), 'macroman'
else:
return bstring.decode('sloppy-windows-1252'), 'sloppy-windows-1252'
def explain_unicode(text):
"""
A utility method that's useful for debugging mysterious Unicode.
It breaks down a string, showing you for each codepoint its number in
hexadecimal, its glyph, its category in the Unicode standard, and its name
in the Unicode standard.
>>> explain_unicode('(╯°□°)╯︵ ┻━┻')
U+0028 ( [Ps] LEFT PARENTHESIS
U+256F [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
U+00B0 ° [So] DEGREE SIGN
U+25A1 [So] WHITE SQUARE
U+00B0 ° [So] DEGREE SIGN
U+0029 ) [Pe] RIGHT PARENTHESIS
U+256F [So] BOX DRAWINGS LIGHT ARC UP AND LEFT
U+FE35 [Ps] PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS
U+0020 [Zs] SPACE
U+253B [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
U+2501 [So] BOX DRAWINGS HEAVY HORIZONTAL
U+253B [So] BOX DRAWINGS HEAVY UP AND HORIZONTAL
"""
for char in text:
if is_printable(char):
display = char
else:
display = char.encode('unicode-escape').decode('ascii')
print('U+{code:04X} {display:<7} [{category}] {name}'.format(
display=display,
code=ord(char),
category=unicodedata.category(char),
name=unicodedata.name(char, '<unknown>')
))
def fix_bad_encoding(text):
"""
Kept for compatibility with previous versions of ftfy.
"""
warnings.warn(
'fix_bad_encoding is now known as fix_text_encoding',
DeprecationWarning
)
return fix_text_encoding(text)

View File

@ -0,0 +1,94 @@
# coding: utf-8
r"""
Give Python the ability to decode some common, flawed encodings.
Python does not want you to be sloppy with your text. Its encoders and decoders
("codecs") follow the relevant standards whenever possible, which means that
when you get text that *doesn't* follow those standards, you'll probably fail
to decode it. Or you might succeed at decoding it for implementation-specific
reasons, which is perhaps worse.
There are some encodings out there that Python wishes didn't exist, which are
widely used outside of Python:
- "utf-8-variants", a family of not-quite-UTF-8 encodings, including the
ever-popular CESU-8 and "Java modified UTF-8".
- "Sloppy" versions of character map encodings, where bytes that don't map to
anything will instead map to the Unicode character with the same number.
Simply importing this module, or in fact any part of the `ftfy` package, will
make these new "bad codecs" available to Python through the standard Codecs
API. You never have to actually call any functions inside `ftfy.bad_codecs`.
However, if you want to call something because your code checker insists on it,
you can call ``ftfy.bad_codecs.ok()``.
A quick example of decoding text that's encoded in CESU-8:
>>> import ftfy.bad_codecs
>>> print(b'\xed\xa0\xbd\xed\xb8\x8d'.decode('utf-8-variants'))
😍
"""
from __future__ import unicode_literals
from encodings import normalize_encoding
import codecs
_CACHE = {}
# Define some aliases for 'utf-8-variants'. All hyphens get turned into
# underscores, because of `normalize_encoding`.
UTF8_VAR_NAMES = (
'utf_8_variants', 'utf8_variants',
'utf_8_variant', 'utf8_variant',
'utf_8_var', 'utf8_var',
'cesu_8', 'cesu8',
'java_utf_8', 'java_utf8'
)
def search_function(encoding):
"""
Register our "bad codecs" with Python's codecs API. This involves adding
a search function that takes in an encoding name, and returns a codec
for that encoding if it knows one, or None if it doesn't.
The encodings this will match are:
- Encodings of the form 'sloppy-windows-NNNN' or 'sloppy-iso-8859-N',
where the non-sloppy version is an encoding that leaves some bytes
unmapped to characters.
- The 'utf-8-variants' encoding, which has the several aliases seen
above.
"""
if encoding in _CACHE:
return _CACHE[encoding]
norm_encoding = normalize_encoding(encoding)
codec = None
if norm_encoding in UTF8_VAR_NAMES:
from ftfy.bad_codecs.utf8_variants import CODEC_INFO
codec = CODEC_INFO
elif norm_encoding.startswith('sloppy_'):
from ftfy.bad_codecs.sloppy import CODECS
codec = CODECS.get(norm_encoding)
if codec is not None:
_CACHE[encoding] = codec
return codec
def ok():
"""
A feel-good function that gives you something to call after importing
this package.
Why is this here? Pyflakes. Pyflakes gets upset when you import a module
and appear not to use it. It doesn't know that you're using it when
you use the ``unicode.encode`` and ``bytes.decode`` methods with certain
encodings.
"""
pass
codecs.register(search_function)

View File

@ -0,0 +1,156 @@
# coding: utf-8
r"""
Decodes single-byte encodings, filling their "holes" in the same messy way that
everyone else does.
A single-byte encoding maps each byte to a Unicode character, except that some
bytes are left unmapped. In the commonly-used Windows-1252 encoding, for
example, bytes 0x81 and 0x8D, among others, have no meaning.
Python, wanting to preserve some sense of decorum, will handle these bytes
as errors. But Windows knows that 0x81 and 0x8D are possible bytes and they're
different from each other. It just hasn't defined what they are in terms of
Unicode.
Software that has to interoperate with Windows-1252 and Unicode -- such as all
the common Web browsers -- will pick some Unicode characters for them to map
to, and the characters they pick are the Unicode characters with the same
numbers: U+0081 and U+008D. This is the same as what Latin-1 does, and the
resulting characters tend to fall into a range of Unicode that's set aside for
obselete Latin-1 control characters anyway.
These sloppy codecs let Python do the same thing, thus interoperating with
other software that works this way. It defines a sloppy version of many
single-byte encodings with holes. (There is no need for a sloppy version of
an encoding without holes: for example, there is no such thing as
sloppy-iso-8859-2 or sloppy-macroman.)
The following encodings will become defined:
- sloppy-windows-1250 (Central European, sort of based on ISO-8859-2)
- sloppy-windows-1251 (Cyrillic)
- sloppy-windows-1252 (Western European, based on Latin-1)
- sloppy-windows-1253 (Greek, sort of based on ISO-8859-7)
- sloppy-windows-1254 (Turkish, based on ISO-8859-9)
- sloppy-windows-1255 (Hebrew, based on ISO-8859-8)
- sloppy-windows-1256 (Arabic)
- sloppy-windows-1257 (Baltic, based on ISO-8859-13)
- sloppy-windows-1258 (Vietnamese)
- sloppy-cp874 (Thai, based on ISO-8859-11)
- sloppy-iso-8859-3 (Maltese and Esperanto, I guess)
- sloppy-iso-8859-6 (different Arabic)
- sloppy-iso-8859-7 (Greek)
- sloppy-iso-8859-8 (Hebrew)
- sloppy-iso-8859-11 (Thai)
Aliases such as "sloppy-cp1252" for "sloppy-windows-1252" will also be
defined.
Only sloppy-windows-1251 and sloppy-windows-1252 are used by the rest of ftfy;
the rest are rather uncommon.
Here are some examples, using `ftfy.explain_unicode` to illustrate how
sloppy-windows-1252 merges Windows-1252 with Latin-1:
>>> from ftfy import explain_unicode
>>> some_bytes = b'\x80\x81\x82'
>>> explain_unicode(some_bytes.decode('latin-1'))
U+0080 \x80 [Cc] <unknown>
U+0081 \x81 [Cc] <unknown>
U+0082 \x82 [Cc] <unknown>
>>> explain_unicode(some_bytes.decode('windows-1252', 'replace'))
U+20AC [Sc] EURO SIGN
U+FFFD <EFBFBD> [So] REPLACEMENT CHARACTER
U+201A [Ps] SINGLE LOW-9 QUOTATION MARK
>>> explain_unicode(some_bytes.decode('sloppy-windows-1252'))
U+20AC [Sc] EURO SIGN
U+0081 \x81 [Cc] <unknown>
U+201A [Ps] SINGLE LOW-9 QUOTATION MARK
"""
from __future__ import unicode_literals
import codecs
from encodings import normalize_encoding
REPLACEMENT_CHAR = '\ufffd'
def make_sloppy_codec(encoding):
"""
Take a codec name, and return a 'sloppy' version of that codec that can
encode and decode the unassigned bytes in that encoding.
Single-byte encodings in the standard library are defined using some
boilerplate classes surrounding the functions that do the actual work,
`codecs.charmap_decode` and `charmap_encode`. This function, given an
encoding name, *defines* those boilerplate classes.
"""
# Make an array of all 256 possible bytes.
all_bytes = bytearray(range(256))
# Get a list of what they would decode to in Latin-1.
sloppy_chars = list(all_bytes.decode('latin-1'))
# Get a list of what they decode to in the given encoding. Use the
# replacement character for unassigned bytes.
decoded_chars = all_bytes.decode(encoding, 'replace')
# Update the sloppy_chars list. Each byte that was successfully decoded
# gets its decoded value in the list. The unassigned bytes are left as
# they are, which gives their decoding in Latin-1.
for i, char in enumerate(decoded_chars):
if char != REPLACEMENT_CHAR:
sloppy_chars[i] = char
# Create the data structures that tell the charmap methods how to encode
# and decode in this sloppy encoding.
decoding_table = ''.join(sloppy_chars)
encoding_table = codecs.charmap_build(decoding_table)
# Now produce all the class boilerplate. Look at the Python source for
# `encodings.cp1252` for comparison; this is almost exactly the same,
# except I made it follow pep8.
class Codec(codecs.Codec):
def encode(self, input, errors='strict'):
return codecs.charmap_encode(input, errors, encoding_table)
def decode(self, input, errors='strict'):
return codecs.charmap_decode(input, errors, decoding_table)
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input, self.errors, encoding_table)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):
return codecs.charmap_decode(input, self.errors, decoding_table)[0]
class StreamWriter(Codec, codecs.StreamWriter):
pass
class StreamReader(Codec, codecs.StreamReader):
pass
return codecs.CodecInfo(
name='sloppy-' + encoding,
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
# Define a codec for each incomplete encoding. The resulting CODECS dictionary
# can be used by the main module of ftfy.bad_codecs.
CODECS = {}
INCOMPLETE_ENCODINGS = (
['windows-%s' % num for num in range(1250, 1259)] +
['iso-8859-%s' % num for num in (3, 6, 7, 8, 11)] +
['cp%s' % num for num in range(1250, 1259)] + ['cp874']
)
for _encoding in INCOMPLETE_ENCODINGS:
_new_name = normalize_encoding('sloppy-' + _encoding)
CODECS[_new_name] = make_sloppy_codec(_encoding)

View File

@ -0,0 +1,281 @@
r"""
This file defines a codec called "utf-8-variants" (or "utf-8-var"), which can
decode text that's been encoded with a popular non-standard version of UTF-8.
This includes CESU-8, the accidental encoding made by layering UTF-8 on top of
UTF-16, as well as Java's twist on CESU-8 that contains a two-byte encoding for
codepoint 0.
This is particularly relevant in Python 3, which provides no other way of
decoding CESU-8 or Java's encoding. [1]
The easiest way to use the codec is to simply import `ftfy.bad_codecs`:
>>> import ftfy.bad_codecs
>>> result = b'here comes a null! \xc0\x80'.decode('utf-8-var')
>>> print(repr(result).lstrip('u'))
'here comes a null! \x00'
The codec does not at all enforce "correct" CESU-8. For example, the Unicode
Consortium's not-quite-standard describing CESU-8 requires that there is only
one possible encoding of any character, so it does not allow mixing of valid
UTF-8 and CESU-8. This codec *does* allow that, just like Python 2's UTF-8
decoder does.
Characters in the Basic Multilingual Plane still have only one encoding. This
codec still enforces the rule, within the BMP, that characters must appear in
their shortest form. There is one exception: the sequence of bytes `0xc0 0x80`,
instead of just `0x00`, may be used to encode the null character `U+0000`, like
in Java.
If you encode with this codec, you get legitimate UTF-8. Decoding with this
codec and then re-encoding is not idempotent, although encoding and then
decoding is. So this module won't produce CESU-8 for you. Look for that
functionality in the sister module, "Breaks Text For You", coming approximately
never.
[1] In a pinch, you can decode CESU-8 in Python 2 using the UTF-8 codec: first
decode the bytes (incorrectly), then encode them, then decode them again, using
UTF-8 as the codec every time.
"""
from __future__ import unicode_literals
from ftfy.compatibility import bytes_to_ints, unichr, PYTHON2
from encodings.utf_8 import (IncrementalDecoder as UTF8IncrementalDecoder,
IncrementalEncoder as UTF8IncrementalEncoder)
import re
import codecs
NAME = 'utf-8-variants'
# This regular expression matches all possible six-byte CESU-8 sequences.
CESU8_RE = re.compile(b'\xed[\xa0-\xaf][\x80-\xbf]\xed[\xb0-\xbf][\x80-\xbf]')
class IncrementalDecoder(UTF8IncrementalDecoder):
"""
An incremental decoder that extends Python's built-in UTF-8 decoder.
This encoder needs to take in bytes, possibly arriving in a stream, and
output the correctly decoded text. The general strategy for doing this
is to fall back on the real UTF-8 decoder whenever possible, because
the real UTF-8 decoder is way optimized, but to call specialized methods
we define here for the cases the real encoder isn't expecting.
"""
def _buffer_decode(self, input, errors, final):
"""
Decode bytes that may be arriving in a stream, following the Codecs
API.
`input` is the incoming sequence of bytes. `errors` tells us how to
handle errors, though we delegate all error-handling cases to the real
UTF-8 decoder to ensure correct behavior. `final` indicates whether
this is the end of the sequence, in which case we should raise an
error given incomplete input.
Returns as much decoded text as possible, and the number of bytes
consumed.
"""
# decoded_segments are the pieces of text we have decoded so far,
# and position is our current position in the byte string. (Bytes
# before this position have been consumed, and bytes after it have
# yet to be decoded.)
decoded_segments = []
position = 0
while True:
# Use _buffer_decode_step to decode a segment of text.
decoded, consumed = self._buffer_decode_step(
input[position:],
errors,
final
)
if consumed == 0:
# Either there's nothing left to decode, or we need to wait
# for more input. Either way, we're done for now.
break
# Append the decoded text to the list, and update our position.
decoded_segments.append(decoded)
position += consumed
if final:
# _buffer_decode_step must consume all the bytes when `final` is
# true.
assert position == len(input)
return ''.join(decoded_segments), position
def _buffer_decode_step(self, input, errors, final):
"""
There are three possibilities for each decoding step:
- Decode as much real UTF-8 as possible.
- Decode a six-byte CESU-8 sequence at the current position.
- Decode a Java-style null at the current position.
This method figures out which step is appropriate, and does it.
"""
# Get a reference to the superclass method that we'll be using for
# most of the real work.
sup = UTF8IncrementalDecoder._buffer_decode
# Find the next byte position that indicates a variant of UTF-8.
# CESU-8 sequences always start with 0xed, and Java nulls always
# start with 0xc0, both of which are conveniently impossible in
# real UTF-8.
cutoff1 = input.find(b'\xed')
cutoff2 = input.find(b'\xc0')
# Set `cutoff` to whichever cutoff comes first.
if cutoff1 != -1 and cutoff2 != -1:
cutoff = min(cutoff1, cutoff2)
elif cutoff1 != -1:
cutoff = cutoff1
elif cutoff2 != -1:
cutoff = cutoff2
else:
# The entire input can be decoded as UTF-8, so just do so.
return sup(input, errors, final)
if cutoff1 == 0:
# Decode a possible six-byte sequence starting with 0xed.
return self._buffer_decode_surrogates(sup, input, errors, final)
elif cutoff2 == 0:
# Decode a possible two-byte sequence, 0xc0 0x80.
return self._buffer_decode_null(sup, input, errors, final)
else:
# Decode the bytes up until the next weird thing as UTF-8.
# Set final=True because 0xc0 and 0xed don't make sense in the
# middle of a sequence, in any variant.
return sup(input[:cutoff], errors, True)
@staticmethod
def _buffer_decode_null(sup, input, errors, final):
"""
Decode the bytes 0xc0 0x80 as U+0000, like Java does.
"""
nextbyte = input[1:2]
if nextbyte == b'':
if final:
# We found 0xc0 at the end of the stream, which is an error.
# Delegate to the superclass method to handle that error.
return sup(input, errors, final)
else:
# We found 0xc0 and we don't know what comes next, so consume
# no bytes and wait.
return '', 0
elif nextbyte == b'\x80':
# We found the usual 0xc0 0x80 sequence, so decode it and consume
# two bytes.
return '\u0000', 2
else:
# We found 0xc0 followed by something else, which is an error.
# Whatever should happen is equivalent to what happens when the
# superclass is given just the byte 0xc0, with final=True.
return sup(b'\xc0', errors, True)
@staticmethod
def _buffer_decode_surrogates(sup, input, errors, final):
"""
When we have improperly encoded surrogates, we can still see the
bits that they were meant to represent.
The surrogates were meant to encode a 20-bit number, to which we
add 0x10000 to get a codepoint. That 20-bit number now appears in
this form:
11101101 1010abcd 10efghij 11101101 1011klmn 10opqrst
The CESU8_RE above matches byte sequences of this form. Then we need
to extract the bits and assemble a codepoint number from them.
"""
if len(input) < 6:
if final:
# We found 0xed near the end of the stream, and there aren't
# six bytes to decode. Delegate to the superclass method to
# handle it as normal UTF-8. It might be a Hangul character
# or an error.
if PYTHON2 and len(input) >= 3:
# We can't trust Python 2 to raise an error when it's
# asked to decode a surrogate, so let's force the issue.
input = mangle_surrogates(input)
return sup(input, errors, final)
else:
# We found 0xed, the stream isn't over yet, and we don't know
# enough of the following bytes to decode anything, so consume
# zero bytes and wait.
return '', 0
else:
if CESU8_RE.match(input):
# If this is a CESU-8 sequence, do some math to pull out
# the intended 20-bit value, and consume six bytes.
bytenums = bytes_to_ints(input[:6])
codepoint = (
((bytenums[1] & 0x0f) << 16) +
((bytenums[2] & 0x3f) << 10) +
((bytenums[4] & 0x0f) << 6) +
(bytenums[5] & 0x3f) +
0x10000
)
return unichr(codepoint), 6
else:
# This looked like a CESU-8 sequence, but it wasn't one.
# 0xed indicates the start of a three-byte sequence, so give
# three bytes to the superclass to decode as usual -- except
# for working around the Python 2 discrepancy as before.
if PYTHON2:
input = mangle_surrogates(input)
return sup(input[:3], errors, False)
def mangle_surrogates(bytestring):
"""
When Python 3 sees the UTF-8 encoding of a surrogate codepoint, it treats
it as an error (which it is). In 'replace' mode, it will decode as three
replacement characters. But Python 2 will just output the surrogate
codepoint.
To ensure consistency between Python 2 and Python 3, and protect downstream
applications from malformed strings, we turn surrogate sequences at the
start of the string into the bytes `ff ff ff`, which we're *sure* won't
decode, and which turn into three replacement characters in 'replace' mode.
"""
if PYTHON2:
if bytestring.startswith(b'\xed') and len(bytestring) >= 3:
decoded = bytestring[:3].decode('utf-8', 'replace')
if '\ud800' <= decoded <= '\udfff':
return b'\xff\xff\xff' + mangle_surrogates(bytestring[3:])
return bytestring
else:
# On Python 3, nothing needs to be done.
return bytestring
# The encoder is identical to UTF-8.
IncrementalEncoder = UTF8IncrementalEncoder
# Everything below here is boilerplate that matches the modules in the
# built-in `encodings` package.
def encode(input, errors='strict'):
return IncrementalEncoder(errors).encode(input, final=True), len(input)
def decode(input, errors='strict'):
return IncrementalDecoder(errors).decode(input, final=True), len(input)
class StreamWriter(codecs.StreamWriter):
encode = encode
class StreamReader(codecs.StreamReader):
decode = decode
CODEC_INFO = codecs.CodecInfo(
name=NAME,
encode=encode,
decode=decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)

144
lib/ftfy/badness.py Normal file
View File

@ -0,0 +1,144 @@
# -*- coding: utf-8 -*-
"""
Heuristics to determine whether re-encoding text is actually making it
more reasonable.
"""
from __future__ import unicode_literals
from ftfy.chardata import chars_to_classes
import re
import unicodedata
# The following regex uses the mapping of character classes to ASCII
# characters defined in chardata.py and build_data.py:
#
# L = Latin capital letter
# l = Latin lowercase letter
# A = Non-latin capital or title-case letter
# a = Non-latin lowercase letter
# C = Non-cased letter (Lo)
# X = Control character (Cc)
# m = Letter modifier (Lm)
# M = Mark (Mc, Me, Mn)
# N = Miscellaneous numbers (No)
# 0 = Math symbol (Sm)
# 1 = Currency symbol (Sc)
# 2 = Symbol modifier (Sk)
# 3 = Other symbol (So)
# S = UTF-16 surrogate
# _ = Unassigned character
# = Whitespace
# o = Other
def _make_weirdness_regex():
"""
Creates a list of regexes that match 'weird' character sequences.
The more matches there are, the weirder the text is.
"""
groups = []
# Match lowercase letters that are followed by non-ASCII uppercase letters
groups.append('lA')
# Match diacritical marks, except when they modify a non-cased letter or
# another mark.
#
# You wouldn't put a diacritical mark on a digit or a space, for example.
# You might put it on a Latin letter, but in that case there will almost
# always be a pre-composed version, and we normalize to pre-composed
# versions first. The cases that can't be pre-composed tend to be in
# large scripts without case, which are in class C.
groups.append('[^CM]M')
# Match non-Latin characters adjacent to Latin characters.
#
# This is a simplification from ftfy version 2, which compared all
# adjacent scripts. However, the ambiguities we need to resolve come from
# encodings designed to represent Latin characters.
groups.append('[Ll][AaC]')
groups.append('[AaC][Ll]')
# Match C1 control characters, which are almost always the result of
# decoding Latin-1 that was meant to be Windows-1252.
groups.append('X')
# Match private use and unassigned characters.
groups.append('P')
groups.append('_')
# Match adjacent characters from any different pair of these categories:
# - Modifier marks (M)
# - Letter modifiers (m)
# - Miscellaneous numbers (N)
# - Symbols (0123)
exclusive_categories = 'MmN0123'
for cat1 in exclusive_categories:
others_range = ''.join(c for c in exclusive_categories if c != cat1)
groups.append('{cat1}[{others_range}]'.format(
cat1=cat1, others_range=others_range
))
regex = '|'.join('({0})'.format(group) for group in groups)
return re.compile(regex)
WEIRDNESS_RE = _make_weirdness_regex()
# A few characters are common ending punctuation that can show up at the end
# of a mojibake sequence. It's plausible that such a character could appear
# after an accented capital letter, for example, so we'll want to add a
# slight preference to leave these characters alone.
#
# The match ends with a + so that we only give the bonus once for a
# consecutive sequence of these characters.
ENDING_PUNCT_RE = re.compile(
'['
'\N{HORIZONTAL ELLIPSIS}\N{EM DASH}\N{EN DASH}'
'\N{RIGHT SINGLE QUOTATION MARK}\N{RIGHT DOUBLE QUOTATION MARK}'
'\N{SINGLE RIGHT-POINTING ANGLE QUOTATION MARK}'
'\N{RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK}'
']+'
)
def sequence_weirdness(text):
"""
Determine how often a text has unexpected characters or sequences of
characters. This metric is used to disambiguate when text should be
re-decoded or left as is.
We start by normalizing text in NFC form, so that penalties for
diacritical marks don't apply to characters that know what to do with
them.
The following things are deemed weird:
- Lowercase letters followed by non-ASCII uppercase letters
- Non-Latin characters next to Latin characters
- Un-combined diacritical marks, unless they're stacking on non-alphabetic
characters (in languages that do that kind of thing a lot) or other
marks
- C1 control characters
- Adjacent symbols from any different pair of these categories:
- Modifier marks
- Letter modifiers
- Non-digit numbers
- Symbols (including math and currency)
The return value is the number of instances of weirdness.
"""
text2 = unicodedata.normalize('NFC', text)
weirdness = len(WEIRDNESS_RE.findall(chars_to_classes(text2)))
punct_discount = len(ENDING_PUNCT_RE.findall(text2))
return weirdness * 2 - punct_discount
def text_cost(text):
"""
An overall cost function for text. Weirder is worse, but all else being
equal, shorter strings are better.
The overall cost is measured as the "weirdness" (see
:func:`sequence_weirdness`) plus the length.
"""
return sequence_weirdness(text) + len(text)

111
lib/ftfy/build_data.py Normal file
View File

@ -0,0 +1,111 @@
"""
A script to make the char_classes.dat file.
This never needs to run in normal usage. It needs to be run if the character
classes we care about change, or if a new version of Python supports a new
Unicode standard and we want it to affect our string decoding.
The file that we generate is based on Unicode 6.1, as supported by Python 3.3.
You can certainly use it in earlier versions. This simply makes sure that we
get consistent results from running ftfy on different versions of Python.
The file will be written to the current directory.
"""
from __future__ import unicode_literals
import unicodedata
import sys
import zlib
if sys.hexversion >= 0x03000000:
unichr = chr
# L = Latin capital letter
# l = Latin lowercase letter
# A = Non-latin capital or title-case letter
# a = Non-latin lowercase letter
# C = Non-cased letter (Lo)
# X = Control character (Cc)
# m = Letter modifier (Lm)
# M = Mark (Mc, Me, Mn)
# N = Miscellaneous numbers (No)
# P = Private use (Co)
# 0 = Math symbol (Sm)
# 1 = Currency symbol (Sc)
# 2 = Symbol modifier (Sk)
# 3 = Other symbol (So)
# S = UTF-16 surrogate
# _ = Unassigned character
# = Whitespace
# o = Other
def make_char_data_file(do_it_anyway=False):
"""
Build the compressed data file 'char_classes.dat' and write it to the
current directory.
If you run this, run it in Python 3.3 or later. It will run in earlier
versions, but you won't get the current Unicode standard, leading to
inconsistent behavior. To protect against this, running this in the
wrong version of Python will raise an error unless you pass
`do_it_anyway=True`.
"""
if sys.hexversion < 0x03030000 and not do_it_anyway:
raise RuntimeError(
"This function should be run in Python 3.3 or later."
)
cclasses = [None] * 0x110000
for codepoint in range(0x0, 0x110000):
char = unichr(codepoint)
category = unicodedata.category(char)
if category.startswith('L'): # letters
is_latin = unicodedata.name(char).startswith('LATIN')
if is_latin and codepoint < 0x200:
if category == 'Lu':
cclasses[codepoint] = 'L'
else:
cclasses[codepoint] = 'l'
else: # non-Latin letter, or close enough
if category == 'Lu' or category == 'Lt':
cclasses[codepoint] = 'A'
elif category == 'Ll':
cclasses[codepoint] = 'a'
elif category == 'Lo':
cclasses[codepoint] = 'C'
elif category == 'Lm':
cclasses[codepoint] = 'm'
else:
raise ValueError('got some weird kind of letter')
elif category.startswith('M'): # marks
cclasses[codepoint] = 'M'
elif category == 'No':
cclasses[codepoint] = 'N'
elif category == 'Sm':
cclasses[codepoint] = '0'
elif category == 'Sc':
cclasses[codepoint] = '1'
elif category == 'Sk':
cclasses[codepoint] = '2'
elif category == 'So':
cclasses[codepoint] = '3'
elif category == 'Cn':
cclasses[codepoint] = '_'
elif category == 'Cc':
cclasses[codepoint] = 'X'
elif category == 'Cs':
cclasses[codepoint] = 'S'
elif category == 'Co':
cclasses[codepoint] = 'P'
elif category.startswith('Z'):
cclasses[codepoint] = ' '
else:
cclasses[codepoint] = 'o'
cclasses[9] = cclasses[10] = cclasses[12] = cclasses[13] = ' '
out = open('char_classes.dat', 'wb')
out.write(zlib.compress(''.join(cclasses).encode('ascii')))
out.close()
if __name__ == '__main__':
make_char_data_file()

BIN
lib/ftfy/char_classes.dat Normal file

Binary file not shown.

81
lib/ftfy/chardata.py Normal file
View File

@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
"""
This gives other modules access to the gritty details about characters and the
encodings that use them.
"""
from __future__ import unicode_literals
import re
import zlib
from pkg_resources import resource_string
from ftfy.compatibility import unichr
# These are the five encodings we will try to fix in ftfy, in the
# order that they should be tried.
CHARMAP_ENCODINGS = [
'latin-1',
'sloppy-windows-1252',
'macroman',
'cp437',
'sloppy-windows-1251',
]
def _build_regexes():
"""
ENCODING_REGEXES contain reasonably fast ways to detect if we
could represent a given string in a given encoding. The simplest one is
the 'ascii' detector, which of course just determines if all characters
are between U+0000 and U+007F.
"""
# Define a regex that matches ASCII text.
encoding_regexes = {'ascii': re.compile('^[\x00-\x7f]*$')}
for encoding in CHARMAP_ENCODINGS:
latin1table = ''.join(unichr(i) for i in range(128, 256))
charlist = latin1table.encode('latin-1').decode(encoding)
# Build a regex from the ASCII range, followed by the decodings of
# bytes 0x80-0xff in this character set. (This uses the fact that all
# regex special characters are ASCII, and therefore won't appear in the
# string.)
regex = '^[\x00-\x7f{0}]*$'.format(charlist)
encoding_regexes[encoding] = re.compile(regex)
return encoding_regexes
ENCODING_REGEXES = _build_regexes()
def possible_encoding(text, encoding):
"""
Given text and a single-byte encoding, check whether that text could have
been decoded from that single-byte encoding.
In other words, check whether it can be encoded in that encoding, possibly
sloppily.
"""
return bool(ENCODING_REGEXES[encoding].match(text))
CHAR_CLASS_STRING = zlib.decompress(
resource_string(__name__, 'char_classes.dat')
).decode('ascii')
def chars_to_classes(string):
"""
Convert each Unicode character to a letter indicating which of many
classes it's in.
See build_data.py for where this data comes from and what it means.
"""
return string.translate(CHAR_CLASS_STRING)
# A translate mapping that will strip all C0 control characters except
# those that represent whitespace.
CONTROL_CHARS = {}
for i in range(32):
CONTROL_CHARS[i] = None
# Map whitespace control characters to themselves.
for char in '\t\n\f\r':
del CONTROL_CHARS[ord(char)]

34
lib/ftfy/cli.py Normal file
View File

@ -0,0 +1,34 @@
"""
A simple command-line utility for fixing text found in a file.
Because files do not come with their encoding marked, it first runs the file
through `ftfy.guess_bytes`, then runs it through `ftfy.fix_text`.
"""
from ftfy import fix_file
import sys
ENCODE_STDOUT = (sys.hexversion < 0x03000000)
def main():
"""
Run ftfy as a command-line utility. (Requires Python 2.7 or later, or
the 'argparse' module.)
"""
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('filename', help='file to transcode')
args = parser.parse_args()
file = open(args.filename)
for line in fix_file(file):
if ENCODE_STDOUT:
sys.stdout.write(line.encode('utf-8'))
else:
sys.stdout.write(line)
if __name__ == '__main__':
main()

79
lib/ftfy/compatibility.py Normal file
View File

@ -0,0 +1,79 @@
"""
Makes some function names and behavior consistent between Python 2 and
Python 3, and also between narrow and wide builds.
"""
from __future__ import unicode_literals
import sys
import re
import unicodedata
if sys.hexversion >= 0x03000000:
from html import entities
unichr = chr
xrange = range
PYTHON2 = False
else:
import htmlentitydefs as entities
unichr = unichr
xrange = xrange
PYTHON2 = True
htmlentitydefs = entities
PYTHON34_OR_LATER = (sys.hexversion >= 0x03040000)
def _narrow_unichr_workaround(codepoint):
"""
A replacement for unichr() on narrow builds of Python. This will get
us the narrow representation of an astral character, which will be
a string of length two, containing two UTF-16 surrogates.
"""
escaped = b'\\U%08x' % codepoint
return escaped.decode('unicode-escape')
if sys.maxunicode < 0x10000:
unichr = _narrow_unichr_workaround
# In a narrow build of Python, we can't write a regex involving astral
# characters. If we want to write the regex:
#
# [\U00100000-\U0010ffff]
#
# The actual string that defines it quietly turns into:
#
# [\udbc0\udc00-\udbff\udfff]
#
# And now the range operator only applies to the middle two characters.
# It looks like a range that's going backwards from \dc00 to \dbff,
# which is an error.
#
# What we can do instead is rewrite the expression to be _about_ the two
# surrogates that make up the astral characters, instead of the characters
# themselves. This would be wrong on a wide build, but it works on a
# narrow build.
UNSAFE_PRIVATE_USE_RE = re.compile('[\udbc0-\udbff][\udc00-\udfff]')
else:
UNSAFE_PRIVATE_USE_RE = re.compile('[\U00100000-\U0010ffff]')
def bytes_to_ints(bytestring):
"""
No matter what version of Python this is, make a sequence of integers from
a bytestring. On Python 3, this is easy, because a 'bytes' object _is_ a
sequence of integers.
"""
if PYTHON2:
return [ord(b) for b in bytestring]
else:
return bytestring
def is_printable(char):
"""
str.isprintable() is new in Python 3. It's useful in `explain_unicode`, so
let's make a crude approximation in Python 2.
"""
if PYTHON2:
return not unicodedata.category(char).startswith('C')
else:
return char.isprintable()

473
lib/ftfy/fixes.py Normal file
View File

@ -0,0 +1,473 @@
# -*- coding: utf-8 -*-
"""
This module contains the individual fixes that the main fix_text function
can perform.
"""
from __future__ import unicode_literals
from ftfy.chardata import (possible_encoding,
CHARMAP_ENCODINGS, CONTROL_CHARS)
from ftfy.badness import text_cost
from ftfy.compatibility import htmlentitydefs, unichr, UNSAFE_PRIVATE_USE_RE
import re
import sys
import codecs
BYTES_ERROR_TEXT = """Hey wait, this isn't Unicode.
ftfy is designed to fix problems that were introduced by handling Unicode
incorrectly. It might be able to fix the bytes you just handed it, but the
fact that you just gave a pile of bytes to a function that fixes text means
that your code is *also* handling Unicode incorrectly.
ftfy takes Unicode text as input. You should take these bytes and decode
them from the encoding you think they are in. If you're not sure what encoding
they're in:
- First, try to find out. 'utf-8' is a good assumption.
- If the encoding is simply unknowable, try running your bytes through
ftfy.guess_bytes. As the name implies, this may not always be accurate.
If you're confused by this, please read the Python Unicode HOWTO:
http://docs.python.org/%d/howto/unicode.html
""" % sys.version_info[0]
def fix_text_encoding(text):
r"""
Fix text with incorrectly-decoded garbage ("mojibake") whenever possible.
Something you will find all over the place, in real-world text, is text
that's mistakenly encoded as utf-8, decoded in some ugly format like
latin-1 or even Windows codepage 1252, and encoded as utf-8 again.
This causes your perfectly good Unicode-aware code to end up with garbage
text because someone else (or maybe "someone else") made a mistake.
This function looks for the evidence of that having happened and fixes it.
It determines whether it should replace nonsense sequences of single-byte
characters that were really meant to be UTF-8 characters, and if so, turns
them into the correctly-encoded Unicode character that they were meant to
represent.
The input to the function must be Unicode. If you don't have Unicode text,
you're not using the right tool to solve your problem.
.. note::
The following examples are written using unmarked literal strings,
but they are Unicode text. In Python 2 we have "unicode_literals"
turned on, and in Python 3 this is always the case.
ftfy decodes text that looks like it was decoded incorrectly. It leaves
alone text that doesn't.
>>> print(fix_text_encoding('único'))
único
>>> print(fix_text_encoding('This text is fine already :þ'))
This text is fine already :þ
Because these characters often come from Microsoft products, we allow
for the possibility that we get not just Unicode characters 128-255, but
also Windows's conflicting idea of what characters 128-160 are.
>>> print(fix_text_encoding('This — should be an em dash'))
This should be an em dash
We might have to deal with both Windows characters and raw control
characters at the same time, especially when dealing with characters like
0x81 that have no mapping in Windows. This is a string that Python's
standard `.encode` and `.decode` methods cannot correct.
>>> print(fix_text_encoding('This text is sad .â\x81”.'))
This text is sad ..
However, it has safeguards against fixing sequences of letters and
punctuation that can occur in valid text:
>>> print(fix_text_encoding('not such a fan of Charlotte Brontë…”'))
not such a fan of Charlotte Brontë
Cases of genuine ambiguity can sometimes be addressed by finding other
characters that are not double-encoded, and expecting the encoding to
be consistent:
>>> print(fix_text_encoding('AHÅ™, the new sofa from IKEA®'))
AHÅ, the new sofa from IKEA®
Finally, we handle the case where the text is in a single-byte encoding
that was intended as Windows-1252 all along but read as Latin-1:
>>> print(fix_text_encoding('This text was never UTF-8 at all\x85'))
This text was never UTF-8 at all
The best version of the text is found using
:func:`ftfy.badness.text_cost`.
"""
text, _plan = fix_encoding_and_explain(text)
return text
def fix_encoding_and_explain(text):
"""
Re-decodes text that has been decoded incorrectly, and also return a
"plan" indicating all the steps required to fix it.
To fix similar text in the same way, without having to detect anything,
you can use the ``apply_plan`` function.
"""
best_version = text
best_cost = text_cost(text)
best_plan = []
plan_so_far = []
while True:
prevtext = text
text, plan = fix_one_step_and_explain(text)
plan_so_far.extend(plan)
cost = text_cost(text)
# Add a penalty if we used a particularly obsolete encoding. The result
# is that we won't use these encodings unless they can successfully
# replace multiple characters.
if ('encode', 'macroman') in plan_so_far or\
('encode', 'cp437') in plan_so_far:
cost += 2
# We need pretty solid evidence to decode from Windows-1251 (Cyrillic).
if ('encode', 'sloppy-windows-1251') in plan_so_far:
cost += 5
if cost < best_cost:
best_cost = cost
best_version = text
best_plan = list(plan_so_far)
if text == prevtext:
return best_version, best_plan
def fix_one_step_and_explain(text):
"""
Performs a single step of re-decoding text that's been decoded incorrectly.
Returns the decoded text, plus a "plan" for how to reproduce what it
did.
"""
if isinstance(text, bytes):
raise UnicodeError(BYTES_ERROR_TEXT)
if len(text) == 0:
return text, []
# The first plan is to return ASCII text unchanged.
if possible_encoding(text, 'ascii'):
return text, []
# As we go through the next step, remember the possible encodings
# that we encounter but don't successfully fix yet. We may need them
# later.
possible_1byte_encodings = []
# Suppose the text was supposed to be UTF-8, but it was decoded using
# a single-byte encoding instead. When these cases can be fixed, they
# are usually the correct thing to do, so try them next.
for encoding in CHARMAP_ENCODINGS:
if possible_encoding(text, encoding):
encoded_bytes = text.encode(encoding)
# Now, find out if it's UTF-8 (or close enough). Otherwise,
# remember the encoding for later.
try:
decoding = 'utf-8'
if b'\xed' in encoded_bytes or b'\xc0' in encoded_bytes:
decoding = 'utf-8-variants'
fixed = encoded_bytes.decode(decoding)
steps = [('encode', encoding), ('decode', decoding)]
return fixed, steps
except UnicodeDecodeError:
possible_1byte_encodings.append(encoding)
# The next most likely case is that this is Latin-1 that was intended to
# be read as Windows-1252, because those two encodings in particular are
# easily confused.
if 'latin-1' in possible_1byte_encodings:
if 'windows-1252' in possible_1byte_encodings:
# This text is in the intersection of Latin-1 and
# Windows-1252, so it's probably legit.
return text, []
else:
# Otherwise, it means we have characters that are in Latin-1 but
# not in Windows-1252. Those are C1 control characters. Nobody
# wants those. Assume they were meant to be Windows-1252. Don't
# use the sloppy codec, because bad Windows-1252 characters are
# a bad sign.
encoded = text.encode('latin-1')
try:
fixed = encoded.decode('windows-1252')
steps = []
if fixed != text:
steps = [('encode', 'latin-1'), ('decode', 'windows-1252')]
return fixed, steps
except UnicodeDecodeError:
# This text contained characters that don't even make sense
# if you assume they were supposed to be Windows-1252. In
# that case, let's not assume anything.
pass
# The cases that remain are mixups between two different single-byte
# encodings, and not the common case of Latin-1 vs. Windows-1252.
#
# Those cases are somewhat rare, and impossible to solve without false
# positives. If you're in one of these situations, you should try using
# the `ftfy.guess_bytes` function.
# Return the text unchanged; the plan is empty.
return text, []
def apply_plan(text, plan):
"""
Apply a plan for fixing the encoding of text.
The plan is a list of tuples of the form (operation, encoding), where
`operation` is either 'encode' or 'decode', and `encoding` is an encoding
name such as 'utf-8' or 'latin-1'.
Because only text can be encoded, and only bytes can be decoded, the plan
should alternate 'encode' and 'decode' steps, or else this function will
encounter an error.
"""
obj = text
for operation, encoding in plan:
if operation == 'encode':
obj = obj.encode(encoding)
elif operation == 'decode':
obj = obj.decode(encoding)
else:
raise ValueError("Unknown plan step: %s" % operation)
return obj
HTML_ENTITY_RE = re.compile(r"&#?\w{0,8};")
def unescape_html(text):
"""
Decode all three types of HTML entities/character references.
Code by Fredrik Lundh of effbot.org. Rob Speer made a slight change
to it for efficiency: it won't match entities longer than 8 characters,
because there are no valid entities like that.
>>> print(unescape_html('&lt;tag&gt;'))
<tag>
"""
def fixup(match):
"""
Replace one matched HTML entity with the character it represents,
if possible.
"""
text = match.group(0)
if text[:2] == "&#":
# character reference
try:
if text[:3] == "&#x":
return unichr(int(text[3:-1], 16))
else:
return unichr(int(text[2:-1]))
except ValueError:
pass
else:
# named entity
try:
text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
except KeyError:
pass
return text # leave as is
return HTML_ENTITY_RE.sub(fixup, text)
ANSI_RE = re.compile('\033\\[((?:\\d|;)*)([a-zA-Z])')
def remove_terminal_escapes(text):
r"""
Strip out "ANSI" terminal escape sequences, such as those that produce
colored text on Unix.
>>> print(remove_terminal_escapes(
... "\033[36;44mI'm blue, da ba dee da ba doo...\033[0m"
... ))
I'm blue, da ba dee da ba doo...
"""
return ANSI_RE.sub('', text)
SINGLE_QUOTE_RE = re.compile('[\u2018-\u201b]')
DOUBLE_QUOTE_RE = re.compile('[\u201c-\u201f]')
def uncurl_quotes(text):
r"""
Replace curly quotation marks with straight equivalents.
>>> print(uncurl_quotes('\u201chere\u2019s a test\u201d'))
"here's a test"
"""
return SINGLE_QUOTE_RE.sub("'", DOUBLE_QUOTE_RE.sub('"', text))
def fix_line_breaks(text):
r"""
Convert all line breaks to Unix style.
This will convert the following sequences into the standard \\n
line break:
- CRLF (\\r\\n), used on Windows and in some communication
protocols
- CR (\\r), once used on Mac OS Classic, and now kept alive
by misguided software such as Microsoft Office for Mac
- LINE SEPARATOR (\\u2028) and PARAGRAPH SEPARATOR (\\u2029),
defined by Unicode and used to sow confusion and discord
- NEXT LINE (\\x85), a C1 control character that is certainly
not what you meant
The NEXT LINE character is a bit of an odd case, because it
usually won't show up if `fix_encoding` is also being run.
\\x85 is very common mojibake for \\u2026, HORIZONTAL ELLIPSIS.
>>> print(fix_line_breaks(
... "This string is made of two things:\u2029"
... "1. Unicode\u2028"
... "2. Spite"
... ))
This string is made of two things:
1. Unicode
2. Spite
For further testing and examples, let's define a function to make sure
we can see the control characters in their escaped form:
>>> def eprint(text):
... print(text.encode('unicode-escape').decode('ascii'))
>>> eprint(fix_line_breaks("Content-type: text/plain\r\n\r\nHi."))
Content-type: text/plain\n\nHi.
>>> eprint(fix_line_breaks("This is how Microsoft \r trolls Mac users"))
This is how Microsoft \n trolls Mac users
>>> eprint(fix_line_breaks("What is this \x85 I don't even"))
What is this \n I don't even
"""
return text.replace('\r\n', '\n').replace('\r', '\n')\
.replace('\u2028', '\n').replace('\u2029', '\n')\
.replace('\u0085', '\n')
def remove_control_chars(text):
"""
Remove all control characters except for the important ones.
This removes characters in these ranges:
- U+0000 to U+0008
- U+000B
- U+000E to U+001F
- U+007F
It leaves alone these characters that are commonly used for formatting:
- TAB (U+0009)
- LF (U+000A)
- FF (U+000C)
- CR (U+000D)
"""
return text.translate(CONTROL_CHARS)
def remove_bom(text):
r"""
Remove a left-over byte-order mark.
>>> print(remove_bom("\ufeffWhere do you want to go today?"))
Where do you want to go today?
"""
return text.lstrip(unichr(0xfeff))
def remove_unsafe_private_use(text):
r"""
Python 3.3's Unicode support isn't perfect, and in fact there are certain
string operations that will crash some versions of it with a SystemError:
http://bugs.python.org/issue18183
The best solution is to remove all characters from Supplementary Private
Use Area B, using a regex that is known not to crash given those
characters.
These are the characters from U+100000 to U+10FFFF. It's sad to lose an
entire plane of Unicode, but on the other hand, these characters are not
assigned and never will be. If you get one of these characters and don't
know what its purpose is, its purpose is probably to crash your code.
If you were using these for actual private use, this might be inconvenient.
You can turn off this fixer, of course, but I kind of encourage using
Supplementary Private Use Area A instead.
>>> print(remove_unsafe_private_use('\U0001F4A9\U00100000'))
💩
This fixer is off by default in Python 3.4 or later. (The bug is actually
fixed in 3.3.3 and 2.7.6, but I don't want the default behavior to change
based on a micro version upgrade of Python.)
"""
return UNSAFE_PRIVATE_USE_RE.sub('', text)
# Define a regex to match valid escape sequences in Python string literals.
ESCAPE_SEQUENCE_RE = re.compile(r'''
( \\U........ # 8-digit hex escapes
| \\u.... # 4-digit hex escapes
| \\x.. # 2-digit hex escapes
| \\[0-7]{1,3} # Octal escapes
| \\N\{[^}]+\} # Unicode characters by name
| \\[\\'"abfnrtv] # Single-character escapes
)''', re.UNICODE | re.VERBOSE)
def decode_escapes(text):
r"""
Decode backslashed escape sequences, including \\x, \\u, and \\U character
references, even in the presence of other Unicode.
This is what Python's "string-escape" and "unicode-escape" codecs were
meant to do, but in contrast, this actually works. It will decode the
string exactly the same way that the Python interpreter decodes its string
literals.
>>> factoid = '\\u20a1 is the currency symbol for the colón.'
>>> print(factoid[1:])
u20a1 is the currency symbol for the colón.
>>> print(decode_escapes(factoid))
is the currency symbol for the colón.
Even though Python itself can read string literals with a combination of
escapes and literal Unicode -- you're looking at one right now -- the
"unicode-escape" codec doesn't work on literal Unicode. (See
http://stackoverflow.com/a/24519338/773754 for more details.)
Instead, this function searches for just the parts of a string that
represent escape sequences, and decodes them, leaving the rest alone. All
valid escape sequences are made of ASCII characters, and this allows
"unicode-escape" to work correctly.
This fix cannot be automatically applied by the `ftfy.fix_text` function,
because escaped text is not necessarily a mistake, and there is no way
to distinguish text that's supposed to be escaped from text that isn't.
"""
def decode_match(match):
"Given a regex match, decode the escape sequence it contains."
return codecs.decode(match.group(0), 'unicode-escape')
return ESCAPE_SEQUENCE_RE.sub(decode_match, text)

View File

@ -0,0 +1,39 @@
"""
This file defines a general method for evaluating ftfy using data that arrives
in a stream. A concrete implementation of it is found in `twitter_tester.py`.
"""
from __future__ import print_function, unicode_literals
from ftfy.fixes import fix_text_encoding
from ftfy.chardata import possible_encoding
class StreamTester:
"""
Take in a sequence of texts, and show the ones that will be changed by
ftfy. This will also periodically show updates, such as the proportion of
texts that changed.
"""
def __init__(self):
self.num_fixed = 0
self.count = 0
def check_ftfy(self, text):
"""
Given a single text input, check whether `ftfy.fix_text_encoding`
would change it. If so, display the change.
"""
self.count += 1
if not possible_encoding(text, 'ascii'):
fixed = fix_text_encoding(text)
if text != fixed:
# possibly filter common bots before printing
print(u'\nText:\t{text}\nFixed:\t{fixed}\n'.format(
text=text, fixed=fixed
))
self.num_fixed += 1
# Print status updates once in a while
if self.count % 100 == 0:
print('.', end='', flush=True)
if self.count % 10000 == 0:
print('\n%d/%d fixed' % (self.num_fixed, self.count))

View File

@ -0,0 +1,73 @@
# coding: utf-8
"""
Do what is necessary to authenticate this tester as a Twitter "app", using
somebody's Twitter account.
"""
from __future__ import unicode_literals
import os
AUTH_TOKEN_PATH = os.path.expanduser('~/.cache/oauth/twitter_ftfy.auth')
def get_auth():
"""
Twitter has some bizarre requirements about how to authorize an "app" to
use its API.
The user of the app has to log in to get a secret token. That's fine. But
the app itself has its own "consumer secret" token. The app has to know it,
and the user of the app has to not know it.
This is, of course, impossible. It's equivalent to DRM. Your computer can't
*really* make use of secret information while hiding the same information
from you.
The threat appears to be that, if you have this super-sekrit token, you can
impersonate the app while doing something different. Well, of course you
can do that, because you *have the source code* and you can change it to do
what you want. You still have to log in as a particular user who has a
token that's actually secret, you know.
Even developers of closed-source applications that use the Twitter API are
unsure what to do, for good reason. These "secrets" are not secret in any
cryptographic sense. A bit of Googling shows that the secret tokens for
every popular Twitter app are already posted on the Web.
Twitter wants us to pretend this string can be kept secret, and hide this
secret behind a fig leaf like everybody else does. So that's what we've
done.
"""
from twitter.oauth import OAuth
from twitter import oauth_dance, read_token_file
def unhide(secret):
"""
Do something mysterious and exactly as secure as every other Twitter
app.
"""
return ''.join([chr(ord(c) - 0x2800) for c in secret])
fig_leaf = '⠴⡹⠹⡩⠶⠴⡶⡅⡂⡩⡅⠳⡏⡉⡈⠰⠰⡹⡥⡶⡈⡐⡍⡂⡫⡍⡗⡬⡒⡧⡶⡣⡰⡄⡧⡸⡑⡣⠵⡓⠶⠴⡁'
consumer_key = 'OFhyNd2Zt4Ba6gJGJXfbsw'
if os.path.exists(AUTH_TOKEN_PATH):
token, token_secret = read_token_file(AUTH_TOKEN_PATH)
else:
authdir = os.path.dirname(AUTH_TOKEN_PATH)
if not os.path.exists(authdir):
os.makedirs(authdir)
token, token_secret = oauth_dance(
app_name='ftfy-tester',
consumer_key=consumer_key,
consumer_secret=unhide(fig_leaf),
token_filename=AUTH_TOKEN_PATH
)
return OAuth(
token=token,
token_secret=token_secret,
consumer_key=consumer_key,
consumer_secret=unhide(fig_leaf)
)

View File

@ -0,0 +1,89 @@
"""
Implements a StreamTester that runs over Twitter data. See the class
docstring.
This module is written for Python 3 only. The __future__ imports you see here
are just to let Python 2 scan the file without crashing with a SyntaxError.
"""
from __future__ import print_function, unicode_literals
import os
from collections import defaultdict
from ftfy.streamtester import StreamTester
class TwitterTester(StreamTester):
"""
This class uses the StreamTester code (defined in `__init__.py`) to
evaluate ftfy's real-world performance, by feeding it live data from
Twitter.
This is a semi-manual evaluation. It requires a human to look at the
results and determine if they are good. The three possible cases we
can see here are:
- Success: the process takes in mojibake and outputs correct text.
- False positive: the process takes in correct text, and outputs
mojibake. Every false positive should be considered a bug, and
reported on GitHub if it isn't already.
- Confusion: the process takes in mojibake and outputs different
mojibake. Not a great outcome, but not as dire as a false
positive.
This tester cannot reveal false negatives. So far, that can only be
done by the unit tests.
"""
OUTPUT_DIR = './twitterlogs'
def __init__(self):
self.lines_by_lang = defaultdict(list)
super().__init__()
def save_files(self):
"""
When processing data from live Twitter, save it to log files so that
it can be replayed later.
"""
if not os.path.exists(self.OUTPUT_DIR):
os.makedirs(self.OUTPUT_DIR)
for lang, lines in self.lines_by_lang.items():
filename = 'tweets.{}.txt'.format(lang)
fullname = os.path.join(self.OUTPUT_DIR, filename)
langfile = open(fullname, 'a')
for line in lines:
print(line.replace('\n', ' '), file=langfile)
langfile.close()
self.lines_by_lang = defaultdict(list)
def run_sample(self):
"""
Listen to live data from Twitter, and pass on the fully-formed tweets
to `check_ftfy`. This requires the `twitter` Python package as a
dependency.
"""
from twitter import TwitterStream
from ftfy.streamtester.oauth import get_auth
twitter_stream = TwitterStream(auth=get_auth())
iterator = twitter_stream.statuses.sample()
for tweet in iterator:
if 'text' in tweet:
self.check_ftfy(tweet['text'])
if 'user' in tweet:
lang = tweet['user'].get('lang', 'NONE')
self.lines_by_lang[lang].append(tweet['text'])
if self.count % 10000 == 100:
self.save_files()
def main():
"""
When run from the command line, this script connects to the Twitter stream
and runs the TwitterTester on it forever. Or at least until the stream
drops.
"""
tester = TwitterTester()
tester.run_sample()
if __name__ == '__main__':
main()

View File

@ -4,10 +4,6 @@ import requests
def TraktCall(method, api, username=None, password=None, data={}):
base_url = 'http://api.trakt.tv/'
# if the API isn't given then it failed
if not api:
return None
# if username and password given then encode password with sha1
auth = None
if username and password:

View File

@ -1,6 +1,6 @@
# !/usr/bin/env python2
#encoding:utf-8
#author:dbr/Ben
# encoding:utf-8
# author:dbr/Ben
#project:tvdb_api
#repository:http://github.com/dbr/tvdb_api
#license:unlicense (http://unlicense.org/)
@ -39,7 +39,7 @@ from lib.dateutil.parser import parse
from lib.cachecontrol import CacheControl, caches
from tvdb_ui import BaseUI, ConsoleUI
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound,
from tvdb_exceptions import (tvdb_error, tvdb_userabort, tvdb_shownotfound, tvdb_showincomplete,
tvdb_seasonnotfound, tvdb_episodenotfound, tvdb_attributenotfound)
@ -598,9 +598,9 @@ class Tvdb:
value = parse(value, fuzzy=True).date()
value = value.strftime("%Y-%m-%d")
#if key == 'airs_time':
# value = parse(value).time()
# value = value.strftime("%I:%M %p")
#if key == 'airs_time':
# value = parse(value).time()
# value = value.strftime("%I:%M %p")
except:
pass
@ -627,10 +627,9 @@ class Tvdb:
"""Loads a URL using caching, returns an ElementTree of the source
"""
try:
src = self._loadUrl(url, params=params, language=language).values()[0]
return src
except:
return []
return self._loadUrl(url, params=params, language=language).values()[0]
except Exception, e:
raise tvdb_error(e)
def _setItem(self, sid, seas, ep, attrib, value):
"""Creates a new episode, creating Show(), Season() and
@ -681,11 +680,7 @@ class Tvdb:
log().debug("Searching for show %s" % series)
self.config['params_getSeries']['seriesname'] = series
try:
seriesFound = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
return seriesFound
except:
return []
return self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
def _getSeries(self, series):
"""This searches TheTVDB.com for the series name,
@ -694,13 +689,13 @@ class Tvdb:
BaseUI is used to select the first result.
"""
allSeries = self.search(series)
if not allSeries:
log().debug('Series result returned zero')
raise tvdb_shownotfound("Show search returned zero results (cannot find show on TVDB)")
if not isinstance(allSeries, list):
allSeries = [allSeries]
if len(allSeries) == 0:
log().debug('Series result returned zero')
raise tvdb_shownotfound("Show-name search returned zero results (cannot find show on TVDB)")
if self.config['custom_ui'] is not None:
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
CustomUI = self.config['custom_ui']
@ -735,37 +730,38 @@ class Tvdb:
"""
log().debug('Getting season banners for %s' % (sid))
bannersEt = self._getetsrc(self.config['url_seriesBanner'] % (sid))
if not bannersEt:
log().debug('Banners result returned zero')
return
banners = {}
for cur_banner in bannersEt['banner']:
bid = cur_banner['id']
btype = cur_banner['bannertype']
btype2 = cur_banner['bannertype2']
if btype is None or btype2 is None:
continue
if not btype in banners:
banners[btype] = {}
if not btype2 in banners[btype]:
banners[btype][btype2] = {}
if not bid in banners[btype][btype2]:
banners[btype][btype2][bid] = {}
try:
for cur_banner in bannersEt['banner']:
bid = cur_banner['id']
btype = cur_banner['bannertype']
btype2 = cur_banner['bannertype2']
if btype is None or btype2 is None:
for k, v in cur_banner.items():
if k is None or v is None:
continue
if not btype in banners:
banners[btype] = {}
if not btype2 in banners[btype]:
banners[btype][btype2] = {}
if not bid in banners[btype][btype2]:
banners[btype][btype2][bid] = {}
for k, v in cur_banner.items():
if k is None or v is None:
continue
k, v = k.lower(), v.lower()
banners[btype][btype2][bid][k] = v
k, v = k.lower(), v.lower()
banners[btype][btype2][bid][k] = v
for k, v in banners[btype][btype2][bid].items():
if k.endswith("path"):
new_key = "_%s" % (k)
log().debug("Transforming %s to %s" % (k, new_key))
new_url = self.config['url_artworkPrefix'] % (v)
banners[btype][btype2][bid][new_key] = new_url
except:
pass
for k, v in banners[btype][btype2][bid].items():
if k.endswith("path"):
new_key = "_%s" % (k)
log().debug("Transforming %s to %s" % (k, new_key))
new_url = self.config['url_artworkPrefix'] % (v)
banners[btype][btype2][bid][new_key] = new_url
self._setShowData(sid, "_banners", banners)
@ -796,21 +792,22 @@ class Tvdb:
log().debug("Getting actors for %s" % (sid))
actorsEt = self._getetsrc(self.config['url_actorsInfo'] % (sid))
if not actorsEt:
log().debug('Actors result returned zero')
return
cur_actors = Actors()
try:
for curActorItem in actorsEt["actor"]:
curActor = Actor()
for k, v in curActorItem.items():
k = k.lower()
if v is not None:
if k == "image":
v = self.config['url_artworkPrefix'] % (v)
else:
v = self._cleanData(v)
curActor[k] = v
cur_actors.append(curActor)
except:
pass
for curActorItem in actorsEt["actor"]:
curActor = Actor()
for k, v in curActorItem.items():
k = k.lower()
if v is not None:
if k == "image":
v = self.config['url_artworkPrefix'] % (v)
else:
v = self._cleanData(v)
curActor[k] = v
cur_actors.append(curActor)
self._setShowData(sid, '_actors', cur_actors)
@ -840,10 +837,11 @@ class Tvdb:
self.config['url_seriesInfo'] % (sid, getShowInLanguage)
)
# check and make sure we have data to process and that it contains a series name
if not len(seriesInfoEt) or (isinstance(seriesInfoEt, dict) and 'seriesname' not in seriesInfoEt['series']):
return False
if not seriesInfoEt:
log().debug('Series result returned zero')
raise tvdb_shownotfound("Show search returned zero results (cannot find show on TVDB)")
# get series data
for k, v in seriesInfoEt['series'].items():
if v is not None:
if k in ['banner', 'fanart', 'poster']:
@ -853,6 +851,7 @@ class Tvdb:
self._setShowData(sid, k, v)
# get episode data
if getEpInfo:
# Parse banners
if self.config['banners_enabled']:
@ -864,15 +863,17 @@ class Tvdb:
# Parse episode data
log().debug('Getting all episodes of %s' % (sid))
if self.config['useZip']:
url = self.config['url_epInfo_zip'] % (sid, language)
else:
url = self.config['url_epInfo'] % (sid, language)
epsEt = self._getetsrc(url, language=language)
episodes = epsEt["episode"]
if not epsEt:
log().debug('Series results incomplete')
raise tvdb_showincomplete("Show search returned incomplete results (cannot find complete show on TVDB)")
episodes = epsEt["episode"]
if not isinstance(episodes, list):
episodes = [episodes]
@ -945,14 +946,10 @@ class Tvdb:
selected_series = [selected_series]
[[self._setShowData(show['id'], k, v) for k, v in show.items()] for show in selected_series]
return selected_series
#test = self._getSeries(key)
#sids = self._nameToSid(key)
#return list(self.shows[sid] for sid in sids)
def __repr__(self):
return str(self.shows)
def main():
"""Simple example of using tvdb_api - it just
grabs an episode name interactively.

View File

@ -11,7 +11,7 @@
__author__ = "dbr/Ben"
__version__ = "1.9"
__all__ = ["tvdb_error", "tvdb_userabort", "tvdb_shownotfound",
__all__ = ["tvdb_error", "tvdb_userabort", "tvdb_shownotfound", "tvdb_showincomplete",
"tvdb_seasonnotfound", "tvdb_episodenotfound", "tvdb_attributenotfound"]
class tvdb_exception(Exception):
@ -35,6 +35,11 @@ class tvdb_shownotfound(tvdb_exception):
"""
pass
class tvdb_showincomplete(tvdb_exception):
"""Show found but incomplete on thetvdb.com (incomplete show)
"""
pass
class tvdb_seasonnotfound(tvdb_exception):
"""Season cannot be found on thetvdb.com
"""

View File

@ -1,7 +1,7 @@
# !/usr/bin/env python2
# encoding:utf-8
#author:echel0n
#project:tvrage_api
# author:echel0n
# project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api
#license:unlicense (http://unlicense.org/)
@ -36,7 +36,7 @@ from lib.dateutil.parser import parse
from cachecontrol import CacheControl, caches
from tvrage_ui import BaseUI
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound,
from tvrage_exceptions import (tvrage_error, tvrage_userabort, tvrage_shownotfound, tvrage_showincomplete,
tvrage_seasonnotfound, tvrage_episodenotfound, tvrage_attributenotfound)
@ -465,10 +465,6 @@ class TVRage:
elif key == 'firstaired':
value = parse(value, fuzzy=True).date()
value = value.strftime("%Y-%m-%d")
#if key == 'airs_time':
# value = parse(value).time()
# value = value.strftime("%I:%M %p")
except:
pass
@ -485,10 +481,9 @@ class TVRage:
"""
try:
src = self._loadUrl(url, params).values()[0]
return src
except:
return []
return self._loadUrl(url, params).values()[0]
except Exception, e:
raise tvrage_error(e)
def _setItem(self, sid, seas, ep, attrib, value):
"""Creates a new episode, creating Show(), Season() and
@ -518,9 +513,7 @@ class TVRage:
"""
if sid not in self.shows:
self.shows[sid] = Show()
if not isinstance(key, dict or list) and not isinstance(value, dict or list):
self.shows[sid].data[key] = value
self.shows[sid].data[key] = value
def _cleanData(self, data):
"""Cleans up strings returned by tvrage.com
@ -544,11 +537,7 @@ class TVRage:
log().debug("Searching for show %s" % series)
self.config['params_getSeries']['show'] = series
try:
seriesFound = self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
return seriesFound
except:
return []
return self._getetsrc(self.config['url_getSeries'], self.config['params_getSeries']).values()[0]
def _getSeries(self, series):
"""This searches tvrage.com for the series name,
@ -557,13 +546,13 @@ class TVRage:
BaseUI is used to select the first result.
"""
allSeries = self.search(series)
if not allSeries:
log().debug('Series result returned zero')
raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)")
if not isinstance(allSeries, list):
allSeries = [allSeries]
if len(allSeries) == 0:
log().debug('Series result returned zero')
raise tvrage_shownotfound("Show-name search returned zero results (cannot find show on TVRAGE)")
if self.config['custom_ui'] is not None:
log().debug("Using custom UI %s" % (repr(self.config['custom_ui'])))
CustomUI = self.config['custom_ui']
@ -588,50 +577,55 @@ class TVRage:
self.config['params_seriesInfo']
)
# check and make sure we have data to process and that it contains a series name
if not len(seriesInfoEt) or (isinstance(seriesInfoEt, dict) and 'seriesname' not in seriesInfoEt):
return False
if not seriesInfoEt:
log().debug('Series result returned zero')
raise tvrage_shownotfound("Show search returned zero results (cannot find show on TVRAGE)")
# get series data
for k, v in seriesInfoEt.items():
if v is not None:
v = self._cleanData(v)
self._setShowData(sid, k, v)
# series search ends here
# get episode data
if getEpInfo:
# Parse episode data
log().debug('Getting all episodes of %s' % (sid))
self.config['params_epInfo']['sid'] = sid
epsEt = self._getetsrc(self.config['url_epInfo'], self.config['params_epInfo'])
if not epsEt:
log().debug('Series results incomplete')
raise tvrage_showincomplete(
"Show search returned incomplete results (cannot find complete show on TVRAGE)")
seasons = epsEt['episodelist']['season']
if not isinstance(seasons, list):
seasons = [seasons]
for season in seasons:
seas_no = int(season['@no'])
episodes = season['episode']
if not isinstance(episodes, list):
episodes = [episodes]
for episode in episodes:
ep_no = int(episode['episodenumber'])
self._setItem(sid, seas_no, ep_no, 'seasonnumber', seas_no)
for k, v in episode.items():
try:
k = k.lower()
if v is not None:
if k == 'link':
v = v.rsplit('/', 1)[1]
k = 'id'
k = k.lower()
if v is not None:
if k == 'link':
v = v.rsplit('/', 1)[1]
k = 'id'
else:
v = self._cleanData(v)
self._setItem(sid, seas_no, ep_no, k, v)
except:
continue
self._setItem(sid, seas_no, ep_no, k, v)
return True
def _nameToSid(self, name):
@ -661,7 +655,7 @@ class TVRage:
self._getShowData(key, True)
return self.shows[key]
key = key.lower()
key = str(key).lower()
self.config['searchterm'] = key
selected_series = self._getSeries(key)
if isinstance(selected_series, dict):

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python2
#encoding:utf-8
# encoding:utf-8
#author:echel0n
#project:tvrage_api
#repository:http://github.com/echel0n/tvrage_api
@ -10,40 +10,53 @@
__author__ = "echel0n"
__version__ = "1.0"
__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound",
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
__all__ = ["tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete",
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
class tvrage_exception(Exception):
"""Any exception generated by tvrage_api
"""
pass
class tvrage_error(tvrage_exception):
"""An error with tvrage.com (Cannot connect, for example)
"""
pass
class tvrage_userabort(tvrage_exception):
"""User aborted the interactive selection (via
the q command, ^c etc)
"""
pass
class tvrage_shownotfound(tvrage_exception):
"""Show cannot be found on tvrage.com (non-existant show)
"""
pass
class tvrage_showincomplete(tvrage_exception):
"""Show found but incomplete on tvrage.com (incomplete show)
"""
pass
class tvrage_seasonnotfound(tvrage_exception):
"""Season cannot be found on tvrage.com
"""
pass
class tvrage_episodenotfound(tvrage_exception):
"""Episode cannot be found on tvrage.com
"""
pass
class tvrage_attributenotfound(tvrage_exception):
"""Raised if an episode does not have the requested
attribute (such as a episode name)

View File

@ -1,9 +1,12 @@
SickRage
=====
Video File Manager for TV Shows, It watches for new episodes of your favorite shows and when they are posted it does its magic.
Video File Manager for TV Shows, It watches for new episodes of your favorite shows and when they are posted it does its magic.
## Important
Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk
## Branch Build Status
- DEVELOP<br>
[![Build Status](https://travis-ci.org/SiCKRAGETV/SickRage.svg?branch=develop)](https://travis-ci.org/SiCKRAGETV/SickRage)
- MASTER<br>
[![Build Status](https://travis-ci.org/SiCKRAGETV/SickRage.svg?branch=master)](https://travis-ci.org/SiCKRAGETV/SickRage)
## Features
- XBMC library updates, poster/fanart downloads, and NFO/TBN generation
@ -36,3 +39,6 @@ With your bug reports, specify:
- What happened
- What you expected
- Link to logfile on http://www.pastebin.com
## Important
Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk

View File

@ -45,8 +45,8 @@ from sickbeard import naming
from sickbeard import dailysearcher
from sickbeard import scene_numbering, scene_exceptions, name_cache
from indexers.indexer_api import indexerApi
from indexers.indexer_exceptions import indexer_shownotfound, indexer_exception, indexer_error, indexer_episodenotfound, \
indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts
from indexers.indexer_exceptions import indexer_shownotfound, indexer_showincomplete, indexer_exception, indexer_error, \
indexer_episodenotfound, indexer_attributenotfound, indexer_seasonnotfound, indexer_userabort, indexerExcepts
from sickbeard.common import SD, SKIPPED, NAMING_REPEAT
from sickbeard.databases import mainDB, cache_db, failed_db
@ -466,10 +466,12 @@ TRAKT_API_KEY = 'abd806c54516240c76e4ebc9c5ccf394'
__INITIALIZED__ = False
def get_backlog_cycle_time():
cycletime = DAILYSEARCH_FREQUENCY * 2 + 7
return max([cycletime, 720])
def initialize(consoleLogging=True):
with INIT_LOCK:
@ -548,7 +550,8 @@ def initialize(consoleLogging=True):
# git_remote
GIT_REMOTE = check_setting_str(CFG, 'General', 'git_remote', 'origin')
GIT_REMOTE_URL = check_setting_str(CFG, 'General', 'git_remote_url', 'https://github.com/SiCKRAGETV/SickRage.git')
GIT_REMOTE_URL = check_setting_str(CFG, 'General', 'git_remote_url',
'https://github.com/SiCKRAGETV/SickRage.git')
# current commit hash
CUR_COMMIT_HASH = check_setting_str(CFG, 'General', 'cur_commit_hash', '')
@ -659,7 +662,8 @@ def initialize(consoleLogging=True):
NAMING_ABD_PATTERN = check_setting_str(CFG, 'General', 'naming_abd_pattern', '%SN - %A.D - %EN')
NAMING_CUSTOM_ABD = bool(check_setting_int(CFG, 'General', 'naming_custom_abd', 0))
NAMING_SPORTS_PATTERN = check_setting_str(CFG, 'General', 'naming_sports_pattern', '%SN - %A-D - %EN')
NAMING_ANIME_PATTERN = check_setting_str(CFG, 'General', 'naming_anime_pattern', 'Season %0S/%SN - S%0SE%0E - %EN')
NAMING_ANIME_PATTERN = check_setting_str(CFG, 'General', 'naming_anime_pattern',
'Season %0S/%SN - S%0SE%0E - %EN')
NAMING_ANIME = check_setting_int(CFG, 'General', 'naming_anime', 3)
NAMING_CUSTOM_SPORTS = bool(check_setting_int(CFG, 'General', 'naming_custom_sports', 0))
NAMING_CUSTOM_ANIME = bool(check_setting_int(CFG, 'General', 'naming_custom_anime', 0))
@ -1466,7 +1470,7 @@ def save_config():
new_config['General']['keep_processed_dir'] = int(KEEP_PROCESSED_DIR)
new_config['General']['process_method'] = PROCESS_METHOD
new_config['General']['move_associated_files'] = int(MOVE_ASSOCIATED_FILES)
new_config['General']['postpone_if_sync_files'] = int (POSTPONE_IF_SYNC_FILES)
new_config['General']['postpone_if_sync_files'] = int(POSTPONE_IF_SYNC_FILES)
new_config['General']['nfo_rename'] = int(NFO_RENAME)
new_config['General']['process_automatically'] = int(PROCESS_AUTOMATICALLY)
new_config['General']['unpack'] = int(UNPACK)

View File

@ -27,7 +27,7 @@ from sickbeard import encodingKludge as ek
from sickbeard.name_parser.parser import NameParser, InvalidNameException, InvalidShowException
MIN_DB_VERSION = 9 # oldest db version we support migrating from
MAX_DB_VERSION = 40
MAX_DB_VERSION = 41
class MainSanityCheck(db.DBSanityCheck):
def check(self):
@ -915,3 +915,15 @@ class AddVersionToTvEpisodes(AddIndexerMapping):
self.addColumn("history", "version", "NUMERIC", "-1")
self.incDBVersion()
class AddDefaultEpStatusToTvShows(AddVersionToTvEpisodes):
def test(self):
return self.checkDBVersion() >= 41
def execute(self):
backupDatabase(41)
logger.log(u"Adding column default_ep_status to tv_shows")
self.addColumn("tv_shows", "default_ep_status", "TEXT", "")
self.incDBVersion()

View File

@ -11,15 +11,18 @@
# SickRage is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with SickRage. If not, see <http://www.gnu.org/licenses/>.
import os
from sickbeard import logger
import sickbeard
from sickbeard import logger
import ftfy
import ftfy.bad_codecs
# This module tries to deal with the apparently random behavior of python when dealing with unicode <-> utf-8
# encodings. It tries to just use unicode, but if that fails then it tries forcing it to utf-8. Any functions
@ -28,18 +31,19 @@ import sickbeard
def fixStupidEncodings(x, silent=False):
if type(x) == str:
try:
return x.decode(sickbeard.SYS_ENCODING)
return str(ftfy.fix_text(u'' + x)).decode(sickbeard.SYS_ENCODING)
except UnicodeDecodeError:
logger.log(u"Unable to decode value: " + repr(x), logger.ERROR)
return None
return x
except UnicodeEncodeError:
logger.log(u"Unable to encode value: " + repr(x), logger.ERROR)
return x
elif type(x) == unicode:
return x
else:
logger.log(
u"Unknown value passed in, ignoring it: " + str(type(x)) + " (" + repr(x) + ":" + repr(type(x)) + ")",
logger.DEBUG if silent else logger.ERROR)
return None
def fixListEncodings(x):
@ -49,21 +53,13 @@ def fixListEncodings(x):
return filter(lambda x: x != None, map(fixStupidEncodings, x))
def callPeopleStupid(x):
try:
return x.encode(sickbeard.SYS_ENCODING)
except UnicodeEncodeError:
logger.log(
u"YOUR COMPUTER SUCKS! Your data is being corrupted by a bad locale/encoding setting. Report this error on the forums or IRC please: " + repr(
x) + ", " + sickbeard.SYS_ENCODING, logger.ERROR)
return x.encode(sickbeard.SYS_ENCODING, 'ignore')
def ek(func, *args, **kwargs):
if os.name == 'nt':
result = func(*args, **kwargs)
else:
result = func(*[callPeopleStupid(x) if type(x) in (str, unicode) else x for x in args], **kwargs)
result = func(
*[fixStupidEncodings(x).encode(sickbeard.SYS_ENCODING) if type(x) in (str, unicode) else x for x in args],
**kwargs)
if type(result) in (list, tuple):
return fixListEncodings(result)

View File

@ -18,7 +18,6 @@
from sickbeard.encodingKludge import fixStupidEncodings
def ex(e):
"""
Returns a unicode string from the exception text if it exists.

View File

@ -26,7 +26,7 @@ from sickbeard.exceptions import ex, EpisodeNotFoundException
from sickbeard.history import dateFormat
from sickbeard.common import Quality
from sickbeard.common import WANTED, FAILED
from sickbeard.encodingKludge import fixStupidEncodings
def prepareFailedName(release):
"""Standardizes release name for failed DB"""
@ -36,9 +36,7 @@ def prepareFailedName(release):
fixed = fixed.rpartition(".")[0]
fixed = re.sub("[\.\-\+\ ]", "_", fixed)
if not isinstance(fixed, unicode):
fixed = unicode(fixed, 'utf-8', 'replace')
fixed = fixStupidEncodings(fixed)
return fixed

View File

@ -213,8 +213,12 @@ def _remove_file_failed(file):
def findCertainShow(showList, indexerid):
results = []
if showList and indexerid:
results = filter(lambda x: int(x.indexerid) == int(indexerid), showList)
if not isinstance(indexerid, list):
indexerid = [indexerid]
if showList and len(indexerid):
results = filter(lambda x: int(x.indexerid) in indexerid, showList)
if len(results) == 1:
return results[0]

View File

@ -20,6 +20,7 @@ import db
import datetime
from sickbeard.common import SNATCHED, SUBTITLED, FAILED, Quality
from sickbeard.encodingKludge import fixStupidEncodings
dateFormat = "%Y%m%d%H%M%S"
@ -27,9 +28,7 @@ dateFormat = "%Y%m%d%H%M%S"
def _logHistoryItem(action, showid, season, episode, quality, resource, provider, version=-1):
logDate = datetime.datetime.today().strftime(dateFormat)
if not isinstance(resource, unicode):
resource = unicode(resource, 'utf-8', 'replace')
resource = fixStupidEncodings(resource)
myDB = db.DBConnection()
myDB.action(

View File

@ -37,6 +37,7 @@ indexerConfig[INDEXER_TVRAGE] = {
}
# TVDB Indexer Settings
indexerConfig[INDEXER_TVDB]['trakt_id'] = 'tvdb_id'
indexerConfig[INDEXER_TVDB]['xem_origin'] = 'tvdb'
indexerConfig[INDEXER_TVDB]['icon'] = 'thetvdb16.png'
indexerConfig[INDEXER_TVDB]['scene_url'] = 'http://midgetspy.github.io/sb_tvdb_scene_exceptions/exceptions.txt'
@ -44,6 +45,7 @@ indexerConfig[INDEXER_TVDB]['show_url'] = 'http://thetvdb.com/?tab=series&id='
indexerConfig[INDEXER_TVDB]['base_url'] = 'http://thetvdb.com/api/%(apikey)s/series/' % indexerConfig[INDEXER_TVDB]['api_params']
# TVRAGE Indexer Settings
indexerConfig[INDEXER_TVRAGE]['trakt_id'] = 'tvrage_id'
indexerConfig[INDEXER_TVRAGE]['xem_origin'] = 'rage'
indexerConfig[INDEXER_TVRAGE]['icon'] = 'tvrage16.png'
indexerConfig[INDEXER_TVRAGE]['scene_url'] = 'https://raw.githubusercontent.com/echel0n/sb_tvrage_scene_exceptions/master/exceptions.txt'

View File

@ -1,5 +1,5 @@
#!/usr/bin/env python2
#encoding:utf-8
# encoding:utf-8
#author:echel0n
#project:indexer_api
#repository:http://github.com/echel0n/Sick-Beard
@ -12,19 +12,20 @@ __version__ = "1.0"
from lib.tvrage_api.tvrage_exceptions import \
tvrage_exception, tvrage_attributenotfound, tvrage_episodenotfound, tvrage_error, \
tvrage_seasonnotfound, tvrage_shownotfound, tvrage_userabort
tvrage_seasonnotfound, tvrage_shownotfound, tvrage_showincomplete, tvrage_userabort
from lib.tvdb_api.tvdb_exceptions import \
tvdb_exception, tvdb_attributenotfound, tvdb_episodenotfound, tvdb_error, \
tvdb_seasonnotfound, tvdb_shownotfound, tvdb_userabort
tvdb_seasonnotfound, tvdb_shownotfound, tvdb_showincomplete, tvdb_userabort
indexerExcepts = ["indexer_exception", "indexer_error", "indexer_userabort", "indexer_shownotfound",
"indexer_showincomplete",
"indexer_seasonnotfound", "indexer_episodenotfound", "indexer_attributenotfound"]
tvdbExcepts = ["tvdb_exception", "tvdb_error", "tvdb_userabort", "tvdb_shownotfound",
tvdbExcepts = ["tvdb_exception", "tvdb_error", "tvdb_userabort", "tvdb_shownotfound", "tvdb_showincomplete",
"tvdb_seasonnotfound", "tvdb_episodenotfound", "tvdb_attributenotfound"]
tvrageExcepts = ["tvdb_exception", "tvrage_error", "tvrage_userabort", "tvrage_shownotfound",
tvrageExcepts = ["tvdb_exception", "tvrage_error", "tvrage_userabort", "tvrage_shownotfound", "tvrage_showincomplete",
"tvrage_seasonnotfound", "tvrage_episodenotfound", "tvrage_attributenotfound"]
# link API exceptions to our exception handler
@ -35,3 +36,4 @@ indexer_attributenotfound = tvdb_attributenotfound, tvrage_attributenotfound
indexer_episodenotfound = tvdb_episodenotfound, tvrage_episodenotfound
indexer_seasonnotfound = tvdb_seasonnotfound, tvrage_seasonnotfound
indexer_shownotfound = tvdb_shownotfound, tvrage_shownotfound
indexer_showincomplete = tvdb_showincomplete, tvrage_showincomplete

View File

@ -26,7 +26,7 @@ import os.path
import regexes
import sickbeard
from sickbeard import logger, helpers, scene_numbering, common, exceptions, scene_exceptions, encodingKludge as ek, db
from sickbeard import logger, helpers, scene_numbering, common, exceptions as ex, scene_exceptions, encodingKludge as ek, db
from dateutil import parser

View File

@ -29,6 +29,7 @@ import sickbeard
from sickbeard import logger, common
from sickbeard import db
from sickbeard.encodingKludge import fixStupidEncodings
from sickbeard.exceptions import ex
@ -50,7 +51,7 @@ class EmailNotifier:
ep_name: The name of the episode that was snatched
title: The title of the notification (optional)
"""
ep_name = ep_name.encode('utf-8', 'replace')
ep_name = fixStupidEncodings(ep_name)
if sickbeard.EMAIL_NOTIFY_ONSNATCH:
show = self._parseEp(ep_name)
@ -85,7 +86,7 @@ class EmailNotifier:
ep_name: The name of the episode that was downloaded
title: The title of the notification (optional)
"""
ep_name = ep_name.encode('utf-8', 'replace')
ep_name = fixStupidEncodings(ep_name)
if sickbeard.EMAIL_NOTIFY_ONDOWNLOAD:
show = self._parseEp(ep_name)
@ -120,7 +121,7 @@ class EmailNotifier:
ep_name: The name of the episode that was downloaded
lang: Subtitle language wanted
"""
ep_name = ep_name.encode('utf-8', 'replace')
ep_name = fixStupidEncodings(ep_name)
if sickbeard.EMAIL_NOTIFY_ONSUBTITLEDOWNLOAD:
show = self._parseEp(ep_name)
@ -197,7 +198,7 @@ class EmailNotifier:
return False
def _parseEp(self, ep_name):
ep_name = ep_name.encode('utf-8', 'replace')
ep_name = fixStupidEncodings(ep_name)
sep = " - "
titles = ep_name.split(sep)

View File

@ -45,11 +45,11 @@ class TraktNotifier:
ep_obj: The TVEpisode object to add to trakt
"""
if sickbeard.USE_TRAKT:
trakt_id = sickbeard.indexerApi(ep_obj.show.indexer).config['trakt_id']
if sickbeard.USE_TRAKT:
# URL parameters
data = {
'tvdb_id': ep_obj.show.indexerid,
'title': ep_obj.show.name,
'year': ep_obj.show.startyear,
'episodes': [{
@ -58,48 +58,53 @@ class TraktNotifier:
}]
}
if data is not None:
TraktCall("show/episode/library/%API%", self._api(), self._username(), self._password(), data)
if sickbeard.TRAKT_REMOVE_WATCHLIST:
TraktCall("show/episode/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
if trakt_id == 'tvdb_id':
data[trakt_id] = ep_obj.show.indexerid
if sickbeard.TRAKT_REMOVE_SERIESLIST:
data_show = None
# update library
TraktCall("show/episode/library/%API%", self._api(), self._username(), self._password(), data)
# URL parameters, should not need to recheck data (done above)
data = {
'shows': [
{
'tvdb_id': ep_obj.show.indexerid,
'title': ep_obj.show.name,
'year': ep_obj.show.startyear
}
]
}
# remove from watchlist
if sickbeard.TRAKT_REMOVE_WATCHLIST:
TraktCall("show/episode/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
TraktCall("show/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
if sickbeard.TRAKT_REMOVE_SERIESLIST:
data = {
'shows': [
{
'title': ep_obj.show.name,
'year': ep_obj.show.startyear
}
]
}
# Remove all episodes from episode watchlist
# Start by getting all episodes in the watchlist
watchlist = TraktCall("user/watchlist/episodes.json/%API%/" + sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_PASSWORD)
if trakt_id == 'tvdb_id':
data['shows'][trakt_id] = ep_obj.show.indexerid
# Convert watchlist to only contain current show
TraktCall("show/unwatchlist/%API%", self._api(), self._username(), self._password(), data)
# Remove all episodes from episode watchlist
# Start by getting all episodes in the watchlist
watchlist = TraktCall("user/watchlist/episodes.json/%API%/" + sickbeard.TRAKT_USERNAME,
sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_PASSWORD)
# Convert watchlist to only contain current show
if watchlist:
for show in watchlist:
# Check if tvdb_id exists
if 'tvdb_id' in show:
if unicode(data['shows'][0]['tvdb_id']) == show['tvdb_id']:
data_show = {
'title': show['title'],
'tvdb_id': show['tvdb_id'],
'episodes': []
}
if show[trakt_id] == ep_obj.show.indexerid:
data_show = {
'title': show['title'],
trakt_id: show[trakt_id],
'episodes': []
}
# Add series and episode (number) to the arry
for episodes in show['episodes']:
ep = {'season': episodes['season'], 'episode': episodes['number']}
data_show['episodes'].append(ep)
if data_show is not None:
TraktCall("show/episode/unwatchlist/%API%", sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME, sickbeard.TRAKT_PASSWORD, data_show)
# Add series and episode (number) to the array
for episodes in show['episodes']:
ep = {'season': episodes['season'], 'episode': episodes['number']}
data_show['episodes'].append(ep)
TraktCall("show/episode/unwatchlist/%API%", sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME,
sickbeard.TRAKT_PASSWORD, data_show)
def test_notify(self, api, username, password):
"""

View File

@ -23,13 +23,14 @@ import xml.etree.cElementTree as etree
import xml.etree
import re
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
from sickbeard import logger, classes, helpers
from sickbeard.common import Quality
from sickbeard import encodingKludge as ek
from sickbeard.exceptions import ex
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
from sickbeard.encodingKludge import fixStupidEncodings
def getSeasonNZBs(name, urlData, season):
try:
@ -84,7 +85,7 @@ def createNZBString(fileElements, xmlns):
for curFile in fileElements:
rootElement.append(stripNS(curFile, xmlns))
return xml.etree.ElementTree.tostring(rootElement, 'utf-8', 'replace')
return xml.etree.ElementTree.tostring(fixStupidEncodings(rootElement))
def saveNZB(nzbName, nzbString):

View File

@ -20,13 +20,14 @@ import re
import time
import threading
import datetime
import sickbeard
from lib import adba
import sickbeard
import adba
from sickbeard import helpers
from sickbeard import name_cache
from sickbeard import logger
from sickbeard import db
from sickbeard.encodingKludge import fixStupidEncodings
exception_dict = {}
anidb_exception_dict = {}
@ -233,8 +234,7 @@ def retrieve_exceptions():
# if this exception isn't already in the DB then add it
if cur_exception not in existing_exceptions:
if not isinstance(cur_exception, unicode):
cur_exception = unicode(cur_exception, 'utf-8', 'replace')
cur_exception = fixStupidEncodings(cur_exception)
myDB.action("INSERT INTO scene_exceptions (indexer_id, show_name, season) VALUES (?,?,?)",
[cur_indexer_id, cur_exception, curSeason])
@ -267,9 +267,7 @@ def update_scene_exceptions(indexer_id, scene_exceptions, season=-1):
exceptionsCache[indexer_id][season] = scene_exceptions
for cur_exception in scene_exceptions:
if not isinstance(cur_exception, unicode):
cur_exception = unicode(cur_exception, 'utf-8', 'replace')
cur_exception = fixStupidEncodings(cur_exception)
myDB.action("INSERT INTO scene_exceptions (indexer_id, show_name, season) VALUES (?,?,?)",
[indexer_id, cur_exception, season])

View File

@ -293,6 +293,10 @@ class QueueItemAdd(ShowQueueItem):
self.show.scene = self.scene if self.scene != None else sickbeard.SCENE_DEFAULT
self.show.paused = self.paused if self.paused != None else False
# set up default new/missing episode status
self.show.default_ep_status = self.default_status
logger.log(u"Setting all episodes to the specified default status: " + str(self.show.default_ep_status))
# be smartish about this
if self.show.genre and "talk show" in self.show.genre.lower():
self.show.air_by_date = 1
@ -364,17 +368,10 @@ class QueueItemAdd(ShowQueueItem):
logger.log(u"Error searching dir for episodes: " + ex(e), logger.ERROR)
logger.log(traceback.format_exc(), logger.DEBUG)
# if they gave a custom status then change all the eps to it
if self.default_status != SKIPPED:
logger.log(u"Setting all episodes to the specified default status: " + str(self.default_status))
myDB = db.DBConnection()
myDB.action("UPDATE tv_episodes SET status = ? WHERE status = ? AND showid = ? AND season != 0",
[self.default_status, SKIPPED, self.show.indexerid])
# if they started with WANTED eps then run the backlog
if self.default_status == WANTED:
# if they set default ep status to WANTED then run the backlog to search for episodes
if self.show.default_ep_status == WANTED:
logger.log(u"Launching backlog for this show since its episodes are WANTED")
sickbeard.backlogSearchScheduler.action.searchBacklog([self.show]) #@UndefinedVariable
sickbeard.backlogSearchScheduler.action.searchBacklog([self.show])
self.show.writeMetadata()
self.show.updateMetadata()
@ -539,17 +536,21 @@ class QueueItemUpdate(ShowQueueItem):
self.show.indexer).name + ", the show info will not be refreshed: " + ex(e), logger.ERROR)
IndexerEpList = None
foundMissingEps = False
if IndexerEpList == None:
logger.log(u"No data returned from " + sickbeard.indexerApi(
self.show.indexer).name + ", unable to update this show", logger.ERROR)
else:
# for each ep we found on TVDB delete it from the DB list
# for each ep we found on the Indexer delete it from the DB list
for curSeason in IndexerEpList:
for curEpisode in IndexerEpList[curSeason]:
logger.log(u"Removing " + str(curSeason) + "x" + str(curEpisode) + " from the DB list",
logger.DEBUG)
if curSeason in DBEpList and curEpisode in DBEpList[curSeason]:
del DBEpList[curSeason][curEpisode]
else:
# found missing episodes
foundMissingEps = True
# for the remaining episodes in the DB list just delete them from the DB
for curSeason in DBEpList:
@ -562,8 +563,12 @@ class QueueItemUpdate(ShowQueueItem):
except exceptions.EpisodeDeletedException:
pass
sickbeard.showQueueScheduler.action.refreshShow(self.show, self.force)
# if they set default ep status to WANTED then run the backlog
if foundMissingEps and self.show.default_ep_status == WANTED:
logger.log(u"Launching backlog for this show since we found missing episodes")
sickbeard.backlogSearchScheduler.action.searchBacklog([self.show])
sickbeard.showQueueScheduler.action.refreshShow(self.show, self.force)
class QueueItemForceUpdate(QueueItemUpdate):
def __init__(self, show=None):

View File

@ -97,7 +97,7 @@ class TVShow(object):
self._scene = 0
self._rls_ignore_words = ""
self._rls_require_words = ""
self._default_ep_status = ""
self.dirty = True
self._location = ""
@ -139,6 +139,7 @@ class TVShow(object):
scene = property(lambda self: self._scene, dirty_setter("_scene"))
rls_ignore_words = property(lambda self: self._rls_ignore_words, dirty_setter("_rls_ignore_words"))
rls_require_words = property(lambda self: self._rls_require_words, dirty_setter("_rls_require_words"))
default_ep_status = property(lambda self: self._default_ep_status, dirty_setter("_default_ep_status"))
@property
def is_anime(self):
@ -577,7 +578,6 @@ class TVShow(object):
myDB = db.DBConnection()
myDB.mass_action(sql_l)
# Done updating save last update date
self.last_update_indexer = datetime.date.today().toordinal()
self.saveToDB()
@ -770,9 +770,11 @@ class TVShow(object):
self.status = sqlResults[0]["status"]
if not self.status:
self.status = ""
self.airs = sqlResults[0]["airs"]
if not self.airs:
self.airs = ""
self.startyear = sqlResults[0]["startyear"]
if not self.startyear:
self.startyear = 0
@ -825,6 +827,10 @@ class TVShow(object):
self.rls_ignore_words = sqlResults[0]["rls_ignore_words"]
self.rls_require_words = sqlResults[0]["rls_require_words"]
self.default_ep_status = sqlResults[0]["default_ep_status"]
if not self.default_ep_status:
self.default_ep_status = ""
if not self.imdbid:
self.imdbid = sqlResults[0]["imdb_id"]
@ -1156,7 +1162,8 @@ class TVShow(object):
"imdb_id": self.imdbid,
"last_update_indexer": self.last_update_indexer,
"rls_ignore_words": self.rls_ignore_words,
"rls_require_words": self.rls_require_words
"rls_require_words": self.rls_require_words,
"default_ep_status": self.default_ep_status
}
myDB = db.DBConnection()
@ -1741,9 +1748,9 @@ class TVEpisode(object):
if self.status == UNAIRED:
self.status = WANTED
# if we somehow are still UNKNOWN then just skip it
# if we somehow are still UNKNOWN then just use the shows defined default status
elif self.status == UNKNOWN:
self.status = SKIPPED
self.status = self.show.default_ep_status
else:
logger.log(
@ -2024,7 +2031,6 @@ class TVEpisode(object):
'%SN S%0SE%E',
'%SN S%SE%E',
'%SN S%0SE%0E'
]
strings = []
@ -2062,7 +2068,6 @@ class TVEpisode(object):
if len(self.relatedEps) == 0:
goodName = self.name
else:
goodName = ''
@ -2494,7 +2499,7 @@ class TVEpisode(object):
if airs:
hr = int(airs.group(1))
hr = (12 + hr, hr)[None is airs.group(3)]
hr = (hr, hr - 12)[0 == hr % 12]
hr = (hr, hr - 12)[0 == hr % 12 and 0 != hr]
min = int((airs.group(2), min)[None is airs.group(2)])
airtime = datetime.time(hr, min)

View File

@ -20,19 +20,20 @@ from __future__ import with_statement
import time
import datetime
import itertools
import sickbeard
from sickbeard import db
from sickbeard import logger
from sickbeard.common import Quality
from sickbeard import helpers, show_name_helpers
from sickbeard.exceptions import MultipleShowObjectsException
from sickbeard.exceptions import AuthException
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
from sickbeard.rssfeeds import RSSFeeds
from sickbeard import clients
import itertools
from name_parser.parser import NameParser, InvalidNameException, InvalidShowException
from sickbeard.encodingKludge import fixStupidEncodings
class CacheDBConnection(db.DBConnection):
def __init__(self, providerName):
@ -262,8 +263,7 @@ class TVCache():
# get quality of release
quality = parse_result.quality
if not isinstance(name, unicode):
name = unicode(name, 'utf-8', 'replace')
name = fixStupidEncodings(name)
# get release group
release_group = parse_result.release_group

File diff suppressed because it is too large Load Diff

View File

@ -64,8 +64,8 @@ from browser import WebFileBrowser
from lib.dateutil import tz
from lib.unrar2 import RarFile
from lib import subliminal
from trakt import TraktCall
from lib import adba, subliminal
from lib.trakt import TraktCall
try:
import json
@ -77,13 +77,13 @@ try:
except ImportError:
import xml.etree.ElementTree as etree
from lib import adba
from Cheetah.Template import Template
from tornado.web import RequestHandler, HTTPError, asynchronous
from bug_tracker import BugTracker
def authenticated(handler_class):
def wrap_execute(handler_execute):
def basicauth(handler, transforms, *args, **kwargs):
@ -101,7 +101,7 @@ def authenticated(handler_class):
'/api/builder' not in handler.request.uri):
return True
elif (handler.request.uri.startswith(sickbeard.WEB_ROOT + '/calendar') and
sickbeard.CALENDAR_UNPROTECTED):
sickbeard.CALENDAR_UNPROTECTED):
return True
auth_hdr = handler.request.headers.get('Authorization')
@ -394,8 +394,9 @@ class MainHandler(RequestHandler):
# add localtime to the dict
for index, item in enumerate(sql_results):
sql_results[index]['localtime'] = sbdatetime.sbdatetime.convert_to_setting(network_timezones.parse_date_time(item['airdate'],
item['airs'], item['network']))
sql_results[index]['localtime'] = sbdatetime.sbdatetime.convert_to_setting(
network_timezones.parse_date_time(item['airdate'],
item['airs'], item['network']))
sql_results.sort(sorts[sickbeard.COMING_EPS_SORT])
@ -424,7 +425,7 @@ class MainHandler(RequestHandler):
t.sql_results = sql_results
# Allow local overriding of layout parameter
if layout and layout in ('poster', 'banner', 'list','calendar'):
if layout and layout in ('poster', 'banner', 'list', 'calendar'):
t.layout = layout
else:
t.layout = sickbeard.COMING_EPS_LAYOUT
@ -487,7 +488,8 @@ class MainHandler(RequestHandler):
ical = ical + 'DESCRIPTION:' + show['airs'] + ' on ' + show['network'] + '\\n\\n' + \
episode['description'].splitlines()[0] + '\r\n'
else:
ical = ical + 'DESCRIPTION:' + (show['airs'] or '(Unknown airs)') + ' on ' + (show['network'] or 'Unknown network') + '\r\n'
ical = ical + 'DESCRIPTION:' + (show['airs'] or '(Unknown airs)') + ' on ' + (
show['network'] or 'Unknown network') + '\r\n'
ical = ical + 'END:VEVENT\r\n'
@ -1074,7 +1076,8 @@ class Manage(MainHandler):
return _munge(t)
def massEditSubmit(self, archive_firstmatch=None, paused=None, anime=None, sports=None, scene=None, flatten_folders=None,
def massEditSubmit(self, archive_firstmatch=None, paused=None, anime=None, sports=None, scene=None,
flatten_folders=None,
quality_preset=False,
subtitles=None, air_by_date=None, anyQualities=[], bestQualities=[], toEdit=None, *args,
**kwargs):
@ -1179,7 +1182,8 @@ class Manage(MainHandler):
redirect("/manage/")
def massUpdate(self, toUpdate=None, toRefresh=None, toRename=None, toDelete=None, toRemove=None, toMetadata=None, toSubtitle=None):
def massUpdate(self, toUpdate=None, toRefresh=None, toRename=None, toDelete=None, toRemove=None, toMetadata=None,
toSubtitle=None):
if toUpdate is not None:
toUpdate = toUpdate.split('|')
@ -1515,11 +1519,13 @@ class ConfigGeneral(MainHandler):
def saveGeneral(self, log_dir=None, web_port=None, web_log=None, encryption_version=None, web_ipv6=None,
update_shows_on_start=None, trash_remove_show=None, trash_rotate_logs=None, update_frequency=None, launch_browser=None, web_username=None,
update_shows_on_start=None, trash_remove_show=None, trash_rotate_logs=None, update_frequency=None,
launch_browser=None, web_username=None,
use_api=None, api_key=None, indexer_default=None, timezone_display=None, cpu_preset=None,
web_password=None, version_notify=None, enable_https=None, https_cert=None, https_key=None,
handle_reverse_proxy=None, sort_article=None, auto_update=None, notify_on_update=None,
proxy_setting=None, proxy_indexers=None, anon_redirect=None, git_path=None, git_remote=None, calendar_unprotected=None,
proxy_setting=None, proxy_indexers=None, anon_redirect=None, git_path=None, git_remote=None,
calendar_unprotected=None,
fuzzy_dating=None, trim_zero=None, date_preset=None, date_preset_na=None, time_preset=None,
indexer_timeout=None, play_videos=None, rootDir=None, theme_name=None):
@ -1671,7 +1677,8 @@ class ConfigSearch(MainHandler):
backlog_startup=None, dailysearch_startup=None,
torrent_dir=None, torrent_username=None, torrent_password=None, torrent_host=None,
torrent_label=None, torrent_path=None, torrent_verify_cert=None,
torrent_seed_time=None, torrent_paused=None, torrent_high_bandwidth=None, ignore_words=None, require_words=None):
torrent_seed_time=None, torrent_paused=None, torrent_high_bandwidth=None, ignore_words=None,
require_words=None):
results = []
@ -1753,7 +1760,8 @@ class ConfigPostProcessing(MainHandler):
wdtv_data=None, tivo_data=None, mede8er_data=None,
keep_processed_dir=None, process_method=None, process_automatically=None,
rename_episodes=None, airdate_episodes=None, unpack=None,
move_associated_files=None, postpone_if_sync_files=None, nfo_rename=None, tv_download_dir=None, naming_custom_abd=None,
move_associated_files=None, postpone_if_sync_files=None, nfo_rename=None,
tv_download_dir=None, naming_custom_abd=None,
naming_anime=None,
naming_abd_pattern=None, naming_strip_year=None, use_failed_downloads=None,
delete_failed=None, extra_scripts=None, skip_removed_files=None,
@ -1783,7 +1791,6 @@ class ConfigPostProcessing(MainHandler):
except:
pass
if unpack:
if self.isRarSupported() != 'not supported':
sickbeard.UNPACK = config.checkbox_to_value(unpack)
@ -2004,17 +2011,17 @@ class ConfigProviders(MainHandler):
error += "\nNo Provider Api key specified"
if error <> "":
return json.dumps({'success' : False, 'error': error})
return json.dumps({'success': False, 'error': error})
#Get list with Newznabproviders
#providerDict = dict(zip([x.getID() for x in sickbeard.newznabProviderList], sickbeard.newznabProviderList))
# Get list with Newznabproviders
# providerDict = dict(zip([x.getID() for x in sickbeard.newznabProviderList], sickbeard.newznabProviderList))
#Get newznabprovider obj with provided name
tempProvider= newznab.NewznabProvider(name, url, key)
# Get newznabprovider obj with provided name
tempProvider = newznab.NewznabProvider(name, url, key)
success, tv_categories, error = tempProvider.get_newznab_categories()
return json.dumps({'success' : success,'tv_categories' : tv_categories, 'error' : error})
return json.dumps({'success': success, 'tv_categories': tv_categories, 'error': error})
def deleteNewznabProvider(self, nnid):
@ -2310,14 +2317,14 @@ class ConfigProviders(MainHandler):
curTorrentProvider.enable_daily = config.checkbox_to_value(
kwargs[curTorrentProvider.getID() + '_enable_daily'])
except:
curTorrentProvider.enable_daily = 0 # these exceptions are actually catching unselected checkboxes
curTorrentProvider.enable_daily = 0 # these exceptions are actually catching unselected checkboxes
if hasattr(curTorrentProvider, 'enable_backlog'):
try:
curTorrentProvider.enable_backlog = config.checkbox_to_value(
kwargs[curTorrentProvider.getID() + '_enable_backlog'])
except:
curTorrentProvider.enable_backlog = 0 # these exceptions are actually catching unselected checkboxes
curTorrentProvider.enable_backlog = 0 # these exceptions are actually catching unselected checkboxes
for curNzbProvider in [curProvider for curProvider in sickbeard.providers.sortedProviderList() if
curProvider.providerType == sickbeard.GenericProvider.NZB]:
@ -2876,18 +2883,19 @@ class NewHomeAddShows(MainHandler):
indexer_id = show_name = indexer = None
for cur_provider in sickbeard.metadata_provider_dict.values():
(indexer_id, show_name, indexer) = cur_provider.retrieveShowMetadata(cur_path)
if not (indexer_id and show_name):
(indexer_id, show_name, indexer) = cur_provider.retrieveShowMetadata(cur_path)
# default to TVDB if indexer was not detected
if show_name and not (indexer or indexer_id):
(sn, idx, id) = helpers.searchIndexerForShowID(show_name, indexer, indexer_id)
# default to TVDB if indexer was not detected
if show_name and not (indexer or indexer_id):
(sn, idx, id) = helpers.searchIndexerForShowID(show_name, indexer, indexer_id)
# set indexer and indexer_id from found info
if not indexer and idx:
indexer = idx
# set indexer and indexer_id from found info
if not indexer and idx:
indexer = idx
if not indexer_id and id:
indexer_id = id
if not indexer_id and id:
indexer_id = id
cur_dir['existing_info'] = (indexer_id, show_name, indexer)
@ -2921,7 +2929,7 @@ class NewHomeAddShows(MainHandler):
if not show_dir:
t.default_show_name = ''
elif not show_name:
t.default_show_name = ek.ek(os.path.basename, ek.ek(os.path.normpath, show_dir)).replace('.', ' ')
t.default_show_name = re.sub(' \(\d{4}\)','', ek.ek(os.path.basename, ek.ek(os.path.normpath, show_dir)).replace('.', ' '))
else:
t.default_show_name = show_name
@ -2959,19 +2967,13 @@ class NewHomeAddShows(MainHandler):
recommendedlist = TraktCall("recommendations/shows.json/%API%", sickbeard.TRAKT_API, sickbeard.TRAKT_USERNAME,
sickbeard.TRAKT_PASSWORD)
if recommendedlist == 'NULL':
logger.log(u"No shows found in your recommendedlist, aborting recommendedlist update", logger.DEBUG)
return
if recommendedlist is None:
logger.log(u"Could not connect to trakt service, aborting recommended list update", logger.ERROR)
return
map(final_results.append,
([int(show['tvdb_id'] or 0) if sickbeard.TRAKT_DEFAULT_INDEXER == 1 else int(show['tvdb_id'] or 0),
show['url'], show['title'], show['overview'],
datetime.date.fromtimestamp(int(show['first_aired']) / 1000.0).strftime('%Y%m%d')] for show in
recommendedlist if not helpers.findCertainShow(sickbeard.showList, indexerid=int(show['tvdb_id']))))
if recommendedlist:
indexers = ['tvdb_id', 'tvrage_id']
map(final_results.append, (
[int(show[indexers[sickbeard.TRAKT_DEFAULT_INDEXER - 1]]), show['url'], show['title'], show['overview'],
datetime.date.fromtimestamp(int(show['first_aired']) / 1000.0).strftime('%Y%m%d')]
for show in recommendedlist if not helpers.findCertainShow(sickbeard.showList, [
int(show[indexers[sickbeard.TRAKT_DEFAULT_INDEXER - 1]])])))
return json.dumps({'results': final_results})
@ -3000,12 +3002,16 @@ class NewHomeAddShows(MainHandler):
t = PageTemplate(headers=self.request.headers, file="home_trendingShows.tmpl")
t.submenu = HomeMenu()
t.trending_shows = TraktCall("shows/trending.json/%API%", sickbeard.TRAKT_API_KEY)
t.trending_shows = []
if None is not t.trending_shows:
for item in t.trending_shows:
if helpers.findCertainShow(sickbeard.showList, int(item['tvdb_id'])):
item['tvdb_id'] = u'ExistsInLibrary'
trending_shows = TraktCall("shows/trending.json/%API%", sickbeard.TRAKT_API_KEY)
if trending_shows:
for show in trending_shows:
try:
if not helpers.findCertainShow(sickbeard.showList, [int(show['tvdb_id']), int(show['tvrage_id'])]):
t.trending_shows += [show]
except exceptions.MultipleShowObjectsException:
continue
return _munge(t)
@ -3282,7 +3288,7 @@ class ErrorLogs(MainHandler):
for x in reversed(data):
x = x.decode('utf-8', 'replace')
x = ek.fixStupidEncodings(x)
match = re.match(regex, x)
if match:
@ -3833,7 +3839,7 @@ class Home(MainHandler):
flatten_folders=None, paused=None, directCall=False, air_by_date=None, sports=None, dvdorder=None,
indexerLang=None, subtitles=None, archive_firstmatch=None, rls_ignore_words=None,
rls_require_words=None, anime=None, blackWords=None, whiteWords=None, blacklist=None, whitelist=None,
scene=None):
scene=None, defaultEpStatus=None):
if show is None:
errString = "Invalid show ID: " + str(show)
@ -4007,6 +4013,7 @@ class Home(MainHandler):
showObj.dvdorder = dvdorder
showObj.rls_ignore_words = rls_ignore_words.strip()
showObj.rls_require_words = rls_require_words.strip()
showObj.default_ep_status = defaultEpStatus
# if we change location clear the db of episodes, change it, write to db, and rescan
if os.path.normpath(showObj._location) != os.path.normpath(location):
@ -4087,8 +4094,8 @@ class Home(MainHandler):
ui.notifications.message('<b>%s</b> has been %s %s' %
(showObj.name,
('deleted', 'trashed')[sickbeard.TRASH_REMOVE_SHOW],
('(media untouched)', '(with all related media)')[bool(full)]))
('deleted', 'trashed')[sickbeard.TRASH_REMOVE_SHOW],
('(media untouched)', '(with all related media)')[bool(full)]))
redirect("/home/")
@ -4405,10 +4412,9 @@ class Home(MainHandler):
sickbeard.searchQueueScheduler.action.add_item(ep_queue_item) # @UndefinedVariable
if ep_queue_item.success:
return returnManualSearchResult(ep_queue_item)
if not ep_queue_item.started and ep_queue_item.success is None:
return json.dumps({'result': 'success'}) #I Actually want to call it queued, because the search hasnt been started yet!
return json.dumps(
{'result': 'success'}) # I Actually want to call it queued, because the search hasnt been started yet!
if ep_queue_item.started and ep_queue_item.success is None:
return json.dumps({'result': 'success'})
else:
@ -4422,16 +4428,17 @@ class Home(MainHandler):
episodes = []
currentManualSearchThreadsQueued = []
currentManualSearchThreadActive = []
finishedManualSearchThreadItems= []
finishedManualSearchThreadItems = []
# Queued Searches
currentManualSearchThreadsQueued = sickbeard.searchQueueScheduler.action.get_all_ep_from_queue(show)
# Running Searches
if (sickbeard.searchQueueScheduler.action.is_manualsearch_in_progress()):
currentManualSearchThreadActive = sickbeard.searchQueueScheduler.action.currentItem
# Finished Searches
finishedManualSearchThreadItems = sickbeard.search_queue.MANUAL_SEARCH_HISTORY
finishedManualSearchThreadItems = sickbeard.search_queue.MANUAL_SEARCH_HISTORY
if currentManualSearchThreadsQueued:
for searchThread in currentManualSearchThreadsQueued:
@ -4439,18 +4446,18 @@ class Home(MainHandler):
if isinstance(searchThread, sickbeard.search_queue.ManualSearchQueueItem):
episodes.append({'episode': searchThread.segment.episode,
'episodeindexid': searchThread.segment.indexerid,
'season' : searchThread.segment.season,
'searchstatus' : searchstatus,
'status' : statusStrings[searchThread.segment.status],
'season': searchThread.segment.season,
'searchstatus': searchstatus,
'status': statusStrings[searchThread.segment.status],
'quality': self.getQualityClass(searchThread.segment)})
else:
for epObj in searchThread.segment:
episodes.append({'episode': epObj.episode,
'episodeindexid': epObj.indexerid,
'season' : epObj.season,
'searchstatus' : searchstatus,
'status' : statusStrings[epObj.status],
'quality': self.getQualityClass(epObj)})
'episodeindexid': epObj.indexerid,
'season': epObj.season,
'searchstatus': searchstatus,
'status': statusStrings[epObj.status],
'quality': self.getQualityClass(epObj)})
if currentManualSearchThreadActive:
searchThread = currentManualSearchThreadActive
@ -4461,22 +4468,23 @@ class Home(MainHandler):
searchstatus = 'searching'
episodes.append({'episode': searchThread.segment.episode,
'episodeindexid': searchThread.segment.indexerid,
'season' : searchThread.segment.season,
'searchstatus' : searchstatus,
'status' : statusStrings[searchThread.segment.status],
'season': searchThread.segment.season,
'searchstatus': searchstatus,
'status': statusStrings[searchThread.segment.status],
'quality': self.getQualityClass(searchThread.segment)})
if finishedManualSearchThreadItems:
for searchThread in finishedManualSearchThreadItems:
if isinstance(searchThread, sickbeard.search_queue.ManualSearchQueueItem):
if str(searchThread.show.indexerid) == show and not [x for x in episodes if x['episodeindexid'] == searchThread.segment.indexerid]:
if str(searchThread.show.indexerid) == show and not [x for x in episodes if x[
'episodeindexid'] == searchThread.segment.indexerid]:
searchstatus = 'finished'
episodes.append({'episode': searchThread.segment.episode,
'episodeindexid': searchThread.segment.indexerid,
'season' : searchThread.segment.season,
'searchstatus' : searchstatus,
'status' : statusStrings[searchThread.segment.status],
'quality': self.getQualityClass(searchThread.segment)})
'season': searchThread.segment.season,
'searchstatus': searchstatus,
'status': statusStrings[searchThread.segment.status],
'quality': self.getQualityClass(searchThread.segment)})
else:
### These are only Failed Downloads/Retry SearchThreadItems.. lets loop through the segement/episodes
if str(searchThread.show.indexerid) == show:
@ -4485,14 +4493,12 @@ class Home(MainHandler):
searchstatus = 'finished'
episodes.append({'episode': epObj.episode,
'episodeindexid': epObj.indexerid,
'season' : epObj.season,
'searchstatus' : searchstatus,
'status' : statusStrings[epObj.status],
'quality': self.getQualityClass(epObj)})
'season': epObj.season,
'searchstatus': searchstatus,
'status': statusStrings[epObj.status],
'quality': self.getQualityClass(epObj)})
return json.dumps({'show': show, 'episodes' : episodes})
#return json.dumps()
return json.dumps({'show': show, 'episodes': episodes})
def getQualityClass(self, ep_obj):
# return the correct json value
@ -4530,7 +4536,8 @@ class Home(MainHandler):
status = 'No subtitles downloaded'
ui.notifications.message('Subtitles Search', status)
return json.dumps({'result': status, 'subtitles': ','.join(sorted([x.alpha2 for x in
ep_obj.subtitles.union(previous_subtitles)]))})
ep_obj.subtitles.union(
previous_subtitles)]))})
def setSceneNumbering(self, show, indexer, forSeason=None, forEpisode=None, forAbsolute=None, sceneSeason=None,
sceneEpisode=None, sceneAbsolute=None):
@ -4617,10 +4624,9 @@ class Home(MainHandler):
ep_queue_item = search_queue.FailedQueueItem(ep_obj.show, [ep_obj])
sickbeard.searchQueueScheduler.action.add_item(ep_queue_item) # @UndefinedVariable
if ep_queue_item.success:
return returnManualSearchResult(ep_queue_item)
if not ep_queue_item.started and ep_queue_item.success is None:
return json.dumps({'result': 'success'}) #I Actually want to call it queued, because the search hasnt been started yet!
return json.dumps(
{'result': 'success'}) # I Actually want to call it queued, because the search hasnt been started yet!
if ep_queue_item.started and ep_queue_item.success is None:
return json.dumps({'result': 'success'})
else:

View File

@ -191,14 +191,25 @@ def tearDown_test_db():
although this seams not to work on my system it leaves me with an zero kb file
"""
# uncomment next line so leave the db intact between test and at the end
#return False
if os.path.exists(os.path.join(TESTDIR, TESTDBNAME)):
os.remove(os.path.join(TESTDIR, TESTDBNAME))
if os.path.exists(os.path.join(TESTDIR, TESTCACHEDBNAME)):
os.remove(os.path.join(TESTDIR, TESTCACHEDBNAME))
if os.path.exists(os.path.join(TESTDIR, TESTFAILEDDBNAME)):
os.remove(os.path.join(TESTDIR, TESTFAILEDDBNAME))
# return False
try:
if os.path.exists(os.path.join(TESTDIR, TESTDBNAME)):
os.remove(os.path.join(TESTDIR, TESTDBNAME))
except:
pass
try:
if os.path.exists(os.path.join(TESTDIR, TESTCACHEDBNAME)):
os.remove(os.path.join(TESTDIR, TESTCACHEDBNAME))
except:
pass
try:
if os.path.exists(os.path.join(TESTDIR, TESTFAILEDDBNAME)):
os.remove(os.path.join(TESTDIR, TESTFAILEDDBNAME))
except:
pass
def setUp_test_episode_file():
if not os.path.exists(FILEDIR):

View File

@ -36,11 +36,11 @@ class TVShowTests(test.SickbeardTestDBCase):
def test_change_indexerid(self):
show = TVShow(1, 0001, "en")
show.name = "show name"
show.tvrname = "show name"
show.network = "cbs"
show.genre = "crime"
show.runtime = 40
show.status = "5"
show.default_ep_status = "5"
show.airs = "monday"
show.startyear = 1987
@ -85,11 +85,11 @@ class TVTests(test.SickbeardTestDBCase):
def test_getEpisode(self):
show = TVShow(1, 0001, "en")
show.name = "show name"
show.tvrname = "show name"
show.network = "cbs"
show.genre = "crime"
show.runtime = 40
show.status = "5"
show.default_ep_status = "5"
show.airs = "monday"
show.startyear = 1987
show.saveToDB()