SickRage/lib/hachoir_metadata/misc.py

from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor
from lib.hachoir_metadata.safe import fault_tolerant
from lib.hachoir_parser.container import SwfFile
from lib.hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile
from lib.hachoir_core.field import isString
from lib.hachoir_core.error import warning
from lib.hachoir_parser import guessParser
from lib.hachoir_metadata.setter import normalizeString

class TorrentMetadata(RootMetadata):
    KEY_TO_ATTR = {
        u"announce": "url",
        u"comment": "comment",
        u"creation_date": "creation_date",
    }
    INFO_TO_ATTR = {
        u"length": "file_size",
        u"name": "filename",
    }

    def extract(self, torrent):
        for field in torrent[0]:
            self.processRoot(field)

    @fault_tolerant
    def processRoot(self, field):
        if field.name in self.KEY_TO_ATTR:
            key = self.KEY_TO_ATTR[field.name]
            value = field.value
            setattr(self, key, value)
        elif field.name == "info" and "value" in field:
            for field in field["value"]:
                self.processInfo(field)

    @fault_tolerant
    def processInfo(self, field):
        if field.name in self.INFO_TO_ATTR:
            key = self.INFO_TO_ATTR[field.name]
            value = field.value
            setattr(self, key, value)
        elif field.name == "piece_length":
            self.comment = "Piece length: %s" % field.display

class TTF_Metadata(RootMetadata):
    NAMEID_TO_ATTR = {
        0: "copyright",   # Copyright notice
        3: "title",       # Unique font identifier
        5: "version",     # Version string
        8: "author",      # Manufacturer name
        11: "url",        # URL Vendor
        14: "copyright",  # License info URL
    }

    def extract(self, ttf):
        if "header" in ttf:
            self.extractHeader(ttf["header"])
        if "names" in ttf:
            self.extractNames(ttf["names"])

    @fault_tolerant
    def extractHeader(self, header):
        self.creation_date = header["created"].value
        self.last_modification = header["modified"].value
        self.comment = u"Smallest readable size in pixels: %s pixels" % header["lowest"].value
        self.comment = u"Font direction: %s" % header["font_dir"].display

    @fault_tolerant
    def extractNames(self, names):
        offset = names["offset"].value
        for header in names.array("header"):
            key = header["nameID"].value
            foffset = offset + header["offset"].value
            field = names.getFieldByAddress(foffset*8)
            if not field or not isString(field):
                continue
            value = field.value
            if key not in self.NAMEID_TO_ATTR:
                continue
            key = self.NAMEID_TO_ATTR[key]
            if key == "version" and value.startswith(u"Version "):
                # "Version 1.2" => "1.2"
                value = value[8:]
            setattr(self, key, value)

class OLE2_Metadata(RootMetadata):
    SUMMARY_ID_TO_ATTR = {
         2: "title",     # Title
         3: "title",     # Subject
         4: "author",
         6: "comment",
         8: "author",    # Last saved by
        12: "creation_date",
        13: "last_modification",
        14: "nb_page",
        18: "producer",
    }
    IGNORE_SUMMARY = set((
        1, # Code page
    ))

    DOC_SUMMARY_ID_TO_ATTR = {
         3: "title",     # Subject
        14: "author",    # Manager
    }
    IGNORE_DOC_SUMMARY = set((
        1, # Code page
    ))

    def extract(self, ole2):
        self._extract(ole2)

    def _extract(self, fieldset, main_document=True):
        if main_document:
            # _feedAll() is needed to make sure that we get all root[*] fragments
            fieldset._feedAll()
            if "root[0]" in fieldset:
                self.useRoot(fieldset["root[0]"])
        doc_summary = self.getField(fieldset, main_document, "doc_summary[0]")
        if doc_summary:
            self.useSummary(doc_summary, True)
        word_doc = self.getField(fieldset, main_document, "word_doc[0]")
        if word_doc:
            self.useWordDocument(word_doc)
        summary = self.getField(fieldset, main_document, "summary[0]")
        if summary:
            self.useSummary(summary, False)

    @fault_tolerant
    def useRoot(self, root):
        stream = root.getSubIStream()
        ministream = guessParser(stream)
        if not ministream:
            warning("Unable to create the OLE2 mini stream parser!")
            return
        self._extract(ministream, main_document=False)

    def getField(self, fieldset, main_document, name):
        if name not in fieldset:
            return None
        # _feedAll() is needed to make sure that we get all fragments
        # eg. summary[0], summary[1], ..., summary[n]
        fieldset._feedAll()
        field = fieldset[name]
        if main_document:
            stream = field.getSubIStream()
            field = guessParser(stream)
            if not field:
                warning("Unable to create the OLE2 parser for %s!" % name)
                return None
        return field

    @fault_tolerant
    def useSummary(self, summary, is_doc_summary):
        if "os" in summary:
            self.os = summary["os"].display
        if "section[0]" not in summary:
            return
        summary = summary["section[0]"]
        for property in summary.array("property_index"):
            self.useProperty(summary, property, is_doc_summary)

    @fault_tolerant
    def useWordDocument(self, doc):
        self.comment = "Encrypted: %s" % doc["fEncrypted"].value

    @fault_tolerant
    def useProperty(self, summary, property, is_doc_summary):
        field = summary.getFieldByAddress(property["offset"].value*8)
        if not field \
        or "value" not in field:
            return
        field = field["value"]
        if not field.hasValue():
            return

        # Get value
        value = field.value
        if isinstance(value, (str, unicode)):
            value = normalizeString(value)
            if not value:
                return

        # Get property identifier
        prop_id = property["id"].value
        if is_doc_summary:
            id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR
            ignore = self.IGNORE_DOC_SUMMARY
        else:
            id_to_attr = self.SUMMARY_ID_TO_ATTR
            ignore = self.IGNORE_SUMMARY
        if prop_id in ignore:
            return

        # Get Hachoir metadata key
        try:
            key = id_to_attr[prop_id]
            use_prefix = False
        except LookupError:
            key = "comment"
            use_prefix = True
        if use_prefix:
            prefix = property["id"].display
            if (prefix in ("TotalEditingTime", "LastPrinted")) \
            and (not field):
                # Ignore null time delta
                return
            value = "%s: %s" % (prefix, value)
        else:
            if (key == "last_modification") and (not field):
                # Ignore null timestamp
                return
        setattr(self, key, value)

class PcfMetadata(RootMetadata):
    PROP_TO_KEY = {
        'CHARSET_REGISTRY': 'charset',
        'COPYRIGHT': 'copyright',
        'WEIGHT_NAME': 'font_weight',
        'FOUNDRY': 'author',
        'FONT': 'title',
        '_XMBDFED_INFO': 'producer',
    }

    def extract(self, pcf):
        if "properties" in pcf:
            self.useProperties(pcf["properties"])

    def useProperties(self, properties):
        last = properties["total_str_length"]
        offset0 = last.address + last.size
        for index in properties.array("property"):
            # Search name and value
            value = properties.getFieldByAddress(offset0+index["value_offset"].value*8)
            if not value:
                continue
            value = value.value
            if not value:
                continue
            name = properties.getFieldByAddress(offset0+index["name_offset"].value*8)
            if not name:
                continue
            name = name.value
            if name not in self.PROP_TO_KEY:
                warning("Skip %s=%r" % (name, value))
                continue
            key = self.PROP_TO_KEY[name]
            setattr(self, key, value)

class SwfMetadata(RootMetadata):
    def extract(self, swf):
        self.height = swf["rect/ymax"].value # twips
        self.width = swf["rect/xmax"].value # twips
        self.format_version = "flash version %s" % swf["version"].value
        self.frame_rate = swf["frame_rate"].value
        self.comment = "Frame count: %s" % swf["frame_count"].value

registerExtractor(TorrentFile, TorrentMetadata)
registerExtractor(TrueTypeFontFile, TTF_Metadata)
registerExtractor(OLE2_File, OLE2_Metadata)
registerExtractor(PcfFile, PcfMetadata)
registerExtractor(SwfFile, SwfMetadata)
Welcome to our SickBeard-TVRage Edition ... This version of SickBeard uses both TVDB and TVRage to search and gather it's series data from allowing you to now have access to and download shows that you couldn't before because of being locked into only what TheTVDB had to offer. Also this edition is based off the code we used in our XEM editon so it does come with scene numbering support as well as all the other features our XEM edition has to offer. Please before using this with your existing database (sickbeard.db) please make a backup copy of it and delete any other database files such as cache.db and failed.db if present, we HIGHLY recommend starting out with no database files at all to make this a fresh start but the choice is at your own risk! Enjoy! 2014-03-10 01:18:05 -04:00			`from lib.hachoir_metadata.metadata import RootMetadata, registerExtractor`
			`from lib.hachoir_metadata.safe import fault_tolerant`
			`from lib.hachoir_parser.container import SwfFile`
			`from lib.hachoir_parser.misc import TorrentFile, TrueTypeFontFile, OLE2_File, PcfFile`
			`from lib.hachoir_core.field import isString`
			`from lib.hachoir_core.error import warning`
			`from lib.hachoir_parser import guessParser`
			`from lib.hachoir_metadata.setter import normalizeString`

			`class TorrentMetadata(RootMetadata):`
			`KEY_TO_ATTR = {`
			`u"announce": "url",`
			`u"comment": "comment",`
			`u"creation_date": "creation_date",`
			`}`
			`INFO_TO_ATTR = {`
			`u"length": "file_size",`
			`u"name": "filename",`
			`}`

			`def extract(self, torrent):`
			`for field in torrent[0]:`
			`self.processRoot(field)`

			`@fault_tolerant`
			`def processRoot(self, field):`
			`if field.name in self.KEY_TO_ATTR:`
			`key = self.KEY_TO_ATTR[field.name]`
			`value = field.value`
			`setattr(self, key, value)`
			`elif field.name == "info" and "value" in field:`
			`for field in field["value"]:`
			`self.processInfo(field)`

			`@fault_tolerant`
			`def processInfo(self, field):`
			`if field.name in self.INFO_TO_ATTR:`
			`key = self.INFO_TO_ATTR[field.name]`
			`value = field.value`
			`setattr(self, key, value)`
			`elif field.name == "piece_length":`
			`self.comment = "Piece length: %s" % field.display`

			`class TTF_Metadata(RootMetadata):`
			`NAMEID_TO_ATTR = {`
			`0: "copyright", # Copyright notice`
			`3: "title", # Unique font identifier`
			`5: "version", # Version string`
			`8: "author", # Manufacturer name`
			`11: "url", # URL Vendor`
			`14: "copyright", # License info URL`
			`}`

			`def extract(self, ttf):`
			`if "header" in ttf:`
			`self.extractHeader(ttf["header"])`
			`if "names" in ttf:`
			`self.extractNames(ttf["names"])`

			`@fault_tolerant`
			`def extractHeader(self, header):`
			`self.creation_date = header["created"].value`
			`self.last_modification = header["modified"].value`
			`self.comment = u"Smallest readable size in pixels: %s pixels" % header["lowest"].value`
			`self.comment = u"Font direction: %s" % header["font_dir"].display`

			`@fault_tolerant`
			`def extractNames(self, names):`
			`offset = names["offset"].value`
			`for header in names.array("header"):`
			`key = header["nameID"].value`
			`foffset = offset + header["offset"].value`
			`field = names.getFieldByAddress(foffset*8)`
			`if not field or not isString(field):`
			`continue`
			`value = field.value`
			`if key not in self.NAMEID_TO_ATTR:`
			`continue`
			`key = self.NAMEID_TO_ATTR[key]`
			`if key == "version" and value.startswith(u"Version "):`
			`# "Version 1.2" => "1.2"`
			`value = value[8:]`
			`setattr(self, key, value)`

			`class OLE2_Metadata(RootMetadata):`
			`SUMMARY_ID_TO_ATTR = {`
			`2: "title", # Title`
			`3: "title", # Subject`
			`4: "author",`
			`6: "comment",`
			`8: "author", # Last saved by`
			`12: "creation_date",`
			`13: "last_modification",`
			`14: "nb_page",`
			`18: "producer",`
			`}`
			`IGNORE_SUMMARY = set((`
			`1, # Code page`
			`))`

			`DOC_SUMMARY_ID_TO_ATTR = {`
			`3: "title", # Subject`
			`14: "author", # Manager`
			`}`
			`IGNORE_DOC_SUMMARY = set((`
			`1, # Code page`
			`))`

			`def extract(self, ole2):`
			`self._extract(ole2)`

			`def _extract(self, fieldset, main_document=True):`
			`if main_document:`
			`# _feedAll() is needed to make sure that we get all root[*] fragments`
			`fieldset._feedAll()`
			`if "root[0]" in fieldset:`
			`self.useRoot(fieldset["root[0]"])`
			`doc_summary = self.getField(fieldset, main_document, "doc_summary[0]")`
			`if doc_summary:`
			`self.useSummary(doc_summary, True)`
			`word_doc = self.getField(fieldset, main_document, "word_doc[0]")`
			`if word_doc:`
			`self.useWordDocument(word_doc)`
			`summary = self.getField(fieldset, main_document, "summary[0]")`
			`if summary:`
			`self.useSummary(summary, False)`

			`@fault_tolerant`
			`def useRoot(self, root):`
			`stream = root.getSubIStream()`
			`ministream = guessParser(stream)`
			`if not ministream:`
			`warning("Unable to create the OLE2 mini stream parser!")`
			`return`
			`self._extract(ministream, main_document=False)`

			`def getField(self, fieldset, main_document, name):`
			`if name not in fieldset:`
			`return None`
			`# _feedAll() is needed to make sure that we get all fragments`
			`# eg. summary[0], summary[1], ..., summary[n]`
			`fieldset._feedAll()`
			`field = fieldset[name]`
			`if main_document:`
			`stream = field.getSubIStream()`
			`field = guessParser(stream)`
			`if not field:`
			`warning("Unable to create the OLE2 parser for %s!" % name)`
			`return None`
			`return field`

			`@fault_tolerant`
			`def useSummary(self, summary, is_doc_summary):`
			`if "os" in summary:`
			`self.os = summary["os"].display`
			`if "section[0]" not in summary:`
			`return`
			`summary = summary["section[0]"]`
			`for property in summary.array("property_index"):`
			`self.useProperty(summary, property, is_doc_summary)`

			`@fault_tolerant`
			`def useWordDocument(self, doc):`
			`self.comment = "Encrypted: %s" % doc["fEncrypted"].value`

			`@fault_tolerant`
			`def useProperty(self, summary, property, is_doc_summary):`
			`field = summary.getFieldByAddress(property["offset"].value*8)`
			`if not field \`
			`or "value" not in field:`
			`return`
			`field = field["value"]`
			`if not field.hasValue():`
			`return`

			`# Get value`
			`value = field.value`
			`if isinstance(value, (str, unicode)):`
			`value = normalizeString(value)`
			`if not value:`
			`return`

			`# Get property identifier`
			`prop_id = property["id"].value`
			`if is_doc_summary:`
			`id_to_attr = self.DOC_SUMMARY_ID_TO_ATTR`
			`ignore = self.IGNORE_DOC_SUMMARY`
			`else:`
			`id_to_attr = self.SUMMARY_ID_TO_ATTR`
			`ignore = self.IGNORE_SUMMARY`
			`if prop_id in ignore:`
			`return`

			`# Get Hachoir metadata key`
			`try:`
			`key = id_to_attr[prop_id]`
			`use_prefix = False`
			`except LookupError:`
			`key = "comment"`
			`use_prefix = True`
			`if use_prefix:`
			`prefix = property["id"].display`
			`if (prefix in ("TotalEditingTime", "LastPrinted")) \`
			`and (not field):`
			`# Ignore null time delta`
			`return`
			`value = "%s: %s" % (prefix, value)`
			`else:`
			`if (key == "last_modification") and (not field):`
			`# Ignore null timestamp`
			`return`
			`setattr(self, key, value)`

			`class PcfMetadata(RootMetadata):`
			`PROP_TO_KEY = {`
			`'CHARSET_REGISTRY': 'charset',`
			`'COPYRIGHT': 'copyright',`
			`'WEIGHT_NAME': 'font_weight',`
			`'FOUNDRY': 'author',`
			`'FONT': 'title',`
			`'_XMBDFED_INFO': 'producer',`
			`}`

			`def extract(self, pcf):`
			`if "properties" in pcf:`
			`self.useProperties(pcf["properties"])`

			`def useProperties(self, properties):`
			`last = properties["total_str_length"]`
			`offset0 = last.address + last.size`
			`for index in properties.array("property"):`
			`# Search name and value`
			`value = properties.getFieldByAddress(offset0+index["value_offset"].value*8)`
			`if not value:`
			`continue`
			`value = value.value`
			`if not value:`
			`continue`
			`name = properties.getFieldByAddress(offset0+index["name_offset"].value*8)`
			`if not name:`
			`continue`
			`name = name.value`
			`if name not in self.PROP_TO_KEY:`
			`warning("Skip %s=%r" % (name, value))`
			`continue`
			`key = self.PROP_TO_KEY[name]`
			`setattr(self, key, value)`

			`class SwfMetadata(RootMetadata):`
			`def extract(self, swf):`
			`self.height = swf["rect/ymax"].value # twips`
			`self.width = swf["rect/xmax"].value # twips`
			`self.format_version = "flash version %s" % swf["version"].value`
			`self.frame_rate = swf["frame_rate"].value`
			`self.comment = "Frame count: %s" % swf["frame_count"].value`

			`registerExtractor(TorrentFile, TorrentMetadata)`
			`registerExtractor(TrueTypeFontFile, TTF_Metadata)`
			`registerExtractor(OLE2_File, OLE2_Metadata)`
			`registerExtractor(PcfFile, PcfMetadata)`
			`registerExtractor(SwfFile, SwfMetadata)`