""" Parsers for the different streams and fragments found in an OLE2 file. Documents: - goffice source code Author: Robert Xiao, Victor Stinner Creation: 2006-04-23 """ from lib.hachoir_parser import HachoirParser from lib.hachoir_core.field import FieldSet, RootSeekableFieldSet, RawBytes from lib.hachoir_core.endian import LITTLE_ENDIAN from lib.hachoir_core.stream import StringInputStream from lib.hachoir_parser.misc.msoffice_summary import SummaryFieldSet, CompObj from lib.hachoir_parser.misc.word_doc import WordDocumentFieldSet PROPERTY_NAME = { u"\5DocumentSummaryInformation": "doc_summary", u"\5SummaryInformation": "summary", u"WordDocument": "word_doc", } class OfficeRootEntry(HachoirParser, RootSeekableFieldSet): PARSER_TAGS = { "description": "Microsoft Office document subfragments", } endian = LITTLE_ENDIAN def __init__(self, stream, **args): RootSeekableFieldSet.__init__(self, None, "root", stream, None, stream.askSize(self)) HachoirParser.__init__(self, stream, **args) def validate(self): return True def createFields(self): for index, property in enumerate(self.ole2.properties): if index == 0: continue try: name = PROPERTY_NAME[property["name"].value] except LookupError: name = property.name+"content" for field in self.parseProperty(index, property, name): yield field def parseProperty(self, property_index, property, name_prefix): ole2 = self.ole2 if not property["size"].value: return if property["size"].value >= ole2["header/threshold"].value: return name = "%s[]" % name_prefix first = None previous = None size = 0 start = property["start"].value chain = ole2.getChain(start, True) blocksize = ole2.ss_size desc_format = "Small blocks %s..%s (%s)" while True: try: block = chain.next() contiguous = False if not first: first = block contiguous = True if previous and block == (previous+1): contiguous = True if contiguous: previous = block size += blocksize continue except StopIteration: block = None self.seekSBlock(first) desc = desc_format % (first, previous, previous-first+1) size = min(size, property["size"].value*8) if name_prefix in ("summary", "doc_summary"): yield SummaryFieldSet(self, name, desc, size=size) elif name_prefix == "word_doc": yield WordDocumentFieldSet(self, name, desc, size=size) elif property_index == 1: yield CompObj(self, "comp_obj", desc, size=size) else: yield RawBytes(self, name, size//8, desc) if block is None: break first = block previous = block size = ole2.sector_size def seekSBlock(self, block): self.seekBit(block * self.ole2.ss_size) class FragmentGroup: def __init__(self, parser): self.items = [] self.parser = parser def add(self, item): self.items.append(item) def createInputStream(self): # FIXME: Use lazy stream creation data = [] for item in self.items: data.append( item["rawdata"].value ) data = "".join(data) # FIXME: Use smarter code to send arguments args = {"ole2": self.items[0].root} tags = {"class": self.parser, "args": args} tags = tags.iteritems() return StringInputStream(data, "", tags=tags) class CustomFragment(FieldSet): def __init__(self, parent, name, size, parser, description=None, group=None): FieldSet.__init__(self, parent, name, description, size=size) if not group: group = FragmentGroup(parser) self.group = group self.group.add(self) def createFields(self): yield RawBytes(self, "rawdata", self.size//8) def _createInputStream(self, **args): return self.group.createInputStream()