From 029850e5cf252874891c670c440affbbc55cc088 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 28 Dec 2010 08:52:50 +0000 Subject: [PATCH] Implement a NPOIFS document reader, and add tests which use it git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053279 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/poifs/filesystem/DirectoryNode.java | 3 +- .../poifs/filesystem/DocumentInputStream.java | 7 +- .../filesystem/NDocumentInputStream.java | 316 ++++++++++++++++++ .../poi/poifs/filesystem/NPOIFSDocument.java | 8 + .../poi/poifs/filesystem/NPOIFSMiniStore.java | 9 +- .../filesystem/TestNPOIFSFileSystem.java | 23 +- 6 files changed, 357 insertions(+), 9 deletions(-) create mode 100644 src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java diff --git a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java index 92261929a..fe15102b2 100644 --- a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java +++ b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java @@ -120,8 +120,7 @@ public class DirectoryNode } else { - childNode = new DocumentNode(( DocumentProperty ) child, - this); + childNode = new DocumentNode((DocumentProperty) child, this); } _entries.add(childNode); _byname.put(childNode.getName(), childNode); diff --git a/src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java b/src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java index ecd110a32..577f3d93b 100644 --- a/src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java +++ b/src/java/org/apache/poi/poifs/filesystem/DocumentInputStream.java @@ -67,11 +67,16 @@ public final class DocumentInputStream extends InputStream implements LittleEndi if (!(document instanceof DocumentNode)) { throw new IOException("Cannot open internal document storage"); } + DocumentNode documentNode = (DocumentNode)document; + if(documentNode.getDocument() == null) { + throw new IOException("Cannot open internal document storage"); + } + _current_offset = 0; _marked_offset = 0; _document_size = document.getSize(); _closed = false; - _document = ((DocumentNode) document).getDocument(); + _document = documentNode.getDocument(); _currentBlock = getDataInputBlock(0); } diff --git a/src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java b/src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java new file mode 100644 index 000000000..52c06f808 --- /dev/null +++ b/src/java/org/apache/poi/poifs/filesystem/NDocumentInputStream.java @@ -0,0 +1,316 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.filesystem; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.util.Iterator; + +import org.apache.poi.poifs.property.DocumentProperty; +import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianInput; + +/** + * This class provides methods to read a DocumentEntry managed by a + * {@link POIFSFileSystem} instance. + * + * @author Marc Johnson (mjohnson at apache dot org) + */ +public final class NDocumentInputStream extends InputStream implements LittleEndianInput { + /** returned by read operations if we're at end of document */ + private static final int EOF = -1; + + private static final int SIZE_SHORT = 2; + private static final int SIZE_INT = 4; + private static final int SIZE_LONG = 8; + + /** current offset into the Document */ + private int _current_offset; + /** current block count */ + private int _current_block_count; + + /** current marked offset into the Document (used by mark and reset) */ + private int _marked_offset; + /** and the block count for it */ + private int _marked_offset_count; + + /** the Document's size */ + private int _document_size; + + /** have we been closed? */ + private boolean _closed; + + /** the actual Document */ + private NPOIFSDocument _document; + + private Iterator _data; + private ByteBuffer _buffer; + + /** + * Create an InputStream from the specified DocumentEntry + * + * @param document the DocumentEntry to be read + * + * @exception IOException if the DocumentEntry cannot be opened (like, maybe it has + * been deleted?) + */ + public NDocumentInputStream(DocumentEntry document) throws IOException { + if (!(document instanceof DocumentNode)) { + throw new IOException("Cannot open internal document storage"); + } + _current_offset = 0; + _current_block_count = 0; + _marked_offset = 0; + _marked_offset_count = 0; + _document_size = document.getSize(); + _closed = false; + + DocumentNode doc = (DocumentNode)document; + DocumentProperty property = (DocumentProperty)doc.getProperty(); + _document = new NPOIFSDocument( + property, + ((DirectoryNode)doc.getParent()).getNFileSystem() + ); + _data = _document.getBlockIterator(); + } + + /** + * Create an InputStream from the specified Document + * + * @param document the Document to be read + */ + public NDocumentInputStream(NPOIFSDocument document) { + _current_offset = 0; + _current_block_count = 0; + _marked_offset = 0; + _marked_offset_count = 0; + _document_size = document.getSize(); + _closed = false; + _document = document; + _data = _document.getBlockIterator(); + } + + public int available() { + if (_closed) { + throw new IllegalStateException("cannot perform requested operation on a closed stream"); + } + return _document_size - _current_offset; + } + + public void close() { + _closed = true; + } + + public void mark(int ignoredReadlimit) { + _marked_offset = _current_offset; + _marked_offset_count = _current_block_count; + } + + /** + * Tests if this input stream supports the mark and reset methods. + * + * @return true always + */ + public boolean markSupported() { + return true; + } + + public int read() throws IOException { + dieIfClosed(); + if (atEOD()) { + return EOF; + } + byte[] b = new byte[1]; + int result = read(b, 0, 1); + if(result >= 0) { + if(b[0] < 0) { + return b[0]+256; + } + return b[0]; + } + return result; + } + + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + public int read(byte[] b, int off, int len) throws IOException { + dieIfClosed(); + if (b == null) { + throw new IllegalArgumentException("buffer must not be null"); + } + if (off < 0 || len < 0 || b.length < off + len) { + throw new IndexOutOfBoundsException("can't read past buffer boundaries"); + } + if (len == 0) { + return 0; + } + if (atEOD()) { + return EOF; + } + int limit = Math.min(available(), len); + readFully(b, off, limit); + return limit; + } + + /** + * Repositions this stream to the position at the time the mark() method was + * last called on this input stream. If mark() has not been called this + * method repositions the stream to its beginning. + */ + public void reset() { + // Special case for reset to the start + if(_marked_offset == 0 && _marked_offset_count == 0) { + _current_block_count = _marked_offset_count; + _current_offset = _marked_offset; + _data = _document.getBlockIterator(); + _buffer = null; + return; + } + + // Start again, then wind on to the required block + _data = _document.getBlockIterator(); + _current_offset = 0; + for(int i=0; i<_marked_offset_count; i++) { + _buffer = _data.next(); + _current_offset += _buffer.remaining(); + } + + _current_block_count = _marked_offset_count; + + // Do we need to position within it? + if(_current_offset != _marked_offset) { + // Grab the right block + _buffer = _data.next(); + _current_block_count++; + + // Skip to the right place in it + _buffer.position(_marked_offset - _current_offset); + } + + // All done + _current_offset = _marked_offset; + } + + public long skip(long n) throws IOException { + dieIfClosed(); + if (n < 0) { + return 0; + } + int new_offset = _current_offset + (int) n; + + if (new_offset < _current_offset) { + // wrap around in converting a VERY large long to an int + new_offset = _document_size; + } else if (new_offset > _document_size) { + new_offset = _document_size; + } + + long rval = new_offset - _current_offset; + + // TODO Do this better + byte[] skip = new byte[(int)rval]; + readFully(skip); + return rval; + } + + private void dieIfClosed() throws IOException { + if (_closed) { + throw new IOException("cannot perform requested operation on a closed stream"); + } + } + + private boolean atEOD() { + return _current_offset == _document_size; + } + + private void checkAvaliable(int requestedSize) { + if (_closed) { + throw new IllegalStateException("cannot perform requested operation on a closed stream"); + } + if (requestedSize > _document_size - _current_offset) { + throw new RuntimeException("Buffer underrun - requested " + requestedSize + + " bytes but " + (_document_size - _current_offset) + " was available"); + } + } + + public byte readByte() { + return (byte) readUByte(); + } + + public double readDouble() { + return Double.longBitsToDouble(readLong()); + } + + public void readFully(byte[] buf) { + readFully(buf, 0, buf.length); + } + + public short readShort() { + return (short) readUShort(); + } + + public void readFully(byte[] buf, int off, int len) { + checkAvaliable(len); + + int read = 0; + while(read < len) { + if(_buffer == null || _buffer.remaining() == 0) { + _current_block_count++; + _buffer = _data.next(); + } + + int limit = Math.min(len-read, _buffer.remaining()); + _buffer.get(buf, off+read, limit); + _current_offset += limit; + read += limit; + } + } + + public long readLong() { + checkAvaliable(SIZE_LONG); + byte[] data = new byte[SIZE_LONG]; + readFully(data, 0, SIZE_LONG); + return LittleEndian.getLong(data, 0); + } + + public int readInt() { + checkAvaliable(SIZE_INT); + byte[] data = new byte[SIZE_INT]; + readFully(data, 0, SIZE_INT); + return LittleEndian.getInt(data); + } + + public int readUShort() { + checkAvaliable(SIZE_SHORT); + byte[] data = new byte[SIZE_SHORT]; + readFully(data, 0, SIZE_SHORT); + return LittleEndian.getShort(data); + } + + public int readUByte() { + checkAvaliable(1); + byte[] data = new byte[1]; + readFully(data, 0, 1); + if(data[0] >= 0) + return data[0]; + return data[0] + 256; + } +} diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java index 08c66b677..09536d4ad 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java @@ -100,6 +100,14 @@ public final class NPOIFSDocument implements POIFSViewable { this._property = new DocumentProperty(name, contents.length); _property.setStartBlock(_stream.getStartBlock()); } + + int getDocumentBlockSize() { + return _block_size; + } + + Iterator getBlockIterator() { + return _stream.getBlockIterator(); + } /** * @return size of the document diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSMiniStore.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSMiniStore.java index 7323045e5..156b73d9a 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSMiniStore.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSMiniStore.java @@ -70,13 +70,16 @@ public class NPOIFSMiniStore extends BlockStore } ByteBuffer dataBlock = it.next(); - // Skip forward to the right place + // Our blocks are small, so duplicating it is fine + byte[] data = new byte[POIFSConstants.SMALL_BLOCK_SIZE]; dataBlock.position( dataBlock.position() + bigBlockOffset ); + dataBlock.get(data, 0, data.length); - // All done - return dataBlock; + // Return a ByteBuffer on this + ByteBuffer miniBuffer = ByteBuffer.wrap(data); + return miniBuffer; } /** diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java b/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java index 17e2694d0..2848c8be2 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java @@ -23,6 +23,10 @@ import java.util.Iterator; import junit.framework.TestCase; import org.apache.poi.POIDataSamples; +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.PropertySet; +import org.apache.poi.hpsf.PropertySetFactory; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.property.NPropertyTable; import org.apache.poi.poifs.property.Property; @@ -446,12 +450,25 @@ public final class TestNPOIFSFileSystem extends TestCase { NPOIFSFileSystem fsD = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize4096.zvi")); for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB,fsC,fsD}) { DirectoryEntry root = fs.getRoot(); - Entry dsi = root.getEntry("\u0005DocumentSummaryInformation"); + Entry si = root.getEntry("\u0005SummaryInformation"); - assertEquals(true, dsi.isDocumentEntry()); - DocumentEntry doc = (DocumentEntry)dsi; + assertEquals(true, si.isDocumentEntry()); + DocumentNode doc = (DocumentNode)si; + // Check we can read it + NDocumentInputStream inp = new NDocumentInputStream(doc); + byte[] contents = new byte[doc.getSize()]; + assertEquals(doc.getSize(), inp.read(contents)); + // Now try to build the property set + inp = new NDocumentInputStream(doc); + PropertySet ps = PropertySetFactory.create(inp); + SummaryInformation inf = (SummaryInformation)ps; + + // Check some bits in it + assertEquals(null, inf.getApplicationName()); + assertEquals(null, inf.getAuthor()); + assertEquals(null, inf.getSubject()); } }