Implement a NPOIFS document reader, and add tests which use it
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053279 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5ed63966c4
commit
029850e5cf
@ -120,8 +120,7 @@ public class DirectoryNode
|
||||
}
|
||||
else
|
||||
{
|
||||
childNode = new DocumentNode(( DocumentProperty ) child,
|
||||
this);
|
||||
childNode = new DocumentNode((DocumentProperty) child, this);
|
||||
}
|
||||
_entries.add(childNode);
|
||||
_byname.put(childNode.getName(), childNode);
|
||||
|
@ -67,11 +67,16 @@ public final class DocumentInputStream extends InputStream implements LittleEndi
|
||||
if (!(document instanceof DocumentNode)) {
|
||||
throw new IOException("Cannot open internal document storage");
|
||||
}
|
||||
DocumentNode documentNode = (DocumentNode)document;
|
||||
if(documentNode.getDocument() == null) {
|
||||
throw new IOException("Cannot open internal document storage");
|
||||
}
|
||||
|
||||
_current_offset = 0;
|
||||
_marked_offset = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
_document = ((DocumentNode) document).getDocument();
|
||||
_document = documentNode.getDocument();
|
||||
_currentBlock = getDataInputBlock(0);
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,316 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.poifs.filesystem;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.poifs.property.DocumentProperty;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianInput;
|
||||
|
||||
/**
|
||||
* This class provides methods to read a DocumentEntry managed by a
|
||||
* {@link POIFSFileSystem} instance.
|
||||
*
|
||||
* @author Marc Johnson (mjohnson at apache dot org)
|
||||
*/
|
||||
public final class NDocumentInputStream extends InputStream implements LittleEndianInput {
|
||||
/** returned by read operations if we're at end of document */
|
||||
private static final int EOF = -1;
|
||||
|
||||
private static final int SIZE_SHORT = 2;
|
||||
private static final int SIZE_INT = 4;
|
||||
private static final int SIZE_LONG = 8;
|
||||
|
||||
/** current offset into the Document */
|
||||
private int _current_offset;
|
||||
/** current block count */
|
||||
private int _current_block_count;
|
||||
|
||||
/** current marked offset into the Document (used by mark and reset) */
|
||||
private int _marked_offset;
|
||||
/** and the block count for it */
|
||||
private int _marked_offset_count;
|
||||
|
||||
/** the Document's size */
|
||||
private int _document_size;
|
||||
|
||||
/** have we been closed? */
|
||||
private boolean _closed;
|
||||
|
||||
/** the actual Document */
|
||||
private NPOIFSDocument _document;
|
||||
|
||||
private Iterator<ByteBuffer> _data;
|
||||
private ByteBuffer _buffer;
|
||||
|
||||
/**
|
||||
* Create an InputStream from the specified DocumentEntry
|
||||
*
|
||||
* @param document the DocumentEntry to be read
|
||||
*
|
||||
* @exception IOException if the DocumentEntry cannot be opened (like, maybe it has
|
||||
* been deleted?)
|
||||
*/
|
||||
public NDocumentInputStream(DocumentEntry document) throws IOException {
|
||||
if (!(document instanceof DocumentNode)) {
|
||||
throw new IOException("Cannot open internal document storage");
|
||||
}
|
||||
_current_offset = 0;
|
||||
_current_block_count = 0;
|
||||
_marked_offset = 0;
|
||||
_marked_offset_count = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
|
||||
DocumentNode doc = (DocumentNode)document;
|
||||
DocumentProperty property = (DocumentProperty)doc.getProperty();
|
||||
_document = new NPOIFSDocument(
|
||||
property,
|
||||
((DirectoryNode)doc.getParent()).getNFileSystem()
|
||||
);
|
||||
_data = _document.getBlockIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create an InputStream from the specified Document
|
||||
*
|
||||
* @param document the Document to be read
|
||||
*/
|
||||
public NDocumentInputStream(NPOIFSDocument document) {
|
||||
_current_offset = 0;
|
||||
_current_block_count = 0;
|
||||
_marked_offset = 0;
|
||||
_marked_offset_count = 0;
|
||||
_document_size = document.getSize();
|
||||
_closed = false;
|
||||
_document = document;
|
||||
_data = _document.getBlockIterator();
|
||||
}
|
||||
|
||||
public int available() {
|
||||
if (_closed) {
|
||||
throw new IllegalStateException("cannot perform requested operation on a closed stream");
|
||||
}
|
||||
return _document_size - _current_offset;
|
||||
}
|
||||
|
||||
public void close() {
|
||||
_closed = true;
|
||||
}
|
||||
|
||||
public void mark(int ignoredReadlimit) {
|
||||
_marked_offset = _current_offset;
|
||||
_marked_offset_count = _current_block_count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests if this input stream supports the mark and reset methods.
|
||||
*
|
||||
* @return <code>true</code> always
|
||||
*/
|
||||
public boolean markSupported() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public int read() throws IOException {
|
||||
dieIfClosed();
|
||||
if (atEOD()) {
|
||||
return EOF;
|
||||
}
|
||||
byte[] b = new byte[1];
|
||||
int result = read(b, 0, 1);
|
||||
if(result >= 0) {
|
||||
if(b[0] < 0) {
|
||||
return b[0]+256;
|
||||
}
|
||||
return b[0];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public int read(byte[] b) throws IOException {
|
||||
return read(b, 0, b.length);
|
||||
}
|
||||
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
dieIfClosed();
|
||||
if (b == null) {
|
||||
throw new IllegalArgumentException("buffer must not be null");
|
||||
}
|
||||
if (off < 0 || len < 0 || b.length < off + len) {
|
||||
throw new IndexOutOfBoundsException("can't read past buffer boundaries");
|
||||
}
|
||||
if (len == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (atEOD()) {
|
||||
return EOF;
|
||||
}
|
||||
int limit = Math.min(available(), len);
|
||||
readFully(b, off, limit);
|
||||
return limit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Repositions this stream to the position at the time the mark() method was
|
||||
* last called on this input stream. If mark() has not been called this
|
||||
* method repositions the stream to its beginning.
|
||||
*/
|
||||
public void reset() {
|
||||
// Special case for reset to the start
|
||||
if(_marked_offset == 0 && _marked_offset_count == 0) {
|
||||
_current_block_count = _marked_offset_count;
|
||||
_current_offset = _marked_offset;
|
||||
_data = _document.getBlockIterator();
|
||||
_buffer = null;
|
||||
return;
|
||||
}
|
||||
|
||||
// Start again, then wind on to the required block
|
||||
_data = _document.getBlockIterator();
|
||||
_current_offset = 0;
|
||||
for(int i=0; i<_marked_offset_count; i++) {
|
||||
_buffer = _data.next();
|
||||
_current_offset += _buffer.remaining();
|
||||
}
|
||||
|
||||
_current_block_count = _marked_offset_count;
|
||||
|
||||
// Do we need to position within it?
|
||||
if(_current_offset != _marked_offset) {
|
||||
// Grab the right block
|
||||
_buffer = _data.next();
|
||||
_current_block_count++;
|
||||
|
||||
// Skip to the right place in it
|
||||
_buffer.position(_marked_offset - _current_offset);
|
||||
}
|
||||
|
||||
// All done
|
||||
_current_offset = _marked_offset;
|
||||
}
|
||||
|
||||
public long skip(long n) throws IOException {
|
||||
dieIfClosed();
|
||||
if (n < 0) {
|
||||
return 0;
|
||||
}
|
||||
int new_offset = _current_offset + (int) n;
|
||||
|
||||
if (new_offset < _current_offset) {
|
||||
// wrap around in converting a VERY large long to an int
|
||||
new_offset = _document_size;
|
||||
} else if (new_offset > _document_size) {
|
||||
new_offset = _document_size;
|
||||
}
|
||||
|
||||
long rval = new_offset - _current_offset;
|
||||
|
||||
// TODO Do this better
|
||||
byte[] skip = new byte[(int)rval];
|
||||
readFully(skip);
|
||||
return rval;
|
||||
}
|
||||
|
||||
private void dieIfClosed() throws IOException {
|
||||
if (_closed) {
|
||||
throw new IOException("cannot perform requested operation on a closed stream");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean atEOD() {
|
||||
return _current_offset == _document_size;
|
||||
}
|
||||
|
||||
private void checkAvaliable(int requestedSize) {
|
||||
if (_closed) {
|
||||
throw new IllegalStateException("cannot perform requested operation on a closed stream");
|
||||
}
|
||||
if (requestedSize > _document_size - _current_offset) {
|
||||
throw new RuntimeException("Buffer underrun - requested " + requestedSize
|
||||
+ " bytes but " + (_document_size - _current_offset) + " was available");
|
||||
}
|
||||
}
|
||||
|
||||
public byte readByte() {
|
||||
return (byte) readUByte();
|
||||
}
|
||||
|
||||
public double readDouble() {
|
||||
return Double.longBitsToDouble(readLong());
|
||||
}
|
||||
|
||||
public void readFully(byte[] buf) {
|
||||
readFully(buf, 0, buf.length);
|
||||
}
|
||||
|
||||
public short readShort() {
|
||||
return (short) readUShort();
|
||||
}
|
||||
|
||||
public void readFully(byte[] buf, int off, int len) {
|
||||
checkAvaliable(len);
|
||||
|
||||
int read = 0;
|
||||
while(read < len) {
|
||||
if(_buffer == null || _buffer.remaining() == 0) {
|
||||
_current_block_count++;
|
||||
_buffer = _data.next();
|
||||
}
|
||||
|
||||
int limit = Math.min(len-read, _buffer.remaining());
|
||||
_buffer.get(buf, off+read, limit);
|
||||
_current_offset += limit;
|
||||
read += limit;
|
||||
}
|
||||
}
|
||||
|
||||
public long readLong() {
|
||||
checkAvaliable(SIZE_LONG);
|
||||
byte[] data = new byte[SIZE_LONG];
|
||||
readFully(data, 0, SIZE_LONG);
|
||||
return LittleEndian.getLong(data, 0);
|
||||
}
|
||||
|
||||
public int readInt() {
|
||||
checkAvaliable(SIZE_INT);
|
||||
byte[] data = new byte[SIZE_INT];
|
||||
readFully(data, 0, SIZE_INT);
|
||||
return LittleEndian.getInt(data);
|
||||
}
|
||||
|
||||
public int readUShort() {
|
||||
checkAvaliable(SIZE_SHORT);
|
||||
byte[] data = new byte[SIZE_SHORT];
|
||||
readFully(data, 0, SIZE_SHORT);
|
||||
return LittleEndian.getShort(data);
|
||||
}
|
||||
|
||||
public int readUByte() {
|
||||
checkAvaliable(1);
|
||||
byte[] data = new byte[1];
|
||||
readFully(data, 0, 1);
|
||||
if(data[0] >= 0)
|
||||
return data[0];
|
||||
return data[0] + 256;
|
||||
}
|
||||
}
|
@ -101,6 +101,14 @@ public final class NPOIFSDocument implements POIFSViewable {
|
||||
_property.setStartBlock(_stream.getStartBlock());
|
||||
}
|
||||
|
||||
int getDocumentBlockSize() {
|
||||
return _block_size;
|
||||
}
|
||||
|
||||
Iterator<ByteBuffer> getBlockIterator() {
|
||||
return _stream.getBlockIterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return size of the document
|
||||
*/
|
||||
|
@ -70,13 +70,16 @@ public class NPOIFSMiniStore extends BlockStore
|
||||
}
|
||||
ByteBuffer dataBlock = it.next();
|
||||
|
||||
// Skip forward to the right place
|
||||
// Our blocks are small, so duplicating it is fine
|
||||
byte[] data = new byte[POIFSConstants.SMALL_BLOCK_SIZE];
|
||||
dataBlock.position(
|
||||
dataBlock.position() + bigBlockOffset
|
||||
);
|
||||
dataBlock.get(data, 0, data.length);
|
||||
|
||||
// All done
|
||||
return dataBlock;
|
||||
// Return a ByteBuffer on this
|
||||
ByteBuffer miniBuffer = ByteBuffer.wrap(data);
|
||||
return miniBuffer;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -23,6 +23,10 @@ import java.util.Iterator;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||
import org.apache.poi.hpsf.PropertySet;
|
||||
import org.apache.poi.hpsf.PropertySetFactory;
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.poifs.property.NPropertyTable;
|
||||
import org.apache.poi.poifs.property.Property;
|
||||
@ -446,12 +450,25 @@ public final class TestNPOIFSFileSystem extends TestCase {
|
||||
NPOIFSFileSystem fsD = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize4096.zvi"));
|
||||
for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB,fsC,fsD}) {
|
||||
DirectoryEntry root = fs.getRoot();
|
||||
Entry dsi = root.getEntry("\u0005DocumentSummaryInformation");
|
||||
Entry si = root.getEntry("\u0005SummaryInformation");
|
||||
|
||||
assertEquals(true, dsi.isDocumentEntry());
|
||||
DocumentEntry doc = (DocumentEntry)dsi;
|
||||
assertEquals(true, si.isDocumentEntry());
|
||||
DocumentNode doc = (DocumentNode)si;
|
||||
|
||||
// Check we can read it
|
||||
NDocumentInputStream inp = new NDocumentInputStream(doc);
|
||||
byte[] contents = new byte[doc.getSize()];
|
||||
assertEquals(doc.getSize(), inp.read(contents));
|
||||
|
||||
// Now try to build the property set
|
||||
inp = new NDocumentInputStream(doc);
|
||||
PropertySet ps = PropertySetFactory.create(inp);
|
||||
SummaryInformation inf = (SummaryInformation)ps;
|
||||
|
||||
// Check some bits in it
|
||||
assertEquals(null, inf.getApplicationName());
|
||||
assertEquals(null, inf.getAuthor());
|
||||
assertEquals(null, inf.getSubject());
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user