Implement a NPOIFS document reader, and add tests which use it
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053279 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5ed63966c4
commit
029850e5cf
@ -120,8 +120,7 @@ public class DirectoryNode
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
childNode = new DocumentNode(( DocumentProperty ) child,
|
childNode = new DocumentNode((DocumentProperty) child, this);
|
||||||
this);
|
|
||||||
}
|
}
|
||||||
_entries.add(childNode);
|
_entries.add(childNode);
|
||||||
_byname.put(childNode.getName(), childNode);
|
_byname.put(childNode.getName(), childNode);
|
||||||
|
@ -67,11 +67,16 @@ public final class DocumentInputStream extends InputStream implements LittleEndi
|
|||||||
if (!(document instanceof DocumentNode)) {
|
if (!(document instanceof DocumentNode)) {
|
||||||
throw new IOException("Cannot open internal document storage");
|
throw new IOException("Cannot open internal document storage");
|
||||||
}
|
}
|
||||||
|
DocumentNode documentNode = (DocumentNode)document;
|
||||||
|
if(documentNode.getDocument() == null) {
|
||||||
|
throw new IOException("Cannot open internal document storage");
|
||||||
|
}
|
||||||
|
|
||||||
_current_offset = 0;
|
_current_offset = 0;
|
||||||
_marked_offset = 0;
|
_marked_offset = 0;
|
||||||
_document_size = document.getSize();
|
_document_size = document.getSize();
|
||||||
_closed = false;
|
_closed = false;
|
||||||
_document = ((DocumentNode) document).getDocument();
|
_document = documentNode.getDocument();
|
||||||
_currentBlock = getDataInputBlock(0);
|
_currentBlock = getDataInputBlock(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,316 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
package org.apache.poi.poifs.filesystem;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.poi.poifs.property.DocumentProperty;
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.util.LittleEndianInput;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class provides methods to read a DocumentEntry managed by a
|
||||||
|
* {@link POIFSFileSystem} instance.
|
||||||
|
*
|
||||||
|
* @author Marc Johnson (mjohnson at apache dot org)
|
||||||
|
*/
|
||||||
|
public final class NDocumentInputStream extends InputStream implements LittleEndianInput {
|
||||||
|
/** returned by read operations if we're at end of document */
|
||||||
|
private static final int EOF = -1;
|
||||||
|
|
||||||
|
private static final int SIZE_SHORT = 2;
|
||||||
|
private static final int SIZE_INT = 4;
|
||||||
|
private static final int SIZE_LONG = 8;
|
||||||
|
|
||||||
|
/** current offset into the Document */
|
||||||
|
private int _current_offset;
|
||||||
|
/** current block count */
|
||||||
|
private int _current_block_count;
|
||||||
|
|
||||||
|
/** current marked offset into the Document (used by mark and reset) */
|
||||||
|
private int _marked_offset;
|
||||||
|
/** and the block count for it */
|
||||||
|
private int _marked_offset_count;
|
||||||
|
|
||||||
|
/** the Document's size */
|
||||||
|
private int _document_size;
|
||||||
|
|
||||||
|
/** have we been closed? */
|
||||||
|
private boolean _closed;
|
||||||
|
|
||||||
|
/** the actual Document */
|
||||||
|
private NPOIFSDocument _document;
|
||||||
|
|
||||||
|
private Iterator<ByteBuffer> _data;
|
||||||
|
private ByteBuffer _buffer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an InputStream from the specified DocumentEntry
|
||||||
|
*
|
||||||
|
* @param document the DocumentEntry to be read
|
||||||
|
*
|
||||||
|
* @exception IOException if the DocumentEntry cannot be opened (like, maybe it has
|
||||||
|
* been deleted?)
|
||||||
|
*/
|
||||||
|
public NDocumentInputStream(DocumentEntry document) throws IOException {
|
||||||
|
if (!(document instanceof DocumentNode)) {
|
||||||
|
throw new IOException("Cannot open internal document storage");
|
||||||
|
}
|
||||||
|
_current_offset = 0;
|
||||||
|
_current_block_count = 0;
|
||||||
|
_marked_offset = 0;
|
||||||
|
_marked_offset_count = 0;
|
||||||
|
_document_size = document.getSize();
|
||||||
|
_closed = false;
|
||||||
|
|
||||||
|
DocumentNode doc = (DocumentNode)document;
|
||||||
|
DocumentProperty property = (DocumentProperty)doc.getProperty();
|
||||||
|
_document = new NPOIFSDocument(
|
||||||
|
property,
|
||||||
|
((DirectoryNode)doc.getParent()).getNFileSystem()
|
||||||
|
);
|
||||||
|
_data = _document.getBlockIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an InputStream from the specified Document
|
||||||
|
*
|
||||||
|
* @param document the Document to be read
|
||||||
|
*/
|
||||||
|
public NDocumentInputStream(NPOIFSDocument document) {
|
||||||
|
_current_offset = 0;
|
||||||
|
_current_block_count = 0;
|
||||||
|
_marked_offset = 0;
|
||||||
|
_marked_offset_count = 0;
|
||||||
|
_document_size = document.getSize();
|
||||||
|
_closed = false;
|
||||||
|
_document = document;
|
||||||
|
_data = _document.getBlockIterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int available() {
|
||||||
|
if (_closed) {
|
||||||
|
throw new IllegalStateException("cannot perform requested operation on a closed stream");
|
||||||
|
}
|
||||||
|
return _document_size - _current_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void close() {
|
||||||
|
_closed = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void mark(int ignoredReadlimit) {
|
||||||
|
_marked_offset = _current_offset;
|
||||||
|
_marked_offset_count = _current_block_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests if this input stream supports the mark and reset methods.
|
||||||
|
*
|
||||||
|
* @return <code>true</code> always
|
||||||
|
*/
|
||||||
|
public boolean markSupported() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int read() throws IOException {
|
||||||
|
dieIfClosed();
|
||||||
|
if (atEOD()) {
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
byte[] b = new byte[1];
|
||||||
|
int result = read(b, 0, 1);
|
||||||
|
if(result >= 0) {
|
||||||
|
if(b[0] < 0) {
|
||||||
|
return b[0]+256;
|
||||||
|
}
|
||||||
|
return b[0];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int read(byte[] b) throws IOException {
|
||||||
|
return read(b, 0, b.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int read(byte[] b, int off, int len) throws IOException {
|
||||||
|
dieIfClosed();
|
||||||
|
if (b == null) {
|
||||||
|
throw new IllegalArgumentException("buffer must not be null");
|
||||||
|
}
|
||||||
|
if (off < 0 || len < 0 || b.length < off + len) {
|
||||||
|
throw new IndexOutOfBoundsException("can't read past buffer boundaries");
|
||||||
|
}
|
||||||
|
if (len == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
if (atEOD()) {
|
||||||
|
return EOF;
|
||||||
|
}
|
||||||
|
int limit = Math.min(available(), len);
|
||||||
|
readFully(b, off, limit);
|
||||||
|
return limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Repositions this stream to the position at the time the mark() method was
|
||||||
|
* last called on this input stream. If mark() has not been called this
|
||||||
|
* method repositions the stream to its beginning.
|
||||||
|
*/
|
||||||
|
public void reset() {
|
||||||
|
// Special case for reset to the start
|
||||||
|
if(_marked_offset == 0 && _marked_offset_count == 0) {
|
||||||
|
_current_block_count = _marked_offset_count;
|
||||||
|
_current_offset = _marked_offset;
|
||||||
|
_data = _document.getBlockIterator();
|
||||||
|
_buffer = null;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start again, then wind on to the required block
|
||||||
|
_data = _document.getBlockIterator();
|
||||||
|
_current_offset = 0;
|
||||||
|
for(int i=0; i<_marked_offset_count; i++) {
|
||||||
|
_buffer = _data.next();
|
||||||
|
_current_offset += _buffer.remaining();
|
||||||
|
}
|
||||||
|
|
||||||
|
_current_block_count = _marked_offset_count;
|
||||||
|
|
||||||
|
// Do we need to position within it?
|
||||||
|
if(_current_offset != _marked_offset) {
|
||||||
|
// Grab the right block
|
||||||
|
_buffer = _data.next();
|
||||||
|
_current_block_count++;
|
||||||
|
|
||||||
|
// Skip to the right place in it
|
||||||
|
_buffer.position(_marked_offset - _current_offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
// All done
|
||||||
|
_current_offset = _marked_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long skip(long n) throws IOException {
|
||||||
|
dieIfClosed();
|
||||||
|
if (n < 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int new_offset = _current_offset + (int) n;
|
||||||
|
|
||||||
|
if (new_offset < _current_offset) {
|
||||||
|
// wrap around in converting a VERY large long to an int
|
||||||
|
new_offset = _document_size;
|
||||||
|
} else if (new_offset > _document_size) {
|
||||||
|
new_offset = _document_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
long rval = new_offset - _current_offset;
|
||||||
|
|
||||||
|
// TODO Do this better
|
||||||
|
byte[] skip = new byte[(int)rval];
|
||||||
|
readFully(skip);
|
||||||
|
return rval;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void dieIfClosed() throws IOException {
|
||||||
|
if (_closed) {
|
||||||
|
throw new IOException("cannot perform requested operation on a closed stream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean atEOD() {
|
||||||
|
return _current_offset == _document_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkAvaliable(int requestedSize) {
|
||||||
|
if (_closed) {
|
||||||
|
throw new IllegalStateException("cannot perform requested operation on a closed stream");
|
||||||
|
}
|
||||||
|
if (requestedSize > _document_size - _current_offset) {
|
||||||
|
throw new RuntimeException("Buffer underrun - requested " + requestedSize
|
||||||
|
+ " bytes but " + (_document_size - _current_offset) + " was available");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public byte readByte() {
|
||||||
|
return (byte) readUByte();
|
||||||
|
}
|
||||||
|
|
||||||
|
public double readDouble() {
|
||||||
|
return Double.longBitsToDouble(readLong());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void readFully(byte[] buf) {
|
||||||
|
readFully(buf, 0, buf.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
public short readShort() {
|
||||||
|
return (short) readUShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void readFully(byte[] buf, int off, int len) {
|
||||||
|
checkAvaliable(len);
|
||||||
|
|
||||||
|
int read = 0;
|
||||||
|
while(read < len) {
|
||||||
|
if(_buffer == null || _buffer.remaining() == 0) {
|
||||||
|
_current_block_count++;
|
||||||
|
_buffer = _data.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
int limit = Math.min(len-read, _buffer.remaining());
|
||||||
|
_buffer.get(buf, off+read, limit);
|
||||||
|
_current_offset += limit;
|
||||||
|
read += limit;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public long readLong() {
|
||||||
|
checkAvaliable(SIZE_LONG);
|
||||||
|
byte[] data = new byte[SIZE_LONG];
|
||||||
|
readFully(data, 0, SIZE_LONG);
|
||||||
|
return LittleEndian.getLong(data, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int readInt() {
|
||||||
|
checkAvaliable(SIZE_INT);
|
||||||
|
byte[] data = new byte[SIZE_INT];
|
||||||
|
readFully(data, 0, SIZE_INT);
|
||||||
|
return LittleEndian.getInt(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int readUShort() {
|
||||||
|
checkAvaliable(SIZE_SHORT);
|
||||||
|
byte[] data = new byte[SIZE_SHORT];
|
||||||
|
readFully(data, 0, SIZE_SHORT);
|
||||||
|
return LittleEndian.getShort(data);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int readUByte() {
|
||||||
|
checkAvaliable(1);
|
||||||
|
byte[] data = new byte[1];
|
||||||
|
readFully(data, 0, 1);
|
||||||
|
if(data[0] >= 0)
|
||||||
|
return data[0];
|
||||||
|
return data[0] + 256;
|
||||||
|
}
|
||||||
|
}
|
@ -101,6 +101,14 @@ public final class NPOIFSDocument implements POIFSViewable {
|
|||||||
_property.setStartBlock(_stream.getStartBlock());
|
_property.setStartBlock(_stream.getStartBlock());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getDocumentBlockSize() {
|
||||||
|
return _block_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterator<ByteBuffer> getBlockIterator() {
|
||||||
|
return _stream.getBlockIterator();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return size of the document
|
* @return size of the document
|
||||||
*/
|
*/
|
||||||
|
@ -70,13 +70,16 @@ public class NPOIFSMiniStore extends BlockStore
|
|||||||
}
|
}
|
||||||
ByteBuffer dataBlock = it.next();
|
ByteBuffer dataBlock = it.next();
|
||||||
|
|
||||||
// Skip forward to the right place
|
// Our blocks are small, so duplicating it is fine
|
||||||
|
byte[] data = new byte[POIFSConstants.SMALL_BLOCK_SIZE];
|
||||||
dataBlock.position(
|
dataBlock.position(
|
||||||
dataBlock.position() + bigBlockOffset
|
dataBlock.position() + bigBlockOffset
|
||||||
);
|
);
|
||||||
|
dataBlock.get(data, 0, data.length);
|
||||||
|
|
||||||
// All done
|
// Return a ByteBuffer on this
|
||||||
return dataBlock;
|
ByteBuffer miniBuffer = ByteBuffer.wrap(data);
|
||||||
|
return miniBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,6 +23,10 @@ import java.util.Iterator;
|
|||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
|
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||||
|
import org.apache.poi.hpsf.PropertySet;
|
||||||
|
import org.apache.poi.hpsf.PropertySetFactory;
|
||||||
|
import org.apache.poi.hpsf.SummaryInformation;
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import org.apache.poi.poifs.property.NPropertyTable;
|
import org.apache.poi.poifs.property.NPropertyTable;
|
||||||
import org.apache.poi.poifs.property.Property;
|
import org.apache.poi.poifs.property.Property;
|
||||||
@ -446,12 +450,25 @@ public final class TestNPOIFSFileSystem extends TestCase {
|
|||||||
NPOIFSFileSystem fsD = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize4096.zvi"));
|
NPOIFSFileSystem fsD = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize4096.zvi"));
|
||||||
for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB,fsC,fsD}) {
|
for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB,fsC,fsD}) {
|
||||||
DirectoryEntry root = fs.getRoot();
|
DirectoryEntry root = fs.getRoot();
|
||||||
Entry dsi = root.getEntry("\u0005DocumentSummaryInformation");
|
Entry si = root.getEntry("\u0005SummaryInformation");
|
||||||
|
|
||||||
assertEquals(true, dsi.isDocumentEntry());
|
assertEquals(true, si.isDocumentEntry());
|
||||||
DocumentEntry doc = (DocumentEntry)dsi;
|
DocumentNode doc = (DocumentNode)si;
|
||||||
|
|
||||||
|
// Check we can read it
|
||||||
|
NDocumentInputStream inp = new NDocumentInputStream(doc);
|
||||||
|
byte[] contents = new byte[doc.getSize()];
|
||||||
|
assertEquals(doc.getSize(), inp.read(contents));
|
||||||
|
|
||||||
|
// Now try to build the property set
|
||||||
|
inp = new NDocumentInputStream(doc);
|
||||||
|
PropertySet ps = PropertySetFactory.create(inp);
|
||||||
|
SummaryInformation inf = (SummaryInformation)ps;
|
||||||
|
|
||||||
|
// Check some bits in it
|
||||||
|
assertEquals(null, inf.getApplicationName());
|
||||||
|
assertEquals(null, inf.getAuthor());
|
||||||
|
assertEquals(null, inf.getSubject());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user