Start to support the MiniStream, by extracting out the BlockStore superclass and implementing a SBAT/Stream based mini block version

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053007 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-12-27 06:50:05 +00:00
parent 17c775625c
commit bf9444f784
6 changed files with 407 additions and 63 deletions

View File

@ -33,6 +33,10 @@ public interface POIFSConstants
public static final POIFSBigBlockSize LARGER_BIG_BLOCK_SIZE_DETAILS =
new POIFSBigBlockSize(LARGER_BIG_BLOCK_SIZE, (short)12);
/** How big a block in the small block stream is. Fixed size */
public static final int SMALL_BLOCK_SIZE = 0x0040;
/** How big a single property is */
public static final int PROPERTY_SIZE = 0x0080;
/**

View File

@ -0,0 +1,105 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
import java.io.IOException;
import java.nio.ByteBuffer;
import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex;
/**
* This abstract class describes a way to read, store, chain
* and free a series of blocks (be they Big or Small ones)
*/
public abstract class BlockStore {
/**
* Returns the size of the blocks managed through the block store.
*/
protected abstract int getBlockStoreBlockSize();
/**
* Load the block at the given offset.
*/
protected abstract ByteBuffer getBlockAt(final int offset) throws IOException;
/**
* Extends the file if required to hold blocks up to
* the specified offset, and return the block from there.
*/
protected abstract ByteBuffer createBlockIfNeeded(final int offset) throws IOException;
/**
* Returns the BATBlock that handles the specified offset,
* and the relative index within it
*/
protected abstract BATBlockAndIndex getBATBlockAndIndex(final int offset);
/**
* Works out what block follows the specified one.
*/
protected abstract int getNextBlock(final int offset);
/**
* Changes the record of what block follows the specified one.
*/
protected abstract void setNextBlock(final int offset, final int nextBlock);
/**
* Finds a free block, and returns its offset.
* This method will extend the file/stream if needed, and if doing
* so, allocate new FAT blocks to address the extra space.
*/
protected abstract int getFreeBlock() throws IOException;
/**
* Creates a Detector for loops in the chain
*/
protected abstract ChainLoopDetector getChainLoopDetector() throws IOException;
/**
* Used to detect if a chain has a loop in it, so
* we can bail out with an error rather than
* spinning away for ever...
*/
protected class ChainLoopDetector {
private boolean[] used_blocks;
protected ChainLoopDetector(long rawSize) {
int numBlocks = (int)Math.ceil( rawSize / getBlockStoreBlockSize() );
used_blocks = new boolean[numBlocks];
}
protected void claim(int offset) {
if(offset >= used_blocks.length) {
// They're writing, and have had new blocks requested
// for the write to proceed. That means they're into
// blocks we've allocated for them, so are safe
return;
}
// Claiming an existing block, ensure there's no loop
if(used_blocks[offset]) {
throw new IllegalStateException(
"Potential loop detected - Block " + offset +
" was already claimed but was just requested again"
);
}
used_blocks[offset] = true;
}
}
}

View File

@ -66,7 +66,7 @@ import org.apache.poi.util.POILogger;
* This is the new NIO version
*/
public class NPOIFSFileSystem
public class NPOIFSFileSystem extends BlockStore
implements POIFSViewable
{
private static final POILogger _logger =
@ -79,10 +79,11 @@ public class NPOIFSFileSystem
return new CloseIgnoringInputStream(is);
}
private NPOIFSMiniStore _mini_store;
private NPropertyTable _property_table;
private List<BATBlock> _bat_blocks;
private HeaderBlock _header;
private DirectoryNode _root;
private List<BATBlock> _bat_blocks;
private HeaderBlock _header;
private DirectoryNode _root;
private DataSource _data;
@ -102,6 +103,7 @@ public class NPOIFSFileSystem
{
_header = new HeaderBlock(bigBlockSize);
_property_table = new NPropertyTable(_header);
_mini_store = new NPOIFSMiniStore(this, _property_table.getRoot(), new ArrayList<BATBlock>(), _header);
_bat_blocks = new ArrayList<BATBlock>();
_root = null;
}
@ -264,7 +266,7 @@ public class NPOIFSFileSystem
// Each block should only ever be used by one of the
// FAT, XFAT or Property Table. Ensure it does
ChainLoopDetector loopDetector = new ChainLoopDetector();
ChainLoopDetector loopDetector = getChainLoopDetector();
// Read the FAT blocks
for(int fatAt : _header.getBATArray()) {
@ -291,6 +293,20 @@ public class NPOIFSFileSystem
// We're now able to load steams
// Use this to read in the properties
_property_table = new NPropertyTable(_header, this);
// Finally read the Small Stream FAT (SBAT) blocks
BATBlock sfat;
List<BATBlock> sbats = new ArrayList<BATBlock>();
_mini_store = new NPOIFSMiniStore(this, _property_table.getRoot(), sbats, _header);
nextAt = _header.getSBATStart();
for(int i=0; i<_header.getSBATCount(); i++) {
loopDetector.claim(nextAt);
ByteBuffer fatData = getBlockAt(nextAt);
sfat = BATBlock.createBATBlock(bigBlockSize, fatData);
sfat.setOurBlockIndex(nextAt);
sbats.add(sfat);
nextAt = getNextBlock(nextAt);
}
}
/**
@ -302,6 +318,24 @@ public class NPOIFSFileSystem
return _data.read(bigBlockSize.getBigBlockSize(), startAt);
}
/**
* Load the block at the given offset,
* extending the file if needed
*/
protected ByteBuffer createBlockIfNeeded(final int offset) throws IOException {
try {
return getBlockAt(offset);
} catch(IndexOutOfBoundsException e) {
// The header block doesn't count, so add one
long startAt = (offset+1) * bigBlockSize.getBigBlockSize();
// Allocate and write
ByteBuffer buffer = ByteBuffer.allocate(getBigBlockSize());
_data.write(buffer, startAt);
// Retrieve the properly backed block
return getBlockAt(offset);
}
}
/**
* Returns the BATBlock that handles the specified offset,
* and the relative index within it
@ -410,7 +444,12 @@ public class NPOIFSFileSystem
return offset+1;
}
/**
@Override
protected ChainLoopDetector getChainLoopDetector() throws IOException {
return new ChainLoopDetector(_data.size());
}
/**
* For unit testing only! Returns the underlying
* properties table
*/
@ -418,6 +457,14 @@ public class NPOIFSFileSystem
return _property_table;
}
/**
* Returns the MiniStore, which performs a similar low
* level function to this, except for the small blocks.
*/
public NPOIFSMiniStore getMiniStore() {
return _mini_store;
}
/**
* Create a new document to be added to the root directory
*
@ -725,36 +772,6 @@ public class NPOIFSFileSystem
}
}
/**
* Used to detect if a chain has a loop in it, so
* we can bail out with an error rather than
* spinning away for ever...
*/
protected class ChainLoopDetector {
private boolean[] used_blocks;
protected ChainLoopDetector() throws IOException {
int numBlocks = (int)Math.ceil(_data.size()/bigBlockSize.getBigBlockSize());
used_blocks = new boolean[numBlocks];
}
protected void claim(int offset) {
if(offset >= used_blocks.length) {
// They're writing, and have had new blocks requested
// for the write to proceed. That means they're into
// blocks we've allocated for them, so are safe
return;
}
// Claiming an existing block, ensure there's no loop
if(used_blocks[offset]) {
throw new IllegalStateException(
"Potential loop detected - Block " + offset +
" was already claimed but was just requested again"
);
}
used_blocks[offset] = true;
}
}
/* ********** START begin implementation of POIFSViewable ********** */
/**
@ -815,11 +832,13 @@ public class NPOIFSFileSystem
return "POIFS FileSystem";
}
/* ********** END begin implementation of POIFSViewable ********** */
/**
* @return The Big Block size, normally 512 bytes, sometimes 4096 bytes
*/
public int getBigBlockSize() {
return bigBlockSize.getBigBlockSize();
return bigBlockSize.getBigBlockSize();
}
/**
* @return The Big Block size, normally 512 bytes, sometimes 4096 bytes
@ -827,7 +846,8 @@ public class NPOIFSFileSystem
public POIFSBigBlockSize getBigBlockSizeDetails() {
return bigBlockSize;
}
/* ********** END begin implementation of POIFSViewable ********** */
protected int getBlockStoreBlockSize() {
return getBigBlockSize();
}
}

View File

@ -0,0 +1,195 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.filesystem;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.List;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.property.RootProperty;
import org.apache.poi.poifs.storage.BATBlock;
import org.apache.poi.poifs.storage.HeaderBlock;
import org.apache.poi.poifs.storage.BATBlock.BATBlockAndIndex;
/**
* This class handles the MiniStream (small block store)
* in the NIO case for {@link NPOIFSFileSystem}
*/
public class NPOIFSMiniStore extends BlockStore
{
private NPOIFSFileSystem _filesystem;
private NPOIFSStream _mini_stream;
private List<BATBlock> _sbat_blocks;
private HeaderBlock _header;
private RootProperty _root;
protected NPOIFSMiniStore(NPOIFSFileSystem filesystem, RootProperty root,
List<BATBlock> sbats, HeaderBlock header)
{
this._filesystem = filesystem;
this._sbat_blocks = sbats;
this._header = header;
this._root = root;
this._mini_stream = new NPOIFSStream(filesystem);
}
/**
* Load the block at the given offset.
*/
protected ByteBuffer getBlockAt(final int offset) throws IOException {
// Which big block is this?
int byteOffset = offset * POIFSConstants.SMALL_BLOCK_SIZE;
int bigBlockNumber = byteOffset / _filesystem.getBigBlockSize();
int bigBlockOffset = byteOffset % _filesystem.getBigBlockSize();
// Now locate the data block for it
Iterator<ByteBuffer> it = _mini_stream.getBlockIterator();
for(int i=0; i<bigBlockNumber; i++) {
it.next();
}
ByteBuffer dataBlock = it.next();
// Skip forward to the right place
dataBlock.position(bigBlockOffset);
// All done
return dataBlock;
}
/**
* Load the block, extending the underlying stream if needed
*/
protected ByteBuffer createBlockIfNeeded(final int offset) throws IOException {
// TODO Extend
return getBlockAt(offset);
}
/**
* Returns the BATBlock that handles the specified offset,
* and the relative index within it
*/
protected BATBlockAndIndex getBATBlockAndIndex(final int offset) {
return BATBlock.getSBATBlockAndIndex(
offset, _header, _sbat_blocks
);
}
/**
* Works out what block follows the specified one.
*/
protected int getNextBlock(final int offset) {
BATBlockAndIndex bai = getBATBlockAndIndex(offset);
return bai.getBlock().getValueAt( bai.getIndex() );
}
/**
* Changes the record of what block follows the specified one.
*/
protected void setNextBlock(final int offset, final int nextBlock) {
BATBlockAndIndex bai = getBATBlockAndIndex(offset);
bai.getBlock().setValueAt(
bai.getIndex(), nextBlock
);
}
/**
* Finds a free block, and returns its offset.
* This method will extend the file if needed, and if doing
* so, allocate new FAT blocks to address the extra space.
*/
protected int getFreeBlock() throws IOException {
int sectorsPerSBAT = _filesystem.getBigBlockSizeDetails().getBATEntriesPerBlock();
// First up, do we have any spare ones?
int offset = 0;
for(int i=0; i<_sbat_blocks.size(); i++) {
// Check this one
BATBlock sbat = _sbat_blocks.get(i);
if(sbat.hasFreeSectors()) {
// Claim one of them and return it
for(int j=0; j<sectorsPerSBAT; j++) {
int sbatValue = sbat.getValueAt(j);
if(sbatValue == POIFSConstants.UNUSED_BLOCK) {
// Bingo
return offset + j;
}
}
}
// Move onto the next SBAT
offset += sectorsPerSBAT;
}
// If we get here, then there aren't any
// free sectors in any of the SBATs
// So, we need to extend the chain and add another
// Create a new BATBlock
BATBlock newSBAT = BATBlock.createEmptyBATBlock(_filesystem.getBigBlockSizeDetails(), false);
int batForSBAT = _filesystem.getFreeBlock();
newSBAT.setOurBlockIndex(batForSBAT);
// Are we the first SBAT?
if(_header.getSBATCount() == 0) {
_header.setSBATStart(batForSBAT);
_header.setSBATBlockCount(1);
} else {
// Find the end of the SBAT stream, and add the sbat in there
ChainLoopDetector loopDetector = _filesystem.getChainLoopDetector();
int batOffset = _header.getSBATStart();
while(true) {
loopDetector.claim(batOffset);
int nextBat = _filesystem.getNextBlock(batOffset);
if(nextBat == POIFSConstants.END_OF_CHAIN) {
break;
}
batOffset = nextBat;
}
// Add it in at the end
_filesystem.setNextBlock(batOffset, batForSBAT);
// And update the count
_header.setSBATBlockCount(
_header.getSBATCount() + 1
);
}
// Finish allocating
_filesystem.setNextBlock(batForSBAT, POIFSConstants.END_OF_CHAIN);
_sbat_blocks.add(newSBAT);
// Return our first spot
return offset;
}
@Override
protected ChainLoopDetector getChainLoopDetector() throws IOException {
return new ChainLoopDetector( _root.getSize() );
}
protected int getBlockStoreBlockSize() {
return POIFSConstants.SMALL_BLOCK_SIZE;
}
}

View File

@ -24,7 +24,7 @@ import java.nio.ByteBuffer;
import java.util.Iterator;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem.ChainLoopDetector;
import org.apache.poi.poifs.filesystem.BlockStore.ChainLoopDetector;
import org.apache.poi.poifs.property.Property;
import org.apache.poi.poifs.storage.HeaderBlock;
@ -45,7 +45,7 @@ import org.apache.poi.poifs.storage.HeaderBlock;
public class NPOIFSStream implements Iterable<ByteBuffer>
{
private NPOIFSFileSystem filesystem;
private BlockStore blockStore;
private int startBlock;
/**
@ -53,8 +53,8 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
* to know how to get the start block (eg from a
* {@link HeaderBlock} or a {@link Property})
*/
public NPOIFSStream(NPOIFSFileSystem filesystem, int startBlock) {
this.filesystem = filesystem;
public NPOIFSStream(BlockStore blockStore, int startBlock) {
this.blockStore = blockStore;
this.startBlock = startBlock;
}
@ -62,8 +62,8 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
* Constructor for a new stream. A start block won't
* be allocated until you begin writing to it.
*/
public NPOIFSStream(NPOIFSFileSystem filesystem) {
this.filesystem = filesystem;
public NPOIFSStream(BlockStore blockStore) {
this.blockStore = blockStore;
this.startBlock = POIFSConstants.END_OF_CHAIN;
}
@ -101,51 +101,56 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
*/
public void updateContents(byte[] contents) throws IOException {
// How many blocks are we going to need?
int blocks = (int)Math.ceil(contents.length / filesystem.getBigBlockSize());
int blockSize = blockStore.getBlockStoreBlockSize();
int blocks = (int)Math.ceil(contents.length / blockSize);
// Make sure we don't encounter a loop whilst overwriting
// the existing blocks
ChainLoopDetector loopDetector = filesystem.new ChainLoopDetector();
ChainLoopDetector loopDetector = blockStore.getChainLoopDetector();
// Start writing
int prevBlock = POIFSConstants.END_OF_CHAIN;
int nextBlock = startBlock;
for(int i=0; i<blocks; i++) {
int thisBlock = nextBlock;
loopDetector.claim(thisBlock);
// Allocate a block if needed, otherwise figure
// out what the next block will be
if(thisBlock == POIFSConstants.END_OF_CHAIN) {
thisBlock = filesystem.getFreeBlock();
thisBlock = blockStore.getFreeBlock();
loopDetector.claim(thisBlock);
// We're on the end of the chain
nextBlock = POIFSConstants.END_OF_CHAIN;
// Mark the previous block as carrying on
// Mark the previous block as carrying on to us if needed
if(prevBlock != POIFSConstants.END_OF_CHAIN) {
filesystem.setNextBlock(prevBlock, thisBlock);
blockStore.setNextBlock(prevBlock, thisBlock);
}
} else {
nextBlock = filesystem.getNextBlock(thisBlock);
loopDetector.claim(thisBlock);
nextBlock = blockStore.getNextBlock(thisBlock);
}
// Write it
ByteBuffer buffer = filesystem.getBlockAt(thisBlock);
buffer.put(contents, i*filesystem.getBigBlockSize(), filesystem.getBigBlockSize());
ByteBuffer buffer = blockStore.createBlockIfNeeded(thisBlock);
buffer.put(contents, i*blockSize, blockSize);
// Update pointers
prevBlock = thisBlock;
}
int lastBlock = prevBlock;
// If we're overwriting, free any remaining blocks
while(nextBlock != POIFSConstants.END_OF_CHAIN) {
int thisBlock = nextBlock;
loopDetector.claim(thisBlock);
nextBlock = filesystem.getNextBlock(thisBlock);
filesystem.setNextBlock(thisBlock, POIFSConstants.UNUSED_BLOCK);
nextBlock = blockStore.getNextBlock(thisBlock);
blockStore.setNextBlock(thisBlock, POIFSConstants.UNUSED_BLOCK);
}
// Mark the end of the stream
filesystem.setNextBlock(nextBlock, POIFSConstants.END_OF_CHAIN);
blockStore.setNextBlock(lastBlock, POIFSConstants.END_OF_CHAIN);
}
// TODO Streaming write support too
@ -160,7 +165,7 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
protected StreamBlockByteBufferIterator(int firstBlock) {
this.nextBlock = firstBlock;
try {
this.loopDetector = filesystem.new ChainLoopDetector();
this.loopDetector = blockStore.getChainLoopDetector();
} catch(IOException e) {
throw new RuntimeException(e);
}
@ -180,8 +185,8 @@ public class NPOIFSStream implements Iterable<ByteBuffer>
try {
loopDetector.claim(nextBlock);
ByteBuffer data = filesystem.getBlockAt(nextBlock);
nextBlock = filesystem.getNextBlock(nextBlock);
ByteBuffer data = blockStore.getBlockAt(nextBlock);
nextBlock = blockStore.getNextBlock(nextBlock);
return data;
} catch(IOException e) {
throw new RuntimeException(e);

View File

@ -257,7 +257,7 @@ public final class BATBlock extends BigBlock {
* The List of BATBlocks must be in sequential order
*/
public static BATBlockAndIndex getBATBlockAndIndex(final int offset,
final HeaderBlock header, final List<BATBlock> blocks) {
final HeaderBlock header, final List<BATBlock> bats) {
POIFSBigBlockSize bigBlockSize = header.getBigBlockSize();
// Are we in the BAT or XBAT range
@ -267,7 +267,7 @@ public final class BATBlock extends BigBlock {
if(offset < batRangeEndsAt) {
int whichBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
int index = offset % bigBlockSize.getBATEntriesPerBlock();
return new BATBlockAndIndex( index, blocks.get(whichBAT) );
return new BATBlockAndIndex( index, bats.get(whichBAT) );
}
// XBATs hold slightly less
@ -276,10 +276,25 @@ public final class BATBlock extends BigBlock {
int index = relOffset % bigBlockSize.getXBATEntriesPerBlock();
return new BATBlockAndIndex(
index,
blocks.get(header.getBATCount() + whichXBAT)
bats.get(header.getBATCount() + whichXBAT)
);
}
/**
* Returns the BATBlock that handles the specified offset,
* and the relative index within it, for the mini stream.
* The List of BATBlocks must be in sequential order
*/
public static BATBlockAndIndex getSBATBlockAndIndex(final int offset,
final HeaderBlock header, final List<BATBlock> sbats) {
POIFSBigBlockSize bigBlockSize = header.getBigBlockSize();
// SBATs are so much easier, as they're chained streams
int whichSBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
int index = offset % bigBlockSize.getBATEntriesPerBlock();
return new BATBlockAndIndex( index, sbats.get(whichSBAT) );
}
private void setXBATChain(final POIFSBigBlockSize bigBlockSize, int chainIndex)
{
int _entries_per_xbat_block = bigBlockSize.getXBATEntriesPerBlock();