XBAT logic in NPOIFS was incorrect - it's a chain of doubly indirect, not singly indirect BATs. Start to correct

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053495 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-12-29 01:05:01 +00:00
parent 029850e5cf
commit 01626c6626
4 changed files with 59 additions and 49 deletions

View File

@ -29,8 +29,6 @@ import org.apache.poi.util.LittleEndianInput;
/**
* This class provides methods to read a DocumentEntry managed by a
* {@link POIFSFileSystem} instance.
*
* @author Marc Johnson (mjohnson at apache dot org)
*/
public final class NDocumentInputStream extends InputStream implements LittleEndianInput {
/** returned by read operations if we're at end of document */

View File

@ -77,6 +77,7 @@ public class NPOIFSFileSystem extends BlockStore
private NPOIFSMiniStore _mini_store;
private NPropertyTable _property_table;
private List<BATBlock> _xbat_blocks;
private List<BATBlock> _bat_blocks;
private HeaderBlock _header;
private DirectoryNode _root;
@ -98,6 +99,7 @@ public class NPOIFSFileSystem extends BlockStore
_header = new HeaderBlock(bigBlockSize);
_property_table = new NPropertyTable(_header);
_mini_store = new NPOIFSMiniStore(this, _property_table.getRoot(), new ArrayList<BATBlock>(), _header);
_xbat_blocks = new ArrayList<BATBlock>();
_bat_blocks = new ArrayList<BATBlock>();
_root = null;
}
@ -264,14 +266,10 @@ public class NPOIFSFileSystem extends BlockStore
// Read the FAT blocks
for(int fatAt : _header.getBATArray()) {
loopDetector.claim(fatAt);
ByteBuffer fatData = getBlockAt(fatAt);
BATBlock bat = BATBlock.createBATBlock(bigBlockSize, fatData);
bat.setOurBlockIndex(fatAt);
_bat_blocks.add(bat);
readBAT(fatAt, loopDetector);
}
// Now read the XFAT blocks
// Now read the XFAT blocks, and the FATs within them
BATBlock xfat;
int nextAt = _header.getXBATIndex();
for(int i=0; i<_header.getXBATCount(); i++) {
@ -280,8 +278,13 @@ public class NPOIFSFileSystem extends BlockStore
xfat = BATBlock.createBATBlock(bigBlockSize, fatData);
xfat.setOurBlockIndex(nextAt);
nextAt = xfat.getValueAt(bigBlockSize.getXBATEntriesPerBlock());
_xbat_blocks.add(xfat);
_bat_blocks.add(xfat);
for(int j=0; j<bigBlockSize.getXBATEntriesPerBlock(); j++) {
int fatAt = xfat.getValueAt(j);
if(fatAt == POIFSConstants.UNUSED_BLOCK) break;
readBAT(fatAt, loopDetector);
}
}
// We're now able to load steams
@ -302,6 +305,13 @@ public class NPOIFSFileSystem extends BlockStore
nextAt = getNextBlock(nextAt);
}
}
private void readBAT(int batAt, ChainLoopDetector loopDetector) throws IOException {
loopDetector.claim(batAt);
ByteBuffer fatData = getBlockAt(batAt);
BATBlock bat = BATBlock.createBATBlock(bigBlockSize, fatData);
bat.setOurBlockIndex(batAt);
_bat_blocks.add(bat);
}
/**
* Load the block at the given offset.

View File

@ -234,16 +234,33 @@ public final class BATBlock extends BigBlock {
/**
* Calculates the maximum size of a file which is addressable given the
* number of FAT (BAT and XBAT) sectors specified.
*
* For files with 109 or fewer BATs:
* The actual file size will be between [size of fatCount-1 blocks] and
* [size of fatCount blocks].
* For 512 byte block sizes, this means we may over-estimate by up to 65kb.
* For 4096 byte block sizes, this means we may over-estimate by up to 4mb
*
* For files with more than 109 BATs (i.e. has XBATs):
* Each XBAT can hold 127/1023 BATs, which in turn address 128/1024 blocks.
* For 512 byte block sizes, this means we may over-estimate by up to 8mb
* For 4096 byte block sizes, this means we may over-estimate by up to 4gb,
* but only for files of more than 436mb in size
*/
public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize,
final int numBAT, final int numXBAT) {
int size = 1; // Header isn't FAT addressed
// The header contains up to 109 BATs, each of which can
// address 128/1024 blocks
size += (numBAT * bigBlockSize.getBATEntriesPerBlock());
size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock());
// Each XBAT holds up to 127/1024 BATs, each of which can
// address 128/1024 blocks
size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock() *
bigBlockSize.getBATEntriesPerBlock());
// So far we've been in sector counts, turn into bytes
return size * bigBlockSize.getBigBlockSize();
}
public static int calculateMaximumSize(final HeaderBlock header)
@ -260,26 +277,11 @@ public final class BATBlock extends BigBlock {
final HeaderBlock header, final List<BATBlock> bats) {
POIFSBigBlockSize bigBlockSize = header.getBigBlockSize();
// Are we in the BAT or XBAT range
int batRangeEndsAt = bigBlockSize.getBATEntriesPerBlock() *
header.getBATCount();
if(offset < batRangeEndsAt) {
int whichBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
int index = offset % bigBlockSize.getBATEntriesPerBlock();
return new BATBlockAndIndex( index, bats.get(whichBAT) );
}
// XBATs hold slightly less
int relOffset = offset - batRangeEndsAt;
int whichXBAT = (int)Math.floor(relOffset / bigBlockSize.getXBATEntriesPerBlock());
int index = relOffset % bigBlockSize.getXBATEntriesPerBlock();
return new BATBlockAndIndex(
index,
bats.get(header.getBATCount() + whichXBAT)
);
}
/**
* Returns the BATBlock that handles the specified offset,
* and the relative index within it, for the mini stream.

View File

@ -242,7 +242,8 @@ public final class TestBATBlock extends TestCase {
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4, 0)
);
// Once we get into XBAT blocks, they address a little bit less
// One XBAT block holds 127/1023 individual BAT blocks, so they can address
// a fairly hefty amount of space themselves
assertEquals(
512 + 109*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 0)
@ -253,20 +254,20 @@ public final class TestBATBlock extends TestCase {
);
assertEquals(
512 + 109*512*128 + 512*127,
512 + 109*512*128 + 512*127*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
);
assertEquals(
4096 + 109*4096*1024 + 4096*1023,
4096 + 109*4096*1024 + 4096*1023*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
);
assertEquals(
512 + 109*512*128 + 3*512*127,
512 + 109*512*128 + 3*512*127*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
);
assertEquals(
4096 + 109*4096*1024 + 3*4096*1023,
4096 + 109*4096*1024 + 3*4096*1023*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
);
}
@ -319,10 +320,9 @@ public final class TestBATBlock extends TestCase {
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
// And finally one with XBATs too
// This is a naughty file, but we should be able to cope...
// (We'll decide everything is XBAT not BAT)
header.setBATCount(0);
// The XBAT count makes no difference, as we flatten in memory
header.setBATCount(1);
header.setXBATCount(1);
offset = 0;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
@ -332,15 +332,15 @@ public final class TestBATBlock extends TestCase {
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 127;
assertEquals(127, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 128;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 128;
assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 129;
assertEquals(2, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
@ -356,11 +356,11 @@ public final class TestBATBlock extends TestCase {
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 1023;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
assertEquals(1023, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 1024;
assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
// Biggr block size, back to real BATs