XBAT logic in NPOIFS was incorrect - it's a chain of doubly indirect, not singly indirect BATs. Start to correct

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1053495 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-12-29 01:05:01 +00:00
parent 029850e5cf
commit 01626c6626
4 changed files with 59 additions and 49 deletions

View File

@ -29,8 +29,6 @@ import org.apache.poi.util.LittleEndianInput;
/** /**
* This class provides methods to read a DocumentEntry managed by a * This class provides methods to read a DocumentEntry managed by a
* {@link POIFSFileSystem} instance. * {@link POIFSFileSystem} instance.
*
* @author Marc Johnson (mjohnson at apache dot org)
*/ */
public final class NDocumentInputStream extends InputStream implements LittleEndianInput { public final class NDocumentInputStream extends InputStream implements LittleEndianInput {
/** returned by read operations if we're at end of document */ /** returned by read operations if we're at end of document */

View File

@ -77,6 +77,7 @@ public class NPOIFSFileSystem extends BlockStore
private NPOIFSMiniStore _mini_store; private NPOIFSMiniStore _mini_store;
private NPropertyTable _property_table; private NPropertyTable _property_table;
private List<BATBlock> _xbat_blocks;
private List<BATBlock> _bat_blocks; private List<BATBlock> _bat_blocks;
private HeaderBlock _header; private HeaderBlock _header;
private DirectoryNode _root; private DirectoryNode _root;
@ -98,6 +99,7 @@ public class NPOIFSFileSystem extends BlockStore
_header = new HeaderBlock(bigBlockSize); _header = new HeaderBlock(bigBlockSize);
_property_table = new NPropertyTable(_header); _property_table = new NPropertyTable(_header);
_mini_store = new NPOIFSMiniStore(this, _property_table.getRoot(), new ArrayList<BATBlock>(), _header); _mini_store = new NPOIFSMiniStore(this, _property_table.getRoot(), new ArrayList<BATBlock>(), _header);
_xbat_blocks = new ArrayList<BATBlock>();
_bat_blocks = new ArrayList<BATBlock>(); _bat_blocks = new ArrayList<BATBlock>();
_root = null; _root = null;
} }
@ -264,14 +266,10 @@ public class NPOIFSFileSystem extends BlockStore
// Read the FAT blocks // Read the FAT blocks
for(int fatAt : _header.getBATArray()) { for(int fatAt : _header.getBATArray()) {
loopDetector.claim(fatAt); readBAT(fatAt, loopDetector);
ByteBuffer fatData = getBlockAt(fatAt);
BATBlock bat = BATBlock.createBATBlock(bigBlockSize, fatData);
bat.setOurBlockIndex(fatAt);
_bat_blocks.add(bat);
} }
// Now read the XFAT blocks // Now read the XFAT blocks, and the FATs within them
BATBlock xfat; BATBlock xfat;
int nextAt = _header.getXBATIndex(); int nextAt = _header.getXBATIndex();
for(int i=0; i<_header.getXBATCount(); i++) { for(int i=0; i<_header.getXBATCount(); i++) {
@ -280,8 +278,13 @@ public class NPOIFSFileSystem extends BlockStore
xfat = BATBlock.createBATBlock(bigBlockSize, fatData); xfat = BATBlock.createBATBlock(bigBlockSize, fatData);
xfat.setOurBlockIndex(nextAt); xfat.setOurBlockIndex(nextAt);
nextAt = xfat.getValueAt(bigBlockSize.getXBATEntriesPerBlock()); nextAt = xfat.getValueAt(bigBlockSize.getXBATEntriesPerBlock());
_xbat_blocks.add(xfat);
_bat_blocks.add(xfat); for(int j=0; j<bigBlockSize.getXBATEntriesPerBlock(); j++) {
int fatAt = xfat.getValueAt(j);
if(fatAt == POIFSConstants.UNUSED_BLOCK) break;
readBAT(fatAt, loopDetector);
}
} }
// We're now able to load steams // We're now able to load steams
@ -302,6 +305,13 @@ public class NPOIFSFileSystem extends BlockStore
nextAt = getNextBlock(nextAt); nextAt = getNextBlock(nextAt);
} }
} }
private void readBAT(int batAt, ChainLoopDetector loopDetector) throws IOException {
loopDetector.claim(batAt);
ByteBuffer fatData = getBlockAt(batAt);
BATBlock bat = BATBlock.createBATBlock(bigBlockSize, fatData);
bat.setOurBlockIndex(batAt);
_bat_blocks.add(bat);
}
/** /**
* Load the block at the given offset. * Load the block at the given offset.

View File

@ -234,16 +234,33 @@ public final class BATBlock extends BigBlock {
/** /**
* Calculates the maximum size of a file which is addressable given the * Calculates the maximum size of a file which is addressable given the
* number of FAT (BAT and XBAT) sectors specified. * number of FAT (BAT and XBAT) sectors specified.
* The actual file size will be between [size of fatCount-1 blocks] and *
* [size of fatCount blocks]. * For files with 109 or fewer BATs:
* For 512 byte block sizes, this means we may over-estimate by up to 65kb. * The actual file size will be between [size of fatCount-1 blocks] and
* For 4096 byte block sizes, this means we may over-estimate by up to 4mb * [size of fatCount blocks].
* For 512 byte block sizes, this means we may over-estimate by up to 65kb.
* For 4096 byte block sizes, this means we may over-estimate by up to 4mb
*
* For files with more than 109 BATs (i.e. has XBATs):
* Each XBAT can hold 127/1023 BATs, which in turn address 128/1024 blocks.
* For 512 byte block sizes, this means we may over-estimate by up to 8mb
* For 4096 byte block sizes, this means we may over-estimate by up to 4gb,
* but only for files of more than 436mb in size
*/ */
public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize, public static int calculateMaximumSize(final POIFSBigBlockSize bigBlockSize,
final int numBAT, final int numXBAT) { final int numBAT, final int numXBAT) {
int size = 1; // Header isn't FAT addressed int size = 1; // Header isn't FAT addressed
// The header contains up to 109 BATs, each of which can
// address 128/1024 blocks
size += (numBAT * bigBlockSize.getBATEntriesPerBlock()); size += (numBAT * bigBlockSize.getBATEntriesPerBlock());
size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock());
// Each XBAT holds up to 127/1024 BATs, each of which can
// address 128/1024 blocks
size += (numXBAT * bigBlockSize.getXBATEntriesPerBlock() *
bigBlockSize.getBATEntriesPerBlock());
// So far we've been in sector counts, turn into bytes
return size * bigBlockSize.getBigBlockSize(); return size * bigBlockSize.getBigBlockSize();
} }
public static int calculateMaximumSize(final HeaderBlock header) public static int calculateMaximumSize(final HeaderBlock header)
@ -260,24 +277,9 @@ public final class BATBlock extends BigBlock {
final HeaderBlock header, final List<BATBlock> bats) { final HeaderBlock header, final List<BATBlock> bats) {
POIFSBigBlockSize bigBlockSize = header.getBigBlockSize(); POIFSBigBlockSize bigBlockSize = header.getBigBlockSize();
// Are we in the BAT or XBAT range int whichBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
int batRangeEndsAt = bigBlockSize.getBATEntriesPerBlock() * int index = offset % bigBlockSize.getBATEntriesPerBlock();
header.getBATCount(); return new BATBlockAndIndex( index, bats.get(whichBAT) );
if(offset < batRangeEndsAt) {
int whichBAT = (int)Math.floor(offset / bigBlockSize.getBATEntriesPerBlock());
int index = offset % bigBlockSize.getBATEntriesPerBlock();
return new BATBlockAndIndex( index, bats.get(whichBAT) );
}
// XBATs hold slightly less
int relOffset = offset - batRangeEndsAt;
int whichXBAT = (int)Math.floor(relOffset / bigBlockSize.getXBATEntriesPerBlock());
int index = relOffset % bigBlockSize.getXBATEntriesPerBlock();
return new BATBlockAndIndex(
index,
bats.get(header.getBATCount() + whichXBAT)
);
} }
/** /**

View File

@ -242,7 +242,8 @@ public final class TestBATBlock extends TestCase {
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4, 0) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 4, 0)
); );
// Once we get into XBAT blocks, they address a little bit less // One XBAT block holds 127/1023 individual BAT blocks, so they can address
// a fairly hefty amount of space themselves
assertEquals( assertEquals(
512 + 109*512*128, 512 + 109*512*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 0) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 0)
@ -253,20 +254,20 @@ public final class TestBATBlock extends TestCase {
); );
assertEquals( assertEquals(
512 + 109*512*128 + 512*127, 512 + 109*512*128 + 512*127*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 1) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
); );
assertEquals( assertEquals(
4096 + 109*4096*1024 + 4096*1023, 4096 + 109*4096*1024 + 4096*1023*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 1) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 1)
); );
assertEquals( assertEquals(
512 + 109*512*128 + 3*512*127, 512 + 109*512*128 + 3*512*127*128,
BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 3) BATBlock.calculateMaximumSize(POIFSConstants.SMALLER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
); );
assertEquals( assertEquals(
4096 + 109*4096*1024 + 3*4096*1023, 4096 + 109*4096*1024 + 3*4096*1023*1024,
BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 3) BATBlock.calculateMaximumSize(POIFSConstants.LARGER_BIG_BLOCK_SIZE_DETAILS, 109, 3)
); );
} }
@ -319,10 +320,9 @@ public final class TestBATBlock extends TestCase {
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
// And finally one with XBATs too // The XBAT count makes no difference, as we flatten in memory
// This is a naughty file, but we should be able to cope... header.setBATCount(1);
// (We'll decide everything is XBAT not BAT) header.setXBATCount(1);
header.setBATCount(0);
offset = 0; offset = 0;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex()); assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
@ -332,15 +332,15 @@ public final class TestBATBlock extends TestCase {
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 127; offset = 127;
assertEquals(127, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 128;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex()); assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 128;
assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 129; offset = 129;
assertEquals(2, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex()); assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
@ -356,11 +356,11 @@ public final class TestBATBlock extends TestCase {
assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 1023; offset = 1023;
assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex()); assertEquals(1023, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(0, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
offset = 1024; offset = 1024;
assertEquals(1, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex()); assertEquals(0, BATBlock.getBATBlockAndIndex(offset, header, blocks).getIndex());
assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() )); assertEquals(1, blocks.indexOf( BATBlock.getBATBlockAndIndex(offset, header, blocks).getBlock() ));
// Biggr block size, back to real BATs // Biggr block size, back to real BATs