From 14a7ae63167c05181665daaea83f8a212c074834 Mon Sep 17 00:00:00 2001 From: Andreas Beeker Date: Mon, 21 Apr 2014 14:37:33 +0000 Subject: [PATCH] Bug 56437 - [PATCH] Streaming write support in NPOIFS git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1588887 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/poifs/filesystem/DirectoryNode.java | 6 +- .../poi/poifs/filesystem/NPOIFSDocument.java | 60 +++++-- .../poifs/filesystem/NPOIFSFileSystem.java | 32 ++-- .../poi/poifs/filesystem/NPOIFSStream.java | 152 +++++++++++------- .../poi/poifs/property/NPropertyTable.java | 27 +++- .../filesystem/TestNPOIFSFileSystem.java | 96 +++++++++-- 6 files changed, 262 insertions(+), 111 deletions(-) diff --git a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java index b9b7a51b5..cb705ff07 100644 --- a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java +++ b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java @@ -412,7 +412,11 @@ public class DirectoryNode final POIFSWriterListener writer) throws IOException { - return createDocument(new POIFSDocument(name, size, _path, writer)); + if(_nfilesystem != null) { + return createDocument(new NPOIFSDocument(name, size, _nfilesystem, writer)); + } else { + return createDocument(new POIFSDocument(name, size, _path, writer)); + } } /** diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java index f7de41594..a61c7dc65 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSDocument.java @@ -17,10 +17,11 @@ package org.apache.poi.poifs.filesystem; -import java.io.ByteArrayInputStream; +import java.io.BufferedInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.Collections; import java.util.Iterator; @@ -30,7 +31,6 @@ import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.dev.POIFSViewable; import org.apache.poi.poifs.property.DocumentProperty; import org.apache.poi.util.HexDump; -import org.apache.poi.util.IOUtils; /** * This class manages a document in the NIO POIFS filesystem. @@ -72,21 +72,12 @@ public final class NPOIFSDocument implements POIFSViewable { { this._filesystem = filesystem; - // Buffer the contents into memory. This is a bit icky... - // TODO Replace with a buffer up to the mini stream size, then streaming write - byte[] contents; - if(stream instanceof ByteArrayInputStream) { - ByteArrayInputStream bais = (ByteArrayInputStream)stream; - contents = new byte[bais.available()]; - bais.read(contents); - } else { - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - IOUtils.copy(stream, baos); - contents = baos.toByteArray(); - } + final int bigBlockSize = POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE; + BufferedInputStream bis = new BufferedInputStream(stream, bigBlockSize+1); + bis.mark(bigBlockSize); // Do we need to store as a mini stream or a full one? - if(contents.length <= POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) { + if(bis.skip(bigBlockSize) < bigBlockSize) { _stream = new NPOIFSStream(filesystem.getMiniStore()); _block_size = _filesystem.getMiniStore().getBlockStoreBlockSize(); } else { @@ -94,14 +85,49 @@ public final class NPOIFSDocument implements POIFSViewable { _block_size = _filesystem.getBlockStoreBlockSize(); } + // start from the beginning + bis.reset(); + // Store it - _stream.updateContents(contents); + OutputStream os = _stream.getOutputStream(); + byte buf[] = new byte[1024]; + int length = 0; + + for (int readBytes; (readBytes = bis.read(buf)) != -1; length += readBytes) { + os.write(buf, 0, readBytes); + } // And build the property for it - this._property = new DocumentProperty(name, contents.length); + this._property = new DocumentProperty(name, length); _property.setStartBlock(_stream.getStartBlock()); } + public NPOIFSDocument(String name, int size, NPOIFSFileSystem filesystem, POIFSWriterListener writer) + throws IOException + { + this._filesystem = filesystem; + + if (size < POIFSConstants.BIG_BLOCK_MINIMUM_DOCUMENT_SIZE) { + _stream = new NPOIFSStream(filesystem.getMiniStore()); + _block_size = _filesystem.getMiniStore().getBlockStoreBlockSize(); + } else { + _stream = new NPOIFSStream(filesystem); + _block_size = _filesystem.getBlockStoreBlockSize(); + } + + OutputStream innerOs = _stream.getOutputStream(); + DocumentOutputStream os = new DocumentOutputStream(innerOs, size); + POIFSDocumentPath path = new POIFSDocumentPath(name.split("\\\\")); + String docName = path.getComponent(path.length()-1); + POIFSWriterEvent event = new POIFSWriterEvent(os, path, docName, size); + writer.processPOIFSWriterEvent(event); + innerOs.close(); + + // And build the property for it + this._property = new DocumentProperty(name, size); + _property.setStartBlock(_stream.getStartBlock()); + } + int getDocumentBlockSize() { return _block_size; } diff --git a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java index 9c1a6b31d..bf7d1f745 100644 --- a/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/NPOIFSFileSystem.java @@ -117,12 +117,14 @@ public class NPOIFSFileSystem extends BlockStore // Mark us as having a single empty BAT at offset 0 _header.setBATCount(1); _header.setBATArray(new int[] { 0 }); - _bat_blocks.add(BATBlock.createEmptyBATBlock(bigBlockSize, false)); + BATBlock bb = BATBlock.createEmptyBATBlock(bigBlockSize, false); + bb.setOurBlockIndex(0); + _bat_blocks.add(bb); + setNextBlock(0, POIFSConstants.FAT_SECTOR_BLOCK); - - // Now associate the properties with the empty block - _property_table.setStartBlock(1); setNextBlock(1, POIFSConstants.END_OF_CHAIN); + + _property_table.setStartBlock(POIFSConstants.END_OF_CHAIN); } /** @@ -483,13 +485,11 @@ public class NPOIFSFileSystem extends BlockStore */ @Override protected int getFreeBlock() throws IOException { + int numSectors = bigBlockSize.getBATEntriesPerBlock(); + // First up, do we have any spare ones? int offset = 0; - for(int i=0; i<_bat_blocks.size(); i++) { - int numSectors = bigBlockSize.getBATEntriesPerBlock(); - - // Check this one - BATBlock bat = _bat_blocks.get(i); + for (BATBlock bat : _bat_blocks) { if(bat.hasFreeSectors()) { // Claim one of them and return it for(int j=0; j { private BlockStore blockStore; private int startBlock; + private OutputStream outStream; /** * Constructor for an existing stream. It's up to you @@ -92,7 +94,7 @@ public class NPOIFSStream implements Iterable } return new StreamBlockByteBufferIterator(startBlock); } - + /** * Updates the contents of the stream to the new * set of bytes. @@ -100,62 +102,16 @@ public class NPOIFSStream implements Iterable * need to update the size in the property yourself */ public void updateContents(byte[] contents) throws IOException { - // How many blocks are we going to need? - int blockSize = blockStore.getBlockStoreBlockSize(); - int blocks = (int)Math.ceil( ((double)contents.length) / blockSize ); - - // Make sure we don't encounter a loop whilst overwriting - // the existing blocks - ChainLoopDetector loopDetector = blockStore.getChainLoopDetector(); - - // Start writing - int prevBlock = POIFSConstants.END_OF_CHAIN; - int nextBlock = startBlock; - for(int i=0; i throw new UnsupportedOperationException(); } } + + protected class StreamBlockByteBuffer extends OutputStream { + byte oneByte[] = new byte[1]; + ByteBuffer buffer; + // Make sure we don't encounter a loop whilst overwriting + // the existing blocks + ChainLoopDetector loopDetector; + int prevBlock, nextBlock; + + protected StreamBlockByteBuffer() throws IOException { + loopDetector = blockStore.getChainLoopDetector(); + prevBlock = POIFSConstants.END_OF_CHAIN; + nextBlock = startBlock; + } + + protected void createBlockIfNeeded() throws IOException { + if (buffer != null && buffer.hasRemaining()) return; + + int thisBlock = nextBlock; + + // Allocate a block if needed, otherwise figure + // out what the next block will be + if(thisBlock == POIFSConstants.END_OF_CHAIN) { + thisBlock = blockStore.getFreeBlock(); + loopDetector.claim(thisBlock); + + // We're on the end of the chain + nextBlock = POIFSConstants.END_OF_CHAIN; + + // Mark the previous block as carrying on to us if needed + if(prevBlock != POIFSConstants.END_OF_CHAIN) { + blockStore.setNextBlock(prevBlock, thisBlock); + } + blockStore.setNextBlock(thisBlock, POIFSConstants.END_OF_CHAIN); + + // If we've just written the first block on a + // new stream, save the start block offset + if(startBlock == POIFSConstants.END_OF_CHAIN) { + startBlock = thisBlock; + } + } else { + loopDetector.claim(thisBlock); + nextBlock = blockStore.getNextBlock(thisBlock); + } + + buffer = blockStore.createBlockIfNeeded(thisBlock); + + // Update pointers + prevBlock = thisBlock; + } + + public void write(int b) throws IOException { + oneByte[0] = (byte)(b & 0xFF); + write(oneByte); + } + + public void write(byte[] b, int off, int len) throws IOException { + if ((off < 0) || (off > b.length) || (len < 0) || + ((off + len) > b.length) || ((off + len) < 0)) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return; + } + + do { + createBlockIfNeeded(); + int writeBytes = Math.min(buffer.remaining(), len); + buffer.put(b, off, writeBytes); + off += writeBytes; + len -= writeBytes; + } while (len > 0); + } + + public void close() throws IOException { + // If we're overwriting, free any remaining blocks + NPOIFSStream toFree = new NPOIFSStream(blockStore, nextBlock); + toFree.free(loopDetector); + + // Mark the end of the stream + blockStore.setNextBlock(prevBlock, POIFSConstants.END_OF_CHAIN); + } + } } diff --git a/src/java/org/apache/poi/poifs/property/NPropertyTable.java b/src/java/org/apache/poi/poifs/property/NPropertyTable.java index aed08a521..eb2e51458 100644 --- a/src/java/org/apache/poi/poifs/property/NPropertyTable.java +++ b/src/java/org/apache/poi/poifs/property/NPropertyTable.java @@ -17,8 +17,8 @@ package org.apache.poi.poifs.property; -import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.OutputStream; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Iterator; @@ -123,18 +123,35 @@ public final class NPropertyTable extends PropertyTableBase { return (int)Math.ceil(size / _bigBigBlockSize.getBigBlockSize()); } + /** + * Prepare to be written + */ + public void preWrite() { + List pList = new ArrayList(); + // give each property its index + int i=0; + for (Property p : _properties) { + // only handle non-null properties + if (p == null) continue; + p.setIndex(i++); + pList.add(p); + } + + // prepare each property for writing + for (Property p : pList) p.preWrite(); + } + /** * Writes the properties out into the given low-level stream */ public void write(NPOIFSStream stream) throws IOException { - // TODO - Use a streaming write - ByteArrayOutputStream baos = new ByteArrayOutputStream(); + OutputStream os = stream.getOutputStream(); for(Property property : _properties) { if(property != null) { - property.writeData(baos); + property.writeData(os); } } - stream.updateContents(baos.toByteArray()); + os.close(); // Update the start position if needed if(getStartBlock() != stream.getStartBlock()) { diff --git a/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java b/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java index ba6d1c0c0..3008fc5b9 100644 --- a/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java +++ b/src/testcases/org/apache/poi/poifs/filesystem/TestNPOIFSFileSystem.java @@ -17,13 +17,18 @@ package org.apache.poi.poifs.filesystem; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.fail; + import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; import java.nio.ByteBuffer; import java.util.Iterator; -import junit.framework.TestCase; - import org.apache.poi.POIDataSamples; import org.apache.poi.hpsf.PropertySet; import org.apache.poi.hpsf.PropertySetFactory; @@ -33,14 +38,17 @@ import org.apache.poi.poifs.property.NPropertyTable; import org.apache.poi.poifs.property.Property; import org.apache.poi.poifs.property.RootProperty; import org.apache.poi.poifs.storage.HeaderBlock; +import org.apache.poi.util.IOUtils; +import org.junit.Test; /** * Tests for the new NIO POIFSFileSystem implementation */ -public final class TestNPOIFSFileSystem extends TestCase { +public final class TestNPOIFSFileSystem { private static final POIDataSamples _inst = POIDataSamples.getPOIFSInstance(); - public void testBasicOpen() throws Exception { + @Test + public void basicOpen() throws Exception { NPOIFSFileSystem fsA, fsB; // With a simple 512 block file @@ -58,7 +66,8 @@ public final class TestNPOIFSFileSystem extends TestCase { } } - public void testPropertiesAndFatOnRead() throws Exception { + @Test + public void propertiesAndFatOnRead() throws Exception { NPOIFSFileSystem fsA, fsB; // With a simple 512 block file @@ -196,7 +205,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Check that for a given block, we can correctly figure * out what the next one is */ - public void testNextBlock() throws Exception { + @Test + public void nextBlock() throws Exception { NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi")); NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi")); for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB}) { @@ -253,7 +263,8 @@ public final class TestNPOIFSFileSystem extends TestCase { /** * Check we get the right data back for each block */ - public void testGetBlock() throws Exception { + @Test + public void getBlock() throws Exception { NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi")); NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi")); for(NPOIFSFileSystem fs : new NPOIFSFileSystem[] {fsA,fsB}) { @@ -322,7 +333,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Ask for free blocks where there are some already * to be had from the FAT */ - public void testGetFreeBlockWithSpare() throws Exception { + @Test + public void getFreeBlockWithSpare() throws Exception { NPOIFSFileSystem fs = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi")); // Our first BAT block has spares @@ -349,7 +361,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Ask for free blocks where no free ones exist, and so the * file needs to be extended and another BAT/XBAT added */ - public void testGetFreeBlockWithNoneSpare() throws Exception { + @Test + public void getFreeBlockWithNoneSpare() throws Exception { NPOIFSFileSystem fs = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi")); int free; @@ -479,7 +492,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Test that we can correctly get the list of directory * entries, and the details on the files in them */ - public void testListEntries() throws Exception { + @Test + public void listEntries() throws Exception { NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi")); NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi")); NPOIFSFileSystem fsC = new NPOIFSFileSystem(_inst.getFile("BlockSize4096.zvi")); @@ -519,7 +533,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Tests that we can get the correct contents for * a document in the filesystem */ - public void testGetDocumentEntry() throws Exception { + @Test + public void getDocumentEntry() throws Exception { NPOIFSFileSystem fsA = new NPOIFSFileSystem(_inst.getFile("BlockSize512.zvi")); NPOIFSFileSystem fsB = new NPOIFSFileSystem(_inst.openResourceAsStream("BlockSize512.zvi")); NPOIFSFileSystem fsC = new NPOIFSFileSystem(_inst.getFile("BlockSize4096.zvi")); @@ -552,7 +567,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Read a file, write it and read it again. * Then, alter+add some streams, write and read */ - public void testReadWriteRead() throws Exception { + @Test + public void readWriteRead() throws Exception { // TODO // TODO } @@ -561,7 +577,8 @@ public final class TestNPOIFSFileSystem extends TestCase { * Create a new file, write it and read it again * Then, add some streams, write and read */ - public void testCreateWriteRead() throws Exception { + @Test + public void createWriteRead() throws Exception { NPOIFSFileSystem fs = new NPOIFSFileSystem(); // Initially has a BAT but not SBAT @@ -577,10 +594,11 @@ public final class TestNPOIFSFileSystem extends TestCase { fs.writeFilesystem(baos); fs = new NPOIFSFileSystem(new ByteArrayInputStream(baos.toByteArray())); - // Check it's still like that + // Property table entries have been added to the blocks assertEquals(POIFSConstants.FAT_SECTOR_BLOCK, fs.getNextBlock(0)); assertEquals(POIFSConstants.END_OF_CHAIN, fs.getNextBlock(1)); - assertEquals(POIFSConstants.UNUSED_BLOCK, fs.getNextBlock(2)); + assertEquals(POIFSConstants.END_OF_CHAIN, fs.getNextBlock(2)); + assertEquals(POIFSConstants.UNUSED_BLOCK, fs.getNextBlock(3)); assertEquals(POIFSConstants.END_OF_CHAIN, fs.getRoot().getProperty().getStartBlock()); // Now add a normal stream and a mini stream @@ -588,6 +606,54 @@ public final class TestNPOIFSFileSystem extends TestCase { // TODO The rest of the test } + + @Test + public void writPOIFSWriterListener() throws Exception { + File testFile = POIDataSamples.getSpreadSheetInstance().getFile("Simple.xls"); + NPOIFSFileSystem src = new NPOIFSFileSystem(testFile); + byte wbDataExp[] = IOUtils.toByteArray(src.createDocumentInputStream("Workbook")); + + NPOIFSFileSystem nfs = new NPOIFSFileSystem(); + copy(src.getRoot(), nfs.getRoot()); + src.close(); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + nfs.writeFilesystem(bos); + nfs.close(); + + POIFSFileSystem pfs = new POIFSFileSystem(new ByteArrayInputStream(bos.toByteArray())); + byte wbDataAct[] = IOUtils.toByteArray(pfs.createDocumentInputStream("Workbook")); + + assertThat(wbDataExp, equalTo(wbDataAct)); + } + + private static void copy(final DirectoryNode src, final DirectoryNode dest) throws IOException { + Iterator srcIter = src.getEntries(); + while(srcIter.hasNext()) { + Entry entry = srcIter.next(); + if (entry.isDirectoryEntry()) { + DirectoryNode srcDir = (DirectoryNode)entry; + DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName()); + destDir.setStorageClsid(src.getStorageClsid()); + copy(srcDir, destDir); + } else { + final DocumentNode srcDoc = (DocumentNode)entry; + // dest.createDocument(srcDoc.getName(), src.createDocumentInputStream(srcDoc)); + dest.createDocument(srcDoc.getName(), srcDoc.getSize(), new POIFSWriterListener() { + public void processPOIFSWriterEvent(POIFSWriterEvent event) { + try { + DocumentInputStream dis = src.createDocumentInputStream(srcDoc); + IOUtils.copy(dis, event.getStream()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + }); + } + } + + } + // TODO Directory/Document write tests }