From 79ecde0c8828d5e1f468c5f6ad9134017f915d8b Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Thu, 11 Aug 2011 18:49:35 +0000 Subject: [PATCH] reuse existing POIFS of hwpf document to preserve all OLE streams git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1156727 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/org/apache/poi/hwpf/HWPFDocument.java | 68 +++++++++++++------ .../apache/poi/hwpf/model/CHPBinTable.java | 26 ++++--- .../poi/hwpf/model/ComplexFileTable.java | 30 ++++---- .../org/apache/poi/hwpf/model/FontTable.java | 12 ++-- .../apache/poi/hwpf/model/PAPBinTable.java | 26 ++++--- .../apache/poi/hwpf/model/SectionTable.java | 25 ++++--- .../apache/poi/hwpf/usermodel/TestBugs.java | 6 +- 7 files changed, 128 insertions(+), 65 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 4c0be3cf6..b25ecc1ea 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -23,8 +23,8 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import org.apache.poi.hwpf.usermodel.ObjectPoolImpl; - +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.model.BookmarksTables; import org.apache.poi.hwpf.model.CHPBinTable; import org.apache.poi.hwpf.model.CPSplitCalculator; @@ -636,7 +636,7 @@ public final class HWPFDocument extends HWPFDocumentCore { // initialize our streams for writing. HWPFFileSystem docSys = new HWPFFileSystem(); - HWPFOutputStream mainStream = docSys.getStream("WordDocument"); + HWPFOutputStream wordDocumentStream = docSys.getStream("WordDocument"); HWPFOutputStream tableStream = docSys.getStream("1Table"); //HWPFOutputStream dataStream = docSys.getStream("Data"); int tableOffset = 0; @@ -653,8 +653,8 @@ public final class HWPFDocument extends HWPFDocumentCore // preserve space for the FileInformationBlock because we will be writing // it after we write everything else. byte[] placeHolder = new byte[fibSize]; - mainStream.write(placeHolder); - int mainOffset = mainStream.getOffset(); + wordDocumentStream.write(placeHolder); + int mainOffset = wordDocumentStream.getOffset(); // write out the StyleSheet. _fib.setFcStshf(tableOffset); @@ -677,10 +677,10 @@ public final class HWPFDocument extends HWPFDocumentCore // write out the Complex table, includes text. _fib.setFcClx(tableOffset); - _cft.writeTo(docSys); + _cft.writeTo(wordDocumentStream, tableStream); _fib.setLcbClx(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); - int fcMac = mainStream.getOffset(); + int fcMac = wordDocumentStream.getOffset(); /* * dop (document properties record) Written immediately after the end of @@ -733,7 +733,7 @@ public final class HWPFDocument extends HWPFDocumentCore // write out the CHPBinTable. _fib.setFcPlcfbteChpx(tableOffset); - _cbt.writeTo(docSys, fcMin, _cft.getTextPieceTable()); + _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable()); _fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); @@ -747,7 +747,7 @@ public final class HWPFDocument extends HWPFDocumentCore // write out the PAPBinTable. _fib.setFcPlcfbtePapx(tableOffset); - _pbt.writeTo(docSys, _cft.getTextPieceTable()); + _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable()); _fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); @@ -804,7 +804,7 @@ public final class HWPFDocument extends HWPFDocumentCore // write out the SectionTable. _fib.setFcPlcfsed(tableOffset); - _st.writeTo(docSys, fcMin); + _st.writeTo(wordDocumentStream, tableStream); _fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); @@ -892,17 +892,17 @@ public final class HWPFDocument extends HWPFDocumentCore // write out the FontTable. _fib.setFcSttbfffn(tableOffset); - _ft.writeTo(docSys); + _ft.writeTo(tableStream); _fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset); tableOffset = tableStream.getOffset(); // set some variables in the FileInformationBlock. _fib.setFcMin(fcMin); _fib.setFcMac(fcMac); - _fib.setCbMac(mainStream.getOffset()); + _fib.setCbMac(wordDocumentStream.getOffset()); // make sure that the table, doc and data streams use big blocks. - byte[] mainBuf = mainStream.toByteArray(); + byte[] mainBuf = wordDocumentStream.toByteArray(); if (mainBuf.length < 4096) { byte[] tempBuf = new byte[4096]; @@ -934,15 +934,31 @@ public final class HWPFDocument extends HWPFDocumentCore dataBuf = tempBuf; } +// // spit out the Word document. +// POIFSFileSystem pfs = new POIFSFileSystem(); +// +// pfs.createDocument(new ByteArrayInputStream(mainBuf), "WordDocument"); +// pfs.createDocument(new ByteArrayInputStream(tableBuf), "1Table"); +// pfs.createDocument(new ByteArrayInputStream(dataBuf), "Data"); +// writeProperties(pfs); - // spit out the Word document. - POIFSFileSystem pfs = new POIFSFileSystem(); - pfs.createDocument(new ByteArrayInputStream(mainBuf), "WordDocument"); - pfs.createDocument(new ByteArrayInputStream(tableBuf), "1Table"); - pfs.createDocument(new ByteArrayInputStream(dataBuf), "Data"); - writeProperties(pfs); + POIFSFileSystem pfs = directory.getFileSystem(); + deleteEntrySafe( pfs, "WordDocument" ); + deleteEntrySafe( pfs, "0Table" ); + deleteEntrySafe( pfs, "1Table" ); + deleteEntrySafe( pfs, "Data" ); - pfs.writeFilesystem(out); + // read properties only if they were not read + getSummaryInformation(); + // update properties in case user changed them + deleteEntrySafe( pfs, SummaryInformation.DEFAULT_STREAM_NAME ); + deleteEntrySafe( pfs, DocumentSummaryInformation.DEFAULT_STREAM_NAME ); + writeProperties( pfs ); + + pfs.createDocument( new ByteArrayInputStream( mainBuf ), "WordDocument" ); + pfs.createDocument( new ByteArrayInputStream( tableBuf ), "1Table" ); + pfs.createDocument( new ByteArrayInputStream( dataBuf ), "Data" ); + pfs.writeFilesystem( out ); /* * since we updated all references in FIB and etc, using new arrays to @@ -952,6 +968,18 @@ public final class HWPFDocument extends HWPFDocumentCore this._dataStream = dataBuf; } + private static void deleteEntrySafe( POIFSFileSystem pfs, final String name ) + { + try + { + pfs.getRoot().getEntry( name ).delete(); + } + catch ( FileNotFoundException exc ) + { + // ok + } + } + @Internal public byte[] getDataStream() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java index 25a2df35a..d2f949813 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java @@ -451,12 +451,20 @@ public class CHPBinTable return _textRuns; } - public void writeTo(HWPFFileSystem sys, int fcMin, CharIndexTranslator translator) - throws IOException - { + @Deprecated + public void writeTo( HWPFFileSystem sys, int fcMin, + CharIndexTranslator translator ) throws IOException + { + HWPFOutputStream docStream = sys.getStream( "WordDocument" ); + HWPFOutputStream tableStream = sys.getStream( "1Table" ); - HWPFOutputStream docStream = sys.getStream("WordDocument"); - OutputStream tableStream = sys.getStream("1Table"); + writeTo( docStream, tableStream, fcMin, translator ); + } + + public void writeTo( HWPFOutputStream wordDocumentStream, + HWPFOutputStream tableStream, int fcMin, + CharIndexTranslator translator ) throws IOException + { /* * Page 35: @@ -469,16 +477,16 @@ public class CHPBinTable PlexOfCps bte = new PlexOfCps( 4 ); // each FKP must start on a 512 byte page. - int docOffset = docStream.getOffset(); + int docOffset = wordDocumentStream.getOffset(); int mod = docOffset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE; if (mod != 0) { byte[] padding = new byte[POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod]; - docStream.write(padding); + wordDocumentStream.write(padding); } // get the page number for the first fkp - docOffset = docStream.getOffset(); + docOffset = wordDocumentStream.getOffset(); int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE; // get the ending fc @@ -499,7 +507,7 @@ public class CHPBinTable cfkp.fill(overflow); byte[] bufFkp = cfkp.toByteArray( translator ); - docStream.write(bufFkp); + wordDocumentStream.write(bufFkp); overflow = cfkp.getOverflow(); int end = endingFc; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java index 7b9eecb22..687b67e0b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ComplexFileTable.java @@ -80,20 +80,26 @@ public final class ComplexFileTable return _grpprls; } - public void writeTo(HWPFFileSystem sys) - throws IOException - { - HWPFOutputStream docStream = sys.getStream("WordDocument"); - HWPFOutputStream tableStream = sys.getStream("1Table"); + @Deprecated + public void writeTo( HWPFFileSystem sys ) throws IOException + { + HWPFOutputStream docStream = sys.getStream( "WordDocument" ); + HWPFOutputStream tableStream = sys.getStream( "1Table" ); - tableStream.write(TEXT_PIECE_TABLE_TYPE); + writeTo( docStream, tableStream ); + } - byte[] table = _tpt.writeTo(docStream); + public void writeTo( HWPFOutputStream wordDocumentStream, + HWPFOutputStream tableStream ) throws IOException + { + tableStream.write( TEXT_PIECE_TABLE_TYPE ); - byte[] numHolder = new byte[LittleEndian.INT_SIZE]; - LittleEndian.putInt(numHolder, table.length); - tableStream.write(numHolder); - tableStream.write(table); - } + byte[] table = _tpt.writeTo( wordDocumentStream ); + + byte[] numHolder = new byte[LittleEndian.INT_SIZE]; + LittleEndian.putInt( numHolder, table.length ); + tableStream.write( numHolder ); + tableStream.write( table ); + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java index b5ffa17a1..55bad615a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java @@ -115,11 +115,15 @@ public final class FontTable this._stringCount = stringCount; } - public void writeTo(HWPFFileSystem sys) - throws IOException - { - HWPFOutputStream tableStream = sys.getStream("1Table"); + @Deprecated + public void writeTo( HWPFFileSystem sys ) throws IOException + { + HWPFOutputStream tableStream = sys.getStream( "1Table" ); + writeTo( tableStream ); + } + public void writeTo( HWPFOutputStream tableStream ) throws IOException + { byte[] buf = new byte[LittleEndian.SHORT_SIZE]; LittleEndian.putShort(buf, _stringCount); tableStream.write(buf); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index b6ec456d2..60960dacf 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -397,26 +397,34 @@ public class PAPBinTable return _paragraphs; } - public void writeTo( HWPFFileSystem sys, CharIndexTranslator translator ) throws IOException + @Deprecated + public void writeTo( HWPFFileSystem sys, CharIndexTranslator translator ) + throws IOException { + HWPFOutputStream wordDocumentStream = sys.getStream( "WordDocument" ); + HWPFOutputStream tableStream = sys.getStream( "1Table" ); - HWPFOutputStream docStream = sys.getStream("WordDocument"); - OutputStream tableStream = sys.getStream("1Table"); - HWPFOutputStream dataStream = sys.getStream("1Table"); + writeTo( wordDocumentStream, tableStream, translator ); + } + + public void writeTo( HWPFOutputStream wordDocumentStream, + HWPFOutputStream tableStream, CharIndexTranslator translator ) + throws IOException + { PlexOfCps binTable = new PlexOfCps(4); // each FKP must start on a 512 byte page. - int docOffset = docStream.getOffset(); + int docOffset = wordDocumentStream.getOffset(); int mod = docOffset % POIFSConstants.SMALLER_BIG_BLOCK_SIZE; if (mod != 0) { byte[] padding = new byte[POIFSConstants.SMALLER_BIG_BLOCK_SIZE - mod]; - docStream.write(padding); + wordDocumentStream.write(padding); } // get the page number for the first fkp - docOffset = docStream.getOffset(); + docOffset = wordDocumentStream.getOffset(); int pageNum = docOffset/POIFSConstants.SMALLER_BIG_BLOCK_SIZE; // get the ending fc @@ -436,8 +444,8 @@ public class PAPBinTable PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(); pfkp.fill(overflow); - byte[] bufFkp = pfkp.toByteArray(dataStream, translator); - docStream.write(bufFkp); + byte[] bufFkp = pfkp.toByteArray(tableStream, translator); + wordDocumentStream.write(bufFkp); overflow = pfkp.getOverflow(); int end = endingFc; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java index 054011159..929216258 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java @@ -165,13 +165,20 @@ public class SectionTable return _sections; } - public void writeTo(HWPFFileSystem sys, int fcMin) - throws IOException - { - HWPFOutputStream docStream = sys.getStream("WordDocument"); - HWPFOutputStream tableStream = sys.getStream("1Table"); + @Deprecated + public void writeTo( HWPFFileSystem sys, int fcMin ) throws IOException + { + HWPFOutputStream docStream = sys.getStream( "WordDocument" ); + HWPFOutputStream tableStream = sys.getStream( "1Table" ); - int offset = docStream.getOffset(); + writeTo( docStream, tableStream ); + } + + public void writeTo( HWPFOutputStream wordDocumentStream, + HWPFOutputStream tableStream ) throws IOException + { + + int offset = wordDocumentStream.getOffset(); int len = _sections.size(); PlexOfCps plex = new PlexOfCps(SED_SIZE); @@ -185,8 +192,8 @@ public class SectionTable byte[] shortBuf = new byte[2]; LittleEndian.putShort(shortBuf, (short)grpprl.length); - docStream.write(shortBuf); - docStream.write(grpprl); + wordDocumentStream.write(shortBuf); + wordDocumentStream.write(grpprl); // set the fc in the section descriptor SectionDescriptor sed = sepx.getSectionDescriptor(); @@ -212,7 +219,7 @@ public class SectionTable plex.addProperty(property); - offset = docStream.getOffset(); + offset = wordDocumentStream.getOffset(); } tableStream.write(plex.toByteArray()); } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java index cd239c1b2..a657a903a 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestBugs.java @@ -17,6 +17,7 @@ package org.apache.poi.hwpf.usermodel; import java.io.FileNotFoundException; +import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Arrays; @@ -551,7 +552,7 @@ public class TestBugs extends TestCase * @throws IOException * @throws FileNotFoundException */ - public void test51604p2() throws FileNotFoundException, IOException + public void test51604p2() throws Exception { HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug51604.doc" ); @@ -583,13 +584,14 @@ public class TestBugs extends TestCase totalLength += partLength; } + assertEquals( doc.getText().length(), totalLength ); } /** * [RESOLVED FIXED] Bug 51604 - replace text fails for doc ( poi 3.8 beta * release from download site ) */ - public void test51604p3() throws IOException + public void test51604p3() throws Exception { HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug51604.doc" );