diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index caed7b82d..1ce89377b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -250,8 +250,11 @@ public final class HWPFDocument extends HWPFDocumentCore // Now load the rest of the properties, which need to be adjusted // for where text really begin - _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true); - _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true); + _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt); + _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt); + + _cbt.rebuild( _cft ); + _pbt.rebuild( _dataStream, _cft ); // Read FSPA and Escher information _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java index dbf085f18..70b8b3840 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java @@ -407,7 +407,7 @@ public final class HWPFLister PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage( mainStream, doc.getDataStream(), pageOffset, - doc.getTextTable(), false ); + doc.getTextTable() ); System.out.println( "* PFKP: " + pfkp ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java index aed936329..976c4a705 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java @@ -65,21 +65,20 @@ public class CHPBinTable * Constructor used to read a binTable in from a Word document. * * @deprecated Use - * {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)} + * {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable)} * instead */ public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin, TextPieceTable tpt ) { - this( documentStream, tableStream, offset, size, null, tpt, true ); + this( documentStream, tableStream, offset, size, tpt ); } /** * Constructor used to read a binTable in from a Word document. */ public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, - int size, ComplexFileTable complexFileTable, TextPieceTable tpt, - boolean reconstructChpxTable ) + int size, TextPieceTable tpt ) { long start = System.currentTimeMillis(); /* @@ -102,7 +101,7 @@ public class CHPBinTable int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, - pageOffset, tpt, reconstructChpxTable); + pageOffset, tpt); int fkpSize = cfkp.size(); @@ -116,16 +115,11 @@ public class CHPBinTable logger.log( POILogger.DEBUG, "CHPX FKPs loaded in ", Long.valueOf( System.currentTimeMillis() - start ), " ms (", Integer.valueOf( _textRuns.size() ), " elements)" ); - start = System.currentTimeMillis(); + } - if ( !reconstructChpxTable ) - { - Collections.sort( _textRuns ); - - logger.log( POILogger.DEBUG, "CHPX sorted in ", - Long.valueOf( System.currentTimeMillis() - start ), " ms" ); - return; - } + public void rebuild( ComplexFileTable complexFileTable ) + { + long start = System.currentTimeMillis(); if ( complexFileTable != null ) { @@ -359,14 +353,14 @@ public class CHPBinTable iterator.remove(); continue; } - + previous = current; } logger.log( POILogger.DEBUG, "CHPX compacted in ", Long.valueOf( System.currentTimeMillis() - start ), " ms (", Integer.valueOf( _textRuns.size() ), " elements)" ); -} + } private static int binarySearch( List chpxs, int startPosition ) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java index 0dd077ad8..2144df04e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java @@ -20,6 +20,7 @@ package org.apache.poi.hwpf.model; import java.util.ArrayList; import java.util.List; +import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; @@ -60,14 +61,14 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage * read from a Word file). * * @deprecated Use - * {@link #CHPFormattedDiskPage(byte[],int,TextPieceTable,boolean)} + * {@link #CHPFormattedDiskPage(byte[],int,TextPieceTable)} * instead */ @SuppressWarnings( "unused" ) public CHPFormattedDiskPage( byte[] documentStream, int offset, int fcMin, TextPieceTable tpt ) { - this( documentStream, offset, tpt, true ); + this( documentStream, offset, tpt ); } /** @@ -75,39 +76,23 @@ public final class CHPFormattedDiskPage extends FormattedDiskPage * read from a Word file). */ public CHPFormattedDiskPage( byte[] documentStream, int offset, - TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces ) + TextPieceTable tpt ) { super( documentStream, offset ); for ( int x = 0; x < _crun; x++ ) { - int startAt = getStart( x ); - int endAt = getEnd( x ); + int bytesStartAt = getStart( x ); + int bytesEndAt = getEnd( x ); - if ( ignoreChpxWithoutTextPieces - && !tpt.isIndexInTable( startAt, endAt ) ) - { - logger.log( POILogger.WARN, "CHPX [", - Integer.valueOf( startAt ), "; ", - Integer.valueOf( endAt ), - ") (bytes) doesn't have corresponding text pieces " - + "and will be skipped" ); - - _chpxList.add( null ); - continue; - } - - CHPX chpx = new CHPX( startAt, endAt, tpt, getGrpprl( x ) ); - - if ( ignoreChpxWithoutTextPieces - && chpx.getStart() == chpx.getEnd() ) - { - logger.log( POILogger.WARN, chpx - + " references zero-length range and will be skipped" ); - _chpxList.add( null ); - continue; - } + int charStartAt = tpt.getCharIndex( bytesStartAt ); + int charEndAt = tpt.getCharIndex( bytesEndAt, charStartAt ); + // TODO: CHECK! + // CHPX chpx = new CHPX( bytesStartAt, bytesEndAt, tpt, getGrpprl( x + // ) ); + CHPX chpx = new CHPX( charStartAt, charEndAt, new SprmBuffer( + getGrpprl( x ), 0 ) ); _chpxList.add( chpx ); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java index 479bb3c95..fd8edc940 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldCHPBinTable.java @@ -55,7 +55,7 @@ public final class OldCHPBinTable extends CHPBinTable int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, - pageOffset, tpt, true); + pageOffset, tpt); int fkpSize = cfkp.size(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/OldPAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/OldPAPBinTable.java index 9f5a43fe6..34267e2cf 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldPAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldPAPBinTable.java @@ -47,7 +47,7 @@ public final class OldPAPBinTable extends PAPBinTable int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, - documentStream, pageOffset, tpt, true); + documentStream, pageOffset, tpt); int fkpSize = pfkp.size(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index 07ccad1c9..4bb50e023 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -33,6 +33,7 @@ import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmIterator; import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; @@ -44,6 +45,7 @@ import org.apache.poi.util.POILogger; * * @author Ryan Ackley */ +@Internal public class PAPBinTable { private static final POILogger logger = POILogFactory @@ -69,13 +71,11 @@ public class PAPBinTable byte[] dataStream, int offset, int size, int fcMin, TextPieceTable tpt ) { - this( documentStream, tableStream, dataStream, offset, size, null, tpt, true ); + this( documentStream, tableStream, dataStream, offset, size, tpt ); } public PAPBinTable( byte[] documentStream, byte[] tableStream, - byte[] dataStream, int offset, int size, - ComplexFileTable complexFileTable, TextPieceTable tpt, - boolean reconstructPapxTable ) + byte[] dataStream, int offset, int size, TextPieceTable tpt ) { long start = System.currentTimeMillis(); @@ -93,8 +93,7 @@ public class PAPBinTable * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage( - documentStream, dataStream, pageOffset, tpt, - reconstructPapxTable ); + documentStream, dataStream, pageOffset, tpt ); int fkpSize = pfkp.size(); @@ -111,16 +110,11 @@ public class PAPBinTable logger.log( POILogger.DEBUG, "PAPX tables loaded in ", Long.valueOf( System.currentTimeMillis() - start ), " ms (", Integer.valueOf( _paragraphs.size() ), " elements)" ); - start = System.currentTimeMillis(); + } - if ( !reconstructPapxTable ) - { - Collections.sort( _paragraphs ); - - logger.log( POILogger.DEBUG, "PAPX sorted in ", - Long.valueOf( System.currentTimeMillis() - start ), " ms" ); - return; - } + public void rebuild( byte[] dataStream, ComplexFileTable complexFileTable ) + { + long start = System.currentTimeMillis(); if ( complexFileTable != null ) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index 05fff84f5..4edb3b23e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -22,10 +22,8 @@ import java.util.Arrays; import java.util.Collections; import java.util.List; -import org.apache.poi.hwpf.sprm.SprmBuffer; +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.POILogFactory; -import org.apache.poi.util.POILogger; /** * Represents a PAP FKP. The style properties for paragraph and character runs @@ -43,10 +41,8 @@ import org.apache.poi.util.POILogger; * * @author Ryan Ackley */ +@Internal public final class PAPFormattedDiskPage extends FormattedDiskPage { - private static final POILogger logger = POILogFactory - .getLogger( PAPFormattedDiskPage.class ); - private static final int BX_SIZE = 13; private static final int FC_SIZE = 4; @@ -70,14 +66,14 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream, int offset, int fcMin, TextPieceTable tpt ) { - this( documentStream, dataStream, offset, tpt, true ); + this( documentStream, dataStream, offset, tpt ); } /** * Creates a PAPFormattedDiskPage from a 512 byte array */ public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream, - int offset, TextPieceTable tpt, boolean ignorePapxWithoutTextPieces ) + int offset, TextPieceTable tpt ) { super( documentStream, offset ); for ( int x = 0; x < _crun; x++ ) @@ -85,31 +81,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage { int startAt = getStart( x ); int endAt = getEnd( x ); - if ( ignorePapxWithoutTextPieces - && !tpt.isIndexInTable( startAt, endAt ) ) - { - logger.log( POILogger.WARN, "PAPX [", - Integer.valueOf( startAt ), "; ", - Integer.valueOf( endAt ), - ") (bytes) doesn't have corresponding text pieces " - + "and will be skipped\n\tSkipped SPRM: " - + new SprmBuffer( getGrpprl( x ), 2 ) ); - _papxList.add( null ); - continue; - } - PAPX papx = new PAPX( startAt, endAt, tpt, getGrpprl( x ), getParagraphHeight( x ), dataStream ); - - if ( ignorePapxWithoutTextPieces - && papx.getStart() == papx.getEnd() ) - { - logger.log( POILogger.WARN, papx - + " references zero-length range and will be skipped" ); - _papxList.add( null ); - continue; - } - _papxList.add( papx ); } _fkp = null; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java index 38700f790..8e26e5ac1 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestCHPBinTable.java @@ -46,7 +46,7 @@ public final class TestCHPBinTable byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.getFcMin(); - _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false); + _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -57,7 +57,7 @@ public final class TestCHPBinTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false); + CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT); ArrayList oldTextRuns = _cHPBinTable._textRuns; ArrayList newTextRuns = newBinTable._textRuns; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java index 95ed6f7ad..c743d52d8 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java @@ -49,8 +49,7 @@ public final class TestPAPBinTable extends TestCase byte[] tableStream = _hWPFDocFixture._tableStream; PAPBinTable _pAPBinTable = new PAPBinTable( mainStream, tableStream, - null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, - fakeTPT, false ); + null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT ); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -62,8 +61,7 @@ public final class TestPAPBinTable extends TestCase byte[] newMainStream = mainOut.toByteArray(); PAPBinTable newBinTable = new PAPBinTable( newMainStream, - newTableStream, null, 0, newTableStream.length, null, fakeTPT, - false ); + newTableStream, null, 0, newTableStream.length, fakeTPT ); List oldTextRuns = _pAPBinTable.getParagraphs(); List newTextRuns = newBinTable.getParagraphs();