fix Bug 51524 -- PapBinTable constructor is slow (regression)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148002 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
eab1e5cfdb
commit
9f7257440b
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.8-beta4" date="2011-??-??">
|
||||
<action dev="poi-developers" type="fix">51524 - PapBinTable constructor is slow (regression)</action>
|
||||
<action dev="poi-developers" type="fix">51514 - allow HSSFObjectData to work with both POIFS and NPOIFS</action>
|
||||
<action dev="poi-developers" type="fix">51514 - avoid NPE when copying nodes from one HSSF workbook to a new one, when opened from NPOIFS</action>
|
||||
<action dev="poi-developers" type="fix">51504 - avoid NPE when DefaultRowHeight or DefaultColumnWidth records are missing</action>
|
||||
|
@ -21,8 +21,11 @@ import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
@ -69,8 +72,12 @@ public class PAPBinTable
|
||||
}
|
||||
|
||||
public PAPBinTable( byte[] documentStream, byte[] tableStream,
|
||||
byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
|
||||
TextPieceTable tpt, boolean reconstructPapxTable )
|
||||
byte[] dataStream, int offset, int size,
|
||||
ComplexFileTable complexFileTable, TextPieceTable tpt,
|
||||
boolean reconstructPapxTable )
|
||||
{
|
||||
long start = System.currentTimeMillis();
|
||||
|
||||
{
|
||||
PlexOfCps binTable = new PlexOfCps( tableStream, offset, size, 4 );
|
||||
this.tpt = tpt;
|
||||
@ -81,10 +88,12 @@ public class PAPBinTable
|
||||
GenericPropertyNode node = binTable.getProperty( x );
|
||||
|
||||
int pageNum = LittleEndian.getInt( node.getBytes() );
|
||||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
|
||||
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
|
||||
* pageNum;
|
||||
|
||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
|
||||
dataStream, pageOffset, tpt, reconstructPapxTable);
|
||||
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
|
||||
documentStream, dataStream, pageOffset, tpt,
|
||||
reconstructPapxTable );
|
||||
|
||||
int fkpSize = pfkp.size();
|
||||
|
||||
@ -96,10 +105,19 @@ public class PAPBinTable
|
||||
_paragraphs.add( papx );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
logger.log( POILogger.DEBUG, "PAPX tables loaded in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
|
||||
Integer.valueOf( _paragraphs.size() ), " elements)" );
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
if ( !reconstructPapxTable )
|
||||
{
|
||||
Collections.sort( _paragraphs );
|
||||
|
||||
logger.log( POILogger.DEBUG, "PAPX sorted in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
|
||||
return;
|
||||
}
|
||||
|
||||
@ -107,7 +125,7 @@ public class PAPBinTable
|
||||
{
|
||||
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
|
||||
|
||||
// adding CHPX from fast-saved SPRMs
|
||||
// adding PAPX from fast-saved SPRMs
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
{
|
||||
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
|
||||
@ -145,6 +163,13 @@ public class PAPBinTable
|
||||
_paragraphs.add( papx );
|
||||
}
|
||||
}
|
||||
|
||||
logger.log( POILogger.DEBUG,
|
||||
"Merged (?) with PAPX from complex file table in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ),
|
||||
" ms (", Integer.valueOf( _paragraphs.size() ),
|
||||
" elements in total)" );
|
||||
start = System.currentTimeMillis();
|
||||
}
|
||||
|
||||
// rebuild document paragraphs structure
|
||||
@ -170,9 +195,35 @@ public class PAPBinTable
|
||||
docText.replace( textPiece.getStart(), textPiece.getStart()
|
||||
+ toAppendLength, toAppend );
|
||||
}
|
||||
logger.log( POILogger.DEBUG, "Document text rebuilded in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
|
||||
Integer.valueOf( docText.length() ), " chars)" );
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs );
|
||||
Collections.sort( oldPapxSortedByEndPos,
|
||||
PropertyNode.EndComparator.instance );
|
||||
|
||||
logger.log( POILogger.DEBUG, "PAPX sorted by end position in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
|
||||
{
|
||||
int counter = 0;
|
||||
for ( PAPX papx : _paragraphs )
|
||||
{
|
||||
papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
|
||||
}
|
||||
}
|
||||
|
||||
logger.log( POILogger.DEBUG, "PAPX's order map created in ",
|
||||
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
|
||||
start = System.currentTimeMillis();
|
||||
|
||||
List<PAPX> newPapxs = new LinkedList<PAPX>();
|
||||
int lastParStart = 0;
|
||||
int lastPapxIndex = 0;
|
||||
for ( int charIndex = 0; charIndex < docText.length(); charIndex++ )
|
||||
{
|
||||
final char c = docText.charAt( charIndex );
|
||||
@ -183,20 +234,19 @@ public class PAPBinTable
|
||||
final int endExclusive = charIndex + 1;
|
||||
|
||||
List<PAPX> papxs = new LinkedList<PAPX>();
|
||||
for ( PAPX papx : _paragraphs )
|
||||
for ( int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos
|
||||
.size(); papxIndex++ )
|
||||
{
|
||||
// TODO: Tests, check, etc
|
||||
for ( int f = papx.getEnd() - 1; f <= charIndex; f++ )
|
||||
{
|
||||
if ( f == charIndex )
|
||||
PAPX papx = oldPapxSortedByEndPos.get( papxIndex );
|
||||
|
||||
assert papx.getEnd() > startInclusive;
|
||||
if ( papx.getEnd() - 1 > charIndex )
|
||||
{
|
||||
lastPapxIndex = papxIndex;
|
||||
break;
|
||||
}
|
||||
|
||||
papxs.add( papx );
|
||||
break;
|
||||
}
|
||||
final char fChar = docText.charAt( charIndex );
|
||||
if ( fChar == 13 || fChar == 7 || fChar == 12 )
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( papxs.size() == 0 )
|
||||
@ -226,6 +276,17 @@ public class PAPBinTable
|
||||
}
|
||||
}
|
||||
|
||||
// restore file order of PAPX
|
||||
Collections.sort( papxs, new Comparator<PAPX>()
|
||||
{
|
||||
public int compare( PAPX o1, PAPX o2 )
|
||||
{
|
||||
Integer i1 = papxToFileOrder.get( o1 );
|
||||
Integer i2 = papxToFileOrder.get( o2 );
|
||||
return i1.compareTo( i2 );
|
||||
}
|
||||
} );
|
||||
|
||||
SprmBuffer sprmBuffer = null;
|
||||
for ( PAPX papx : papxs )
|
||||
{
|
||||
|
@ -35,6 +35,19 @@ import org.apache.poi.util.POILogger;
|
||||
public abstract class PropertyNode<T extends PropertyNode<T>> implements Comparable<T>, Cloneable
|
||||
{
|
||||
|
||||
static final class EndComparator implements Comparator<PropertyNode<?>>
|
||||
{
|
||||
static EndComparator instance = new EndComparator();
|
||||
|
||||
public int compare( PropertyNode<?> o1, PropertyNode<?> o2 )
|
||||
{
|
||||
int thisVal = o1.getEnd();
|
||||
int anotherVal = o2.getEnd();
|
||||
return ( thisVal < anotherVal ? -1 : ( thisVal == anotherVal ? 0
|
||||
: 1 ) );
|
||||
}
|
||||
}
|
||||
|
||||
static final class StartComparator implements Comparator<PropertyNode<?>>
|
||||
{
|
||||
static StartComparator instance = new StartComparator();
|
||||
|
6
src/scratchpad/testcases/log4j.properties
Normal file
6
src/scratchpad/testcases/log4j.properties
Normal file
@ -0,0 +1,6 @@
|
||||
log4j.rootLogger=ALL,CONSOLE
|
||||
|
||||
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.CONSOLE.target=System.out
|
||||
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.CONSOLE.layout.ConversionPattern=%d{dd.MM HH:mm:ss} %-30.30c %5p %m%n
|
@ -16,13 +16,23 @@
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
import java.io.*;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
public class HWPFTestDataSamples {
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( HWPFTestDataSamples.class );
|
||||
|
||||
public static HWPFDocument openSampleFile(String sampleFileName) {
|
||||
try {
|
||||
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);
|
||||
@ -31,6 +41,55 @@ public class HWPFTestDataSamples {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public static HWPFDocument openSampleFileFromArchive( String sampleFileName )
|
||||
{
|
||||
final long start = System.currentTimeMillis();
|
||||
try
|
||||
{
|
||||
ZipInputStream is = new ZipInputStream( POIDataSamples
|
||||
.getDocumentInstance()
|
||||
.openResourceAsStream( sampleFileName ) );
|
||||
try
|
||||
{
|
||||
is.getNextEntry();
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
try
|
||||
{
|
||||
IOUtils.copy( is, baos );
|
||||
}
|
||||
finally
|
||||
{
|
||||
baos.close();
|
||||
}
|
||||
|
||||
final long endUnzip = System.currentTimeMillis();
|
||||
byte[] byteArray = baos.toByteArray();
|
||||
|
||||
logger.log( POILogger.DEBUG, "Unzipped in ",
|
||||
Long.valueOf( endUnzip - start ), " ms -- ",
|
||||
Long.valueOf( byteArray.length ), " byte(s)" );
|
||||
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream( byteArray );
|
||||
HWPFDocument doc = new HWPFDocument( bais );
|
||||
final long endParse = System.currentTimeMillis();
|
||||
|
||||
logger.log( POILogger.DEBUG, "Parsed in ",
|
||||
Long.valueOf( endParse - start ), " ms" );
|
||||
|
||||
return doc;
|
||||
}
|
||||
finally
|
||||
{
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
catch ( IOException e )
|
||||
{
|
||||
throw new RuntimeException( e );
|
||||
}
|
||||
}
|
||||
|
||||
public static HWPFOldDocument openOldSampleFile(String sampleFileName) {
|
||||
try {
|
||||
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);
|
||||
|
@ -22,6 +22,7 @@ import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.AssertionFailedError;
|
||||
|
||||
import org.apache.commons.codec.digest.DigestUtils;
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
@ -43,6 +44,7 @@ import org.apache.poi.util.IOUtils;
|
||||
*/
|
||||
public final class TestProblems extends HWPFTestCase {
|
||||
|
||||
|
||||
/**
|
||||
* ListEntry passed no ListTable
|
||||
*/
|
||||
@ -825,4 +827,12 @@ public final class TestProblems extends HWPFTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Bug 51524 - PapBinTable constructor is slow
|
||||
*/
|
||||
public void test51524()
|
||||
{
|
||||
HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user