fix Bug 51524 -- PapBinTable constructor is slow (regression)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148002 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-18 18:44:03 +00:00
parent eab1e5cfdb
commit 9f7257440b
6 changed files with 185 additions and 35 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta4" date="2011-??-??">
<action dev="poi-developers" type="fix">51524 - PapBinTable constructor is slow (regression)</action>
<action dev="poi-developers" type="fix">51514 - allow HSSFObjectData to work with both POIFS and NPOIFS</action>
<action dev="poi-developers" type="fix">51514 - avoid NPE when copying nodes from one HSSF workbook to a new one, when opened from NPOIFS</action>
<action dev="poi-developers" type="fix">51504 - avoid NPE when DefaultRowHeight or DefaultColumnWidth records are missing</action>

View File

@ -21,8 +21,11 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.IdentityHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
@ -69,37 +72,52 @@ public class PAPBinTable
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
TextPieceTable tpt, boolean reconstructPapxTable )
byte[] dataStream, int offset, int size,
ComplexFileTable complexFileTable, TextPieceTable tpt,
boolean reconstructPapxTable )
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
long start = System.currentTimeMillis();
int length = binTable.length();
for (int x = 0; x < length; x++)
{
GenericPropertyNode node = binTable.getProperty(x);
{
PlexOfCps binTable = new PlexOfCps( tableStream, offset, size, 4 );
this.tpt = tpt;
int pageNum = LittleEndian.getInt(node.getBytes());
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum;
int length = binTable.length();
for ( int x = 0; x < length; x++ )
{
GenericPropertyNode node = binTable.getProperty( x );
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
dataStream, pageOffset, tpt, reconstructPapxTable);
int pageNum = LittleEndian.getInt( node.getBytes() );
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
* pageNum;
int fkpSize = pfkp.size();
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
documentStream, dataStream, pageOffset, tpt,
reconstructPapxTable );
for (int y = 0; y < fkpSize; y++)
{
PAPX papx = pfkp.getPAPX(y);
int fkpSize = pfkp.size();
if (papx != null)
_paragraphs.add(papx);
}
}
for ( int y = 0; y < fkpSize; y++ )
{
PAPX papx = pfkp.getPAPX( y );
if ( papx != null )
_paragraphs.add( papx );
}
}
}
logger.log( POILogger.DEBUG, "PAPX tables loaded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _paragraphs.size() ), " elements)" );
start = System.currentTimeMillis();
if ( !reconstructPapxTable )
{
Collections.sort( _paragraphs );
logger.log( POILogger.DEBUG, "PAPX sorted in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
return;
}
@ -107,7 +125,7 @@ public class PAPBinTable
{
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
// adding PAPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
@ -137,7 +155,7 @@ public class PAPBinTable
if ( hasPap )
{
SprmBuffer newSprmBuffer = new SprmBuffer(2);
SprmBuffer newSprmBuffer = new SprmBuffer( 2 );
newSprmBuffer.append( sprmBuffer.toByteArray() );
PAPX papx = new PAPX( textPiece.getStart(),
@ -145,6 +163,13 @@ public class PAPBinTable
_paragraphs.add( papx );
}
}
logger.log( POILogger.DEBUG,
"Merged (?) with PAPX from complex file table in ",
Long.valueOf( System.currentTimeMillis() - start ),
" ms (", Integer.valueOf( _paragraphs.size() ),
" elements in total)" );
start = System.currentTimeMillis();
}
// rebuild document paragraphs structure
@ -170,9 +195,35 @@ public class PAPBinTable
docText.replace( textPiece.getStart(), textPiece.getStart()
+ toAppendLength, toAppend );
}
logger.log( POILogger.DEBUG, "Document text rebuilded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( docText.length() ), " chars)" );
start = System.currentTimeMillis();
List<PAPX> oldPapxSortedByEndPos = new ArrayList<PAPX>( _paragraphs );
Collections.sort( oldPapxSortedByEndPos,
PropertyNode.EndComparator.instance );
logger.log( POILogger.DEBUG, "PAPX sorted by end position in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
start = System.currentTimeMillis();
final Map<PAPX, Integer> papxToFileOrder = new IdentityHashMap<PAPX, Integer>();
{
int counter = 0;
for ( PAPX papx : _paragraphs )
{
papxToFileOrder.put( papx, Integer.valueOf( counter++ ) );
}
}
logger.log( POILogger.DEBUG, "PAPX's order map created in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
start = System.currentTimeMillis();
List<PAPX> newPapxs = new LinkedList<PAPX>();
int lastParStart = 0;
int lastPapxIndex = 0;
for ( int charIndex = 0; charIndex < docText.length(); charIndex++ )
{
final char c = docText.charAt( charIndex );
@ -183,20 +234,19 @@ public class PAPBinTable
final int endExclusive = charIndex + 1;
List<PAPX> papxs = new LinkedList<PAPX>();
for ( PAPX papx : _paragraphs )
for ( int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos
.size(); papxIndex++ )
{
// TODO: Tests, check, etc
for ( int f = papx.getEnd() - 1; f <= charIndex; f++ )
PAPX papx = oldPapxSortedByEndPos.get( papxIndex );
assert papx.getEnd() > startInclusive;
if ( papx.getEnd() - 1 > charIndex )
{
if ( f == charIndex )
{
papxs.add( papx );
break;
}
final char fChar = docText.charAt( charIndex );
if ( fChar == 13 || fChar == 7 || fChar == 12 )
break;
lastPapxIndex = papxIndex;
break;
}
papxs.add( papx );
}
if ( papxs.size() == 0 )
@ -226,6 +276,17 @@ public class PAPBinTable
}
}
// restore file order of PAPX
Collections.sort( papxs, new Comparator<PAPX>()
{
public int compare( PAPX o1, PAPX o2 )
{
Integer i1 = papxToFileOrder.get( o1 );
Integer i2 = papxToFileOrder.get( o2 );
return i1.compareTo( i2 );
}
} );
SprmBuffer sprmBuffer = null;
for ( PAPX papx : papxs )
{

View File

@ -35,6 +35,19 @@ import org.apache.poi.util.POILogger;
public abstract class PropertyNode<T extends PropertyNode<T>> implements Comparable<T>, Cloneable
{
static final class EndComparator implements Comparator<PropertyNode<?>>
{
static EndComparator instance = new EndComparator();
public int compare( PropertyNode<?> o1, PropertyNode<?> o2 )
{
int thisVal = o1.getEnd();
int anotherVal = o2.getEnd();
return ( thisVal < anotherVal ? -1 : ( thisVal == anotherVal ? 0
: 1 ) );
}
}
static final class StartComparator implements Comparator<PropertyNode<?>>
{
static StartComparator instance = new StartComparator();

View File

@ -0,0 +1,6 @@
log4j.rootLogger=ALL,CONSOLE
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.target=System.out
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%d{dd.MM HH:mm:ss} %-30.30c %5p %m%n

View File

@ -16,13 +16,23 @@
==================================================================== */
package org.apache.poi.hwpf;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.ZipInputStream;
import org.apache.poi.POIDataSamples;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.*;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
public class HWPFTestDataSamples {
private static final POILogger logger = POILogFactory
.getLogger( HWPFTestDataSamples.class );
public static HWPFDocument openSampleFile(String sampleFileName) {
try {
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);
@ -31,6 +41,55 @@ public class HWPFTestDataSamples {
throw new RuntimeException(e);
}
}
public static HWPFDocument openSampleFileFromArchive( String sampleFileName )
{
final long start = System.currentTimeMillis();
try
{
ZipInputStream is = new ZipInputStream( POIDataSamples
.getDocumentInstance()
.openResourceAsStream( sampleFileName ) );
try
{
is.getNextEntry();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try
{
IOUtils.copy( is, baos );
}
finally
{
baos.close();
}
final long endUnzip = System.currentTimeMillis();
byte[] byteArray = baos.toByteArray();
logger.log( POILogger.DEBUG, "Unzipped in ",
Long.valueOf( endUnzip - start ), " ms -- ",
Long.valueOf( byteArray.length ), " byte(s)" );
ByteArrayInputStream bais = new ByteArrayInputStream( byteArray );
HWPFDocument doc = new HWPFDocument( bais );
final long endParse = System.currentTimeMillis();
logger.log( POILogger.DEBUG, "Parsed in ",
Long.valueOf( endParse - start ), " ms" );
return doc;
}
finally
{
is.close();
}
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
}
public static HWPFOldDocument openOldSampleFile(String sampleFileName) {
try {
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName);

View File

@ -22,6 +22,7 @@ import java.io.InputStream;
import java.util.List;
import junit.framework.AssertionFailedError;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDataSamples;
@ -43,6 +44,7 @@ import org.apache.poi.util.IOUtils;
*/
public final class TestProblems extends HWPFTestCase {
/**
* ListEntry passed no ListTable
*/
@ -825,4 +827,12 @@ public final class TestProblems extends HWPFTestCase {
}
}
/**
* Bug 51524 - PapBinTable constructor is slow
*/
public void test51524()
{
HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" );
}
}