From 9f7257440b24fe9a7122df0d8923808a2512f0cd Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Mon, 18 Jul 2011 18:44:03 +0000 Subject: [PATCH] fix Bug 51524 -- PapBinTable constructor is slow (regression) git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148002 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../apache/poi/hwpf/model/PAPBinTable.java | 127 +++++++++++++----- .../apache/poi/hwpf/model/PropertyNode.java | 13 ++ src/scratchpad/testcases/log4j.properties | 6 + .../apache/poi/hwpf/HWPFTestDataSamples.java | 63 ++++++++- .../poi/hwpf/usermodel/TestProblems.java | 10 ++ 6 files changed, 185 insertions(+), 35 deletions(-) create mode 100644 src/scratchpad/testcases/log4j.properties diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 707aca954..9abeb80d1 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 51524 - PapBinTable constructor is slow (regression) 51514 - allow HSSFObjectData to work with both POIFS and NPOIFS 51514 - avoid NPE when copying nodes from one HSSF workbook to a new one, when opened from NPOIFS 51504 - avoid NPE when DefaultRowHeight or DefaultColumnWidth records are missing diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index 4aaa8e226..53b92fcfa 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -21,8 +21,11 @@ import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; import java.util.Collections; +import java.util.Comparator; +import java.util.IdentityHashMap; import java.util.LinkedList; import java.util.List; +import java.util.Map; import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFOutputStream; @@ -69,37 +72,52 @@ public class PAPBinTable } public PAPBinTable( byte[] documentStream, byte[] tableStream, - byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable, - TextPieceTable tpt, boolean reconstructPapxTable ) + byte[] dataStream, int offset, int size, + ComplexFileTable complexFileTable, TextPieceTable tpt, + boolean reconstructPapxTable ) { - PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); - this.tpt = tpt; + long start = System.currentTimeMillis(); - int length = binTable.length(); - for (int x = 0; x < length; x++) - { - GenericPropertyNode node = binTable.getProperty(x); + { + PlexOfCps binTable = new PlexOfCps( tableStream, offset, size, 4 ); + this.tpt = tpt; - int pageNum = LittleEndian.getInt(node.getBytes()); - int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE * pageNum; + int length = binTable.length(); + for ( int x = 0; x < length; x++ ) + { + GenericPropertyNode node = binTable.getProperty( x ); - PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, - dataStream, pageOffset, tpt, reconstructPapxTable); + int pageNum = LittleEndian.getInt( node.getBytes() ); + int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE + * pageNum; - int fkpSize = pfkp.size(); + PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage( + documentStream, dataStream, pageOffset, tpt, + reconstructPapxTable ); - for (int y = 0; y < fkpSize; y++) - { - PAPX papx = pfkp.getPAPX(y); + int fkpSize = pfkp.size(); - if (papx != null) - _paragraphs.add(papx); - } - } + for ( int y = 0; y < fkpSize; y++ ) + { + PAPX papx = pfkp.getPAPX( y ); + + if ( papx != null ) + _paragraphs.add( papx ); + } + } + } + + logger.log( POILogger.DEBUG, "PAPX tables loaded in ", + Long.valueOf( System.currentTimeMillis() - start ), " ms (", + Integer.valueOf( _paragraphs.size() ), " elements)" ); + start = System.currentTimeMillis(); if ( !reconstructPapxTable ) { Collections.sort( _paragraphs ); + + logger.log( POILogger.DEBUG, "PAPX sorted in ", + Long.valueOf( System.currentTimeMillis() - start ), " ms" ); return; } @@ -107,7 +125,7 @@ public class PAPBinTable { SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls(); - // adding CHPX from fast-saved SPRMs + // adding PAPX from fast-saved SPRMs for ( TextPiece textPiece : tpt.getTextPieces() ) { PropertyModifier prm = textPiece.getPieceDescriptor().getPrm(); @@ -137,7 +155,7 @@ public class PAPBinTable if ( hasPap ) { - SprmBuffer newSprmBuffer = new SprmBuffer(2); + SprmBuffer newSprmBuffer = new SprmBuffer( 2 ); newSprmBuffer.append( sprmBuffer.toByteArray() ); PAPX papx = new PAPX( textPiece.getStart(), @@ -145,6 +163,13 @@ public class PAPBinTable _paragraphs.add( papx ); } } + + logger.log( POILogger.DEBUG, + "Merged (?) with PAPX from complex file table in ", + Long.valueOf( System.currentTimeMillis() - start ), + " ms (", Integer.valueOf( _paragraphs.size() ), + " elements in total)" ); + start = System.currentTimeMillis(); } // rebuild document paragraphs structure @@ -170,9 +195,35 @@ public class PAPBinTable docText.replace( textPiece.getStart(), textPiece.getStart() + toAppendLength, toAppend ); } + logger.log( POILogger.DEBUG, "Document text rebuilded in ", + Long.valueOf( System.currentTimeMillis() - start ), " ms (", + Integer.valueOf( docText.length() ), " chars)" ); + start = System.currentTimeMillis(); + + List oldPapxSortedByEndPos = new ArrayList( _paragraphs ); + Collections.sort( oldPapxSortedByEndPos, + PropertyNode.EndComparator.instance ); + + logger.log( POILogger.DEBUG, "PAPX sorted by end position in ", + Long.valueOf( System.currentTimeMillis() - start ), " ms" ); + start = System.currentTimeMillis(); + + final Map papxToFileOrder = new IdentityHashMap(); + { + int counter = 0; + for ( PAPX papx : _paragraphs ) + { + papxToFileOrder.put( papx, Integer.valueOf( counter++ ) ); + } + } + + logger.log( POILogger.DEBUG, "PAPX's order map created in ", + Long.valueOf( System.currentTimeMillis() - start ), " ms" ); + start = System.currentTimeMillis(); List newPapxs = new LinkedList(); int lastParStart = 0; + int lastPapxIndex = 0; for ( int charIndex = 0; charIndex < docText.length(); charIndex++ ) { final char c = docText.charAt( charIndex ); @@ -183,20 +234,19 @@ public class PAPBinTable final int endExclusive = charIndex + 1; List papxs = new LinkedList(); - for ( PAPX papx : _paragraphs ) + for ( int papxIndex = lastPapxIndex; papxIndex < oldPapxSortedByEndPos + .size(); papxIndex++ ) { - // TODO: Tests, check, etc - for ( int f = papx.getEnd() - 1; f <= charIndex; f++ ) + PAPX papx = oldPapxSortedByEndPos.get( papxIndex ); + + assert papx.getEnd() > startInclusive; + if ( papx.getEnd() - 1 > charIndex ) { - if ( f == charIndex ) - { - papxs.add( papx ); - break; - } - final char fChar = docText.charAt( charIndex ); - if ( fChar == 13 || fChar == 7 || fChar == 12 ) - break; + lastPapxIndex = papxIndex; + break; } + + papxs.add( papx ); } if ( papxs.size() == 0 ) @@ -226,6 +276,17 @@ public class PAPBinTable } } + // restore file order of PAPX + Collections.sort( papxs, new Comparator() + { + public int compare( PAPX o1, PAPX o2 ) + { + Integer i1 = papxToFileOrder.get( o1 ); + Integer i2 = papxToFileOrder.get( o2 ); + return i1.compareTo( i2 ); + } + } ); + SprmBuffer sprmBuffer = null; for ( PAPX papx : papxs ) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java index f0f6f4e92..78f4b35bf 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java @@ -35,6 +35,19 @@ import org.apache.poi.util.POILogger; public abstract class PropertyNode> implements Comparable, Cloneable { + static final class EndComparator implements Comparator> + { + static EndComparator instance = new EndComparator(); + + public int compare( PropertyNode o1, PropertyNode o2 ) + { + int thisVal = o1.getEnd(); + int anotherVal = o2.getEnd(); + return ( thisVal < anotherVal ? -1 : ( thisVal == anotherVal ? 0 + : 1 ) ); + } + } + static final class StartComparator implements Comparator> { static StartComparator instance = new StartComparator(); diff --git a/src/scratchpad/testcases/log4j.properties b/src/scratchpad/testcases/log4j.properties new file mode 100644 index 000000000..083b5da07 --- /dev/null +++ b/src/scratchpad/testcases/log4j.properties @@ -0,0 +1,6 @@ +log4j.rootLogger=ALL,CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.target=System.out +log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%d{dd.MM HH:mm:ss} %-30.30c %5p %m%n diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestDataSamples.java b/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestDataSamples.java index 2c5253d9c..4f0f81457 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestDataSamples.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/HWPFTestDataSamples.java @@ -16,13 +16,23 @@ ==================================================================== */ package org.apache.poi.hwpf; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.ZipInputStream; + import org.apache.poi.POIDataSamples; import org.apache.poi.poifs.filesystem.POIFSFileSystem; - -import java.io.*; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.POILogFactory; +import org.apache.poi.util.POILogger; public class HWPFTestDataSamples { + private static final POILogger logger = POILogFactory + .getLogger( HWPFTestDataSamples.class ); + public static HWPFDocument openSampleFile(String sampleFileName) { try { InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName); @@ -31,6 +41,55 @@ public class HWPFTestDataSamples { throw new RuntimeException(e); } } + + public static HWPFDocument openSampleFileFromArchive( String sampleFileName ) + { + final long start = System.currentTimeMillis(); + try + { + ZipInputStream is = new ZipInputStream( POIDataSamples + .getDocumentInstance() + .openResourceAsStream( sampleFileName ) ); + try + { + is.getNextEntry(); + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + try + { + IOUtils.copy( is, baos ); + } + finally + { + baos.close(); + } + + final long endUnzip = System.currentTimeMillis(); + byte[] byteArray = baos.toByteArray(); + + logger.log( POILogger.DEBUG, "Unzipped in ", + Long.valueOf( endUnzip - start ), " ms -- ", + Long.valueOf( byteArray.length ), " byte(s)" ); + + ByteArrayInputStream bais = new ByteArrayInputStream( byteArray ); + HWPFDocument doc = new HWPFDocument( bais ); + final long endParse = System.currentTimeMillis(); + + logger.log( POILogger.DEBUG, "Parsed in ", + Long.valueOf( endParse - start ), " ms" ); + + return doc; + } + finally + { + is.close(); + } + } + catch ( IOException e ) + { + throw new RuntimeException( e ); + } + } + public static HWPFOldDocument openOldSampleFile(String sampleFileName) { try { InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream(sampleFileName); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index 5b13f781f..60b4df7a3 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.util.List; import junit.framework.AssertionFailedError; + import org.apache.commons.codec.digest.DigestUtils; import org.apache.poi.EncryptedDocumentException; import org.apache.poi.POIDataSamples; @@ -43,6 +44,7 @@ import org.apache.poi.util.IOUtils; */ public final class TestProblems extends HWPFTestCase { + /** * ListEntry passed no ListTable */ @@ -825,4 +827,12 @@ public final class TestProblems extends HWPFTestCase { } } + /** + * Bug 51524 - PapBinTable constructor is slow + */ + public void test51524() + { + HWPFTestDataSamples.openSampleFileFromArchive( "Bug51524.zip" ); + } + }