speed improvement for rebuilding CHPX

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148115 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-19 00:50:17 +00:00
parent 2e6aa3d153
commit 1ba8c3e781

View File

@ -21,9 +21,12 @@ import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet; import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map;
import java.util.Set; import java.util.Set;
import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFFileSystem;
@ -76,6 +79,7 @@ public class CHPBinTable
int size, ComplexFileTable complexFileTable, TextPieceTable tpt, int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
boolean reconstructChpxTable ) boolean reconstructChpxTable )
{ {
long start = System.currentTimeMillis();
/* /*
* Page 35: * Page 35:
* *
@ -107,10 +111,17 @@ public class CHPBinTable
_textRuns.add(chpx); _textRuns.add(chpx);
} }
} }
logger.log( POILogger.DEBUG, "CHPX FKPs loaded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _textRuns.size() ), " elements)" );
start = System.currentTimeMillis();
if ( !reconstructChpxTable ) if ( !reconstructChpxTable )
{ {
Collections.sort( _textRuns ); Collections.sort( _textRuns );
logger.log( POILogger.DEBUG, "CHPX sorted in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
return; return;
} }
@ -164,6 +175,12 @@ public class CHPBinTable
_textRuns.add( chpx ); _textRuns.add( chpx );
} }
} }
logger.log( POILogger.DEBUG,
"Merged with CHPX from complex file table in ",
Long.valueOf( System.currentTimeMillis() - start ),
" ms (", Integer.valueOf( _textRuns.size() ),
" elements in total)" );
start = System.currentTimeMillis();
} }
// rebuild document paragraphs structure // rebuild document paragraphs structure
@ -189,7 +206,43 @@ public class CHPBinTable
docText.replace( textPiece.getStart(), textPiece.getStart() docText.replace( textPiece.getStart(), textPiece.getStart()
+ toAppendLength, toAppend ); + toAppendLength, toAppend );
} }
logger.log( POILogger.DEBUG, "Document text rebuilded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( docText.length() ), " chars)" );
start = System.currentTimeMillis();
List<CHPX> oldChpxSortedByStartPos = new ArrayList<CHPX>( _textRuns );
Collections.sort( oldChpxSortedByStartPos,
PropertyNode.StartComparator.instance );
logger.log( POILogger.DEBUG, "CHPX sorted by start position in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
start = System.currentTimeMillis();
final Map<CHPX, Integer> chpxToFileOrder = new IdentityHashMap<CHPX, Integer>();
{
int counter = 0;
for ( CHPX chpx : _textRuns )
{
chpxToFileOrder.put( chpx, Integer.valueOf( counter++ ) );
}
}
final Comparator<CHPX> chpxFileOrderComparator = new Comparator<CHPX>()
{
public int compare( CHPX o1, CHPX o2 )
{
Integer i1 = chpxToFileOrder.get( o1 );
Integer i2 = chpxToFileOrder.get( o2 );
return i1.compareTo( i2 );
}
};
logger.log( POILogger.DEBUG, "CHPX's order map created in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
start = System.currentTimeMillis();
List<Integer> textRunsBoundariesList;
{
Set<Integer> textRunsBoundariesSet = new HashSet<Integer>(); Set<Integer> textRunsBoundariesSet = new HashSet<Integer>();
for ( CHPX chpx : _textRuns ) for ( CHPX chpx : _textRuns )
{ {
@ -197,21 +250,41 @@ public class CHPBinTable
textRunsBoundariesSet.add( Integer.valueOf( chpx.getEnd() ) ); textRunsBoundariesSet.add( Integer.valueOf( chpx.getEnd() ) );
} }
textRunsBoundariesSet.remove( Integer.valueOf( 0 ) ); textRunsBoundariesSet.remove( Integer.valueOf( 0 ) );
List<Integer> textRunsBoundariesList = new ArrayList<Integer>( textRunsBoundariesList = new ArrayList<Integer>(
textRunsBoundariesSet ); textRunsBoundariesSet );
Collections.sort( textRunsBoundariesList ); Collections.sort( textRunsBoundariesList );
}
logger.log( POILogger.DEBUG, "Texts CHPX boundaries collected in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms" );
start = System.currentTimeMillis();
List<CHPX> newChpxs = new LinkedList<CHPX>(); List<CHPX> newChpxs = new LinkedList<CHPX>();
int lastTextRunStart = 0; int lastTextRunStart = 0;
for ( Integer boundary : textRunsBoundariesList ) for ( Integer objBoundary : textRunsBoundariesList )
{ {
final int boundary = objBoundary.intValue();
final int startInclusive = lastTextRunStart; final int startInclusive = lastTextRunStart;
final int endExclusive = boundary.intValue(); final int endExclusive = boundary;
lastTextRunStart = endExclusive; lastTextRunStart = endExclusive;
int startPosition = binarySearch( oldChpxSortedByStartPos, boundary );
startPosition = Math.abs( startPosition );
while ( startPosition >= oldChpxSortedByStartPos.size() )
startPosition--;
while ( startPosition > 0
&& oldChpxSortedByStartPos.get( startPosition ).getStart() >= boundary )
startPosition--;
List<CHPX> chpxs = new LinkedList<CHPX>(); List<CHPX> chpxs = new LinkedList<CHPX>();
for ( CHPX chpx : _textRuns ) for ( int c = startPosition; c < oldChpxSortedByStartPos.size(); c++ )
{ {
CHPX chpx = oldChpxSortedByStartPos.get( c );
if ( boundary < chpx.getStart() )
break;
int left = Math.max( startInclusive, chpx.getStart() ); int left = Math.max( startInclusive, chpx.getStart() );
int right = Math.min( endExclusive, chpx.getEnd() ); int right = Math.min( endExclusive, chpx.getEnd() );
@ -246,6 +319,8 @@ public class CHPBinTable
} }
} }
Collections.sort( chpxs, chpxFileOrderComparator );
SprmBuffer sprmBuffer = new SprmBuffer( 0 ); SprmBuffer sprmBuffer = new SprmBuffer( 0 );
for ( CHPX chpx : chpxs ) for ( CHPX chpx : chpxs )
{ {
@ -257,6 +332,31 @@ public class CHPBinTable
continue; continue;
} }
this._textRuns = new ArrayList<CHPX>( newChpxs ); this._textRuns = new ArrayList<CHPX>( newChpxs );
logger.log( POILogger.DEBUG, "CHPX rebuilded in ",
Long.valueOf( System.currentTimeMillis() - start ), " ms (",
Integer.valueOf( _textRuns.size() ), " elements)" );
}
private static int binarySearch( List<CHPX> chpxs, int startPosition )
{
int low = 0;
int high = chpxs.size() - 1;
while ( low <= high )
{
int mid = ( low + high ) >>> 1;
CHPX midVal = chpxs.get( mid );
int midValue = midVal.getStart();
if ( midValue < startPosition )
low = mid + 1;
else if ( midValue > startPosition )
high = mid - 1;
else
return mid; // key found
}
return -( low + 1 ); // key not found.
} }
public void adjustForDelete(int listIndex, int offset, int length) public void adjustForDelete(int listIndex, int offset, int length)