From 2acd64779a39368290d85f5ec37541b68af96f16 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Thu, 18 Aug 2011 14:29:59 +0000 Subject: [PATCH] fix 51678 -- Extracting text from Bug51524.zip is slow git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1159244 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/status.xml | 1 + .../org/apache/poi/hwpf/usermodel/Range.java | 133 ++++++++++++++++-- .../hwpf/usermodel/TestHWPFOldDocument.java | 4 +- 3 files changed, 126 insertions(+), 12 deletions(-) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 99ab3a10e..5be88f7e8 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 51678 - Extracting text from Bug51524.zip is slow 51671 - HWPFDocument.write based on NPOIFSFileSystem throws a NullPointerException support for tables and hyperlinks in XSLF 51535 - correct signed vs unsigned short reading in NDocumentInputStream diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index f368be197..b8fe62c5c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -21,6 +21,8 @@ import java.lang.ref.WeakReference; import java.util.List; import java.util.NoSuchElementException; +import org.apache.poi.hwpf.model.BytePropertyNode; + import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.model.CHPX; @@ -770,16 +772,28 @@ public class Range { // TODO -instantiable superclass return null; } - int[] point = findRange( _paragraphs, _parStart, - Math.max( chpx.getStart(), _start ), chpx.getEnd() ); - - if ( point[0] >= _paragraphs.size() ) + short istd; + if ( this instanceof Paragraph ) { - return null; + istd = ((Paragraph) this)._istd; } + else + { + int[] point = findRange( _paragraphs, + Math.max( chpx.getStart(), _start ), + Math.min( chpx.getEnd(), _end ) ); - PAPX papx = _paragraphs.get( point[0] ); - short istd = papx.getIstd(); + initParagraphs(); + int parStart = Math.max( point[0], _parStart ); + + if ( parStart >= _paragraphs.size() ) + { + return null; + } + + PAPX papx = _paragraphs.get( point[0] ); + istd = papx.getIstd(); + } CharacterRun chp = new CharacterRun( chpx, _doc.getStyleSheet(), istd, this ); @@ -924,7 +938,7 @@ public class Range { // TODO -instantiable superclass */ private void initParagraphs() { if (!_parRangeFound) { - int[] point = findRange(_paragraphs, _parStart, _start, _end); + int[] point = findRange(_paragraphs, _start, _end); _parStart = point[0]; _parEnd = point[1]; _parRangeFound = true; @@ -936,7 +950,7 @@ public class Range { // TODO -instantiable superclass */ private void initCharacterRuns() { if (!_charRangeFound) { - int[] point = findRange(_characters, _charStart, _start, _end); + int[] point = findRange(_characters, _start, _end); _charStart = point[0]; _charEnd = point[1]; _charRangeFound = true; @@ -955,6 +969,105 @@ public class Range { // TODO -instantiable superclass } } + private static int binarySearchStart( List> rpl, + int start ) + { + if ( rpl.get( 0 ).getStart() >= start ) + return 0; + + int low = 0; + int high = rpl.size() - 1; + + while ( low <= high ) + { + int mid = ( low + high ) >>> 1; + PropertyNode node = rpl.get( mid ); + + if ( node.getStart() < start ) + { + low = mid + 1; + } + else if ( node.getStart() > start ) + { + high = mid - 1; + } + else + { + assert node.getStart() == start; + return mid; + } + } + assert low != 0; + return low - 1; + } + + private static int binarySearchEnd( List> rpl, + int foundStart, int end ) + { + if ( rpl.get( rpl.size() - 1 ).getEnd() <= end ) + return rpl.size() - 1; + + int low = foundStart; + int high = rpl.size() - 1; + + while ( low <= high ) + { + int mid = ( low + high ) >>> 1; + PropertyNode node = rpl.get( mid ); + + if ( node.getEnd() < end ) + { + low = mid + 1; + } + else if ( node.getEnd() > end ) + { + high = mid - 1; + } + else + { + assert node.getEnd() == end; + return mid; + } + } + assert 0 <= low && low < rpl.size(); + + return low; + } + + /** + * Used to find the list indexes of a particular property. + * + * @param rpl + * A list of property nodes. + * @param min + * A hint on where to start looking. + * @param start + * The starting character offset. + * @param end + * The ending character offset. + * @return An int array of length 2. The first int is the start index and + * the second int is the end index. + */ + private int[] findRange( List> rpl, int start, + int end ) + { + int startIndex = binarySearchStart( rpl, start ); + while ( startIndex > 0 && rpl.get( startIndex - 1 ).getStart() >= start ) + startIndex--; + + int endIndex = binarySearchEnd( rpl, startIndex, end ); + while ( endIndex < rpl.size() - 1 + && rpl.get( endIndex + 1 ).getEnd() <= end ) + endIndex--; + + if ( startIndex < 0 || startIndex >= rpl.size() + || startIndex > endIndex || endIndex < 0 + || endIndex >= rpl.size() ) + throw new AssertionError(); + + return new int[] { startIndex, endIndex + 1 }; + } + /** * Used to find the list indexes of a particular property. * @@ -971,7 +1084,7 @@ public class Range { // TODO -instantiable superclass */ private int[] findRange(List> rpl, int min, int start, int end) { int x = min; - + if ( rpl.size() == min ) return new int[] { min, min }; diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java index 8eca387f0..46bd00ded 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestHWPFOldDocument.java @@ -42,7 +42,7 @@ public final class TestHWPFOldDocument extends HWPFTestCase { // Check assertEquals(1, doc.getRange().numSections()); assertEquals(1, doc.getRange().numParagraphs()); - assertEquals(1, doc.getRange().numCharacterRuns()); + assertEquals(2, doc.getRange().numCharacterRuns()); assertEquals( "The quick brown fox jumps over the lazy dog\r", @@ -96,7 +96,7 @@ public final class TestHWPFOldDocument extends HWPFTestCase { assertEquals(5, doc.getRange().getParagraph(4).numCharacterRuns()); assertEquals(1, doc.getRange().getParagraph(5).numCharacterRuns()); // Normal, superscript for 4th, normal - assertEquals(3, doc.getRange().getParagraph(6).numCharacterRuns()); + assertEquals(4, doc.getRange().getParagraph(6).numCharacterRuns()); } /**