diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index acea33a3a..8e5199d0a 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -39,6 +39,7 @@
49508 - Allow the addition of paragraphs to XWPF Table Cells
49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text
XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though
+ HWPF: Improve reading of auto-saved ("complex") documents
49432 - Lazy caching of XSSFComment CTComment objects by reference, to make repeated comment searching faster
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
index 211dc9a6b..4226c7f49 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java
@@ -86,7 +86,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
TextPiece tp = new TextPiece(
0, textData.length, textData, pd, 0
);
- tpt.getTextPieces().add(tp);
+ tpt.add(tp);
text.append(tp.getStringBuffer());
}
diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
index 81849c6fb..fae08e03f 100644
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@@ -17,14 +17,15 @@
package org.apache.poi.hwpf.model;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.poifs.common.POIFSConstants;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+
/**
* The piece table for matching up character positions to bits of text. This
* mostly works in bytes, but the TextPieces themselves work in characters. This
@@ -34,6 +35,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
*/
public final class TextPieceTable implements CharIndexTranslator {
protected ArrayList _textPieces = new ArrayList();
+ protected ArrayList _textPiecesFCOrder = new ArrayList();
// int _multiple;
int _cpMin;
@@ -96,11 +98,9 @@ public final class TextPieceTable implements CharIndexTranslator {
// In the interest of our sanity, now sort the text pieces
// into order, if they're not already
- TextPiece[] tp = _textPieces.toArray(new TextPiece[_textPieces.size()]);
- Arrays.sort(tp);
- for (int i = 0; i < tp.length; i++) {
- _textPieces.set(i, tp[i]);
- }
+ Collections.sort(_textPieces);
+ _textPiecesFCOrder = new ArrayList(_textPieces);
+ Collections.sort(_textPiecesFCOrder, new FCComparator());
}
public int getCpMin() {
@@ -111,6 +111,13 @@ public final class TextPieceTable implements CharIndexTranslator {
return _textPieces;
}
+ public void add(TextPiece piece) {
+ _textPieces.add(piece);
+ _textPiecesFCOrder.add(piece);
+ Collections.sort(_textPieces);
+ Collections.sort(_textPiecesFCOrder, new FCComparator());
+ }
+
/**
* Is the text at the given Character offset unicode, or plain old ascii? In
* a very evil fashion, you have to actually know this to make sense of
@@ -238,7 +245,7 @@ public final class TextPieceTable implements CharIndexTranslator {
public int getCharIndex(int bytePos) {
int charCount = 0;
- for(TextPiece tp : _textPieces) {
+ for(TextPiece tp : _textPiecesFCOrder) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
if (pieceStart >= bytePos) {
break;
@@ -259,4 +266,15 @@ public final class TextPieceTable implements CharIndexTranslator {
return charCount;
}
+ private static class FCComparator implements Comparator {
+ public int compare(TextPiece textPiece, TextPiece textPiece1) {
+ if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
+ return 1;
+ } else if (textPiece.getPieceDescriptor().fc