diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index acea33a3a..8e5199d0a 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -39,6 +39,7 @@ 49508 - Allow the addition of paragraphs to XWPF Table Cells 49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though + HWPF: Improve reading of auto-saved ("complex") documents 49432 - Lazy caching of XSSFComment CTComment objects by reference, to make repeated comment searching faster diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java index 211dc9a6b..4226c7f49 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFOldDocument.java @@ -86,7 +86,7 @@ public class HWPFOldDocument extends HWPFDocumentCore { TextPiece tp = new TextPiece( 0, textData.length, textData, pd, 0 ); - tpt.getTextPieces().add(tp); + tpt.add(tp); text.append(tp.getStringBuffer()); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index 81849c6fb..fae08e03f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -17,14 +17,15 @@ package org.apache.poi.hwpf.model; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.poifs.common.POIFSConstants; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; + /** * The piece table for matching up character positions to bits of text. This * mostly works in bytes, but the TextPieces themselves work in characters. This @@ -34,6 +35,7 @@ import org.apache.poi.poifs.common.POIFSConstants; */ public final class TextPieceTable implements CharIndexTranslator { protected ArrayList _textPieces = new ArrayList(); + protected ArrayList _textPiecesFCOrder = new ArrayList(); // int _multiple; int _cpMin; @@ -96,11 +98,9 @@ public final class TextPieceTable implements CharIndexTranslator { // In the interest of our sanity, now sort the text pieces // into order, if they're not already - TextPiece[] tp = _textPieces.toArray(new TextPiece[_textPieces.size()]); - Arrays.sort(tp); - for (int i = 0; i < tp.length; i++) { - _textPieces.set(i, tp[i]); - } + Collections.sort(_textPieces); + _textPiecesFCOrder = new ArrayList(_textPieces); + Collections.sort(_textPiecesFCOrder, new FCComparator()); } public int getCpMin() { @@ -111,6 +111,13 @@ public final class TextPieceTable implements CharIndexTranslator { return _textPieces; } + public void add(TextPiece piece) { + _textPieces.add(piece); + _textPiecesFCOrder.add(piece); + Collections.sort(_textPieces); + Collections.sort(_textPiecesFCOrder, new FCComparator()); + } + /** * Is the text at the given Character offset unicode, or plain old ascii? In * a very evil fashion, you have to actually know this to make sense of @@ -238,7 +245,7 @@ public final class TextPieceTable implements CharIndexTranslator { public int getCharIndex(int bytePos) { int charCount = 0; - for(TextPiece tp : _textPieces) { + for(TextPiece tp : _textPiecesFCOrder) { int pieceStart = tp.getPieceDescriptor().getFilePosition(); if (pieceStart >= bytePos) { break; @@ -259,4 +266,15 @@ public final class TextPieceTable implements CharIndexTranslator { return charCount; } + private static class FCComparator implements Comparator { + public int compare(TextPiece textPiece, TextPiece textPiece1) { + if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) { + return 1; + } else if (textPiece.getPieceDescriptor().fc