HWPF: Improve reading of auto-saved ("complex") document
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@960587 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8c42604abf
commit
78b0c18ade
@ -39,6 +39,7 @@
|
|||||||
<action dev="POI-DEVELOPERS" type="add">49508 - Allow the addition of paragraphs to XWPF Table Cells</action>
|
<action dev="POI-DEVELOPERS" type="add">49508 - Allow the addition of paragraphs to XWPF Table Cells</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text</action>
|
<action dev="POI-DEVELOPERS" type="fix">49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though</action>
|
<action dev="POI-DEVELOPERS" type="fix">XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though</action>
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">HWPF: Improve reading of auto-saved ("complex") documents</action>
|
||||||
</release>
|
</release>
|
||||||
<release version="3.7-beta1" date="2010-06-20">
|
<release version="3.7-beta1" date="2010-06-20">
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49432 - Lazy caching of XSSFComment CTComment objects by reference, to make repeated comment searching faster</action>
|
<action dev="POI-DEVELOPERS" type="fix">49432 - Lazy caching of XSSFComment CTComment objects by reference, to make repeated comment searching faster</action>
|
||||||
|
@ -86,7 +86,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||||||
TextPiece tp = new TextPiece(
|
TextPiece tp = new TextPiece(
|
||||||
0, textData.length, textData, pd, 0
|
0, textData.length, textData, pd, 0
|
||||||
);
|
);
|
||||||
tpt.getTextPieces().add(tp);
|
tpt.add(tp);
|
||||||
|
|
||||||
text.append(tp.getStringBuffer());
|
text.append(tp.getStringBuffer());
|
||||||
}
|
}
|
||||||
|
@ -17,14 +17,15 @@
|
|||||||
|
|
||||||
package org.apache.poi.hwpf.model;
|
package org.apache.poi.hwpf.model;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||||
import org.apache.poi.poifs.common.POIFSConstants;
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The piece table for matching up character positions to bits of text. This
|
* The piece table for matching up character positions to bits of text. This
|
||||||
* mostly works in bytes, but the TextPieces themselves work in characters. This
|
* mostly works in bytes, but the TextPieces themselves work in characters. This
|
||||||
@ -34,6 +35,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
|
|||||||
*/
|
*/
|
||||||
public final class TextPieceTable implements CharIndexTranslator {
|
public final class TextPieceTable implements CharIndexTranslator {
|
||||||
protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
|
protected ArrayList<TextPiece> _textPieces = new ArrayList<TextPiece>();
|
||||||
|
protected ArrayList<TextPiece> _textPiecesFCOrder = new ArrayList<TextPiece>();
|
||||||
// int _multiple;
|
// int _multiple;
|
||||||
int _cpMin;
|
int _cpMin;
|
||||||
|
|
||||||
@ -96,11 +98,9 @@ public final class TextPieceTable implements CharIndexTranslator {
|
|||||||
|
|
||||||
// In the interest of our sanity, now sort the text pieces
|
// In the interest of our sanity, now sort the text pieces
|
||||||
// into order, if they're not already
|
// into order, if they're not already
|
||||||
TextPiece[] tp = _textPieces.toArray(new TextPiece[_textPieces.size()]);
|
Collections.sort(_textPieces);
|
||||||
Arrays.sort(tp);
|
_textPiecesFCOrder = new ArrayList<TextPiece>(_textPieces);
|
||||||
for (int i = 0; i < tp.length; i++) {
|
Collections.sort(_textPiecesFCOrder, new FCComparator());
|
||||||
_textPieces.set(i, tp[i]);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getCpMin() {
|
public int getCpMin() {
|
||||||
@ -111,6 +111,13 @@ public final class TextPieceTable implements CharIndexTranslator {
|
|||||||
return _textPieces;
|
return _textPieces;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void add(TextPiece piece) {
|
||||||
|
_textPieces.add(piece);
|
||||||
|
_textPiecesFCOrder.add(piece);
|
||||||
|
Collections.sort(_textPieces);
|
||||||
|
Collections.sort(_textPiecesFCOrder, new FCComparator());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is the text at the given Character offset unicode, or plain old ascii? In
|
* Is the text at the given Character offset unicode, or plain old ascii? In
|
||||||
* a very evil fashion, you have to actually know this to make sense of
|
* a very evil fashion, you have to actually know this to make sense of
|
||||||
@ -238,7 +245,7 @@ public final class TextPieceTable implements CharIndexTranslator {
|
|||||||
public int getCharIndex(int bytePos) {
|
public int getCharIndex(int bytePos) {
|
||||||
int charCount = 0;
|
int charCount = 0;
|
||||||
|
|
||||||
for(TextPiece tp : _textPieces) {
|
for(TextPiece tp : _textPiecesFCOrder) {
|
||||||
int pieceStart = tp.getPieceDescriptor().getFilePosition();
|
int pieceStart = tp.getPieceDescriptor().getFilePosition();
|
||||||
if (pieceStart >= bytePos) {
|
if (pieceStart >= bytePos) {
|
||||||
break;
|
break;
|
||||||
@ -259,4 +266,15 @@ public final class TextPieceTable implements CharIndexTranslator {
|
|||||||
return charCount;
|
return charCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static class FCComparator implements Comparator<TextPiece> {
|
||||||
|
public int compare(TextPiece textPiece, TextPiece textPiece1) {
|
||||||
|
if (textPiece.getPieceDescriptor().fc>textPiece1.getPieceDescriptor().fc) {
|
||||||
|
return 1;
|
||||||
|
} else if (textPiece.getPieceDescriptor().fc<textPiece1.getPieceDescriptor().fc) {
|
||||||
|
return -1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user