Finally get all HWPF tests to pass again, by working around how evil PAPX/CHPX/SEPX byte references are
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684986 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5067bab579
commit
6624d6420e
@ -37,6 +37,7 @@
|
|||||||
|
|
||||||
<!-- Don't forget to update status.xml too! -->
|
<!-- Don't forget to update status.xml too! -->
|
||||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Include headers and footers int he extracted text from HWPF's WordExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">Include headers and footers int he extracted text from HWPF's WordExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers and footers</action>
|
<action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers and footers</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text</action>
|
<action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text</action>
|
||||||
|
@ -34,6 +34,7 @@
|
|||||||
<!-- Don't forget to update changes.xml too! -->
|
<!-- Don't forget to update changes.xml too! -->
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Include headers and footers int he extracted text from HWPF's WordExtractor</action>
|
<action dev="POI-DEVELOPERS" type="add">Include headers and footers int he extracted text from HWPF's WordExtractor</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers and footers</action>
|
<action dev="POI-DEVELOPERS" type="add">Added support to HWPF for headers and footers</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text</action>
|
<action dev="POI-DEVELOPERS" type="fix">Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text</action>
|
||||||
|
@ -253,7 +253,7 @@ public class HWPFDocument extends POIDocument
|
|||||||
// read in the pictures stream
|
// read in the pictures stream
|
||||||
_pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
|
_pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg);
|
||||||
|
|
||||||
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, getTextTable().getTextPieces());
|
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit);
|
||||||
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
|
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
|
||||||
_ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
|
_ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
|
||||||
|
|
||||||
|
@ -36,6 +36,7 @@ public abstract class BytePropertyNode extends PropertyNode {
|
|||||||
generateCp(fcEnd, isUnicode),
|
generateCp(fcEnd, isUnicode),
|
||||||
buf
|
buf
|
||||||
);
|
);
|
||||||
|
this.isUnicode = isUnicode;
|
||||||
}
|
}
|
||||||
private static int generateCp(int val, boolean isUnicode) {
|
private static int generateCp(int val, boolean isUnicode) {
|
||||||
if(isUnicode)
|
if(isUnicode)
|
||||||
|
@ -119,7 +119,7 @@ public class CHPBinTable
|
|||||||
|
|
||||||
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
||||||
{
|
{
|
||||||
boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
|
boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
|
||||||
|
|
||||||
CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
|
CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode);
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
|
|||||||
|
|
||||||
for (int x = 0; x < _crun; x++)
|
for (int x = 0; x < _crun; x++)
|
||||||
{
|
{
|
||||||
boolean isUnicode = tpt.isUnicodeAt( getStart(x) );
|
boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) );
|
||||||
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
|
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -76,7 +76,7 @@ public class PAPBinTable
|
|||||||
|
|
||||||
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
||||||
{
|
{
|
||||||
boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart);
|
boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart);
|
||||||
|
|
||||||
PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
|
PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode);
|
||||||
|
|
||||||
|
@ -67,7 +67,8 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
|
|||||||
for (int x = 0; x < _crun; x++) {
|
for (int x = 0; x < _crun; x++) {
|
||||||
int startAt = getStart(x) - fcMin;
|
int startAt = getStart(x) - fcMin;
|
||||||
int endAt = getEnd(x) - fcMin;
|
int endAt = getEnd(x) - fcMin;
|
||||||
boolean isUnicode = tpt.isUnicodeAt(startAt);
|
boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt);
|
||||||
|
//System.err.println(startAt + " -> " + endAt + " = " + isUnicode);
|
||||||
|
|
||||||
_papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
|
_papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode));
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,11 @@ public abstract class PropertyNode implements Comparable, Cloneable
|
|||||||
_cpStart = fcStart;
|
_cpStart = fcStart;
|
||||||
_cpEnd = fcEnd;
|
_cpEnd = fcEnd;
|
||||||
_buf = buf;
|
_buf = buf;
|
||||||
|
|
||||||
|
if(_cpStart < 0) {
|
||||||
|
System.err.println("A property claimed to start before zero, at " + _cpStart + "! Resetting it to zero, and hoping for the best");
|
||||||
|
_cpStart = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -25,17 +25,15 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
|
|||||||
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* TODO - figure out if this works in characters, like most
|
|
||||||
* things do, or in bytes as PAPX / CHPX does.
|
|
||||||
*/
|
*/
|
||||||
public class SEPX extends PropertyNode
|
public class SEPX extends BytePropertyNode
|
||||||
{
|
{
|
||||||
|
|
||||||
SectionDescriptor _sed;
|
SectionDescriptor _sed;
|
||||||
|
|
||||||
public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl)
|
public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode)
|
||||||
{
|
{
|
||||||
super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0));
|
super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode);
|
||||||
_sed = sed;
|
_sed = sed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -34,6 +34,9 @@ public class SectionTable
|
|||||||
protected ArrayList _sections = new ArrayList();
|
protected ArrayList _sections = new ArrayList();
|
||||||
protected List _text;
|
protected List _text;
|
||||||
|
|
||||||
|
/** So we can know if things are unicode or not */
|
||||||
|
private TextPieceTable tpt;
|
||||||
|
|
||||||
public SectionTable()
|
public SectionTable()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
@ -41,10 +44,11 @@ public class SectionTable
|
|||||||
|
|
||||||
public SectionTable(byte[] documentStream, byte[] tableStream, int offset,
|
public SectionTable(byte[] documentStream, byte[] tableStream, int offset,
|
||||||
int size, int fcMin,
|
int size, int fcMin,
|
||||||
List tpt)
|
TextPieceTable tpt, CPSplitCalculator cps)
|
||||||
{
|
{
|
||||||
PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
|
PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
|
||||||
_text = tpt;
|
this.tpt = tpt;
|
||||||
|
this._text = tpt.getTextPieces();
|
||||||
|
|
||||||
int length = sedPlex.length();
|
int length = sedPlex.length();
|
||||||
|
|
||||||
@ -54,11 +58,16 @@ public class SectionTable
|
|||||||
SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
|
SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
|
||||||
|
|
||||||
int fileOffset = sed.getFc();
|
int fileOffset = sed.getFc();
|
||||||
|
int startAt = CPtoFC(node.getStart());
|
||||||
|
int endAt = CPtoFC(node.getEnd());
|
||||||
|
|
||||||
|
boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt );
|
||||||
|
// System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart);
|
||||||
|
|
||||||
// check for the optimization
|
// check for the optimization
|
||||||
if (fileOffset == 0xffffffff)
|
if (fileOffset == 0xffffffff)
|
||||||
{
|
{
|
||||||
_sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), new byte[0]));
|
_sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -67,7 +76,32 @@ public class SectionTable
|
|||||||
byte[] buf = new byte[sepxSize];
|
byte[] buf = new byte[sepxSize];
|
||||||
fileOffset += LittleEndian.SHORT_SIZE;
|
fileOffset += LittleEndian.SHORT_SIZE;
|
||||||
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
|
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
|
||||||
_sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), buf));
|
_sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Some files seem to lie about their unicode status, which
|
||||||
|
// is very very pesky. Try to work around these, but this
|
||||||
|
// is getting on for black magic...
|
||||||
|
int mainEndsAt = cps.getMainDocumentEnd();
|
||||||
|
boolean matchAt = false;
|
||||||
|
boolean matchHalf = false;
|
||||||
|
for(int i=0; i<_sections.size(); i++) {
|
||||||
|
SEPX s = (SEPX)_sections.get(i);
|
||||||
|
if(s.getEnd() == mainEndsAt) {
|
||||||
|
matchAt = true;
|
||||||
|
} else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) {
|
||||||
|
matchHalf = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(! matchAt && matchHalf) {
|
||||||
|
System.err.println("Your document seemed to be mostly unicode, but the section definition was in bytes! Trying anyway, but things may well go wrong!");
|
||||||
|
for(int i=0; i<_sections.size(); i++) {
|
||||||
|
SEPX s = (SEPX)_sections.get(i);
|
||||||
|
GenericPropertyNode node = sedPlex.getProperty(i);
|
||||||
|
|
||||||
|
s.setStart( CPtoFC(node.getStart()) );
|
||||||
|
s.setEnd( CPtoFC(node.getEnd()) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -171,7 +205,7 @@ public class SectionTable
|
|||||||
|
|
||||||
// Line using Ryan's FCtoCP() conversion method -
|
// Line using Ryan's FCtoCP() conversion method -
|
||||||
// unable to observe any effect on our testcases when using this code - piers
|
// unable to observe any effect on our testcases when using this code - piers
|
||||||
GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStart()), FCtoCP(sepx.getEnd()), sed.toByteArray());
|
GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray());
|
||||||
|
|
||||||
|
|
||||||
plex.addProperty(property);
|
plex.addProperty(property);
|
||||||
|
@ -25,6 +25,8 @@ import org.apache.poi.poifs.common.POIFSConstants;
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Hashtable;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
@ -103,6 +105,15 @@ public class TextPieceTable
|
|||||||
// And now build the piece
|
// And now build the piece
|
||||||
_textPieces.add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node.getStart()));
|
_textPieces.add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node.getStart()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In the interest of our sanity, now sort the text pieces
|
||||||
|
// into order, if they're not already
|
||||||
|
TextPiece[] tp = (TextPiece[])
|
||||||
|
_textPieces.toArray(new TextPiece[_textPieces.size()]);
|
||||||
|
Arrays.sort(tp);
|
||||||
|
for(int i=0; i<tp.length; i++) {
|
||||||
|
_textPieces.set(i, tp[i]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getCpMin()
|
public int getCpMin()
|
||||||
@ -123,9 +134,8 @@ public class TextPieceTable
|
|||||||
* paragraph properties :(
|
* paragraph properties :(
|
||||||
* @param cp The character offset to check about
|
* @param cp The character offset to check about
|
||||||
*/
|
*/
|
||||||
public boolean isUnicodeAt(int cp) {
|
public boolean isUnicodeAtCharOffset(int cp) {
|
||||||
boolean lastWas = false;
|
boolean lastWas = false;
|
||||||
int lastAt = 0;
|
|
||||||
|
|
||||||
Iterator it = _textPieces.iterator();
|
Iterator it = _textPieces.iterator();
|
||||||
while(it.hasNext()) {
|
while(it.hasNext()) {
|
||||||
@ -135,9 +145,37 @@ public class TextPieceTable
|
|||||||
return tp.isUnicode();
|
return tp.isUnicode();
|
||||||
}
|
}
|
||||||
// Otherwise keep track for the last one
|
// Otherwise keep track for the last one
|
||||||
if(tp.getStart() > lastAt) {
|
|
||||||
lastWas = tp.isUnicode();
|
lastWas = tp.isUnicode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If they ask off the end, just go with the last one...
|
||||||
|
return lastWas;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Is the text at the given byte offset
|
||||||
|
* unicode, or plain old ascii?
|
||||||
|
* In a very evil fashion, you have to actually
|
||||||
|
* know this to make sense of character and
|
||||||
|
* paragraph properties :(
|
||||||
|
* @param cp The character offset to check about
|
||||||
|
*/
|
||||||
|
public boolean isUnicodeAtByteOffset(int bytePos) {
|
||||||
|
boolean lastWas = false;
|
||||||
|
int curByte = 0;
|
||||||
|
|
||||||
|
Iterator it = _textPieces.iterator();
|
||||||
|
while(it.hasNext()) {
|
||||||
|
TextPiece tp = (TextPiece)it.next();
|
||||||
|
int nextByte = curByte + tp.bytesLength();
|
||||||
|
|
||||||
|
// If the text piece covers the character, all good
|
||||||
|
if(curByte <= bytePos && nextByte >= bytePos) {
|
||||||
|
return tp.isUnicode();
|
||||||
|
}
|
||||||
|
// Otherwise keep track for the last one
|
||||||
|
lastWas = tp.isUnicode();
|
||||||
|
// Move along
|
||||||
|
curByte = nextByte;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If they ask off the end, just go with the last one...
|
// If they ask off the end, just go with the last one...
|
||||||
|
@ -155,6 +155,8 @@ public class Range
|
|||||||
_characters = _doc.getCharacterTable().getTextRuns();
|
_characters = _doc.getCharacterTable().getTextRuns();
|
||||||
_text = _doc.getTextTable().getTextPieces();
|
_text = _doc.getTextTable().getTextPieces();
|
||||||
_parent = new WeakReference(null);
|
_parent = new WeakReference(null);
|
||||||
|
|
||||||
|
sanityCheckStartEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -175,6 +177,8 @@ public class Range
|
|||||||
_characters = parent._characters;
|
_characters = parent._characters;
|
||||||
_text = parent._text;
|
_text = parent._text;
|
||||||
_parent = new WeakReference(parent);
|
_parent = new WeakReference(parent);
|
||||||
|
|
||||||
|
sanityCheckStartEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -226,6 +230,22 @@ public class Range
|
|||||||
_textRangeFound = true;
|
_textRangeFound = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sanityCheckStartEnd();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensures that the start and end were were given
|
||||||
|
* are actually valid, to avoid issues later on
|
||||||
|
* if they're not
|
||||||
|
*/
|
||||||
|
private void sanityCheckStartEnd() {
|
||||||
|
if(_start < 0) {
|
||||||
|
throw new IllegalArgumentException("Range start must not be negative. Given " + _start);
|
||||||
|
}
|
||||||
|
if(_end < _start) {
|
||||||
|
throw new IllegalArgumentException("The end (" + _end + ") must not be before the start ("+_start+")");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -537,13 +557,17 @@ public class Range
|
|||||||
for (int x = _parStart; x < numParagraphs; x++)
|
for (int x = _parStart; x < numParagraphs; x++)
|
||||||
{
|
{
|
||||||
PAPX papx = (PAPX)_paragraphs.get(x);
|
PAPX papx = (PAPX)_paragraphs.get(x);
|
||||||
|
//System.err.println("Paragraph " + x + " was " + papx.getStart() + " -> " + papx.getEnd());
|
||||||
papx.adjustForDelete(_start, _end - _start);
|
papx.adjustForDelete(_start, _end - _start);
|
||||||
|
//System.err.println("Paragraph " + x + " is now " + papx.getStart() + " -> " + papx.getEnd());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int x = _sectionStart; x < numSections; x++)
|
for (int x = _sectionStart; x < numSections; x++)
|
||||||
{
|
{
|
||||||
SEPX sepx = (SEPX)_sections.get(x);
|
SEPX sepx = (SEPX)_sections.get(x);
|
||||||
|
//System.err.println("Section " + x + " was " + sepx.getStart() + " -> " + sepx.getEnd());
|
||||||
sepx.adjustForDelete(_start, _end - _start);
|
sepx.adjustForDelete(_start, _end - _start);
|
||||||
|
//System.err.println("Section " + x + " is now " + sepx.getStart() + " -> " + sepx.getEnd());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int x = _textStart; x < numTextPieces; x++)
|
for (int x = _textStart; x < numTextPieces; x++)
|
||||||
@ -806,6 +830,10 @@ public class Range
|
|||||||
{
|
{
|
||||||
throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range");
|
throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range");
|
||||||
}
|
}
|
||||||
|
if (tableEnd < 0)
|
||||||
|
{
|
||||||
|
throw new ArrayIndexOutOfBoundsException("The table's end is negative, which isn't allowed!");
|
||||||
|
}
|
||||||
return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel());
|
return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -45,13 +45,15 @@ public class TestSectionTable
|
|||||||
byte[] tableStream = _hWPFDocFixture._tableStream;
|
byte[] tableStream = _hWPFDocFixture._tableStream;
|
||||||
int fcMin = fib.getFcMin();
|
int fcMin = fib.getFcMin();
|
||||||
|
|
||||||
|
CPSplitCalculator cps = new CPSplitCalculator(fib);
|
||||||
|
|
||||||
ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.getFcClx(), fcMin);
|
ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.getFcClx(), fcMin);
|
||||||
TextPieceTable tpt = cft.getTextPieceTable();
|
TextPieceTable tpt = cft.getTextPieceTable();
|
||||||
|
|
||||||
SectionTable sectionTable = new SectionTable(mainStream, tableStream,
|
SectionTable sectionTable = new SectionTable(mainStream, tableStream,
|
||||||
fib.getFcPlcfsed(),
|
fib.getFcPlcfsed(),
|
||||||
fib.getLcbPlcfsed(),
|
fib.getLcbPlcfsed(),
|
||||||
fcMin, tpt.getTextPieces());
|
fcMin, tpt, cps);
|
||||||
HWPFFileSystem fileSys = new HWPFFileSystem();
|
HWPFFileSystem fileSys = new HWPFFileSystem();
|
||||||
|
|
||||||
sectionTable.writeTo(fileSys, 0);
|
sectionTable.writeTo(fileSys, 0);
|
||||||
@ -61,7 +63,9 @@ public class TestSectionTable
|
|||||||
byte[] newTableStream = tableOut.toByteArray();
|
byte[] newTableStream = tableOut.toByteArray();
|
||||||
byte[] newMainStream = mainOut.toByteArray();
|
byte[] newMainStream = mainOut.toByteArray();
|
||||||
|
|
||||||
SectionTable newSectionTable = new SectionTable(newMainStream, newTableStream, 0, newTableStream.length, 0, tpt.getTextPieces());
|
SectionTable newSectionTable = new SectionTable(
|
||||||
|
newMainStream, newTableStream, 0,
|
||||||
|
newTableStream.length, 0, tpt, cps);
|
||||||
|
|
||||||
ArrayList oldSections = sectionTable.getSections();
|
ArrayList oldSections = sectionTable.getSections();
|
||||||
ArrayList newSections = newSectionTable.getSections();
|
ArrayList newSections = newSectionTable.getSections();
|
||||||
|
@ -81,9 +81,16 @@ public class TestProblems extends TestCase {
|
|||||||
HWPFDocument doc = new HWPFDocument(new FileInputStream(
|
HWPFDocument doc = new HWPFDocument(new FileInputStream(
|
||||||
new File(dirname, "Bug44292.doc")));
|
new File(dirname, "Bug44292.doc")));
|
||||||
Range r = doc.getRange();
|
Range r = doc.getRange();
|
||||||
|
assertEquals(6, r.numParagraphs());
|
||||||
|
assertEquals(0, r.getStartOffset());
|
||||||
|
assertEquals(87, r.getEndOffset());
|
||||||
|
|
||||||
//get the table
|
// Paragraph with table
|
||||||
Paragraph p = r.getParagraph(0);
|
Paragraph p = r.getParagraph(0);
|
||||||
|
assertEquals(0, p.getStartOffset());
|
||||||
|
assertEquals(20, p.getEndOffset());
|
||||||
|
|
||||||
|
// Get the table
|
||||||
Table t = r.getTable(p);
|
Table t = r.getTable(p);
|
||||||
|
|
||||||
//get the only row
|
//get the only row
|
||||||
|
@ -23,6 +23,7 @@ import java.io.FileInputStream;
|
|||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
import org.apache.poi.hwpf.model.PAPX;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test to see if Range.delete() works even if the Range contains a
|
* Test to see if Range.delete() works even if the Range contains a
|
||||||
@ -37,6 +38,8 @@ public class TestRangeDelete extends TestCase {
|
|||||||
"${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
|
"${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
|
||||||
private String originalText =
|
private String originalText =
|
||||||
"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
|
"It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r";
|
||||||
|
private String lastText =
|
||||||
|
"Thank you, ${organization} ${delete}!\r";
|
||||||
private String searchText = "${delete}";
|
private String searchText = "${delete}";
|
||||||
private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
|
private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r";
|
||||||
private String expectedText2 =
|
private String expectedText2 =
|
||||||
@ -69,32 +72,60 @@ public class TestRangeDelete extends TestCase {
|
|||||||
Range range;
|
Range range;
|
||||||
Section section;
|
Section section;
|
||||||
Paragraph para;
|
Paragraph para;
|
||||||
|
PAPX paraDef;
|
||||||
|
|
||||||
// First, check overall
|
// First, check overall
|
||||||
range = daDoc.getOverallRange();
|
range = daDoc.getOverallRange();
|
||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
assertEquals(4, range.numParagraphs());
|
assertEquals(5, range.numParagraphs());
|
||||||
|
|
||||||
|
|
||||||
// Now, onto just the doc bit
|
// Now, onto just the doc bit
|
||||||
range = daDoc.getRange();
|
range = daDoc.getRange();
|
||||||
|
|
||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
|
assertEquals(1, daDoc.getSectionTable().getSections().size());
|
||||||
section = range.getSection(0);
|
section = range.getSection(0);
|
||||||
|
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
para = section.getParagraph(0);
|
para = section.getParagraph(0);
|
||||||
assertEquals(1, para.numCharacterRuns());
|
assertEquals(1, para.numCharacterRuns());
|
||||||
assertEquals(introText, para.text());
|
assertEquals(introText, para.text());
|
||||||
|
|
||||||
para = section.getParagraph(1);
|
para = section.getParagraph(1);
|
||||||
assertEquals(2, para.numCharacterRuns());
|
assertEquals(5, para.numCharacterRuns());
|
||||||
assertEquals(fillerText, para.text());
|
assertEquals(fillerText, para.text());
|
||||||
|
|
||||||
|
|
||||||
|
paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(2);
|
||||||
|
assertEquals(132, paraDef.getStart());
|
||||||
|
assertEquals(400, paraDef.getEnd());
|
||||||
|
|
||||||
para = section.getParagraph(2);
|
para = section.getParagraph(2);
|
||||||
assertEquals(6, para.numCharacterRuns());
|
assertEquals(5, para.numCharacterRuns());
|
||||||
assertEquals(originalText, para.text());
|
assertEquals(originalText, para.text());
|
||||||
|
|
||||||
|
|
||||||
|
paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(3);
|
||||||
|
assertEquals(400, paraDef.getStart());
|
||||||
|
assertEquals(438, paraDef.getEnd());
|
||||||
|
|
||||||
|
para = section.getParagraph(3);
|
||||||
|
assertEquals(1, para.numCharacterRuns());
|
||||||
|
assertEquals(lastText, para.text());
|
||||||
|
|
||||||
|
|
||||||
|
// Check things match on text length
|
||||||
|
assertEquals(439, range.text().length());
|
||||||
|
assertEquals(439, section.text().length());
|
||||||
|
assertEquals(439,
|
||||||
|
section.getParagraph(0).text().length() +
|
||||||
|
section.getParagraph(1).text().length() +
|
||||||
|
section.getParagraph(2).text().length() +
|
||||||
|
section.getParagraph(3).text().length() +
|
||||||
|
section.getParagraph(4).text().length()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -108,7 +139,7 @@ public class TestRangeDelete extends TestCase {
|
|||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
|
|
||||||
Section section = range.getSection(0);
|
Section section = range.getSection(0);
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
|
|
||||||
@ -131,7 +162,7 @@ public class TestRangeDelete extends TestCase {
|
|||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
section = range.getSection(0);
|
section = range.getSection(0);
|
||||||
|
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
para = section.getParagraph(2);
|
para = section.getParagraph(2);
|
||||||
|
|
||||||
text = para.text();
|
text = para.text();
|
||||||
@ -154,7 +185,7 @@ public class TestRangeDelete extends TestCase {
|
|||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
|
|
||||||
Section section = range.getSection(0);
|
Section section = range.getSection(0);
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
|
|
||||||
@ -188,7 +219,7 @@ public class TestRangeDelete extends TestCase {
|
|||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
section = range.getSection(0);
|
section = range.getSection(0);
|
||||||
|
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
para = section.getParagraph(0);
|
para = section.getParagraph(0);
|
||||||
text = para.text();
|
text = para.text();
|
||||||
|
@ -71,14 +71,11 @@ public class TestRangeInsertion extends TestCase {
|
|||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
assertEquals(originalText, para.text());
|
assertEquals(originalText, para.text());
|
||||||
|
|
||||||
assertEquals(6, para.numCharacterRuns());
|
assertEquals(3, para.numCharacterRuns());
|
||||||
String text =
|
String text =
|
||||||
para.getCharacterRun(0).text() +
|
para.getCharacterRun(0).text() +
|
||||||
para.getCharacterRun(1).text() +
|
para.getCharacterRun(1).text() +
|
||||||
para.getCharacterRun(2).text() +
|
para.getCharacterRun(2).text()
|
||||||
para.getCharacterRun(3).text() +
|
|
||||||
para.getCharacterRun(4).text() +
|
|
||||||
para.getCharacterRun(5).text()
|
|
||||||
;
|
;
|
||||||
|
|
||||||
assertEquals(originalText, text);
|
assertEquals(originalText, text);
|
||||||
@ -116,14 +113,11 @@ public class TestRangeInsertion extends TestCase {
|
|||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
assertEquals((textToInsert + originalText), para.text());
|
assertEquals((textToInsert + originalText), para.text());
|
||||||
|
|
||||||
assertEquals(6, para.numCharacterRuns());
|
assertEquals(3, para.numCharacterRuns());
|
||||||
String text =
|
String text =
|
||||||
para.getCharacterRun(0).text() +
|
para.getCharacterRun(0).text() +
|
||||||
para.getCharacterRun(1).text() +
|
para.getCharacterRun(1).text() +
|
||||||
para.getCharacterRun(2).text() +
|
para.getCharacterRun(2).text()
|
||||||
para.getCharacterRun(3).text() +
|
|
||||||
para.getCharacterRun(4).text() +
|
|
||||||
para.getCharacterRun(5).text()
|
|
||||||
;
|
;
|
||||||
|
|
||||||
// System.out.println(text);
|
// System.out.println(text);
|
||||||
|
@ -87,6 +87,16 @@ public class TestRangeProperties extends TestCase {
|
|||||||
r.text()
|
r.text()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assertEquals(1, r.numSections());
|
||||||
|
assertEquals(1, a.getSectionTable().getSections().size());
|
||||||
|
Section s = r.getSection(0);
|
||||||
|
assertEquals(
|
||||||
|
a_page_1 +
|
||||||
|
page_break + "\r" +
|
||||||
|
a_page_2,
|
||||||
|
s.text()
|
||||||
|
);
|
||||||
|
|
||||||
assertEquals(
|
assertEquals(
|
||||||
7,
|
7,
|
||||||
r.numParagraphs()
|
r.numParagraphs()
|
||||||
@ -162,6 +172,20 @@ public class TestRangeProperties extends TestCase {
|
|||||||
408, r.text().length()
|
408, r.text().length()
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
assertEquals(1, r.numSections());
|
||||||
|
assertEquals(1, u.getSectionTable().getSections().size());
|
||||||
|
Section s = r.getSection(0);
|
||||||
|
assertEquals(
|
||||||
|
u_page_1 +
|
||||||
|
page_break + "\r" +
|
||||||
|
u_page_2,
|
||||||
|
s.text()
|
||||||
|
);
|
||||||
|
assertEquals(0, s.getStartOffset());
|
||||||
|
assertEquals(408, s.getEndOffset());
|
||||||
|
|
||||||
|
|
||||||
List pDefs = r._paragraphs;
|
List pDefs = r._paragraphs;
|
||||||
assertEquals(35, pDefs.size());
|
assertEquals(35, pDefs.size());
|
||||||
|
|
||||||
|
@ -66,21 +66,22 @@ public class TestRangeReplacement extends TestCase {
|
|||||||
HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
|
HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile));
|
||||||
|
|
||||||
Range range = daDoc.getRange();
|
Range range = daDoc.getRange();
|
||||||
|
assertEquals(414, range.text().length());
|
||||||
|
|
||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
Section section = range.getSection(0);
|
Section section = range.getSection(0);
|
||||||
|
assertEquals(414, section.text().length());
|
||||||
|
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
|
|
||||||
assertEquals(6, para.numCharacterRuns());
|
assertEquals(5, para.numCharacterRuns());
|
||||||
String text =
|
String text =
|
||||||
para.getCharacterRun(0).text() +
|
para.getCharacterRun(0).text() +
|
||||||
para.getCharacterRun(1).text() +
|
para.getCharacterRun(1).text() +
|
||||||
para.getCharacterRun(2).text() +
|
para.getCharacterRun(2).text() +
|
||||||
para.getCharacterRun(3).text() +
|
para.getCharacterRun(3).text() +
|
||||||
para.getCharacterRun(4).text() +
|
para.getCharacterRun(4).text()
|
||||||
para.getCharacterRun(5).text()
|
|
||||||
;
|
;
|
||||||
|
|
||||||
assertEquals(originalText, text);
|
assertEquals(originalText, text);
|
||||||
@ -97,7 +98,7 @@ public class TestRangeReplacement extends TestCase {
|
|||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
|
|
||||||
Section section = range.getSection(0);
|
Section section = range.getSection(0);
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
|
|
||||||
@ -130,7 +131,7 @@ public class TestRangeReplacement extends TestCase {
|
|||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
|
|
||||||
Section section = range.getSection(0);
|
Section section = range.getSection(0);
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
Paragraph para = section.getParagraph(2);
|
Paragraph para = section.getParagraph(2);
|
||||||
|
|
||||||
@ -141,7 +142,7 @@ public class TestRangeReplacement extends TestCase {
|
|||||||
|
|
||||||
assertEquals(1, range.numSections());
|
assertEquals(1, range.numSections());
|
||||||
section = range.getSection(0);
|
section = range.getSection(0);
|
||||||
assertEquals(4, section.numParagraphs());
|
assertEquals(5, section.numParagraphs());
|
||||||
|
|
||||||
para = section.getParagraph(2);
|
para = section.getParagraph(2);
|
||||||
text = para.text();
|
text = para.text();
|
||||||
|
Loading…
Reference in New Issue
Block a user