diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 9c9a4b702..62b53e541 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF Include headers and footers int he extracted text from HWPF's WordExtractor Added support to HWPF for headers and footers Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 617031475..a020a892a 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF Include headers and footers int he extracted text from HWPF's WordExtractor Added support to HWPF for headers and footers Improve how HWPF deals with unicode internally. Should avoid some odd behaviour when manipulating unicode text diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 71b36a4f4..daf1c8e17 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -253,7 +253,7 @@ public class HWPFDocument extends POIDocument // read in the pictures stream _pictures = new PicturesTable(this, _dataStream, _mainStream, _fspa, _dgg); - _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, getTextTable().getTextPieces()); + _st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit); _ss = new StyleSheet(_tableStream, _fib.getFcStshf()); _ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn()); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java index c1b592801..d22edb632 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/BytePropertyNode.java @@ -36,6 +36,7 @@ public abstract class BytePropertyNode extends PropertyNode { generateCp(fcEnd, isUnicode), buf ); + this.isUnicode = isUnicode; } private static int generateCp(int val, boolean isUnicode) { if(isUnicode) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java index 69e0a67d9..f46aee80a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPBinTable.java @@ -119,7 +119,7 @@ public class CHPBinTable public void insert(int listIndex, int cpStart, SprmBuffer buf) { - boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart); + boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart); CHPX insertChpx = new CHPX(0, 0, buf, needsToBeUnicode); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java index d5fb602b8..e6b302300 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CHPFormattedDiskPage.java @@ -61,7 +61,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage for (int x = 0; x < _crun; x++) { - boolean isUnicode = tpt.isUnicodeAt( getStart(x) ); + boolean isUnicode = tpt.isUnicodeAtByteOffset( getStart(x) ); _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), isUnicode)); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index cde563ec0..ed47b59c5 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -76,7 +76,7 @@ public class PAPBinTable public void insert(int listIndex, int cpStart, SprmBuffer buf) { - boolean needsToBeUnicode = tpt.isUnicodeAt(cpStart); + boolean needsToBeUnicode = tpt.isUnicodeAtCharOffset(cpStart); PAPX forInsert = new PAPX(0, 0, buf, _dataStream, needsToBeUnicode); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index fefcf442b..1a9a7bad5 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -67,7 +67,8 @@ public class PAPFormattedDiskPage extends FormattedDiskPage for (int x = 0; x < _crun; x++) { int startAt = getStart(x) - fcMin; int endAt = getEnd(x) - fcMin; - boolean isUnicode = tpt.isUnicodeAt(startAt); + boolean isUnicode = tpt.isUnicodeAtByteOffset(startAt); + //System.err.println(startAt + " -> " + endAt + " = " + isUnicode); _papxList.add(new PAPX(startAt, endAt, getGrpprl(x), getParagraphHeight(x), dataStream, isUnicode)); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java index f369c169d..5a1491796 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PropertyNode.java @@ -48,6 +48,11 @@ public abstract class PropertyNode implements Comparable, Cloneable _cpStart = fcStart; _cpEnd = fcEnd; _buf = buf; + + if(_cpStart < 0) { + System.err.println("A property claimed to start before zero, at " + _cpStart + "! Resetting it to zero, and hoping for the best"); + _cpStart = 0; + } } /** diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java index 92ec6cfbb..7987280ed 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SEPX.java @@ -25,17 +25,15 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor; import org.apache.poi.hwpf.usermodel.SectionProperties; /** - * TODO - figure out if this works in characters, like most - * things do, or in bytes as PAPX / CHPX does. */ -public class SEPX extends PropertyNode +public class SEPX extends BytePropertyNode { SectionDescriptor _sed; - public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl) + public SEPX(SectionDescriptor sed, int start, int end, byte[] grpprl, boolean isUnicode) { - super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0)); + super(start, end, SectionSprmUncompressor.uncompressSEP(grpprl, 0), isUnicode); _sed = sed; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java index 7b9c23325..b88edbb95 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java @@ -34,6 +34,9 @@ public class SectionTable protected ArrayList _sections = new ArrayList(); protected List _text; + /** So we can know if things are unicode or not */ + private TextPieceTable tpt; + public SectionTable() { } @@ -41,10 +44,11 @@ public class SectionTable public SectionTable(byte[] documentStream, byte[] tableStream, int offset, int size, int fcMin, - List tpt) + TextPieceTable tpt, CPSplitCalculator cps) { PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE); - _text = tpt; + this.tpt = tpt; + this._text = tpt.getTextPieces(); int length = sedPlex.length(); @@ -54,11 +58,16 @@ public class SectionTable SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0); int fileOffset = sed.getFc(); + int startAt = CPtoFC(node.getStart()); + int endAt = CPtoFC(node.getEnd()); + + boolean isUnicodeAtStart = tpt.isUnicodeAtByteOffset( startAt ); +// System.err.println(startAt + " -> " + endAt + " = " + isUnicodeAtStart); // check for the optimization if (fileOffset == 0xffffffff) { - _sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), new byte[0])); + _sections.add(new SEPX(sed, startAt, endAt, new byte[0], isUnicodeAtStart)); } else { @@ -67,9 +76,34 @@ public class SectionTable byte[] buf = new byte[sepxSize]; fileOffset += LittleEndian.SHORT_SIZE; System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); - _sections.add(new SEPX(sed, CPtoFC(node.getStart()), CPtoFC(node.getEnd()), buf)); + _sections.add(new SEPX(sed, startAt, endAt, buf, isUnicodeAtStart)); } } + + // Some files seem to lie about their unicode status, which + // is very very pesky. Try to work around these, but this + // is getting on for black magic... + int mainEndsAt = cps.getMainDocumentEnd(); + boolean matchAt = false; + boolean matchHalf = false; + for(int i=0; i<_sections.size(); i++) { + SEPX s = (SEPX)_sections.get(i); + if(s.getEnd() == mainEndsAt) { + matchAt = true; + } else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) { + matchHalf = true; + } + } + if(! matchAt && matchHalf) { + System.err.println("Your document seemed to be mostly unicode, but the section definition was in bytes! Trying anyway, but things may well go wrong!"); + for(int i=0; i<_sections.size(); i++) { + SEPX s = (SEPX)_sections.get(i); + GenericPropertyNode node = sedPlex.getProperty(i); + + s.setStart( CPtoFC(node.getStart()) ); + s.setEnd( CPtoFC(node.getEnd()) ); + } + } } public void adjustForInsert(int listIndex, int length) @@ -171,7 +205,7 @@ public class SectionTable // Line using Ryan's FCtoCP() conversion method - // unable to observe any effect on our testcases when using this code - piers - GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStart()), FCtoCP(sepx.getEnd()), sed.toByteArray()); + GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStartBytes()), FCtoCP(sepx.getEndBytes()), sed.toByteArray()); plex.addProperty(property); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index 7e856f1ee..129603463 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -25,6 +25,8 @@ import org.apache.poi.poifs.common.POIFSConstants; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Hashtable; import java.util.Iterator; import java.util.List; @@ -103,6 +105,15 @@ public class TextPieceTable // And now build the piece _textPieces.add(new TextPiece(nodeStartChars, nodeEndChars, buf, pieces[x], node.getStart())); } + + // In the interest of our sanity, now sort the text pieces + // into order, if they're not already + TextPiece[] tp = (TextPiece[]) + _textPieces.toArray(new TextPiece[_textPieces.size()]); + Arrays.sort(tp); + for(int i=0; i lastAt) { - lastWas = tp.isUnicode(); + lastWas = tp.isUnicode(); + } + + // If they ask off the end, just go with the last one... + return lastWas; + } + /** + * Is the text at the given byte offset + * unicode, or plain old ascii? + * In a very evil fashion, you have to actually + * know this to make sense of character and + * paragraph properties :( + * @param cp The character offset to check about + */ + public boolean isUnicodeAtByteOffset(int bytePos) { + boolean lastWas = false; + int curByte = 0; + + Iterator it = _textPieces.iterator(); + while(it.hasNext()) { + TextPiece tp = (TextPiece)it.next(); + int nextByte = curByte + tp.bytesLength(); + + // If the text piece covers the character, all good + if(curByte <= bytePos && nextByte >= bytePos) { + return tp.isUnicode(); } + // Otherwise keep track for the last one + lastWas = tp.isUnicode(); + // Move along + curByte = nextByte; } // If they ask off the end, just go with the last one... diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 6e230a5dd..0a145d5fd 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -155,6 +155,8 @@ public class Range _characters = _doc.getCharacterTable().getTextRuns(); _text = _doc.getTextTable().getTextPieces(); _parent = new WeakReference(null); + + sanityCheckStartEnd(); } @@ -175,6 +177,8 @@ public class Range _characters = parent._characters; _text = parent._text; _parent = new WeakReference(parent); + + sanityCheckStartEnd(); } /** @@ -226,6 +230,22 @@ public class Range _textRangeFound = true; break; } + + sanityCheckStartEnd(); + } + + /** + * Ensures that the start and end were were given + * are actually valid, to avoid issues later on + * if they're not + */ + private void sanityCheckStartEnd() { + if(_start < 0) { + throw new IllegalArgumentException("Range start must not be negative. Given " + _start); + } + if(_end < _start) { + throw new IllegalArgumentException("The end (" + _end + ") must not be before the start ("+_start+")"); + } } /** @@ -537,13 +557,17 @@ public class Range for (int x = _parStart; x < numParagraphs; x++) { PAPX papx = (PAPX)_paragraphs.get(x); + //System.err.println("Paragraph " + x + " was " + papx.getStart() + " -> " + papx.getEnd()); papx.adjustForDelete(_start, _end - _start); + //System.err.println("Paragraph " + x + " is now " + papx.getStart() + " -> " + papx.getEnd()); } for (int x = _sectionStart; x < numSections; x++) { SEPX sepx = (SEPX)_sections.get(x); + //System.err.println("Section " + x + " was " + sepx.getStart() + " -> " + sepx.getEnd()); sepx.adjustForDelete(_start, _end - _start); + //System.err.println("Section " + x + " is now " + sepx.getStart() + " -> " + sepx.getEnd()); } for (int x = _textStart; x < numTextPieces; x++) @@ -806,6 +830,10 @@ public class Range { throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range"); } + if (tableEnd < 0) + { + throw new ArrayIndexOutOfBoundsException("The table's end is negative, which isn't allowed!"); + } return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel()); } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java index 0912daaf7..2f5ad1ccb 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestSectionTable.java @@ -45,13 +45,15 @@ public class TestSectionTable byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.getFcMin(); + CPSplitCalculator cps = new CPSplitCalculator(fib); + ComplexFileTable cft = new ComplexFileTable(mainStream, tableStream, fib.getFcClx(), fcMin); TextPieceTable tpt = cft.getTextPieceTable(); SectionTable sectionTable = new SectionTable(mainStream, tableStream, fib.getFcPlcfsed(), fib.getLcbPlcfsed(), - fcMin, tpt.getTextPieces()); + fcMin, tpt, cps); HWPFFileSystem fileSys = new HWPFFileSystem(); sectionTable.writeTo(fileSys, 0); @@ -61,7 +63,9 @@ public class TestSectionTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - SectionTable newSectionTable = new SectionTable(newMainStream, newTableStream, 0, newTableStream.length, 0, tpt.getTextPieces()); + SectionTable newSectionTable = new SectionTable( + newMainStream, newTableStream, 0, + newTableStream.length, 0, tpt, cps); ArrayList oldSections = sectionTable.getSections(); ArrayList newSections = newSectionTable.getSections(); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index 764b3239d..db28cbd45 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -81,9 +81,16 @@ public class TestProblems extends TestCase { HWPFDocument doc = new HWPFDocument(new FileInputStream( new File(dirname, "Bug44292.doc"))); Range r = doc.getRange(); + assertEquals(6, r.numParagraphs()); + assertEquals(0, r.getStartOffset()); + assertEquals(87, r.getEndOffset()); - //get the table + // Paragraph with table Paragraph p = r.getParagraph(0); + assertEquals(0, p.getStartOffset()); + assertEquals(20, p.getEndOffset()); + + // Get the table Table t = r.getTable(p); //get the only row diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java index 4c7d7b92e..4adc5b9cf 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeDelete.java @@ -23,6 +23,7 @@ import java.io.FileInputStream; import junit.framework.TestCase; import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.PAPX; /** * Test to see if Range.delete() works even if the Range contains a @@ -37,6 +38,8 @@ public class TestRangeDelete extends TestCase { "${delete} This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r"; private String originalText = "It is used to confirm that text delete works even if Unicode characters (such as \u201c\u2014\u201d (U+2014), \u201c\u2e8e\u201d (U+2E8E), or \u201c\u2714\u201d (U+2714)) are present. Everybody should be thankful to the ${organization} ${delete} and all the POI contributors for their assistance in this matter.\r"; + private String lastText = + "Thank you, ${organization} ${delete}!\r"; private String searchText = "${delete}"; private String expectedText1 = " This is an MS-Word 97 formatted document created using NeoOffice v. 2.2.4 Patch 0 (OpenOffice.org v. 2.2.1).\r"; private String expectedText2 = @@ -69,32 +72,60 @@ public class TestRangeDelete extends TestCase { Range range; Section section; Paragraph para; + PAPX paraDef; // First, check overall range = daDoc.getOverallRange(); assertEquals(1, range.numSections()); - assertEquals(4, range.numParagraphs()); + assertEquals(5, range.numParagraphs()); // Now, onto just the doc bit range = daDoc.getRange(); assertEquals(1, range.numSections()); + assertEquals(1, daDoc.getSectionTable().getSections().size()); section = range.getSection(0); - - assertEquals(4, section.numParagraphs()); + + assertEquals(5, section.numParagraphs()); para = section.getParagraph(0); assertEquals(1, para.numCharacterRuns()); assertEquals(introText, para.text()); para = section.getParagraph(1); - assertEquals(2, para.numCharacterRuns()); + assertEquals(5, para.numCharacterRuns()); assertEquals(fillerText, para.text()); + + paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(2); + assertEquals(132, paraDef.getStart()); + assertEquals(400, paraDef.getEnd()); + para = section.getParagraph(2); - assertEquals(6, para.numCharacterRuns()); + assertEquals(5, para.numCharacterRuns()); assertEquals(originalText, para.text()); + + + paraDef = (PAPX)daDoc.getParagraphTable().getParagraphs().get(3); + assertEquals(400, paraDef.getStart()); + assertEquals(438, paraDef.getEnd()); + + para = section.getParagraph(3); + assertEquals(1, para.numCharacterRuns()); + assertEquals(lastText, para.text()); + + + // Check things match on text length + assertEquals(439, range.text().length()); + assertEquals(439, section.text().length()); + assertEquals(439, + section.getParagraph(0).text().length() + + section.getParagraph(1).text().length() + + section.getParagraph(2).text().length() + + section.getParagraph(3).text().length() + + section.getParagraph(4).text().length() + ); } /** @@ -108,7 +139,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -131,7 +162,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); para = section.getParagraph(2); text = para.text(); @@ -154,7 +185,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -188,7 +219,7 @@ public class TestRangeDelete extends TestCase { assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); para = section.getParagraph(0); text = para.text(); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java index 69be319cf..a520b953f 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeInsertion.java @@ -71,14 +71,11 @@ public class TestRangeInsertion extends TestCase { Paragraph para = section.getParagraph(2); assertEquals(originalText, para.text()); - assertEquals(6, para.numCharacterRuns()); + assertEquals(3, para.numCharacterRuns()); String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() + - para.getCharacterRun(2).text() + - para.getCharacterRun(3).text() + - para.getCharacterRun(4).text() + - para.getCharacterRun(5).text() + para.getCharacterRun(2).text() ; assertEquals(originalText, text); @@ -116,14 +113,11 @@ public class TestRangeInsertion extends TestCase { Paragraph para = section.getParagraph(2); assertEquals((textToInsert + originalText), para.text()); - assertEquals(6, para.numCharacterRuns()); + assertEquals(3, para.numCharacterRuns()); String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() + - para.getCharacterRun(2).text() + - para.getCharacterRun(3).text() + - para.getCharacterRun(4).text() + - para.getCharacterRun(5).text() + para.getCharacterRun(2).text() ; // System.out.println(text); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java index 59754fc21..1578ebdaf 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeProperties.java @@ -87,6 +87,16 @@ public class TestRangeProperties extends TestCase { r.text() ); + assertEquals(1, r.numSections()); + assertEquals(1, a.getSectionTable().getSections().size()); + Section s = r.getSection(0); + assertEquals( + a_page_1 + + page_break + "\r" + + a_page_2, + s.text() + ); + assertEquals( 7, r.numParagraphs() @@ -161,6 +171,20 @@ public class TestRangeProperties extends TestCase { assertEquals( 408, r.text().length() ); + + + assertEquals(1, r.numSections()); + assertEquals(1, u.getSectionTable().getSections().size()); + Section s = r.getSection(0); + assertEquals( + u_page_1 + + page_break + "\r" + + u_page_2, + s.text() + ); + assertEquals(0, s.getStartOffset()); + assertEquals(408, s.getEndOffset()); + List pDefs = r._paragraphs; assertEquals(35, pDefs.size()); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java index 05dec843a..7c4766844 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestRangeReplacement.java @@ -66,21 +66,22 @@ public class TestRangeReplacement extends TestCase { HWPFDocument daDoc = new HWPFDocument(new FileInputStream(illustrativeDocFile)); Range range = daDoc.getRange(); + assertEquals(414, range.text().length()); assertEquals(1, range.numSections()); Section section = range.getSection(0); + assertEquals(414, section.text().length()); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); Paragraph para = section.getParagraph(2); - assertEquals(6, para.numCharacterRuns()); + assertEquals(5, para.numCharacterRuns()); String text = para.getCharacterRun(0).text() + para.getCharacterRun(1).text() + para.getCharacterRun(2).text() + para.getCharacterRun(3).text() + - para.getCharacterRun(4).text() + - para.getCharacterRun(5).text() + para.getCharacterRun(4).text() ; assertEquals(originalText, text); @@ -97,7 +98,7 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -130,7 +131,7 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); Section section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); Paragraph para = section.getParagraph(2); @@ -141,7 +142,7 @@ public class TestRangeReplacement extends TestCase { assertEquals(1, range.numSections()); section = range.getSection(0); - assertEquals(4, section.numParagraphs()); + assertEquals(5, section.numParagraphs()); para = section.getParagraph(2); text = para.text();