fix range detection for documents with section across document parts.
Bug46817.doc is the example of document where section contains all document parts - including main part and additional from textbox. Previous implemenetation didn't correctly calculate last paragraph for Section (intersection of SEPX and main document range). Test case is added. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d24bfcea93
commit
320a3ab737
@ -1001,22 +1001,32 @@ public class Range { // TODO -instantiable superclass
|
|||||||
node = rpl.get(x);
|
node = rpl.get(x);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node.getStart()>end) {
|
if ( node.getStart() > end )
|
||||||
return new int[] {0, 0};
|
{
|
||||||
|
return new int[] { 0, 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node.getEnd() <= start) {
|
if ( node.getEnd() <= start )
|
||||||
return new int[] { rpl.size(), rpl.size() };
|
{
|
||||||
}
|
return new int[] { rpl.size(), rpl.size() };
|
||||||
|
}
|
||||||
|
|
||||||
int y = x;
|
for ( int y = x; y < rpl.size(); y++ )
|
||||||
node = rpl.get(y);
|
{
|
||||||
while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) {
|
node = rpl.get( y );
|
||||||
y++;
|
if ( node == null )
|
||||||
node = rpl.get(y);
|
continue;
|
||||||
}
|
|
||||||
return new int[] { x, y + 1 };
|
if ( node.getStart() < end && node.getEnd() <= end )
|
||||||
}
|
continue;
|
||||||
|
|
||||||
|
if ( node.getStart() < end )
|
||||||
|
return new int[] { x, y +1 };
|
||||||
|
|
||||||
|
return new int[] { x, y };
|
||||||
|
}
|
||||||
|
return new int[] { x, rpl.size() };
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* resets the list indexes.
|
* resets the list indexes.
|
||||||
@ -1109,4 +1119,11 @@ public class Range { // TODO -instantiable superclass
|
|||||||
protected HWPFDocumentCore getDocument() {
|
protected HWPFDocumentCore getDocument() {
|
||||||
return _doc;
|
return _doc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "Range from " + getStartOffset() + " to " + getEndOffset()
|
||||||
|
+ " (chars)";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,12 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testAIOOBTap() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "AIOOB-Tap.doc" );
|
||||||
|
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
||||||
|
}
|
||||||
|
|
||||||
public void testBug33519() throws Exception
|
public void testBug33519() throws Exception
|
||||||
{
|
{
|
||||||
getHtmlText( "Bug33519.doc" );
|
getHtmlText( "Bug33519.doc" );
|
||||||
@ -73,6 +79,12 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
assertTrue( result
|
assertTrue( result
|
||||||
.contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
|
.contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBug46817() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "Bug46817.doc" );
|
||||||
|
assertTrue( result.contains( "<table>" ) );
|
||||||
|
}
|
||||||
|
|
||||||
public void testEquation() throws Exception
|
public void testEquation() throws Exception
|
||||||
{
|
{
|
||||||
@ -82,13 +94,6 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
.contains( "<!--Image link to '0.emf' can be here-->" ) );
|
.contains( "<!--Image link to '0.emf' can be here-->" ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testAIOOBTap() throws Exception
|
|
||||||
{
|
|
||||||
String result = getHtmlText( "AIOOB-Tap.doc" );
|
|
||||||
|
|
||||||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testHyperlink() throws Exception
|
public void testHyperlink() throws Exception
|
||||||
{
|
{
|
||||||
String result = getHtmlText( "hyperlink.doc" );
|
String result = getHtmlText( "hyperlink.doc" );
|
||||||
@ -102,6 +107,13 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
getHtmlText( "innertable.doc" );
|
getHtmlText( "innertable.doc" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMBD001D0B89() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "MBD001D0B89.doc" );
|
||||||
|
|
||||||
|
assertTrue( result.contains( "<table>" ) );
|
||||||
|
}
|
||||||
|
|
||||||
public void testPageref() throws Exception
|
public void testPageref() throws Exception
|
||||||
{
|
{
|
||||||
String result = getHtmlText( "pageref.doc" );
|
String result = getHtmlText( "pageref.doc" );
|
||||||
|
@ -17,38 +17,76 @@
|
|||||||
|
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.model.SEPX;
|
||||||
|
|
||||||
|
import org.apache.poi.POIDataSamples;
|
||||||
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests for Range which aren't around deletion, insertion,
|
* Tests for Range which aren't around deletion, insertion, text replacement or
|
||||||
* text replacement or textual contents
|
* textual contents
|
||||||
*/
|
*/
|
||||||
public final class TestRange extends TestCase {
|
public final class TestRange extends TestCase
|
||||||
public void testFieldStripping() {
|
{
|
||||||
String exp = "This is some text.";
|
public void testFieldStripping()
|
||||||
|
{
|
||||||
|
String exp = "This is some text.";
|
||||||
|
|
||||||
String single = "This is some \u0013Blah!\u0015text.";
|
String single = "This is some \u0013Blah!\u0015text.";
|
||||||
String with14 = "This is \u0013Blah!\u0014some\u0015 text.";
|
String with14 = "This is \u0013Blah!\u0014some\u0015 text.";
|
||||||
String withNested =
|
String withNested = "This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
|
||||||
"This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
|
String withNested14 = "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
|
||||||
String withNested14 =
|
String withNestedIn14 = "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
|
||||||
"This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
|
|
||||||
String withNestedIn14 =
|
|
||||||
"This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
|
|
||||||
|
|
||||||
// Check all comes out right
|
// Check all comes out right
|
||||||
assertEquals(exp, Range.stripFields(exp));
|
assertEquals( exp, Range.stripFields( exp ) );
|
||||||
assertEquals(exp, Range.stripFields(single));
|
assertEquals( exp, Range.stripFields( single ) );
|
||||||
assertEquals(exp, Range.stripFields(with14));
|
assertEquals( exp, Range.stripFields( with14 ) );
|
||||||
assertEquals(exp, Range.stripFields(withNested));
|
assertEquals( exp, Range.stripFields( withNested ) );
|
||||||
assertEquals(exp, Range.stripFields(withNested14));
|
assertEquals( exp, Range.stripFields( withNested14 ) );
|
||||||
assertEquals(exp, Range.stripFields(withNestedIn14));
|
assertEquals( exp, Range.stripFields( withNestedIn14 ) );
|
||||||
|
|
||||||
// Ones that are odd and we won't change
|
// Ones that are odd and we won't change
|
||||||
String odd1 = "This\u0015 is \u0013 odd";
|
String odd1 = "This\u0015 is \u0013 odd";
|
||||||
String odd2 = "This\u0015 is \u0014 also \u0013 odd";
|
String odd2 = "This\u0015 is \u0014 also \u0013 odd";
|
||||||
|
|
||||||
assertEquals(odd1, Range.stripFields(odd1));
|
assertEquals( odd1, Range.stripFields( odd1 ) );
|
||||||
assertEquals(odd2, Range.stripFields(odd2));
|
assertEquals( odd2, Range.stripFields( odd2 ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBug46817() throws Exception
|
||||||
|
{
|
||||||
|
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
|
||||||
|
.getDocumentInstance().openResourceAsStream( "Bug46817.doc" ) );
|
||||||
|
|
||||||
|
final ArrayList<SEPX> sections = hwpfDocument.getSectionTable()
|
||||||
|
.getSections();
|
||||||
|
assertEquals( sections.size(), 1 );
|
||||||
|
|
||||||
|
// whole document, including additional text from shape
|
||||||
|
SEPX sepx = sections.get( 0 );
|
||||||
|
assertEquals( sepx.getStartBytes(), 1024 );
|
||||||
|
assertEquals( sepx.getEndBytes(), 3880 );
|
||||||
|
assertEquals( sepx.getStart(), 0 );
|
||||||
|
assertEquals( sepx.getEnd(), 1428 );
|
||||||
|
|
||||||
|
// only main range
|
||||||
|
Range range = hwpfDocument.getRange();
|
||||||
|
assertEquals( range.getStartOffset(), 0 );
|
||||||
|
assertEquals( range.getEndOffset(), 766 );
|
||||||
|
|
||||||
|
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() );
|
||||||
|
assertTrue( lastInMainRange.getEndOffset() <= 766 );
|
||||||
|
|
||||||
|
Section section = range.getSection( 0 );
|
||||||
|
assertTrue( section.getEndOffset() <= 766 );
|
||||||
|
|
||||||
|
Paragraph lastInMainSection = section.getParagraph( section
|
||||||
|
.numParagraphs() );
|
||||||
|
assertTrue( lastInMainSection.getEndOffset() <= 766 );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user