fix range detection for documents with section across document parts.
Bug46817.doc is the example of document where section contains all document parts - including main part and additional from textbox. Previous implemenetation didn't correctly calculate last paragraph for Section (intersection of SEPX and main document range). Test case is added. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d24bfcea93
commit
320a3ab737
@ -1001,21 +1001,31 @@ public class Range { // TODO -instantiable superclass
|
||||
node = rpl.get(x);
|
||||
}
|
||||
|
||||
if (node.getStart()>end) {
|
||||
return new int[] {0, 0};
|
||||
if ( node.getStart() > end )
|
||||
{
|
||||
return new int[] { 0, 0 };
|
||||
}
|
||||
|
||||
if (node.getEnd() <= start) {
|
||||
if ( node.getEnd() <= start )
|
||||
{
|
||||
return new int[] { rpl.size(), rpl.size() };
|
||||
}
|
||||
|
||||
int y = x;
|
||||
node = rpl.get(y);
|
||||
while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) {
|
||||
y++;
|
||||
node = rpl.get(y);
|
||||
for ( int y = x; y < rpl.size(); y++ )
|
||||
{
|
||||
node = rpl.get( y );
|
||||
if ( node == null )
|
||||
continue;
|
||||
|
||||
if ( node.getStart() < end && node.getEnd() <= end )
|
||||
continue;
|
||||
|
||||
if ( node.getStart() < end )
|
||||
return new int[] { x, y +1 };
|
||||
|
||||
return new int[] { x, y };
|
||||
}
|
||||
return new int[] { x, y + 1 };
|
||||
return new int[] { x, rpl.size() };
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1109,4 +1119,11 @@ public class Range { // TODO -instantiable superclass
|
||||
protected HWPFDocumentCore getDocument() {
|
||||
return _doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return "Range from " + getStartOffset() + " to " + getEndOffset()
|
||||
+ " (chars)";
|
||||
}
|
||||
}
|
||||
|
@ -62,6 +62,12 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
return result;
|
||||
}
|
||||
|
||||
public void testAIOOBTap() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "AIOOB-Tap.doc" );
|
||||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
||||
}
|
||||
|
||||
public void testBug33519() throws Exception
|
||||
{
|
||||
getHtmlText( "Bug33519.doc" );
|
||||
@ -74,6 +80,12 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
.contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
|
||||
}
|
||||
|
||||
public void testBug46817() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "Bug46817.doc" );
|
||||
assertTrue( result.contains( "<table>" ) );
|
||||
}
|
||||
|
||||
public void testEquation() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "equation.doc" );
|
||||
@ -82,13 +94,6 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
.contains( "<!--Image link to '0.emf' can be here-->" ) );
|
||||
}
|
||||
|
||||
public void testAIOOBTap() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "AIOOB-Tap.doc" );
|
||||
|
||||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
||||
}
|
||||
|
||||
public void testHyperlink() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "hyperlink.doc" );
|
||||
@ -102,6 +107,13 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
getHtmlText( "innertable.doc" );
|
||||
}
|
||||
|
||||
public void testMBD001D0B89() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "MBD001D0B89.doc" );
|
||||
|
||||
assertTrue( result.contains( "<table>" ) );
|
||||
}
|
||||
|
||||
public void testPageref() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "pageref.doc" );
|
||||
|
@ -17,38 +17,76 @@
|
||||
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.poi.hwpf.model.SEPX;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Tests for Range which aren't around deletion, insertion,
|
||||
* text replacement or textual contents
|
||||
* Tests for Range which aren't around deletion, insertion, text replacement or
|
||||
* textual contents
|
||||
*/
|
||||
public final class TestRange extends TestCase {
|
||||
public void testFieldStripping() {
|
||||
public final class TestRange extends TestCase
|
||||
{
|
||||
public void testFieldStripping()
|
||||
{
|
||||
String exp = "This is some text.";
|
||||
|
||||
String single = "This is some \u0013Blah!\u0015text.";
|
||||
String with14 = "This is \u0013Blah!\u0014some\u0015 text.";
|
||||
String withNested =
|
||||
"This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
|
||||
String withNested14 =
|
||||
"This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
|
||||
String withNestedIn14 =
|
||||
"This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
|
||||
String withNested = "This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
|
||||
String withNested14 = "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
|
||||
String withNestedIn14 = "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
|
||||
|
||||
// Check all comes out right
|
||||
assertEquals(exp, Range.stripFields(exp));
|
||||
assertEquals(exp, Range.stripFields(single));
|
||||
assertEquals(exp, Range.stripFields(with14));
|
||||
assertEquals(exp, Range.stripFields(withNested));
|
||||
assertEquals(exp, Range.stripFields(withNested14));
|
||||
assertEquals(exp, Range.stripFields(withNestedIn14));
|
||||
assertEquals( exp, Range.stripFields( exp ) );
|
||||
assertEquals( exp, Range.stripFields( single ) );
|
||||
assertEquals( exp, Range.stripFields( with14 ) );
|
||||
assertEquals( exp, Range.stripFields( withNested ) );
|
||||
assertEquals( exp, Range.stripFields( withNested14 ) );
|
||||
assertEquals( exp, Range.stripFields( withNestedIn14 ) );
|
||||
|
||||
// Ones that are odd and we won't change
|
||||
String odd1 = "This\u0015 is \u0013 odd";
|
||||
String odd2 = "This\u0015 is \u0014 also \u0013 odd";
|
||||
|
||||
assertEquals(odd1, Range.stripFields(odd1));
|
||||
assertEquals(odd2, Range.stripFields(odd2));
|
||||
assertEquals( odd1, Range.stripFields( odd1 ) );
|
||||
assertEquals( odd2, Range.stripFields( odd2 ) );
|
||||
}
|
||||
|
||||
public void testBug46817() throws Exception
|
||||
{
|
||||
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
|
||||
.getDocumentInstance().openResourceAsStream( "Bug46817.doc" ) );
|
||||
|
||||
final ArrayList<SEPX> sections = hwpfDocument.getSectionTable()
|
||||
.getSections();
|
||||
assertEquals( sections.size(), 1 );
|
||||
|
||||
// whole document, including additional text from shape
|
||||
SEPX sepx = sections.get( 0 );
|
||||
assertEquals( sepx.getStartBytes(), 1024 );
|
||||
assertEquals( sepx.getEndBytes(), 3880 );
|
||||
assertEquals( sepx.getStart(), 0 );
|
||||
assertEquals( sepx.getEnd(), 1428 );
|
||||
|
||||
// only main range
|
||||
Range range = hwpfDocument.getRange();
|
||||
assertEquals( range.getStartOffset(), 0 );
|
||||
assertEquals( range.getEndOffset(), 766 );
|
||||
|
||||
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() );
|
||||
assertTrue( lastInMainRange.getEndOffset() <= 766 );
|
||||
|
||||
Section section = range.getSection( 0 );
|
||||
assertTrue( section.getEndOffset() <= 766 );
|
||||
|
||||
Paragraph lastInMainSection = section.getParagraph( section
|
||||
.numParagraphs() );
|
||||
assertTrue( lastInMainSection.getEndOffset() <= 766 );
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user