fix range detection for documents with section across document parts.

Bug46817.doc is the example of document where section contains all document parts - including main part and additional from textbox. Previous implemenetation didn't correctly calculate last paragraph for Section (intersection of SEPX and main document range). Test case is added.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-05 12:02:02 +00:00
parent d24bfcea93
commit 320a3ab737
3 changed files with 113 additions and 46 deletions

View File

@ -1001,22 +1001,32 @@ public class Range { // TODO -instantiable superclass
node = rpl.get(x);
}
if (node.getStart()>end) {
return new int[] {0, 0};
if ( node.getStart() > end )
{
return new int[] { 0, 0 };
}
if (node.getEnd() <= start) {
return new int[] { rpl.size(), rpl.size() };
}
if ( node.getEnd() <= start )
{
return new int[] { rpl.size(), rpl.size() };
}
int y = x;
node = rpl.get(y);
while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) {
y++;
node = rpl.get(y);
}
return new int[] { x, y + 1 };
}
for ( int y = x; y < rpl.size(); y++ )
{
node = rpl.get( y );
if ( node == null )
continue;
if ( node.getStart() < end && node.getEnd() <= end )
continue;
if ( node.getStart() < end )
return new int[] { x, y +1 };
return new int[] { x, y };
}
return new int[] { x, rpl.size() };
}
/**
* resets the list indexes.
@ -1109,4 +1119,11 @@ public class Range { // TODO -instantiable superclass
protected HWPFDocumentCore getDocument() {
return _doc;
}
@Override
public String toString()
{
return "Range from " + getStartOffset() + " to " + getEndOffset()
+ " (chars)";
}
}

View File

@ -62,6 +62,12 @@ public class TestWordToHtmlConverter extends TestCase
return result;
}
public void testAIOOBTap() throws Exception
{
String result = getHtmlText( "AIOOB-Tap.doc" );
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
public void testBug33519() throws Exception
{
getHtmlText( "Bug33519.doc" );
@ -74,6 +80,12 @@ public class TestWordToHtmlConverter extends TestCase
.contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
}
public void testBug46817() throws Exception
{
String result = getHtmlText( "Bug46817.doc" );
assertTrue( result.contains( "<table>" ) );
}
public void testEquation() throws Exception
{
String result = getHtmlText( "equation.doc" );
@ -82,13 +94,6 @@ public class TestWordToHtmlConverter extends TestCase
.contains( "<!--Image link to '0.emf' can be here-->" ) );
}
public void testAIOOBTap() throws Exception
{
String result = getHtmlText( "AIOOB-Tap.doc" );
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
public void testHyperlink() throws Exception
{
String result = getHtmlText( "hyperlink.doc" );
@ -102,6 +107,13 @@ public class TestWordToHtmlConverter extends TestCase
getHtmlText( "innertable.doc" );
}
public void testMBD001D0B89() throws Exception
{
String result = getHtmlText( "MBD001D0B89.doc" );
assertTrue( result.contains( "<table>" ) );
}
public void testPageref() throws Exception
{
String result = getHtmlText( "pageref.doc" );

View File

@ -17,38 +17,76 @@
package org.apache.poi.hwpf.usermodel;
import java.util.ArrayList;
import org.apache.poi.hwpf.model.SEPX;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import junit.framework.TestCase;
/**
* Tests for Range which aren't around deletion, insertion,
* text replacement or textual contents
* Tests for Range which aren't around deletion, insertion, text replacement or
* textual contents
*/
public final class TestRange extends TestCase {
public void testFieldStripping() {
String exp = "This is some text.";
public final class TestRange extends TestCase
{
public void testFieldStripping()
{
String exp = "This is some text.";
String single = "This is some \u0013Blah!\u0015text.";
String with14 = "This is \u0013Blah!\u0014some\u0015 text.";
String withNested =
"This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
String withNested14 =
"This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
String withNestedIn14 =
"This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
String single = "This is some \u0013Blah!\u0015text.";
String with14 = "This is \u0013Blah!\u0014some\u0015 text.";
String withNested = "This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
String withNested14 = "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
String withNestedIn14 = "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
// Check all comes out right
assertEquals(exp, Range.stripFields(exp));
assertEquals(exp, Range.stripFields(single));
assertEquals(exp, Range.stripFields(with14));
assertEquals(exp, Range.stripFields(withNested));
assertEquals(exp, Range.stripFields(withNested14));
assertEquals(exp, Range.stripFields(withNestedIn14));
// Check all comes out right
assertEquals( exp, Range.stripFields( exp ) );
assertEquals( exp, Range.stripFields( single ) );
assertEquals( exp, Range.stripFields( with14 ) );
assertEquals( exp, Range.stripFields( withNested ) );
assertEquals( exp, Range.stripFields( withNested14 ) );
assertEquals( exp, Range.stripFields( withNestedIn14 ) );
// Ones that are odd and we won't change
String odd1 = "This\u0015 is \u0013 odd";
String odd2 = "This\u0015 is \u0014 also \u0013 odd";
// Ones that are odd and we won't change
String odd1 = "This\u0015 is \u0013 odd";
String odd2 = "This\u0015 is \u0014 also \u0013 odd";
assertEquals(odd1, Range.stripFields(odd1));
assertEquals(odd2, Range.stripFields(odd2));
}
assertEquals( odd1, Range.stripFields( odd1 ) );
assertEquals( odd2, Range.stripFields( odd2 ) );
}
public void testBug46817() throws Exception
{
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
.getDocumentInstance().openResourceAsStream( "Bug46817.doc" ) );
final ArrayList<SEPX> sections = hwpfDocument.getSectionTable()
.getSections();
assertEquals( sections.size(), 1 );
// whole document, including additional text from shape
SEPX sepx = sections.get( 0 );
assertEquals( sepx.getStartBytes(), 1024 );
assertEquals( sepx.getEndBytes(), 3880 );
assertEquals( sepx.getStart(), 0 );
assertEquals( sepx.getEnd(), 1428 );
// only main range
Range range = hwpfDocument.getRange();
assertEquals( range.getStartOffset(), 0 );
assertEquals( range.getEndOffset(), 766 );
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() );
assertTrue( lastInMainRange.getEndOffset() <= 766 );
Section section = range.getSection( 0 );
assertTrue( section.getEndOffset() <= 766 );
Paragraph lastInMainSection = section.getParagraph( section
.numParagraphs() );
assertTrue( lastInMainSection.getEndOffset() <= 766 );
}
}