fix range detection for documents with section across document parts.

Bug46817.doc is the example of document where section contains all document parts - including main part and additional from textbox. Previous implemenetation didn't correctly calculate last paragraph for Section (intersection of SEPX and main document range). Test case is added.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-05 12:02:02 +00:00
parent d24bfcea93
commit 320a3ab737
3 changed files with 113 additions and 46 deletions

View File

@ -1001,21 +1001,31 @@ public class Range { // TODO -instantiable superclass
node = rpl.get(x); node = rpl.get(x);
} }
if (node.getStart()>end) { if ( node.getStart() > end )
return new int[] {0, 0}; {
return new int[] { 0, 0 };
} }
if (node.getEnd() <= start) { if ( node.getEnd() <= start )
{
return new int[] { rpl.size(), rpl.size() }; return new int[] { rpl.size(), rpl.size() };
} }
int y = x; for ( int y = x; y < rpl.size(); y++ )
node = rpl.get(y); {
while (node==null || (node.getEnd() < end && y < rpl.size() - 1)) { node = rpl.get( y );
y++; if ( node == null )
node = rpl.get(y); continue;
if ( node.getStart() < end && node.getEnd() <= end )
continue;
if ( node.getStart() < end )
return new int[] { x, y +1 };
return new int[] { x, y };
} }
return new int[] { x, y + 1 }; return new int[] { x, rpl.size() };
} }
/** /**
@ -1109,4 +1119,11 @@ public class Range { // TODO -instantiable superclass
protected HWPFDocumentCore getDocument() { protected HWPFDocumentCore getDocument() {
return _doc; return _doc;
} }
@Override
public String toString()
{
return "Range from " + getStartOffset() + " to " + getEndOffset()
+ " (chars)";
}
} }

View File

@ -62,6 +62,12 @@ public class TestWordToHtmlConverter extends TestCase
return result; return result;
} }
public void testAIOOBTap() throws Exception
{
String result = getHtmlText( "AIOOB-Tap.doc" );
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
public void testBug33519() throws Exception public void testBug33519() throws Exception
{ {
getHtmlText( "Bug33519.doc" ); getHtmlText( "Bug33519.doc" );
@ -74,6 +80,12 @@ public class TestWordToHtmlConverter extends TestCase
.contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) ); .contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
} }
public void testBug46817() throws Exception
{
String result = getHtmlText( "Bug46817.doc" );
assertTrue( result.contains( "<table>" ) );
}
public void testEquation() throws Exception public void testEquation() throws Exception
{ {
String result = getHtmlText( "equation.doc" ); String result = getHtmlText( "equation.doc" );
@ -82,13 +94,6 @@ public class TestWordToHtmlConverter extends TestCase
.contains( "<!--Image link to '0.emf' can be here-->" ) ); .contains( "<!--Image link to '0.emf' can be here-->" ) );
} }
public void testAIOOBTap() throws Exception
{
String result = getHtmlText( "AIOOB-Tap.doc" );
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
public void testHyperlink() throws Exception public void testHyperlink() throws Exception
{ {
String result = getHtmlText( "hyperlink.doc" ); String result = getHtmlText( "hyperlink.doc" );
@ -102,6 +107,13 @@ public class TestWordToHtmlConverter extends TestCase
getHtmlText( "innertable.doc" ); getHtmlText( "innertable.doc" );
} }
public void testMBD001D0B89() throws Exception
{
String result = getHtmlText( "MBD001D0B89.doc" );
assertTrue( result.contains( "<table>" ) );
}
public void testPageref() throws Exception public void testPageref() throws Exception
{ {
String result = getHtmlText( "pageref.doc" ); String result = getHtmlText( "pageref.doc" );

View File

@ -17,38 +17,76 @@
package org.apache.poi.hwpf.usermodel; package org.apache.poi.hwpf.usermodel;
import java.util.ArrayList;
import org.apache.poi.hwpf.model.SEPX;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import junit.framework.TestCase; import junit.framework.TestCase;
/** /**
* Tests for Range which aren't around deletion, insertion, * Tests for Range which aren't around deletion, insertion, text replacement or
* text replacement or textual contents * textual contents
*/ */
public final class TestRange extends TestCase { public final class TestRange extends TestCase
public void testFieldStripping() { {
public void testFieldStripping()
{
String exp = "This is some text."; String exp = "This is some text.";
String single = "This is some \u0013Blah!\u0015text."; String single = "This is some \u0013Blah!\u0015text.";
String with14 = "This is \u0013Blah!\u0014some\u0015 text."; String with14 = "This is \u0013Blah!\u0014some\u0015 text.";
String withNested = String withNested = "This is \u0013Blah!\u0013Blah!\u0015\u0015some text.";
"This is \u0013Blah!\u0013Blah!\u0015\u0015some text."; String withNested14 = "This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
String withNested14 = String withNestedIn14 = "This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
"This is \u0013Blah!\u0013Blah!\u0014don't see me\u0015 blah!\u0015some text.";
String withNestedIn14 =
"This is \u0013Blah!\u0014some\u0013Blah!\u0015 \u0015text.";
// Check all comes out right // Check all comes out right
assertEquals(exp, Range.stripFields(exp)); assertEquals( exp, Range.stripFields( exp ) );
assertEquals(exp, Range.stripFields(single)); assertEquals( exp, Range.stripFields( single ) );
assertEquals(exp, Range.stripFields(with14)); assertEquals( exp, Range.stripFields( with14 ) );
assertEquals(exp, Range.stripFields(withNested)); assertEquals( exp, Range.stripFields( withNested ) );
assertEquals(exp, Range.stripFields(withNested14)); assertEquals( exp, Range.stripFields( withNested14 ) );
assertEquals(exp, Range.stripFields(withNestedIn14)); assertEquals( exp, Range.stripFields( withNestedIn14 ) );
// Ones that are odd and we won't change // Ones that are odd and we won't change
String odd1 = "This\u0015 is \u0013 odd"; String odd1 = "This\u0015 is \u0013 odd";
String odd2 = "This\u0015 is \u0014 also \u0013 odd"; String odd2 = "This\u0015 is \u0014 also \u0013 odd";
assertEquals(odd1, Range.stripFields(odd1)); assertEquals( odd1, Range.stripFields( odd1 ) );
assertEquals(odd2, Range.stripFields(odd2)); assertEquals( odd2, Range.stripFields( odd2 ) );
}
public void testBug46817() throws Exception
{
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
.getDocumentInstance().openResourceAsStream( "Bug46817.doc" ) );
final ArrayList<SEPX> sections = hwpfDocument.getSectionTable()
.getSections();
assertEquals( sections.size(), 1 );
// whole document, including additional text from shape
SEPX sepx = sections.get( 0 );
assertEquals( sepx.getStartBytes(), 1024 );
assertEquals( sepx.getEndBytes(), 3880 );
assertEquals( sepx.getStart(), 0 );
assertEquals( sepx.getEnd(), 1428 );
// only main range
Range range = hwpfDocument.getRange();
assertEquals( range.getStartOffset(), 0 );
assertEquals( range.getEndOffset(), 766 );
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() );
assertTrue( lastInMainRange.getEndOffset() <= 766 );
Section section = range.getSection( 0 );
assertTrue( section.getEndOffset() <= 766 );
Paragraph lastInMainSection = section.getParagraph( section
.numParagraphs() );
assertTrue( lastInMainSection.getEndOffset() <= 766 );
} }
} }