More range tests, which show that we do have a bug in the hwpf unicode support

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@684309 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-08-09 17:58:35 +00:00
parent 0ed496289a
commit 1400017540

View File

@ -29,7 +29,18 @@ import junit.framework.TestCase;
* the different ranges * the different ranges
*/ */
public class TestHWPFRangeParts extends TestCase { public class TestHWPFRangeParts extends TestCase {
private static final String page_1 = private static final char page_break = (char)12;
private static final String headerDef =
"\u0003\r\r" +
"\u0004\r\r" +
"\u0003\r\r" +
"\u0004\r\r"
;
private static final String footerDef = "\r";
private static final String endHeaderFooter = "\r\r";
private static final String a_page_1 =
"This is a sample word document. It has two pages. It has a three column heading, and a three column footer\r" + "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\r" +
"\r" + "\r" +
"HEADING TEXT\r" + "HEADING TEXT\r" +
@ -38,37 +49,60 @@ public class TestHWPFRangeParts extends TestCase {
"\r\r" + "\r\r" +
"End of page 1\r" "End of page 1\r"
; ;
private static final char page_break = (char)12; private static final String a_page_2 =
private static final String page_2 =
"This is page two. It also has a three column heading, and a three column footer.\r" "This is page two. It also has a three column heading, and a three column footer.\r"
; ;
private static final String headerDef = private static final String a_header =
"\u0003\r\r" +
"\u0004\r\r" +
"\u0003\r\r" +
"\u0004\r\r"
;
private static final String header =
"First header column!\tMid header Right header!\r" "First header column!\tMid header Right header!\r"
; ;
private static final String footerDef = private static final String a_footer =
"\r"
;
private static final String footer =
"Footer Left\tFooter Middle Footer Right\r" "Footer Left\tFooter Middle Footer Right\r"
; ;
private static final String endHeaderFooter =
"\r\r"
private static final String u_page_1 =
"This is a fairly simple word document, over two pages, with headers and footers.\r" +
"The trick with this one is that it contains some Unicode based strings in it.\r" +
"Firstly, some currency symbols:\r" +
"\tGBP - \u00a3\r" +
"\tEUR - \u20ac\r" +
"Now, we\u2019ll have some French text, in bold and big:\r" +
"\tMoli\u00e8re\r" +
"And some normal French text:\r" +
"\tL'Avare ou l'\u00c9cole du mensonge\r" +
"That\u2019s it for page one\r"
;
private static final String u_page_2 =
"This is page two. Les Pr\u00e9cieuses ridicules. The end.\r"
; ;
private HWPFDocument doc; private static final String u_header =
"This is a simple header, with a \u20ac euro symbol in it.\r"
;
private static final String u_footer =
"The footer, with Moli\u00e8re, has Unicode in it.\r"
;
/**
* A document made up only of basic ASCII text
*/
private HWPFDocument docAscii;
/**
* A document with some unicode in it too
*/
private HWPFDocument docUnicode;
public void setUp() throws Exception { public void setUp() throws Exception {
String filename = System.getProperty("HWPF.testdata.path"); String dirname = System.getProperty("HWPF.testdata.path");
filename = filename + "/ThreeColHeadFoot.doc";
doc = new HWPFDocument( String filename = dirname + "/HeaderFooterUnicode.doc";
docUnicode = new HWPFDocument(
new FileInputStream(filename)
);
filename = dirname + "/ThreeColHeadFoot.doc";
docAscii = new HWPFDocument(
new FileInputStream(filename) new FileInputStream(filename)
); );
} }
@ -77,32 +111,32 @@ public class TestHWPFRangeParts extends TestCase {
// First check the start and end bits // First check the start and end bits
assertEquals( assertEquals(
0, 0,
doc._cpSplit.getMainDocumentStart() docAscii._cpSplit.getMainDocumentStart()
); );
assertEquals( assertEquals(
page_1.length() + a_page_1.length() +
2 + // page break 2 + // page break
page_2.length(), a_page_2.length(),
doc._cpSplit.getMainDocumentEnd() docAscii._cpSplit.getMainDocumentEnd()
); );
assertEquals( assertEquals(
238, 238,
doc._cpSplit.getFootnoteStart() docAscii._cpSplit.getFootnoteStart()
); );
assertEquals( assertEquals(
238, 238,
doc._cpSplit.getFootnoteEnd() docAscii._cpSplit.getFootnoteEnd()
); );
assertEquals( assertEquals(
238, 238,
doc._cpSplit.getHeaderStoryStart() docAscii._cpSplit.getHeaderStoryStart()
); );
assertEquals( assertEquals(
238 + headerDef.length() + header.length() + 238 + headerDef.length() + a_header.length() +
footerDef.length() + footer.length() + endHeaderFooter.length(), footerDef.length() + a_footer.length() + endHeaderFooter.length(),
doc._cpSplit.getHeaderStoryEnd() docAscii._cpSplit.getHeaderStoryEnd()
); );
} }
@ -110,33 +144,104 @@ public class TestHWPFRangeParts extends TestCase {
Range r; Range r;
// Now check the real ranges // Now check the real ranges
r = doc.getRange(); r = docAscii.getRange();
assertEquals( assertEquals(
page_1 + a_page_1 +
page_break + "\r" + page_break + "\r" +
page_2, a_page_2,
r.text() r.text()
); );
r = doc.getHeaderStoryRange(); r = docAscii.getHeaderStoryRange();
assertEquals( assertEquals(
headerDef + headerDef +
header + a_header +
footerDef + footerDef +
footer + a_footer +
endHeaderFooter, endHeaderFooter,
r.text() r.text()
); );
r = doc.getOverallRange(); r = docAscii.getOverallRange();
assertEquals( assertEquals(
page_1 + a_page_1 +
page_break + "\r" + page_break + "\r" +
page_2 + a_page_2 +
headerDef + headerDef +
header + a_header +
footerDef + footerDef +
footer + a_footer +
endHeaderFooter +
"\r",
r.text()
);
}
public void testBasicsUnicode() throws Exception {
// First check the start and end bits
assertEquals(
0,
docUnicode._cpSplit.getMainDocumentStart()
);
assertEquals(
u_page_1.length() +
2 + // page break
u_page_2.length(),
docUnicode._cpSplit.getMainDocumentEnd()
);
assertEquals(
408,
docUnicode._cpSplit.getFootnoteStart()
);
assertEquals(
408,
docUnicode._cpSplit.getFootnoteEnd()
);
assertEquals(
408,
docUnicode._cpSplit.getHeaderStoryStart()
);
// TODO - fix this one
assertEquals(
408 + headerDef.length() + u_header.length() +
footerDef.length() + u_footer.length() + endHeaderFooter.length(),
docUnicode._cpSplit.getHeaderStoryEnd()
);
}
public void testContentsUnicode() throws Exception {
Range r;
// Now check the real ranges
r = docUnicode.getRange();
assertEquals(
u_page_1 +
page_break + "\r" +
u_page_2,
r.text()
);
r = docUnicode.getHeaderStoryRange();
assertEquals(
headerDef +
u_header +
footerDef +
u_footer +
endHeaderFooter,
r.text()
);
r = docUnicode.getOverallRange();
assertEquals(
u_page_1 +
page_break + "\r" +
u_page_2 +
headerDef +
u_header +
footerDef +
u_footer +
endHeaderFooter + endHeaderFooter +
"\r", "\r",
r.text() r.text()