Unit test for bugs #54880 & #55030 - seems ok so far

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1488403 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2013-05-31 21:17:55 +00:00
parent f6a3cf4d62
commit c4f77a02e2
2 changed files with 81 additions and 16 deletions

View File

@ -329,6 +329,27 @@ public final class TestExtractor extends TestCase {
assertContains(text, masterText);
}
/**
* Bug #54880 Chinese text not extracted properly
*/
public void testChineseText() throws Exception {
HSLFSlideShow hslf = new HSLFSlideShow(slTests.openResourceAsStream("54880_chinese.ppt"));
ppe = new PowerPointExtractor(hslf);
String text = ppe.getText();
// Check for the english text line
assertContains(text, "Single byte");
// Check for the english text in the mixed line
assertContains(text, "Mix");
// Check for the chinese text in the mixed line -
assertContains(text, "\u8868");
// Check for the chinese only text line - ハンカク
assertContains(text, "\uff8a\uff9d\uff76\uff78");
}
/**
* Tests that we can work with both {@link POIFSFileSystem}

View File

@ -38,24 +38,25 @@ import org.apache.poi.POIDataSamples;
* @author Nick Burch (nick at torchbox dot com)
*/
public final class TestRichTextRun extends TestCase {
private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
// SlideShow primed on the test data
private SlideShow ss;
private SlideShow ssRichA;
private SlideShow ssRichB;
private SlideShow ssRichC;
private HSLFSlideShow hss;
private HSLFSlideShow hssRichA;
private HSLFSlideShow hssRichB;
private HSLFSlideShow hssRichC;
private static String filenameC;
// SlideShow primed on the test data
private SlideShow ss;
private SlideShow ssRichA;
private SlideShow ssRichB;
private SlideShow ssRichC;
private SlideShow ssChinese;
private HSLFSlideShow hss;
private HSLFSlideShow hssRichA;
private HSLFSlideShow hssRichB;
private HSLFSlideShow hssRichC;
private HSLFSlideShow hssChinese;
private static String filenameC;
protected void setUp() throws Exception {
// Basic (non rich) test file
hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
ss = new SlideShow(hss);
protected void setUp() throws Exception {
// Basic (non rich) test file
hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
ss = new SlideShow(hss);
// Rich test file A
hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt"));
@ -70,8 +71,18 @@ public final class TestRichTextRun extends TestCase {
filenameC = "ParagraphStylesShorterThanCharStyles.ppt";
hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC));
ssRichC = new SlideShow(hssRichC);
// Rich test file with Chinese + English text in it
hssChinese = new HSLFSlideShow(_slTests.openResourceAsStream("54880_chinese.ppt"));
ssChinese = new SlideShow(hssChinese);
}
private static void assertContains(String haystack, String needle) {
assertTrue(
"Unable to find expected text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle)
);
}
/**
* Test the stuff about getting/setting bold
* on a non rich text run
@ -623,4 +634,37 @@ if(false) {
// FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt");
// ppt.write(fout);
}
public void testChineseParagraphs() throws Exception {
RichTextRun[] rts;
RichTextRun rt;
TextRun[] txt;
Slide[] slides = ssChinese.getSlides();
// One slide
assertEquals(1, slides.length);
// One block of text within that
txt = slides[0].getTextRuns();
assertEquals(1, txt.length);
// One rich block of text in that - text is all the same style
// TODO Is this completely correct?
rts = txt[0].getRichTextRuns();
assertEquals(1, rts.length);
rt = rts[0];
// Check we can get the english text out of that
String text = rt.getText();
assertContains(text, "Single byte");
// And the chinese - ハンカク
assertContains(text, "\uff8a\uff9d\uff76\uff78");
// It isn't bold or italic
assertFalse(rt.isBold());
assertFalse(rt.isItalic());
// Font is Calibri
assertEquals("Calibri", rt.getFontName());
}
}