git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1488403 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f6a3cf4d62
commit
c4f77a02e2
@ -329,6 +329,27 @@ public final class TestExtractor extends TestCase {
|
|||||||
assertContains(text, masterText);
|
assertContains(text, masterText);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bug #54880 Chinese text not extracted properly
|
||||||
|
*/
|
||||||
|
public void testChineseText() throws Exception {
|
||||||
|
HSLFSlideShow hslf = new HSLFSlideShow(slTests.openResourceAsStream("54880_chinese.ppt"));
|
||||||
|
ppe = new PowerPointExtractor(hslf);
|
||||||
|
|
||||||
|
String text = ppe.getText();
|
||||||
|
|
||||||
|
// Check for the english text line
|
||||||
|
assertContains(text, "Single byte");
|
||||||
|
|
||||||
|
// Check for the english text in the mixed line
|
||||||
|
assertContains(text, "Mix");
|
||||||
|
|
||||||
|
// Check for the chinese text in the mixed line - 表
|
||||||
|
assertContains(text, "\u8868");
|
||||||
|
|
||||||
|
// Check for the chinese only text line - ハンカク
|
||||||
|
assertContains(text, "\uff8a\uff9d\uff76\uff78");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that we can work with both {@link POIFSFileSystem}
|
* Tests that we can work with both {@link POIFSFileSystem}
|
||||||
|
@ -38,24 +38,25 @@ import org.apache.poi.POIDataSamples;
|
|||||||
* @author Nick Burch (nick at torchbox dot com)
|
* @author Nick Burch (nick at torchbox dot com)
|
||||||
*/
|
*/
|
||||||
public final class TestRichTextRun extends TestCase {
|
public final class TestRichTextRun extends TestCase {
|
||||||
private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
|
private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance();
|
||||||
|
|
||||||
// SlideShow primed on the test data
|
// SlideShow primed on the test data
|
||||||
private SlideShow ss;
|
private SlideShow ss;
|
||||||
private SlideShow ssRichA;
|
private SlideShow ssRichA;
|
||||||
private SlideShow ssRichB;
|
private SlideShow ssRichB;
|
||||||
private SlideShow ssRichC;
|
private SlideShow ssRichC;
|
||||||
private HSLFSlideShow hss;
|
private SlideShow ssChinese;
|
||||||
private HSLFSlideShow hssRichA;
|
private HSLFSlideShow hss;
|
||||||
private HSLFSlideShow hssRichB;
|
private HSLFSlideShow hssRichA;
|
||||||
private HSLFSlideShow hssRichC;
|
private HSLFSlideShow hssRichB;
|
||||||
private static String filenameC;
|
private HSLFSlideShow hssRichC;
|
||||||
|
private HSLFSlideShow hssChinese;
|
||||||
|
private static String filenameC;
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
|
// Basic (non rich) test file
|
||||||
// Basic (non rich) test file
|
hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
|
||||||
hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt"));
|
ss = new SlideShow(hss);
|
||||||
ss = new SlideShow(hss);
|
|
||||||
|
|
||||||
// Rich test file A
|
// Rich test file A
|
||||||
hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt"));
|
hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt"));
|
||||||
@ -70,8 +71,18 @@ public final class TestRichTextRun extends TestCase {
|
|||||||
filenameC = "ParagraphStylesShorterThanCharStyles.ppt";
|
filenameC = "ParagraphStylesShorterThanCharStyles.ppt";
|
||||||
hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC));
|
hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC));
|
||||||
ssRichC = new SlideShow(hssRichC);
|
ssRichC = new SlideShow(hssRichC);
|
||||||
|
|
||||||
|
// Rich test file with Chinese + English text in it
|
||||||
|
hssChinese = new HSLFSlideShow(_slTests.openResourceAsStream("54880_chinese.ppt"));
|
||||||
|
ssChinese = new SlideShow(hssChinese);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void assertContains(String haystack, String needle) {
|
||||||
|
assertTrue(
|
||||||
|
"Unable to find expected text '" + needle + "' in text:\n" + haystack,
|
||||||
|
haystack.contains(needle)
|
||||||
|
);
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Test the stuff about getting/setting bold
|
* Test the stuff about getting/setting bold
|
||||||
* on a non rich text run
|
* on a non rich text run
|
||||||
@ -623,4 +634,37 @@ if(false) {
|
|||||||
// FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt");
|
// FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt");
|
||||||
// ppt.write(fout);
|
// ppt.write(fout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testChineseParagraphs() throws Exception {
|
||||||
|
RichTextRun[] rts;
|
||||||
|
RichTextRun rt;
|
||||||
|
TextRun[] txt;
|
||||||
|
Slide[] slides = ssChinese.getSlides();
|
||||||
|
|
||||||
|
// One slide
|
||||||
|
assertEquals(1, slides.length);
|
||||||
|
|
||||||
|
// One block of text within that
|
||||||
|
txt = slides[0].getTextRuns();
|
||||||
|
assertEquals(1, txt.length);
|
||||||
|
|
||||||
|
// One rich block of text in that - text is all the same style
|
||||||
|
// TODO Is this completely correct?
|
||||||
|
rts = txt[0].getRichTextRuns();
|
||||||
|
assertEquals(1, rts.length);
|
||||||
|
rt = rts[0];
|
||||||
|
|
||||||
|
// Check we can get the english text out of that
|
||||||
|
String text = rt.getText();
|
||||||
|
assertContains(text, "Single byte");
|
||||||
|
// And the chinese - ハンカク
|
||||||
|
assertContains(text, "\uff8a\uff9d\uff76\uff78");
|
||||||
|
|
||||||
|
// It isn't bold or italic
|
||||||
|
assertFalse(rt.isBold());
|
||||||
|
assertFalse(rt.isItalic());
|
||||||
|
|
||||||
|
// Font is Calibri
|
||||||
|
assertEquals("Calibri", rt.getFontName());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user