From c4f77a02e21571040243f5d309cf6ffc65928dc1 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 31 May 2013 21:17:55 +0000 Subject: [PATCH] Unit test for bugs #54880 & #55030 - seems ok so far git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1488403 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hslf/extractor/TestExtractor.java | 21 +++++ .../poi/hslf/usermodel/TestRichTextRun.java | 76 +++++++++++++++---- 2 files changed, 81 insertions(+), 16 deletions(-) diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java index cddacf9fe..104637c27 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TestExtractor.java @@ -329,6 +329,27 @@ public final class TestExtractor extends TestCase { assertContains(text, masterText); } + /** + * Bug #54880 Chinese text not extracted properly + */ + public void testChineseText() throws Exception { + HSLFSlideShow hslf = new HSLFSlideShow(slTests.openResourceAsStream("54880_chinese.ppt")); + ppe = new PowerPointExtractor(hslf); + + String text = ppe.getText(); + + // Check for the english text line + assertContains(text, "Single byte"); + + // Check for the english text in the mixed line + assertContains(text, "Mix"); + + // Check for the chinese text in the mixed line - 表 + assertContains(text, "\u8868"); + + // Check for the chinese only text line - ハンカク + assertContains(text, "\uff8a\uff9d\uff76\uff78"); + } /** * Tests that we can work with both {@link POIFSFileSystem} diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java index c94f91f2a..63acbaceb 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestRichTextRun.java @@ -38,24 +38,25 @@ import org.apache.poi.POIDataSamples; * @author Nick Burch (nick at torchbox dot com) */ public final class TestRichTextRun extends TestCase { - private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance(); + private static POIDataSamples _slTests = POIDataSamples.getSlideShowInstance(); - // SlideShow primed on the test data - private SlideShow ss; - private SlideShow ssRichA; - private SlideShow ssRichB; - private SlideShow ssRichC; - private HSLFSlideShow hss; - private HSLFSlideShow hssRichA; - private HSLFSlideShow hssRichB; - private HSLFSlideShow hssRichC; - private static String filenameC; + // SlideShow primed on the test data + private SlideShow ss; + private SlideShow ssRichA; + private SlideShow ssRichB; + private SlideShow ssRichC; + private SlideShow ssChinese; + private HSLFSlideShow hss; + private HSLFSlideShow hssRichA; + private HSLFSlideShow hssRichB; + private HSLFSlideShow hssRichC; + private HSLFSlideShow hssChinese; + private static String filenameC; - protected void setUp() throws Exception { - - // Basic (non rich) test file - hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt")); - ss = new SlideShow(hss); + protected void setUp() throws Exception { + // Basic (non rich) test file + hss = new HSLFSlideShow(_slTests.openResourceAsStream("basic_test_ppt_file.ppt")); + ss = new SlideShow(hss); // Rich test file A hssRichA = new HSLFSlideShow(_slTests.openResourceAsStream("Single_Coloured_Page.ppt")); @@ -70,8 +71,18 @@ public final class TestRichTextRun extends TestCase { filenameC = "ParagraphStylesShorterThanCharStyles.ppt"; hssRichC = new HSLFSlideShow(_slTests.openResourceAsStream(filenameC)); ssRichC = new SlideShow(hssRichC); + + // Rich test file with Chinese + English text in it + hssChinese = new HSLFSlideShow(_slTests.openResourceAsStream("54880_chinese.ppt")); + ssChinese = new SlideShow(hssChinese); } + private static void assertContains(String haystack, String needle) { + assertTrue( + "Unable to find expected text '" + needle + "' in text:\n" + haystack, + haystack.contains(needle) + ); + } /** * Test the stuff about getting/setting bold * on a non rich text run @@ -623,4 +634,37 @@ if(false) { // FileOutputStream fout = new FileOutputStream("/tmp/foo.ppt"); // ppt.write(fout); } + + public void testChineseParagraphs() throws Exception { + RichTextRun[] rts; + RichTextRun rt; + TextRun[] txt; + Slide[] slides = ssChinese.getSlides(); + + // One slide + assertEquals(1, slides.length); + + // One block of text within that + txt = slides[0].getTextRuns(); + assertEquals(1, txt.length); + + // One rich block of text in that - text is all the same style + // TODO Is this completely correct? + rts = txt[0].getRichTextRuns(); + assertEquals(1, rts.length); + rt = rts[0]; + + // Check we can get the english text out of that + String text = rt.getText(); + assertContains(text, "Single byte"); + // And the chinese - ハンカク + assertContains(text, "\uff8a\uff9d\uff76\uff78"); + + // It isn't bold or italic + assertFalse(rt.isBold()); + assertFalse(rt.isItalic()); + + // Font is Calibri + assertEquals("Calibri", rt.getFontName()); + } }