diff --git a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java index 9728dcfe6..bcbf176d2 100644 --- a/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java +++ b/src/ooxml/java/org/apache/poi/xslf/extractor/XSLFPowerPointExtractor.java @@ -16,28 +16,18 @@ ==================================================================== */ package org.apache.poi.xslf.extractor; -import java.io.IOException; - import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.xslf.usermodel.DrawingParagraph; import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFCommonSlideData; import org.apache.poi.xslf.usermodel.XSLFSlide; import org.apache.xmlbeans.XmlException; -import org.apache.xmlbeans.XmlObject; -import org.apache.xmlbeans.XmlCursor; -import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; -import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; -import org.openxmlformats.schemas.presentationml.x2006.main.CTComment; -import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList; -import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape; -import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTShape; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide; -import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; +import org.openxmlformats.schemas.presentationml.x2006.main.*; + +import java.io.IOException; public class XSLFPowerPointExtractor extends POIXMLTextExtractor { private XMLSlideShow slideshow; @@ -110,7 +100,7 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { slideshow._getXSLFSlideShow().getSlideComments(slideId); if(slideText) { - extractText(rawSlide.getCSld().getSpTree(), text); + extractText(slides[i].getCommonSlideData(), text); // Comments too for the slide if(comments != null) { @@ -123,8 +113,9 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { } } } + if(notesText && notes != null) { - extractText(notes.getCSld().getSpTree(), text); + extractText(new XSLFCommonSlideData(notes.getCSld()), text); } } catch(Exception e) { throw new RuntimeException(e); @@ -134,31 +125,10 @@ public class XSLFPowerPointExtractor extends POIXMLTextExtractor { return text.toString(); } - private void extractText(CTGroupShape gs, StringBuffer text) { - CTShape[] shapes = gs.getSpArray(); - for (int i = 0; i < shapes.length; i++) { - CTTextBody textBody = - shapes[i].getTxBody(); - if(textBody != null) { - CTTextParagraph[] paras = - textBody.getPArray(); - for (int j = 0; j < paras.length; j++) { - XmlCursor c = paras[j].newCursor(); - c.selectPath("./*"); - while (c.toNextSelection()) { - XmlObject o = c.getObject(); - if(o instanceof CTRegularTextRun){ - CTRegularTextRun txrun = (CTRegularTextRun)o; - text.append( txrun.getT() ); - } else if (o instanceof CTTextLineBreak){ - text.append('\n'); - } - } - - // End each paragraph with a new line - text.append("\n"); - } - } - } - } + private void extractText(XSLFCommonSlideData data, StringBuffer text) { + for (DrawingParagraph p : data.getText()) { + text.append(p.getText()); + text.append("\n"); + } + } } diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java new file mode 100644 index 000000000..fd61b26b3 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingParagraph.java @@ -0,0 +1,33 @@ +package org.apache.poi.xslf.usermodel; + +import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; +import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun; +import org.openxmlformats.schemas.drawingml.x2006.main.CTTextLineBreak; +import org.apache.xmlbeans.XmlCursor; +import org.apache.xmlbeans.XmlObject; + +public class DrawingParagraph { + private final CTTextParagraph p; + + public DrawingParagraph(CTTextParagraph p) { + this.p = p; + } + + public CharSequence getText() { + StringBuilder text = new StringBuilder(); + + XmlCursor c = p.newCursor(); + c.selectPath("./*"); + while (c.toNextSelection()) { + XmlObject o = c.getObject(); + if (o instanceof CTRegularTextRun) { + CTRegularTextRun txrun = (CTRegularTextRun) o; + text.append(txrun.getT()); + } else if (o instanceof CTTextLineBreak) { + text.append('\n'); + } + } + + return text; + } +} diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java new file mode 100644 index 000000000..ab39cf024 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/DrawingTable.java @@ -0,0 +1,23 @@ +package org.apache.poi.xslf.usermodel; + +import org.openxmlformats.schemas.drawingml.x2006.main.CTTable; +import org.openxmlformats.schemas.drawingml.x2006.main.CTTableRow; + +public class DrawingTable { + private final CTTable table; + + public DrawingTable(CTTable table) { + this.table = table; + } + + public DrawingTableRow[] getRows() { + CTTableRow[] ctTableRows = table.getTrArray(); + DrawingTableRow[] o = new DrawingTableRow[ctTableRows.length]; + + for (int i=0; i getText() { + CTGroupShape gs = data.getSpTree(); + + List out = new ArrayList(); + + CTShape[] shapes = gs.getSpArray(); + for (int i = 0; i < shapes.length; i++) { + CTTextBody ctTextBody = shapes[i].getTxBody(); + if (ctTextBody==null) { + continue; + } + + DrawingTextBody textBody = new DrawingTextBody(ctTextBody); + + out.addAll(Arrays.asList(textBody.getParagraphs())); + } + + CTGraphicalObjectFrame[] graphicFrames = gs.getGraphicFrameArray(); + for (CTGraphicalObjectFrame frame: graphicFrames) { + CTGraphicalObjectData data = frame.getGraphic().getGraphicData(); + XmlCursor c = data.newCursor(); + c.selectPath("./*"); + + while (c.toNextSelection()) { + XmlObject o = c.getObject(); + + if (o instanceof CTTable) { + DrawingTable table = new DrawingTable((CTTable) o); + + for (DrawingTableRow row : table.getRows()) { + for (DrawingTableCell cell : row.getCells()) { + DrawingTextBody textBody = cell.getTextBody(); + + out.addAll(Arrays.asList(textBody.getParagraphs())); + } + } + } + } + } + + return out; + } + +} diff --git a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java index 90de6e8a0..29af4941e 100644 --- a/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java +++ b/src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFSlide.java @@ -26,11 +26,13 @@ import org.openxmlformats.schemas.presentationml.x2006.main.CTSlideIdListEntry; public class XSLFSlide extends XSLFSheet implements Slide { private CTSlide slide; private CTSlideIdListEntry slideId; + private XSLFCommonSlideData data; public XSLFSlide(CTSlide slide, CTSlideIdListEntry slideId, SlideShow parent) { super(parent); this.slide = slide; this.slideId = slideId; + this.data = new XSLFCommonSlideData(slide.getCSld()); } /** @@ -88,4 +90,8 @@ public class XSLFSlide extends XSLFSheet implements Slide { // TODO Auto-generated method stub } + + public XSLFCommonSlideData getCommonSlideData() { + return data; + } } diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index 334e55646..cfb4e6518 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -113,4 +113,17 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check comments are there assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); } + + public void testTable() throws Exception { + POIDataSamples slTests = POIDataSamples.getSlideShowInstance(); + xmlA = new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); + XSLFPowerPointExtractor extractor = + new XSLFPowerPointExtractor(xmlA); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check comments are there + assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); + } } diff --git a/test-data/slideshow/present1.pptx b/test-data/slideshow/present1.pptx new file mode 100644 index 000000000..c7f4c9f62 Binary files /dev/null and b/test-data/slideshow/present1.pptx differ