2007-12-30 13:11:55 -05:00
|
|
|
/* ====================================================================
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
==================================================================== */
|
2008-03-09 10:21:34 -04:00
|
|
|
package org.apache.poi.xslf.extractor;
|
2007-12-30 13:11:55 -05:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
import java.io.IOException;
|
2015-04-27 16:13:43 -04:00
|
|
|
import java.util.List;
|
2011-09-04 16:51:21 -04:00
|
|
|
|
2007-12-30 13:11:55 -05:00
|
|
|
import org.apache.poi.POIXMLTextExtractor;
|
2009-03-18 14:54:01 -04:00
|
|
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
|
|
|
import org.apache.poi.openxml4j.opc.OPCPackage;
|
2008-03-09 10:21:34 -04:00
|
|
|
import org.apache.poi.xslf.XSLFSlideShow;
|
2010-01-11 09:27:54 -05:00
|
|
|
import org.apache.poi.xslf.usermodel.DrawingParagraph;
|
2011-09-26 10:37:50 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.DrawingTextBody;
|
|
|
|
import org.apache.poi.xslf.usermodel.DrawingTextPlaceholder;
|
2008-08-05 14:05:29 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
2011-09-04 17:16:32 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFCommentAuthors;
|
2011-09-04 16:51:21 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFComments;
|
2010-01-11 09:27:54 -05:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
|
2011-09-04 16:51:21 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFNotes;
|
2010-07-29 07:57:08 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
2008-08-05 14:05:29 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
2011-09-26 10:37:50 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFSlideLayout;
|
2011-09-21 12:32:52 -04:00
|
|
|
import org.apache.poi.xslf.usermodel.XSLFSlideMaster;
|
2007-12-30 13:11:55 -05:00
|
|
|
import org.apache.xmlbeans.XmlException;
|
2011-08-11 04:38:19 -04:00
|
|
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTComment;
|
2011-09-04 17:16:32 -04:00
|
|
|
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentAuthor;
|
2007-12-30 13:11:55 -05:00
|
|
|
|
2008-03-09 10:21:34 -04:00
|
|
|
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
2010-07-29 07:57:08 -04:00
|
|
|
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[] {
|
|
|
|
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
|
|
|
|
XSLFRelation.PRESENTATIONML, XSLFRelation.PRESENTATIONML_TEMPLATE,
|
|
|
|
XSLFRelation.PRESENTATION_MACRO
|
|
|
|
};
|
|
|
|
|
2008-08-05 14:05:29 -04:00
|
|
|
private XMLSlideShow slideshow;
|
2008-01-04 08:19:23 -05:00
|
|
|
private boolean slidesByDefault = true;
|
|
|
|
private boolean notesByDefault = false;
|
2011-09-21 12:54:22 -04:00
|
|
|
private boolean masterByDefault = false;
|
2007-12-30 13:11:55 -05:00
|
|
|
|
2008-08-05 14:05:29 -04:00
|
|
|
public XSLFPowerPointExtractor(XMLSlideShow slideshow) {
|
2011-08-11 04:38:19 -04:00
|
|
|
super(slideshow);
|
2008-08-05 14:05:29 -04:00
|
|
|
this.slideshow = slideshow;
|
|
|
|
}
|
|
|
|
public XSLFPowerPointExtractor(XSLFSlideShow slideshow) throws XmlException, IOException {
|
2011-08-11 04:38:19 -04:00
|
|
|
this(new XMLSlideShow(slideshow.getPackage()));
|
2008-08-05 14:05:29 -04:00
|
|
|
}
|
2009-03-18 14:54:01 -04:00
|
|
|
public XSLFPowerPointExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
|
2008-03-09 10:21:34 -04:00
|
|
|
this(new XSLFSlideShow(container));
|
2007-12-30 13:11:55 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
public static void main(String[] args) throws Exception {
|
|
|
|
if(args.length < 1) {
|
|
|
|
System.err.println("Use:");
|
2011-08-11 04:38:19 -04:00
|
|
|
System.err.println(" XSLFPowerPointExtractor <filename.pptx>");
|
2007-12-30 13:11:55 -05:00
|
|
|
System.exit(1);
|
|
|
|
}
|
|
|
|
POIXMLTextExtractor extractor =
|
2008-03-09 10:21:34 -04:00
|
|
|
new XSLFPowerPointExtractor(
|
|
|
|
new XSLFSlideShow(args[0]));
|
2007-12-30 13:11:55 -05:00
|
|
|
System.out.println(extractor.getText());
|
2014-04-24 11:07:20 -04:00
|
|
|
extractor.close();
|
2007-12-30 13:11:55 -05:00
|
|
|
}
|
2008-01-04 08:19:23 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Should a call to getText() return slide text?
|
|
|
|
* Default is yes
|
|
|
|
*/
|
|
|
|
public void setSlidesByDefault(boolean slidesByDefault) {
|
|
|
|
this.slidesByDefault = slidesByDefault;
|
|
|
|
}
|
|
|
|
/**
|
|
|
|
* Should a call to getText() return notes text?
|
|
|
|
* Default is no
|
|
|
|
*/
|
|
|
|
public void setNotesByDefault(boolean notesByDefault) {
|
|
|
|
this.notesByDefault = notesByDefault;
|
|
|
|
}
|
2007-12-30 13:11:55 -05:00
|
|
|
|
2011-09-21 12:54:22 -04:00
|
|
|
/**
|
|
|
|
* Should a call to getText() return text from master? Default is no
|
|
|
|
*/
|
|
|
|
public void setMasterByDefault(boolean masterByDefault) {
|
|
|
|
this.masterByDefault = masterByDefault;
|
|
|
|
}
|
|
|
|
|
2007-12-30 13:11:55 -05:00
|
|
|
/**
|
2008-01-04 08:19:23 -05:00
|
|
|
* Gets the slide text, but not the notes text
|
2007-12-30 13:11:55 -05:00
|
|
|
*/
|
|
|
|
public String getText() {
|
2008-01-04 08:19:23 -05:00
|
|
|
return getText(slidesByDefault, notesByDefault);
|
2007-12-30 13:11:55 -05:00
|
|
|
}
|
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
/**
|
|
|
|
* Gets the requested text from the file
|
|
|
|
* @param slideText Should we retrieve text from slides?
|
|
|
|
* @param notesText Should we retrieve text from notes?
|
|
|
|
*/
|
|
|
|
public String getText(boolean slideText, boolean notesText) {
|
2011-09-21 12:54:22 -04:00
|
|
|
return getText(slideText, notesText, masterByDefault);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the requested text from the file
|
|
|
|
* @param slideText Should we retrieve text from slides?
|
|
|
|
* @param notesText Should we retrieve text from notes?
|
|
|
|
* @param masterText Should we retrieve text from master slides?
|
|
|
|
*/
|
2014-08-27 20:08:41 -04:00
|
|
|
@SuppressWarnings("deprecation")
|
2011-09-21 12:54:22 -04:00
|
|
|
public String getText(boolean slideText, boolean notesText, boolean masterText) {
|
2011-09-04 16:51:21 -04:00
|
|
|
StringBuffer text = new StringBuffer();
|
|
|
|
|
2015-04-27 16:13:43 -04:00
|
|
|
List<XSLFSlide> slides = slideshow.getSlides();
|
2011-09-04 17:16:32 -04:00
|
|
|
XSLFCommentAuthors commentAuthors = slideshow.getCommentAuthors();
|
2008-08-05 14:05:29 -04:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
for (XSLFSlide slide : slides) {
|
|
|
|
try {
|
|
|
|
XSLFNotes notes = slide.getNotes();
|
|
|
|
XSLFComments comments = slide.getComments();
|
2011-09-26 10:37:50 -04:00
|
|
|
XSLFSlideLayout layout = slide.getSlideLayout();
|
2011-11-07 04:12:16 -05:00
|
|
|
XSLFSlideMaster master = layout.getSlideMaster();
|
2010-01-11 09:27:54 -05:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
// TODO Do the slide's name
|
2011-09-21 12:32:52 -04:00
|
|
|
// (Stored in docProps/app.xml)
|
2011-08-11 04:38:19 -04:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
// Do the slide's text if requested
|
|
|
|
if (slideText) {
|
2011-09-26 10:37:50 -04:00
|
|
|
extractText(slide.getCommonSlideData(), false, text);
|
2011-09-21 12:32:52 -04:00
|
|
|
|
2011-09-26 10:37:50 -04:00
|
|
|
// If requested, get text from the master and it's layout
|
|
|
|
if(masterText) {
|
|
|
|
if(layout != null) {
|
|
|
|
extractText(layout.getCommonSlideData(), true, text);
|
|
|
|
}
|
|
|
|
if(master != null) {
|
|
|
|
extractText(master.getCommonSlideData(), true, text);
|
|
|
|
}
|
2011-09-21 12:32:52 -04:00
|
|
|
}
|
2011-08-11 04:38:19 -04:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
// If the slide has comments, do those too
|
|
|
|
if (comments != null) {
|
2014-08-27 20:08:41 -04:00
|
|
|
for (CTComment comment : comments.getCTCommentsList().getCmArray()) {
|
2011-09-04 17:16:32 -04:00
|
|
|
// Do the author if we can
|
|
|
|
if (commentAuthors != null) {
|
|
|
|
CTCommentAuthor author = commentAuthors.getAuthorById(comment.getAuthorId());
|
|
|
|
if(author != null) {
|
|
|
|
text.append(author.getName() + ": ");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Then the comment text, with a new line afterwards
|
|
|
|
text.append(comment.getText());
|
|
|
|
text.append("\n");
|
2011-09-04 16:51:21 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2011-08-11 04:38:19 -04:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
// Do the notes if requested
|
|
|
|
if (notesText && notes != null) {
|
2011-09-26 10:37:50 -04:00
|
|
|
extractText(notes.getCommonSlideData(), false, text);
|
2011-08-11 04:38:19 -04:00
|
|
|
}
|
2011-09-04 16:51:21 -04:00
|
|
|
} catch (Exception e) {
|
2011-08-11 04:38:19 -04:00
|
|
|
throw new RuntimeException(e);
|
2011-09-04 16:51:21 -04:00
|
|
|
}
|
|
|
|
}
|
2011-08-11 04:38:19 -04:00
|
|
|
|
2011-09-04 16:51:21 -04:00
|
|
|
return text.toString();
|
|
|
|
}
|
2007-12-30 13:11:55 -05:00
|
|
|
|
2011-09-26 10:37:50 -04:00
|
|
|
private void extractText(XSLFCommonSlideData data, boolean skipPlaceholders, StringBuffer text) {
|
|
|
|
for(DrawingTextBody textBody : data.getDrawingText()) {
|
|
|
|
if(skipPlaceholders && textBody instanceof DrawingTextPlaceholder) {
|
|
|
|
DrawingTextPlaceholder ph = (DrawingTextPlaceholder)textBody;
|
|
|
|
if(! ph.isPlaceholderCustom()) {
|
|
|
|
// Skip non-customised placeholder text
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for (DrawingParagraph p : textBody.getParagraphs()) {
|
2010-01-11 09:27:54 -05:00
|
|
|
text.append(p.getText());
|
|
|
|
text.append("\n");
|
2011-09-26 10:37:50 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2007-12-30 13:11:55 -05:00
|
|
|
}
|