#62319 - Decommission XSLF-/PowerPointExtractor

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1829653 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2018-04-20 12:52:59 +00:00
parent bc436fcc3d
commit ab390ce170
27 changed files with 824 additions and 1248 deletions

View File

@ -330,8 +330,6 @@ public class TestAllFiles {
); );
private static final Set<String> IGNORED = unmodifiableHashSet( private static final Set<String> IGNORED = unmodifiableHashSet(
// need JDK8+ - https://bugs.openjdk.java.net/browse/JDK-8038081
"slideshow/42474-2.ppt",
// OPC handler works / XSSF handler fails // OPC handler works / XSSF handler fails
"spreadsheet/57181.xlsm", "spreadsheet/57181.xlsm",
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop "spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop

View File

@ -24,6 +24,7 @@ import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlideShow; import org.apache.poi.xslf.usermodel.XSLFSlideShow;
@ -53,12 +54,19 @@ public class XSLFFileHandler extends SlideShowHandler {
// additionally try the other getText() methods // additionally try the other getText() methods
try (XSLFPowerPointExtractor extractor = (XSLFPowerPointExtractor) ExtractorFactory.createExtractor(file)) { try (SlideShowExtractor extractor = ExtractorFactory.createExtractor(file)) {
assertNotNull(extractor); assertNotNull(extractor);
extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(true);
extractor.setMasterByDefault(true);
assertNotNull(extractor.getText(true, true, true)); assertNotNull(extractor.getText());
assertEquals("With all options disabled we should not get text",
"", extractor.getText(false, false, false)); extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(false);
extractor.setMasterByDefault(false);
assertEquals("With all options disabled we should not get text", "", extractor.getText());
} }
} }

View File

@ -105,6 +105,7 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
* *
* @return the underlying POIDocument * @return the underlying POIDocument
*/ */
@Override
public POIDocument getDocument() { public POIDocument getDocument() {
return document; return document;
} }

View File

@ -74,4 +74,9 @@ public abstract class POITextExtractor implements Closeable {
fsToClose.close(); fsToClose.close();
} }
} }
/**
* @return the processed document
*/
public abstract Object getDocument();
} }

View File

@ -115,26 +115,23 @@ public class OLE2ExtractorFactory {
return threadPreferEventExtractors.get(); return threadPreferEventExtractors.get();
} }
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException { public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException {
// Only ever an OLE2 one from the root of the FS return (T)createExtractor(fs.getRoot());
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
} }
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException { public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException {
// Only ever an OLE2 one from the root of the FS return (T)createExtractor(fs.getRoot());
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
} }
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException { public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException {
// Only ever an OLE2 one from the root of the FS return (T)createExtractor(fs.getRoot());
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
} }
public static POITextExtractor createExtractor(InputStream input) throws IOException { public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException {
Class<?> cls = getOOXMLClass(); Class<?> cls = getOOXMLClass();
if (cls != null) { if (cls != null) {
// Use Reflection to get us the full OOXML-enabled version // Use Reflection to get us the full OOXML-enabled version
try { try {
Method m = cls.getDeclaredMethod("createExtractor", InputStream.class); Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
return (POITextExtractor)m.invoke(null, input); return (T)m.invoke(null, input);
} catch (IllegalArgumentException iae) { } catch (IllegalArgumentException iae) {
throw iae; throw iae;
} catch (Exception e) { } catch (Exception e) {

View File

@ -44,8 +44,30 @@ public class DocumentFactoryHelper {
* @throws IOException If an error occurs while decrypting or if the password does not match * @throws IOException If an error occurs while decrypting or if the password does not match
*/ */
public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password) public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password)
throws IOException {
// wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
// as well when the resulting OPCPackage is closed
return new FilterInputStream(getDecryptedStream(fs.getRoot(), password)) {
@Override
public void close() throws IOException {
fs.close();
super.close();
}
};
}
/**
* Wrap the OLE2 data of the DirectoryNode into a decrypted stream by using
* the given password.
*
* @param root The OLE2 directory node for the document
* @param password The password, null if the default password should be used
* @return A stream for reading the decrypted data
* @throws IOException If an error occurs while decrypting or if the password does not match
*/
public static InputStream getDecryptedStream(final DirectoryNode root, String password)
throws IOException { throws IOException {
EncryptionInfo info = new EncryptionInfo(fs); EncryptionInfo info = new EncryptionInfo(root);
Decryptor d = Decryptor.getInstance(info); Decryptor d = Decryptor.getInstance(info);
try { try {
@ -58,21 +80,11 @@ public class DocumentFactoryHelper {
} }
if (passwordCorrect) { if (passwordCorrect) {
// wrap the stream in a FilterInputStream to close the NPOIFSFileSystem return d.getDataStream(root);
// as well when the resulting OPCPackage is closed } else if (password != null) {
return new FilterInputStream(d.getDataStream(fs.getRoot())) { throw new EncryptedDocumentException("Password incorrect");
@Override
public void close() throws IOException {
fs.close();
super.close();
}
};
} else { } else {
if (password != null) throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
throw new EncryptedDocumentException("Password incorrect");
else
throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
} }
} catch (GeneralSecurityException e) { } catch (GeneralSecurityException e) {
throw new IOException(e); throw new IOException(e);

View File

@ -1,3 +1,20 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.sl.extractor; package org.apache.poi.sl.extractor;
import java.util.ArrayList; import java.util.ArrayList;
@ -48,6 +65,16 @@ public class SlideShowExtractor<
this.slideshow = slideshow; this.slideshow = slideshow;
} }
/**
* Returns opened document
*
* @return the opened document
*/
@Override
public final Object getDocument() {
return slideshow.getPersistDocument();
}
/** /**
* Should a call to getText() return slide text? Default is yes * Should a call to getText() return slide text? Default is yes
*/ */
@ -219,7 +246,6 @@ public class SlideShowExtractor<
return; return;
} }
for (final P para : paraList) { for (final P para : paraList) {
final int oldLen = sb.length();
for (final TextRun tr : para) { for (final TextRun tr : para) {
final String str = tr.getRawText().replace("\r", ""); final String str = tr.getRawText().replace("\r", "");
final String newStr; final String newStr;

View File

@ -126,4 +126,13 @@ public interface SlideShow<
* @since POI 4.0.0 * @since POI 4.0.0
*/ */
POITextExtractor getMetadataTextExtractor(); POITextExtractor getMetadataTextExtractor();
/**
* @return the instance which handles the persisting of the slideshow,
* which is either a subclass of {@link org.apache.poi.POIDocument}
* or {@link org.apache.poi.POIXMLDocument}
*
* @since POI 4.0.0
*/
Object getPersistDocument();
} }

View File

@ -60,13 +60,40 @@ public class SlideShowFactory {
* @throws IOException if an error occurs while reading the data * @throws IOException if an error occurs while reading the data
*/ */
public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException { public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException {
DirectoryNode root = fs.getRoot(); return create(fs.getRoot(), password);
}
/**
* Creates a SlideShow from the given NPOIFSFileSystem.
*
* @param root The {@link DirectoryNode} to start reading the document from
*
* @return The created SlideShow
*
* @throws IOException if an error occurs while reading the data
*/
public static SlideShow<?,?> create(final DirectoryNode root) throws IOException {
return create(root, null);
}
/**
* Creates a SlideShow from the given NPOIFSFileSystem, which may
* be password protected
*
* @param root The {@link DirectoryNode} to start reading the document from
* @param password The password that should be used or null if no password is necessary.
*
* @return The created SlideShow
*
* @throws IOException if an error occurs while reading the data
*/
public static SlideShow<?,?> create(final DirectoryNode root, String password) throws IOException {
// Encrypted OOXML files go inside OLE2 containers, is this one? // Encrypted OOXML files go inside OLE2 containers, is this one?
if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
InputStream stream = null; InputStream stream = null;
try { try {
stream = DocumentFactoryHelper.getDecryptedStream(fs, password); stream = DocumentFactoryHelper.getDecryptedStream(root, password);
return createXSLFSlideShow(stream); return createXSLFSlideShow(stream);
} finally { } finally {
@ -82,7 +109,7 @@ public class SlideShowFactory {
passwordSet = true; passwordSet = true;
} }
try { try {
return createHSLFSlideShow(fs); return createHSLFSlideShow(root);
} finally { } finally {
if (passwordSet) { if (passwordSet) {
Biff8EncryptionKey.setCurrentUserPassword(null); Biff8EncryptionKey.setCurrentUserPassword(null);

View File

@ -68,6 +68,7 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
* *
* @return the opened document * @return the opened document
*/ */
@Override
public final POIXMLDocument getDocument() { public final POIXMLDocument getDocument() {
return _document; return _document;
} }

View File

@ -51,6 +51,7 @@ import org.apache.poi.poifs.filesystem.NotOLE2FileException;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.NotImplemented; import org.apache.poi.util.NotImplemented;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
@ -58,6 +59,7 @@ import org.apache.poi.util.POILogger;
import org.apache.poi.util.Removal; import org.apache.poi.util.Removal;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor; import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlideShow; import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
@ -127,20 +129,20 @@ public class ExtractorFactory {
return OLE2ExtractorFactory.getPreferEventExtractor(); return OLE2ExtractorFactory.getPreferEventExtractor();
} }
public static POITextExtractor createExtractor(File f) throws IOException, OpenXML4JException, XmlException { public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
NPOIFSFileSystem fs = null; NPOIFSFileSystem fs = null;
try { try {
fs = new NPOIFSFileSystem(f); fs = new NPOIFSFileSystem(f);
if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
return createEncryptedOOXMLExtractor(fs); return (T)createEncryptedOOXMLExtractor(fs);
} }
POIOLE2TextExtractor extractor = createExtractor(fs); POITextExtractor extractor = createExtractor(fs);
extractor.setFilesystem(fs); extractor.setFilesystem(fs);
return extractor; return (T)extractor;
} catch (OfficeXmlFileException e) { } catch (OfficeXmlFileException e) {
// ensure file-handle release // ensure file-handle release
IOUtils.closeQuietly(fs); IOUtils.closeQuietly(fs);
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ)); return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
} catch (NotOLE2FileException ne) { } catch (NotOLE2FileException ne) {
// ensure file-handle release // ensure file-handle release
IOUtils.closeQuietly(fs); IOUtils.closeQuietly(fs);
@ -179,7 +181,7 @@ public class ExtractorFactory {
* @throws XmlException If an XML parsing error occurs. * @throws XmlException If an XML parsing error occurs.
* @throws IllegalArgumentException If no matching file type could be found. * @throws IllegalArgumentException If no matching file type could be found.
*/ */
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
try { try {
// Check for the normal Office core document // Check for the normal Office core document
PackageRelationshipCollection core; PackageRelationshipCollection core;
@ -226,13 +228,13 @@ public class ExtractorFactory {
// Is it XSLF? // Is it XSLF?
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) { for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
if ( rel.getContentType().equals( contentType ) ) { if ( rel.getContentType().equals( contentType ) ) {
return new XSLFPowerPointExtractor(pkg); return new SlideShowExtractor(new XMLSlideShow(pkg));
} }
} }
// special handling for SlideShow-Theme-files, // special handling for SlideShow-Theme-files,
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) { if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg)); return new SlideShowExtractor(new XMLSlideShow(pkg));
} }
// How about xlsb? // How about xlsb?
@ -252,28 +254,28 @@ public class ExtractorFactory {
} }
} }
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
return OLE2ExtractorFactory.createExtractor(fs); return createExtractor(fs.getRoot());
} }
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
return OLE2ExtractorFactory.createExtractor(fs); return createExtractor(fs.getRoot());
} }
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException { public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
return OLE2ExtractorFactory.createExtractor(fs); return createExtractor(fs.getRoot());
} }
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
{ {
// First, check for OOXML // First, check for OOXML
for (String entryName : poifsDir.getEntryNames()) { for (String entryName : poifsDir.getEntryNames()) {
if (entryName.equals("Package")) { if (entryName.equals("Package")) {
OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package")); OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
return createExtractor(pkg); return (T)createExtractor(pkg);
} }
} }
// If not, ask the OLE2 code to check, with Scratchpad if possible // If not, ask the OLE2 code to check, with Scratchpad if possible
return OLE2ExtractorFactory.createExtractor(poifsDir); return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
} }
/** /**
@ -403,7 +405,7 @@ public class ExtractorFactory {
throw new IllegalStateException("Not yet supported"); throw new IllegalStateException("Not yet supported");
} }
private static POIXMLTextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs) private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
throws IOException { throws IOException {
String pass = Biff8EncryptionKey.getCurrentUserPassword(); String pass = Biff8EncryptionKey.getCurrentUserPassword();
if (pass == null) { if (pass == null) {

View File

@ -37,7 +37,7 @@ import org.apache.xmlbeans.XmlException;
* @deprecated use {@link SlideShowExtractor} * @deprecated use {@link SlideShowExtractor}
*/ */
@Deprecated @Deprecated
@Removal(version="4.2.0") @Removal(version="5.0.0")
public class XSLFPowerPointExtractor extends POIXMLTextExtractor { public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{ public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE, XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,

View File

@ -631,4 +631,9 @@ public class XMLSlideShow extends POIXMLDocument
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
return new POIXMLPropertiesTextExtractor(this); return new POIXMLPropertiesTextExtractor(this);
} }
@Override
public Object getPersistDocument() {
return this;
}
} }

View File

@ -1,3 +1,20 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xslf.usermodel; package org.apache.poi.xslf.usermodel;
import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS; import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS;

View File

@ -182,12 +182,20 @@ implements Slide<XSLFShape,XSLFTextParagraph> {
*/ */
public XSLFCommentAuthors getCommentAuthorsPart() { public XSLFCommentAuthors getCommentAuthorsPart() {
if(_commentAuthors == null) { if(_commentAuthors == null) {
// first scan the slide relations
for (POIXMLDocumentPart p : getRelations()) { for (POIXMLDocumentPart p : getRelations()) {
if (p instanceof XSLFCommentAuthors) { if (p instanceof XSLFCommentAuthors) {
_commentAuthors = (XSLFCommentAuthors)p; _commentAuthors = (XSLFCommentAuthors)p;
return _commentAuthors; return _commentAuthors;
} }
} }
// then scan the presentation relations
for (POIXMLDocumentPart p : getSlideShow().getRelations()) {
if (p instanceof XSLFCommentAuthors) {
_commentAuthors = (XSLFCommentAuthors)p;
return _commentAuthors;
}
}
} }
return null; return null;

View File

@ -120,10 +120,10 @@ public class TestHxxFEncryption {
public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException { public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
Biff8EncryptionKey.setCurrentUserPassword(password); Biff8EncryptionKey.setCurrentUserPassword(password);
File f = sampleDir.getFile(file); File f = sampleDir.getFile(file);
POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f); POITextExtractor te1 = ExtractorFactory.createExtractor(f);
Biff8EncryptionKey.setCurrentUserPassword(newPass); Biff8EncryptionKey.setCurrentUserPassword(newPass);
ByteArrayOutputStream bos = new ByteArrayOutputStream(); ByteArrayOutputStream bos = new ByteArrayOutputStream();
POIDocument doc = te1.getDocument(); POIDocument doc = (POIDocument)te1.getDocument();
doc.write(bos); doc.write(bos);
doc.close(); doc.close();
te1.close(); te1.close();
@ -140,25 +140,25 @@ public class TestHxxFEncryption {
ByteArrayOutputStream bos = new ByteArrayOutputStream(); ByteArrayOutputStream bos = new ByteArrayOutputStream();
Biff8EncryptionKey.setCurrentUserPassword(password); Biff8EncryptionKey.setCurrentUserPassword(password);
File f = sampleDir.getFile(file); File f = sampleDir.getFile(file);
POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f); POITextExtractor te1 = ExtractorFactory.createExtractor(f);
// first remove encryption // first remove encryption
Biff8EncryptionKey.setCurrentUserPassword(null); Biff8EncryptionKey.setCurrentUserPassword(null);
POIDocument doc = te1.getDocument(); POIDocument doc = (POIDocument)te1.getDocument();
doc.write(bos); doc.write(bos);
doc.close(); doc.close();
te1.close(); te1.close();
// then use default setting, which is cryptoapi // then use default setting, which is cryptoapi
String newPass = "newPass"; String newPass = "newPass";
POIOLE2TextExtractor te2 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray())); POITextExtractor te2 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
Biff8EncryptionKey.setCurrentUserPassword(newPass); Biff8EncryptionKey.setCurrentUserPassword(newPass);
doc = te2.getDocument(); doc = (POIDocument)te2.getDocument();
bos.reset(); bos.reset();
doc.write(bos); doc.write(bos);
doc.close(); doc.close();
te2.close(); te2.close();
// and finally update cryptoapi setting // and finally update cryptoapi setting
POIOLE2TextExtractor te3 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray())); POITextExtractor te3 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
doc = te3.getDocument(); doc = (POIDocument)te3.getDocument();
// need to cache data (i.e. read all data) before changing the key size // need to cache data (i.e. read all data) before changing the key size
if (doc instanceof HSLFSlideShowImpl) { if (doc instanceof HSLFSlideShowImpl) {
HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc; HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc;
@ -175,8 +175,8 @@ public class TestHxxFEncryption {
doc.close(); doc.close();
te3.close(); te3.close();
// check the setting // check the setting
POIOLE2TextExtractor te4 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray())); POITextExtractor te4 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
doc = te4.getDocument(); doc = (POIDocument)te4.getDocument();
ei = doc.getEncryptionInfo(); ei = doc.getEncryptionInfo();
assertNotNull(ei); assertNotNull(ei);
assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader); assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader);

View File

@ -50,6 +50,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePartName; import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.sl.draw.DrawPaint; import org.apache.poi.sl.draw.DrawPaint;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.PaintStyle; import org.apache.poi.sl.usermodel.PaintStyle;
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint; import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint; import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint;
@ -221,28 +222,27 @@ public class TestXSLFBugs {
* rID2 -> slide3.xml * rID2 -> slide3.xml
*/ */
@Test @Test
public void bug54916() throws Exception { public void bug54916() throws IOException {
XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx"); try (XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx")) {
XSLFSlide slide; XSLFSlide slide;
// Should find 4 slides // Should find 4 slides
assertEquals(4, ss.getSlides().size()); assertEquals(4, ss.getSlides().size());
// Check the text, to see we got them in order // Check the text, to see we got them in order
slide = ss.getSlides().get(0); slide = ss.getSlides().get(0);
assertContains(getSlideText(slide), "POI cannot read this"); assertContains(getSlideText(ss, slide), "POI cannot read this");
slide = ss.getSlides().get(1); slide = ss.getSlides().get(1);
assertContains(getSlideText(slide), "POI can read this"); assertContains(getSlideText(ss, slide), "POI can read this");
assertContains(getSlideText(slide), "Has a relationship to another slide"); assertContains(getSlideText(ss, slide), "Has a relationship to another slide");
slide = ss.getSlides().get(2); slide = ss.getSlides().get(2);
assertContains(getSlideText(slide), "POI can read this"); assertContains(getSlideText(ss, slide), "POI can read this");
slide = ss.getSlides().get(3); slide = ss.getSlides().get(3);
assertContains(getSlideText(slide), "POI can read this"); assertContains(getSlideText(ss, slide), "POI can read this");
}
ss.close();
} }
/** /**
@ -311,8 +311,15 @@ public class TestXSLFBugs {
ss.close(); ss.close();
} }
protected String getSlideText(XSLFSlide slide) { protected String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
return XSLFPowerPointExtractor.getText(slide, true, false, false); try (SlideShowExtractor extr = new SlideShowExtractor(ppt)) {
// do not auto-close the slideshow
extr.setFilesystem(null);
extr.setSlidesByDefault(true);
extr.setNotesByDefault(false);
extr.setMasterByDefault(false);
return extr.getText(slide);
}
} }
@Test @Test
@ -458,7 +465,7 @@ public class TestXSLFBugs {
for (int i = 0; i < slideTexts.length; i++) { for (int i = 0; i < slideTexts.length; i++) {
XSLFSlide slide = ss.getSlides().get(i); XSLFSlide slide = ss.getSlides().get(i);
assertContains(getSlideText(slide), slideTexts[i]); assertContains(getSlideText(ss, slide), slideTexts[i]);
} }
} }

View File

@ -24,16 +24,17 @@ import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow; import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
import org.junit.Ignore;
import org.junit.Test; import org.junit.Test;
/** /**
@ -44,188 +45,189 @@ public class TestXSLFPowerPointExtractor {
/** /**
* Get text out of the simple file * Get text out of the simple file
* @throws XmlException
* @throws OpenXML4JException
*/ */
@Test @Test
public void testGetSimpleText() public void testGetSimpleText() throws IOException {
throws IOException, XmlException, OpenXML4JException { try (XMLSlideShow xmlA = openPPTX("sample.pptx");
XMLSlideShow xmlA = openPPTX("sample.pptx"); SlideShowExtractor extractor = new SlideShowExtractor(xmlA)) {
@SuppressWarnings("resource")
OPCPackage pkg = xmlA.getPackage();
new XSLFPowerPointExtractor(xmlA).close(); extractor.getText();
new XSLFPowerPointExtractor(pkg).close();
XSLFPowerPointExtractor extractor = String text = extractor.getText();
new XSLFPowerPointExtractor(xmlA); assertTrue(text.length() > 0);
extractor.getText();
String text = extractor.getText(); // Check Basics
assertTrue(text.length() > 0); assertStartsWith(text, "Lorem ipsum dolor sit amet\n");
assertContains(text, "amet\n\n");
// Check Basics // Our placeholder master text
assertStartsWith(text, "Lorem ipsum dolor sit amet\n"); // This shouldn't show up in the output
assertContains(text, "amet\n\n"); // String masterText =
// "Click to edit Master title style\n" +
// "Click to edit Master subtitle style\n" +
// "\n\n\n\n\n\n" +
// "Click to edit Master title style\n" +
// "Click to edit Master text styles\n" +
// "Second level\n" +
// "Third level\n" +
// "Fourth level\n" +
// "Fifth level\n";
// Our placeholder master text // Just slides, no notes
// This shouldn't show up in the output extractor.setSlidesByDefault(true);
// String masterText = extractor.setNotesByDefault(false);
// "Click to edit Master title style\n" + extractor.setMasterByDefault(false);
// "Click to edit Master subtitle style\n" + text = extractor.getText();
// "\n\n\n\n\n\n" + String slideText =
// "Click to edit Master title style\n" + "Lorem ipsum dolor sit amet\n" +
// "Click to edit Master text styles\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
// "Second level\n" + "\n" +
// "Third level\n" + "Lorem ipsum dolor sit amet\n" +
// "Fourth level\n" + "Lorem\n" +
// "Fifth level\n"; "ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n";
assertEquals(slideText, text);
// Just slides, no notes // Just notes, no slides
text = extractor.getText(true, false, false); extractor.setSlidesByDefault(false);
String slideText = extractor.setNotesByDefault(true);
"Lorem ipsum dolor sit amet\n" + text = extractor.getText();
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + assertEquals("\n\n1\n\n\n2\n", text);
"\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n";
assertEquals(slideText, text);
// Just notes, no slides // Both
text = extractor.getText(false, true); extractor.setSlidesByDefault(true);
assertEquals("\n\n1\n\n\n2\n", text); extractor.setNotesByDefault(true);
text = extractor.getText();
String bothText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n\n\n1\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n\n\n2\n";
assertEquals(bothText, text);
// Both // With Slides and Master Text
text = extractor.getText(true, true, false); extractor.setSlidesByDefault(true);
String bothText = extractor.setNotesByDefault(false);
"Lorem ipsum dolor sit amet\n" + extractor.setMasterByDefault(true);
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + text = extractor.getText();
"\n\n\n1\n" + String smText =
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Lorem\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"ipsum\n" + "\n" +
"dolor\n" + "Lorem ipsum dolor sit amet\n" +
"sit\n" + "Lorem\n" +
"amet\n" + "ipsum\n" +
"\n\n\n2\n"; "dolor\n" +
assertEquals(bothText, text); "sit\n" +
"amet\n" +
"\n";
assertEquals(smText, text);
// With Slides and Master Text // With Slides, Notes and Master Text
text = extractor.getText(true, false, true); extractor.setSlidesByDefault(true);
String smText = extractor.setNotesByDefault(true);
"Lorem ipsum dolor sit amet\n" + extractor.setMasterByDefault(true);
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + text = extractor.getText();
"\n" + String snmText =
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Lorem\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"ipsum\n" + "\n\n\n1\n" +
"dolor\n" + "Lorem ipsum dolor sit amet\n" +
"sit\n" + "Lorem\n" +
"amet\n" + "ipsum\n" +
"\n"; "dolor\n" +
assertEquals(smText, text); "sit\n" +
"amet\n" +
"\n\n\n2\n";
assertEquals(snmText, text);
// With Slides, Notes and Master Text // Via set defaults
text = extractor.getText(true, true, true); extractor.setSlidesByDefault(false);
String snmText = extractor.setNotesByDefault(true);
"Lorem ipsum dolor sit amet\n" + text = extractor.getText();
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + assertEquals("\n\n1\n\n\n2\n", text);
"\n\n\n1\n" + }
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n\n\n2\n";
assertEquals(snmText, text);
// Via set defaults
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(true);
text = extractor.getText();
assertEquals("\n\n1\n\n\n2\n", text);
extractor.close();
xmlA.close();
} }
@Test
public void testGetComments() throws IOException { public void testGetComments() throws IOException {
XMLSlideShow xml = openPPTX("45545_Comment.pptx"); try (XMLSlideShow xml = openPPTX("45545_Comment.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml); SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
extractor.setCommentsByDefault(true);
String text = extractor.getText(); String text = extractor.getText();
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
// Check comments are there // Check comments are there
assertContains(text, "testdoc"); assertContains(text, "testdoc");
assertContains(text, "test phrase"); assertContains(text, "test phrase");
// Check the authors came through too // Check the authors came through too
assertContains(text, "XPVMWARE01"); assertContains(text, "XPVMWARE01");
}
extractor.close();
xml.close();
} }
@Test
@Ignore("currently slidelayouts aren't yet supported")
public void testGetMasterText() throws Exception { public void testGetMasterText() throws Exception {
XMLSlideShow xml = openPPTX("WithMaster.pptx"); try (XMLSlideShow xml = openPPTX("WithMaster.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml); SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
extractor.setSlidesByDefault(true); extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(false); extractor.setNotesByDefault(false);
extractor.setMasterByDefault(true); extractor.setMasterByDefault(true);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check master text is there String text = extractor.getText();
assertContains(text, "Footer from the master slide"); assertTrue(text.length() > 0);
// Theme text shouldn't show up // Check master text is there
// String themeText = assertContains(text, "Footer from the master slide");
// "Theme Master Title\n" +
// "Theme Master first level\n" +
// "And the 2nd level\n" +
// "Our 3rd level goes here\n" +
// "And onto the 4th, such fun....\n" +
// "Finally is the Fifth level\n";
// Check the whole text // Theme text shouldn't show up
String wholeText = // String themeText =
"First page title\n" + // "Theme Master Title\n" +
"First page subtitle\n" + // "Theme Master first level\n" +
"This is the Master Title\n" + // "And the 2nd level\n" +
"This text comes from the Master Slide\n" + // "Our 3rd level goes here\n" +
"\n" + // "And onto the 4th, such fun....\n" +
// TODO Detect we didn't have a title, and include the master one // "Finally is the Fifth level\n";
"2nd page subtitle\n" +
"Footer from the master slide\n" +
"This is the Master Title\n" +
"This text comes from the Master Slide\n";
assertEquals(wholeText, text);
extractor.close(); // Check the whole text
xml.close(); String wholeText =
"First page title\n" +
"First page subtitle\n" +
"This is the Master Title\n" +
"This text comes from the Master Slide\n" +
"\n" +
// TODO Detect we didn't have a title, and include the master one
"2nd page subtitle\n" +
"Footer from the master slide\n" +
"This is the Master Title\n" +
"This text comes from the Master Slide\n";
assertEquals(wholeText, text);
}
} }
@Test @Test
public void testTable() throws Exception { public void testTable() throws Exception {
XMLSlideShow xml = openPPTX("present1.pptx"); try (XMLSlideShow xml = openPPTX("present1.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml); SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
String text = extractor.getText(); String text = extractor.getText();
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
// Check comments are there // Check comments are there
assertContains(text, "TEST"); assertContains(text, "TEST");
}
extractor.close();
xml.close();
} }
/** /**
@ -241,74 +243,76 @@ public class TestXSLFPowerPointExtractor {
}; };
for(String extension : extensions) { for(String extension : extensions) {
String filename = "testPPT." + extension; String filename = "testPPT." + extension;
XMLSlideShow xml = openPPTX(filename);
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
String text = extractor.getText(); try (XMLSlideShow xml = openPPTX(filename);
if (extension.equals("thmx")) { SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
// Theme file doesn't have any textual content
assertEquals(filename, 0, text.length()); String text = extractor.getText();
continue; if (extension.equals("thmx")) {
// Theme file doesn't have any textual content
assertEquals(filename, 0, text.length());
continue;
}
assertTrue(filename, text.length() > 0);
assertContains(filename, text, "Attachment Test");
assertContains(filename, text, "This is a test file data with the same content");
assertContains(filename, text, "content parsing");
assertContains(filename, text, "Different words to test against");
assertContains(filename, text, "Mystery");
} }
assertTrue(filename, text.length() > 0);
assertContains(filename, text, "Attachment Test");
assertContains(filename, text, "This is a test file data with the same content");
assertContains(filename, text, "content parsing");
assertContains(filename, text, "Different words to test against");
assertContains(filename, text, "Mystery");
extractor.close();
xml.close();
} }
} }
@Test @Test
public void test45541() throws Exception { public void test45541() throws IOException, OpenXML4JException, XmlException {
// extract text from a powerpoint that has a header in the notes-element // extract text from a powerpoint that has a header in the notes-element
POITextExtractor extr = ExtractorFactory.createExtractor( final File headerFile = slTests.getFile("45541_Header.pptx");
slTests.getFile("45541_Header.pptx")); try (final SlideShowExtractor extr = ExtractorFactory.createExtractor(headerFile)) {
String text = extr.getText(); String text = extr.getText();
assertNotNull(text); assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc")); assertFalse("Had: " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true); extr.setSlidesByDefault(false);
assertContains(text, "testdoc"); extr.setNotesByDefault(true);
extr.close();
assertNotNull(text); text = extr.getText();
assertContains(text, "testdoc");
assertNotNull(text);
}
// extract text from a powerpoint that has a footer in the master-slide // extract text from a powerpoint that has a footer in the master-slide
extr = ExtractorFactory.createExtractor( final File footerFile = slTests.getFile("45541_Footer.pptx");
slTests.getFile("45541_Footer.pptx")); try (SlideShowExtractor extr = ExtractorFactory.createExtractor(footerFile)) {
text = extr.getText(); String text = extr.getText();
assertNotContained(text, "testdoc"); assertNotContained(text, "testdoc");
text = ((XSLFPowerPointExtractor)extr).getText(false, true); extr.setSlidesByDefault(false);
assertNotContained(text, "testdoc"); extr.setNotesByDefault(true);
text = extr.getText();
assertNotContained(text, "testdoc");
text = ((XSLFPowerPointExtractor)extr).getText(false, false, true); extr.setSlidesByDefault(false);
assertNotContained(text, "testdoc"); extr.setNotesByDefault(false);
extr.setMasterByDefault(true);
extr.close(); text = extr.getText();
assertNotContained(text, "testdoc");
}
} }
@Test @Test
public void bug54570() throws IOException { public void bug54570() throws IOException {
XMLSlideShow xml = openPPTX("bug54570.pptx"); try (XMLSlideShow xml = openPPTX("bug54570.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml); SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
String text = extractor.getText(); String text = extractor.getText();
assertNotNull(text); assertNotNull(text);
extractor.close(); }
xml.close();
} }
private XMLSlideShow openPPTX(String file) throws IOException { private XMLSlideShow openPPTX(String file) throws IOException {
InputStream is = slTests.openResourceAsStream(file); try (InputStream is = slTests.openResourceAsStream(file)) {
try {
return new XMLSlideShow(is); return new XMLSlideShow(is);
} finally {
is.close();
} }
} }
} }

View File

@ -38,6 +38,8 @@ import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.SlideShowFactory;
/** /**
* Scratchpad-specific logic for {@link OLE2ExtractorFactory} and * Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
@ -65,7 +67,7 @@ public class OLE2ScratchpadExtractorFactory {
} }
if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) { if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
return new PowerPointExtractor(poifsDir); return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
} }
if (poifsDir.hasEntry("VisioDocument")) { if (poifsDir.hasEntry("VisioDocument")) {

View File

@ -34,6 +34,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.extractor.SlideShowExtractor; import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.SlideShowFactory; import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.util.Removal;
/** /**
* This class can be used to extract text from a PowerPoint file. Can optionally * This class can be used to extract text from a PowerPoint file. Can optionally
@ -43,6 +44,7 @@ import org.apache.poi.sl.usermodel.SlideShowFactory;
*/ */
@SuppressWarnings("WeakerAccess") @SuppressWarnings("WeakerAccess")
@Deprecated @Deprecated
@Removal(version="5.0.0")
public final class PowerPointExtractor extends POIOLE2TextExtractor { public final class PowerPointExtractor extends POIOLE2TextExtractor {
private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate; private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate;

View File

@ -1139,4 +1139,9 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
public void close() throws IOException { public void close() throws IOException {
_hslfSlideShow.close(); _hslfSlideShow.close();
} }
@Override
public Object getPersistDocument() {
return getSlideShowImpl();
}
} }

View File

@ -19,8 +19,8 @@ package org.apache.poi.hslf.usermodel;
import java.io.IOException; import java.io.IOException;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.sl.usermodel.SlideShowFactory; import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
@ -31,12 +31,20 @@ import org.apache.poi.util.Internal;
@Internal @Internal
public class HSLFSlideShowFactory extends SlideShowFactory { public class HSLFSlideShowFactory extends SlideShowFactory {
/** /**
* Creates a HSLFSlideShow from the given NPOIFSFileSystem * Creates a HSLFSlideShow from the given NPOIFSFileSystem<p>
* <p>Note that in order to properly release resources the * Note that in order to properly release resources the
* SlideShow should be closed after use. * SlideShow should be closed after use.
*/ */
public static SlideShow<?,?> createSlideShow(NPOIFSFileSystem fs) throws IOException { public static HSLFSlideShow createSlideShow(final NPOIFSFileSystem fs) throws IOException {
return new HSLFSlideShow(fs); return new HSLFSlideShow(fs);
} }
/**
* Creates a HSLFSlideShow from the given DirectoryNode<p>
* Note that in order to properly release resources the
* SlideShow should be closed after use.
*/
public static HSLFSlideShow createSlideShow(final DirectoryNode root) throws IOException {
return new HSLFSlideShow(root);
}
} }

View File

@ -846,9 +846,13 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
NPOIFSFileSystem fs = getDirectory().getFileSystem(); // only close the filesystem, if we are based on the root node.
if (fs != null) { // embedded documents/slideshows shouldn't close the parent container
fs.close(); if (getDirectory().getParent() == null) {
NPOIFSFileSystem fs = getDirectory().getFileSystem();
if (fs != null) {
fs.close();
}
} }
} }

View File

@ -42,6 +42,10 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.ObjectShape;
import org.apache.poi.sl.usermodel.SlideShow;
import org.apache.poi.sl.usermodel.SlideShowFactory;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.junit.Test; import org.junit.Test;
@ -76,43 +80,46 @@ public final class TestExtractor {
// ppe.close(); // ppe.close();
// } // }
private PowerPointExtractor openExtractor(String fileName) throws IOException { private SlideShowExtractor<?,?> openExtractor(String fileName) throws IOException {
InputStream is = slTests.openResourceAsStream(fileName); try (InputStream is = slTests.openResourceAsStream(fileName)) {
try { return new SlideShowExtractor(SlideShowFactory.create(is));
return new PowerPointExtractor(is);
} finally {
is.close();
} }
} }
@Test @Test
public void testReadSheetText() throws IOException { public void testReadSheetText() throws IOException {
// Basic 2 page example // Basic 2 page example
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt"); try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
assertEquals(expectText, ppe.getText()); assertEquals(expectText, ppe.getText());
ppe.close(); }
// 1 page example with text boxes // 1 page example with text boxes
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt"); try (SlideShowExtractor ppe = openExtractor("with_textbox.ppt")) {
assertEquals(expectText2, ppe2.getText()); assertEquals(expectText2, ppe.getText());
ppe2.close(); }
} }
@Test @Test
public void testReadNoteText() throws IOException { public void testReadNoteText() throws IOException {
// Basic 2 page example // Basic 2 page example
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt"); try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
String notesText = ppe.getNotes(); ppe.setNotesByDefault(true);
String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n"; ppe.setSlidesByDefault(false);
assertEquals(expText, notesText); ppe.setMasterByDefault(false);
ppe.close(); String notesText = ppe.getText();
String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n";
assertEquals(expText, notesText);
}
// Other one doesn't have notes // Other one doesn't have notes
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt"); try (SlideShowExtractor ppe = openExtractor("with_textbox.ppt")) {
notesText = ppe2.getNotes(); ppe.setNotesByDefault(true);
expText = ""; ppe.setSlidesByDefault(false);
assertEquals(expText, notesText); ppe.setMasterByDefault(false);
ppe2.close(); String notesText = ppe.getText();
String expText = "";
assertEquals(expText, notesText);
}
} }
@Test @Test
@ -126,19 +133,19 @@ public final class TestExtractor {
"\nThese are the notes on page two, again lacking formatting\n" "\nThese are the notes on page two, again lacking formatting\n"
}; };
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt"); try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
ppe.setSlidesByDefault(true); ppe.setSlidesByDefault(true);
ppe.setNotesByDefault(false); ppe.setNotesByDefault(false);
assertEquals(slText[0] + slText[1], ppe.getText()); assertEquals(slText[0] + slText[1], ppe.getText());
ppe.setSlidesByDefault(false); ppe.setSlidesByDefault(false);
ppe.setNotesByDefault(true); ppe.setNotesByDefault(true);
assertEquals(ntText[0] + ntText[1], ppe.getText()); assertEquals(ntText[0] + ntText[1], ppe.getText());
ppe.setSlidesByDefault(true); ppe.setSlidesByDefault(true);
ppe.setNotesByDefault(true); ppe.setNotesByDefault(true);
assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText()); assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText());
ppe.close(); }
} }
/** /**
@ -149,45 +156,46 @@ public final class TestExtractor {
*/ */
@Test @Test
public void testMissingCoreRecords() throws IOException { public void testMissingCoreRecords() throws IOException {
PowerPointExtractor ppe = openExtractor("missing_core_records.ppt"); try (SlideShowExtractor<?,?> ppe = openExtractor("missing_core_records.ppt")) {
ppe.setSlidesByDefault(true);
ppe.setNotesByDefault(false);
String text = ppe.getText();
ppe.setSlidesByDefault(false);
ppe.setNotesByDefault(true);
String nText = ppe.getText();
String text = ppe.getText(true, false); assertNotNull(text);
String nText = ppe.getNotes(); assertNotNull(nText);
assertNotNull(text); // Notes record were corrupt, so don't expect any
assertNotNull(nText); assertEquals(nText.length(), 0);
// Notes record were corrupt, so don't expect any // Slide records were fine
assertEquals(nText.length(), 0); assertContains(text, "Using Disease Surveillance and Response");
}
// Slide records were fine
assertContains(text, "Using Disease Surveillance and Response");
ppe.close();
} }
@Test @Test
public void testExtractFromEmbeded() throws IOException { public void testExtractFromEmbeded() throws IOException {
InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls"); try (final InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
POIFSFileSystem fs = new POIFSFileSystem(is); final POIFSFileSystem fs = new POIFSFileSystem(is)) {
DirectoryNode root = fs.getRoot(); final DirectoryNode root = fs.getRoot();
PowerPointExtractor ppe1 = assertExtractFromEmbedded(root, "MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n");
PowerPointExtractor ppe2 = assertExtractFromEmbedded(root, "MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n");
ppe2.close();
ppe1.close();
fs.close();
}
private PowerPointExtractor assertExtractFromEmbedded(DirectoryNode root, String entryName, String expected)
throws IOException {
DirectoryNode dir = (DirectoryNode)root.getEntry(entryName);
assertTrue(dir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT));
// Check the first file final String[] TEST_SET = {
HSLFSlideShowImpl ppt = new HSLFSlideShowImpl(dir); "MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
PowerPointExtractor ppe = new PowerPointExtractor(ppt); "MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n"
assertEquals(expected, ppe.getText(true, false)); };
return ppe;
for (int i=0; i<TEST_SET.length; i+=2) {
DirectoryNode dir = (DirectoryNode)root.getEntry(TEST_SET[i]);
assertTrue(dir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT));
try (final SlideShow<?,?> ppt = SlideShowFactory.create(dir);
final SlideShowExtractor<?,?> ppe = new SlideShowExtractor(ppt)) {
assertEquals(TEST_SET[i+1], ppe.getText());
}
}
}
} }
/** /**
@ -195,32 +203,32 @@ public final class TestExtractor {
*/ */
@Test @Test
public void testExtractFromOwnEmbeded() throws IOException { public void testExtractFromOwnEmbeded() throws IOException {
PowerPointExtractor ppe = openExtractor("ppt_with_embeded.ppt"); try (SlideShowExtractor<?,?> ppe = openExtractor("ppt_with_embeded.ppt")) {
List<HSLFObjectShape> shapes = ppe.getOLEShapes(); List<? extends ObjectShape> shapes = ppe.getOLEShapes();
assertEquals("Expected 6 ole shapes", 6, shapes.size()); assertEquals("Expected 6 ole shapes", 6, shapes.size());
int num_ppt = 0, num_doc = 0, num_xls = 0; int num_ppt = 0, num_doc = 0, num_xls = 0;
for (HSLFObjectShape ole : shapes) { for (ObjectShape ole : shapes) {
String name = ole.getInstanceName(); String name = ((HSLFObjectShape)ole).getInstanceName();
InputStream data = ole.getObjectData().getInputStream(); InputStream data = ole.getObjectData().getInputStream();
if ("Worksheet".equals(name)) { if ("Worksheet".equals(name)) {
HSSFWorkbook wb = new HSSFWorkbook(data); HSSFWorkbook wb = new HSSFWorkbook(data);
num_xls++; num_xls++;
wb.close(); wb.close();
} else if ("Document".equals(name)) { } else if ("Document".equals(name)) {
HWPFDocument doc = new HWPFDocument(data); HWPFDocument doc = new HWPFDocument(data);
num_doc++; num_doc++;
doc.close(); doc.close();
} else if ("Presentation".equals(name)) { } else if ("Presentation".equals(name)) {
num_ppt++; num_ppt++;
HSLFSlideShow ppt = new HSLFSlideShow(data); HSLFSlideShow ppt = new HSLFSlideShow(data);
ppt.close(); ppt.close();
}
data.close();
} }
data.close(); assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
} }
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
ppe.close();
} }
/** /**
@ -228,11 +236,11 @@ public final class TestExtractor {
*/ */
@Test @Test
public void test52991() throws IOException { public void test52991() throws IOException {
PowerPointExtractor ppe = openExtractor("badzip.ppt"); try (SlideShowExtractor<?,?> ppe = openExtractor("badzip.ppt")) {
for (HSLFObjectShape shape : ppe.getOLEShapes()) { for (ObjectShape shape : ppe.getOLEShapes()) {
IOUtils.copy(shape.getObjectData().getInputStream(), new ByteArrayOutputStream()); IOUtils.copy(shape.getObjectData().getInputStream(), new ByteArrayOutputStream());
}
} }
ppe.close();
} }
/** /**
@ -240,27 +248,27 @@ public final class TestExtractor {
*/ */
@Test @Test
public void testWithComments() throws IOException { public void testWithComments() throws IOException {
PowerPointExtractor ppe1 = openExtractor("WithComments.ppt"); try (final SlideShowExtractor ppe = openExtractor("WithComments.ppt")) {
String text = ppe1.getText(); String text = ppe.getText();
assertFalse("Comments not in by default", text.contains("This is a test comment")); assertFalse("Comments not in by default", text.contains("This is a test comment"));
ppe1.setCommentsByDefault(true); ppe.setCommentsByDefault(true);
text = ppe1.getText(); text = ppe.getText();
assertContains(text, "This is a test comment"); assertContains(text, "This is a test comment");
ppe1.close(); }
// And another file // And another file
PowerPointExtractor ppe2 = openExtractor("45543.ppt"); try (SlideShowExtractor ppe = openExtractor("45543.ppt")) {
text = ppe2.getText(); String text = ppe.getText();
assertFalse("Comments not in by default", text.contains("testdoc")); assertFalse("Comments not in by default", text.contains("testdoc"));
ppe2.setCommentsByDefault(true); ppe.setCommentsByDefault(true);
text = ppe2.getText(); text = ppe.getText();
assertContains(text, "testdoc"); assertContains(text, "testdoc");
ppe2.close(); }
} }
/** /**
@ -268,48 +276,37 @@ public final class TestExtractor {
*/ */
@Test @Test
public void testHeaderFooter() throws IOException { public void testHeaderFooter() throws IOException {
String text;
// With a header on the notes // With a header on the notes
InputStream is1 = slTests.openResourceAsStream("45537_Header.ppt"); try (InputStream is = slTests.openResourceAsStream("45537_Header.ppt");
HSLFSlideShow ppt1 = new HSLFSlideShow(is1); HSLFSlideShow ppt = new HSLFSlideShow(is)) {
is1.close();
assertNotNull(ppt1.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ppt1.getNotesHeadersFooters().getHeaderText());
PowerPointExtractor ppe1 = new PowerPointExtractor(ppt1.getSlideShowImpl()); assertNotNull(ppt.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ppt.getNotesHeadersFooters().getHeaderText());
text = ppe1.getText(); testHeaderFooterInner(ppt);
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc")); }
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
ppe1.setNotesByDefault(true);
text = ppe1.getText();
assertContains(text, "testdoc");
assertContains(text, "test phrase");
ppe1.close();
ppt1.close();
// And with a footer, also on notes // And with a footer, also on notes
InputStream is2 = slTests.openResourceAsStream("45537_Footer.ppt"); try (final InputStream is = slTests.openResourceAsStream("45537_Footer.ppt");
HSLFSlideShow ppt2 = new HSLFSlideShow(is2); final HSLFSlideShow ppt = new HSLFSlideShow(is)) {
is2.close(); assertNotNull(ppt.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ppt.getNotesHeadersFooters().getFooterText());
assertNotNull(ppt2.getNotesHeadersFooters());
assertEquals("testdoc test phrase", ppt2.getNotesHeadersFooters().getFooterText());
ppt2.close();
PowerPointExtractor ppe2 = openExtractor("45537_Footer.ppt"); testHeaderFooterInner(ppt);
}
}
text = ppe2.getText(); private void testHeaderFooterInner(final HSLFSlideShow ppt) throws IOException {
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc")); try (final SlideShowExtractor<?,?> ppe = new SlideShowExtractor(ppt)) {
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase")); String text = ppe.getText();
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
ppe2.setNotesByDefault(true); ppe.setNotesByDefault(true);
text = ppe2.getText(); text = ppe.getText();
assertContains(text, "testdoc"); assertContains(text, "testdoc");
assertContains(text, "test phrase"); assertContains(text, "test phrase");
ppe2.close(); }
} }
@SuppressWarnings("unused") @SuppressWarnings("unused")
@ -318,41 +315,40 @@ public final class TestExtractor {
String masterTitleText = "This is the Master Title"; String masterTitleText = "This is the Master Title";
String masterRandomText = "This text comes from the Master Slide"; String masterRandomText = "This text comes from the Master Slide";
String masterFooterText = "Footer from the master slide"; String masterFooterText = "Footer from the master slide";
PowerPointExtractor ppe = openExtractor("WithMaster.ppt"); try (final SlideShowExtractor ppe = openExtractor("WithMaster.ppt")) {
ppe.setMasterByDefault(true); ppe.setMasterByDefault(true);
String text = ppe.getText(); String text = ppe.getText();
assertContains(text, masterRandomText); assertContains(text, masterRandomText);
assertContains(text, masterFooterText); assertContains(text, masterFooterText);
ppe.close(); }
} }
@Test @Test
public void testMasterText() throws IOException { public void testMasterText() throws IOException {
PowerPointExtractor ppe1 = openExtractor("master_text.ppt"); try (final SlideShowExtractor ppe = openExtractor("master_text.ppt")) {
// Initially not there
String text = ppe.getText();
assertFalse(text.contains("Text that I added to the master slide"));
// Initially not there // Enable, shows up
String text = ppe1.getText(); ppe.setMasterByDefault(true);
assertFalse(text.contains("Text that I added to the master slide")); text = ppe.getText();
assertContains(text, "Text that I added to the master slide");
// Enable, shows up // Make sure placeholder text does not come out
ppe1.setMasterByDefault(true); assertNotContained(text, "Click to edit Master");
text = ppe1.getText(); }
assertContains(text, "Text that I added to the master slide");
// Make sure placeholder text does not come out
assertNotContained(text, "Click to edit Master");
ppe1.close();
// Now with another file only containing master text // Now with another file only containing master text
// Will always show up // Will always show up
PowerPointExtractor ppe2 = openExtractor("WithMaster.ppt"); try (final SlideShowExtractor ppe = openExtractor("WithMaster.ppt")) {
String masterText = "Footer from the master slide"; String masterText = "Footer from the master slide";
text = ppe2.getText(); String text = ppe.getText();
assertContainsIgnoreCase(text, "master"); assertContainsIgnoreCase(text, "master");
assertContains(text, masterText); assertContains(text, masterText);
ppe2.close(); }
} }
/** /**
@ -360,22 +356,21 @@ public final class TestExtractor {
*/ */
@Test @Test
public void testChineseText() throws IOException { public void testChineseText() throws IOException {
PowerPointExtractor ppe = openExtractor("54880_chinese.ppt"); try (final SlideShowExtractor ppe = openExtractor("54880_chinese.ppt")) {
String text = ppe.getText();
String text = ppe.getText(); // Check for the english text line
assertContains(text, "Single byte");
// Check for the english text line // Check for the english text in the mixed line
assertContains(text, "Single byte"); assertContains(text, "Mix");
// Check for the english text in the mixed line // Check for the chinese text in the mixed line
assertContains(text, "Mix"); assertContains(text, "\u8868");
// Check for the chinese text in the mixed line // Check for the chinese only text line
assertContains(text, "\u8868"); assertContains(text, "\uff8a\uff9d\uff76\uff78");
}
// Check for the chinese only text line
assertContains(text, "\uff8a\uff9d\uff76\uff78");
ppe.close();
} }
/** /**
@ -387,67 +382,59 @@ public final class TestExtractor {
public void testDifferentPOIFS() throws IOException { public void testDifferentPOIFS() throws IOException {
// Open the two filesystems // Open the two filesystems
File pptFile = slTests.getFile("basic_test_ppt_file.ppt"); File pptFile = slTests.getFile("basic_test_ppt_file.ppt");
InputStream is1 = new FileInputStream(pptFile); try (final InputStream is1 = new FileInputStream(pptFile);
OPOIFSFileSystem opoifs = new OPOIFSFileSystem(is1); final NPOIFSFileSystem npoifs = new NPOIFSFileSystem(pptFile)) {
is1.close();
NPOIFSFileSystem npoifs = new NPOIFSFileSystem(pptFile);
DirectoryNode[] files = { opoifs.getRoot(), npoifs.getRoot() };
// Open directly final OPOIFSFileSystem opoifs = new OPOIFSFileSystem(is1);
for (DirectoryNode dir : files) {
PowerPointExtractor extractor = new PowerPointExtractor(dir); DirectoryNode[] files = {opoifs.getRoot(), npoifs.getRoot()};
assertEquals(expectText, extractor.getText());
// Open directly
for (DirectoryNode dir : files) {
try (SlideShow<?,?> ppt = SlideShowFactory.create(dir);
SlideShowExtractor<?,?> extractor = new SlideShowExtractor(ppt)) {
assertEquals(expectText, extractor.getText());
}
}
} }
// Open via a HSLFSlideShow
for (DirectoryNode dir : files) {
HSLFSlideShowImpl slideshow = new HSLFSlideShowImpl(dir);
PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
assertEquals(expectText, extractor.getText());
extractor.close();
slideshow.close();
}
npoifs.close();
} }
@Test @Test
public void testTable() throws Exception { public void testTable() throws Exception {
PowerPointExtractor ppe1 = openExtractor("54111.ppt"); try (SlideShowExtractor ppe = openExtractor("54111.ppt")) {
String text1 = ppe1.getText(); String text = ppe.getText();
String target1 = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n"+ String target = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n" +
"Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n"+ "Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n" +
"Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n"+ "Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n" +
"Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n"+ "Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n" +
"Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n"+ "Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n" +
"Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n"; "Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n";
assertContains(text1, target1); assertContains(text, target);
ppe1.close(); }
PowerPointExtractor ppe2 = openExtractor("54722.ppt"); try (SlideShowExtractor ppe = openExtractor("54722.ppt")) {
String text2 = ppe2.getText(); String text = ppe.getText();
String target2 = "this\tText\tis\twithin\ta\n" + String target = "this\tText\tis\twithin\ta\n" +
"table\t1\t2\t3\t4"; "table\t1\t2\t3\t4";
assertContains(text2, target2); assertContains(text, target);
ppe2.close(); }
} }
// bug 60003 // bug 60003
@Test @Test
public void testExtractMasterSlideFooterText() throws Exception { public void testExtractMasterSlideFooterText() throws Exception {
PowerPointExtractor ppe = openExtractor("60003.ppt"); try (SlideShowExtractor ppe = openExtractor("60003.ppt")) {
ppe.setMasterByDefault(true); ppe.setMasterByDefault(true);
String text = ppe.getText(); String text = ppe.getText();
assertContains(text, "Prague"); assertContains(text, "Prague");
ppe.close(); }
} }
@Test @Test
public void testExtractGroupedShapeText() throws Exception { public void testExtractGroupedShapeText() throws Exception {
try (final PowerPointExtractor ppe = openExtractor("bug62092.ppt")) { try (final SlideShowExtractor ppe = openExtractor("bug62092.ppt")) {
final String text = ppe.getText(); final String text = ppe.getText();
//this tests that we're ignoring text shapes at depth=0 //this tests that we're ignoring text shapes at depth=0

View File

@ -73,6 +73,7 @@ import org.apache.poi.poifs.macros.VBAMacroReader;
import org.apache.poi.sl.draw.DrawFactory; import org.apache.poi.sl.draw.DrawFactory;
import org.apache.poi.sl.draw.DrawPaint; import org.apache.poi.sl.draw.DrawPaint;
import org.apache.poi.sl.draw.DrawTextParagraph; import org.apache.poi.sl.draw.DrawTextParagraph;
import org.apache.poi.sl.extractor.SlideShowExtractor;
import org.apache.poi.sl.usermodel.ColorStyle; import org.apache.poi.sl.usermodel.ColorStyle;
import org.apache.poi.sl.usermodel.PaintStyle; import org.apache.poi.sl.usermodel.PaintStyle;
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint; import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
@ -800,18 +801,18 @@ public final class TestBugs {
String files[] = { "bug58718_008524.ppt","bug58718_008558.ppt","bug58718_349008.ppt","bug58718_008495.ppt", }; String files[] = { "bug58718_008524.ppt","bug58718_008558.ppt","bug58718_349008.ppt","bug58718_008495.ppt", };
for (String f : files) { for (String f : files) {
File sample = HSLFTestDataSamples.getSampleFile(f); File sample = HSLFTestDataSamples.getSampleFile(f);
PowerPointExtractor ex = new PowerPointExtractor(sample.getAbsolutePath()); try (SlideShowExtractor ex = new SlideShowExtractor(SlideShowFactory.create(sample))) {
assertNotNull(ex.getText()); assertNotNull(ex.getText());
ex.close(); }
} }
} }
@Test @Test
public void bug58733() throws IOException { public void bug58733() throws IOException {
File sample = HSLFTestDataSamples.getSampleFile("bug58733_671884.ppt"); File sample = HSLFTestDataSamples.getSampleFile("bug58733_671884.ppt");
PowerPointExtractor ex = new PowerPointExtractor(sample.getAbsolutePath()); try (SlideShowExtractor ex = new SlideShowExtractor(SlideShowFactory.create(sample))) {
assertNotNull(ex.getText()); assertNotNull(ex.getText());
ex.close(); }
} }
@Test @Test

Binary file not shown.