#62319 - Decommission XSLF-/PowerPointExtractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1829653 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bc436fcc3d
commit
ab390ce170
@ -330,8 +330,6 @@ public class TestAllFiles {
|
||||
);
|
||||
|
||||
private static final Set<String> IGNORED = unmodifiableHashSet(
|
||||
// need JDK8+ - https://bugs.openjdk.java.net/browse/JDK-8038081
|
||||
"slideshow/42474-2.ppt",
|
||||
// OPC handler works / XSSF handler fails
|
||||
"spreadsheet/57181.xlsm",
|
||||
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop
|
||||
|
@ -24,6 +24,7 @@ import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.extractor.ExtractorFactory;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||
@ -53,12 +54,19 @@ public class XSLFFileHandler extends SlideShowHandler {
|
||||
|
||||
// additionally try the other getText() methods
|
||||
|
||||
try (XSLFPowerPointExtractor extractor = (XSLFPowerPointExtractor) ExtractorFactory.createExtractor(file)) {
|
||||
try (SlideShowExtractor extractor = ExtractorFactory.createExtractor(file)) {
|
||||
assertNotNull(extractor);
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(true);
|
||||
extractor.setMasterByDefault(true);
|
||||
|
||||
assertNotNull(extractor.getText(true, true, true));
|
||||
assertEquals("With all options disabled we should not get text",
|
||||
"", extractor.getText(false, false, false));
|
||||
assertNotNull(extractor.getText());
|
||||
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(false);
|
||||
|
||||
assertEquals("With all options disabled we should not get text", "", extractor.getText());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -105,6 +105,7 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
|
||||
*
|
||||
* @return the underlying POIDocument
|
||||
*/
|
||||
@Override
|
||||
public POIDocument getDocument() {
|
||||
return document;
|
||||
}
|
||||
|
@ -74,4 +74,9 @@ public abstract class POITextExtractor implements Closeable {
|
||||
fsToClose.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the processed document
|
||||
*/
|
||||
public abstract Object getDocument();
|
||||
}
|
||||
|
@ -115,26 +115,23 @@ public class OLE2ExtractorFactory {
|
||||
return threadPreferEventExtractors.get();
|
||||
}
|
||||
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||
// Only ever an OLE2 one from the root of the FS
|
||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
||||
public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException {
|
||||
return (T)createExtractor(fs.getRoot());
|
||||
}
|
||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException {
|
||||
// Only ever an OLE2 one from the root of the FS
|
||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
||||
public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException {
|
||||
return (T)createExtractor(fs.getRoot());
|
||||
}
|
||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException {
|
||||
// Only ever an OLE2 one from the root of the FS
|
||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
||||
public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException {
|
||||
return (T)createExtractor(fs.getRoot());
|
||||
}
|
||||
|
||||
public static POITextExtractor createExtractor(InputStream input) throws IOException {
|
||||
public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException {
|
||||
Class<?> cls = getOOXMLClass();
|
||||
if (cls != null) {
|
||||
// Use Reflection to get us the full OOXML-enabled version
|
||||
try {
|
||||
Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
|
||||
return (POITextExtractor)m.invoke(null, input);
|
||||
return (T)m.invoke(null, input);
|
||||
} catch (IllegalArgumentException iae) {
|
||||
throw iae;
|
||||
} catch (Exception e) {
|
||||
|
@ -44,8 +44,30 @@ public class DocumentFactoryHelper {
|
||||
* @throws IOException If an error occurs while decrypting or if the password does not match
|
||||
*/
|
||||
public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password)
|
||||
throws IOException {
|
||||
// wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
|
||||
// as well when the resulting OPCPackage is closed
|
||||
return new FilterInputStream(getDecryptedStream(fs.getRoot(), password)) {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
fs.close();
|
||||
super.close();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap the OLE2 data of the DirectoryNode into a decrypted stream by using
|
||||
* the given password.
|
||||
*
|
||||
* @param root The OLE2 directory node for the document
|
||||
* @param password The password, null if the default password should be used
|
||||
* @return A stream for reading the decrypted data
|
||||
* @throws IOException If an error occurs while decrypting or if the password does not match
|
||||
*/
|
||||
public static InputStream getDecryptedStream(final DirectoryNode root, String password)
|
||||
throws IOException {
|
||||
EncryptionInfo info = new EncryptionInfo(fs);
|
||||
EncryptionInfo info = new EncryptionInfo(root);
|
||||
Decryptor d = Decryptor.getInstance(info);
|
||||
|
||||
try {
|
||||
@ -58,21 +80,11 @@ public class DocumentFactoryHelper {
|
||||
}
|
||||
|
||||
if (passwordCorrect) {
|
||||
// wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
|
||||
// as well when the resulting OPCPackage is closed
|
||||
return new FilterInputStream(d.getDataStream(fs.getRoot())) {
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
fs.close();
|
||||
|
||||
super.close();
|
||||
}
|
||||
};
|
||||
return d.getDataStream(root);
|
||||
} else if (password != null) {
|
||||
throw new EncryptedDocumentException("Password incorrect");
|
||||
} else {
|
||||
if (password != null)
|
||||
throw new EncryptedDocumentException("Password incorrect");
|
||||
else
|
||||
throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
|
||||
throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
|
||||
}
|
||||
} catch (GeneralSecurityException e) {
|
||||
throw new IOException(e);
|
||||
|
@ -1,3 +1,20 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.sl.extractor;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@ -48,6 +65,16 @@ public class SlideShowExtractor<
|
||||
this.slideshow = slideshow;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns opened document
|
||||
*
|
||||
* @return the opened document
|
||||
*/
|
||||
@Override
|
||||
public final Object getDocument() {
|
||||
return slideshow.getPersistDocument();
|
||||
}
|
||||
|
||||
/**
|
||||
* Should a call to getText() return slide text? Default is yes
|
||||
*/
|
||||
@ -219,7 +246,6 @@ public class SlideShowExtractor<
|
||||
return;
|
||||
}
|
||||
for (final P para : paraList) {
|
||||
final int oldLen = sb.length();
|
||||
for (final TextRun tr : para) {
|
||||
final String str = tr.getRawText().replace("\r", "");
|
||||
final String newStr;
|
||||
|
@ -126,4 +126,13 @@ public interface SlideShow<
|
||||
* @since POI 4.0.0
|
||||
*/
|
||||
POITextExtractor getMetadataTextExtractor();
|
||||
|
||||
/**
|
||||
* @return the instance which handles the persisting of the slideshow,
|
||||
* which is either a subclass of {@link org.apache.poi.POIDocument}
|
||||
* or {@link org.apache.poi.POIXMLDocument}
|
||||
*
|
||||
* @since POI 4.0.0
|
||||
*/
|
||||
Object getPersistDocument();
|
||||
}
|
||||
|
@ -60,13 +60,40 @@ public class SlideShowFactory {
|
||||
* @throws IOException if an error occurs while reading the data
|
||||
*/
|
||||
public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException {
|
||||
DirectoryNode root = fs.getRoot();
|
||||
return create(fs.getRoot(), password);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a SlideShow from the given NPOIFSFileSystem.
|
||||
*
|
||||
* @param root The {@link DirectoryNode} to start reading the document from
|
||||
*
|
||||
* @return The created SlideShow
|
||||
*
|
||||
* @throws IOException if an error occurs while reading the data
|
||||
*/
|
||||
public static SlideShow<?,?> create(final DirectoryNode root) throws IOException {
|
||||
return create(root, null);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a SlideShow from the given NPOIFSFileSystem, which may
|
||||
* be password protected
|
||||
*
|
||||
* @param root The {@link DirectoryNode} to start reading the document from
|
||||
* @param password The password that should be used or null if no password is necessary.
|
||||
*
|
||||
* @return The created SlideShow
|
||||
*
|
||||
* @throws IOException if an error occurs while reading the data
|
||||
*/
|
||||
public static SlideShow<?,?> create(final DirectoryNode root, String password) throws IOException {
|
||||
// Encrypted OOXML files go inside OLE2 containers, is this one?
|
||||
if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
|
||||
InputStream stream = null;
|
||||
try {
|
||||
stream = DocumentFactoryHelper.getDecryptedStream(fs, password);
|
||||
stream = DocumentFactoryHelper.getDecryptedStream(root, password);
|
||||
|
||||
return createXSLFSlideShow(stream);
|
||||
} finally {
|
||||
@ -82,7 +109,7 @@ public class SlideShowFactory {
|
||||
passwordSet = true;
|
||||
}
|
||||
try {
|
||||
return createHSLFSlideShow(fs);
|
||||
return createHSLFSlideShow(root);
|
||||
} finally {
|
||||
if (passwordSet) {
|
||||
Biff8EncryptionKey.setCurrentUserPassword(null);
|
||||
|
@ -68,6 +68,7 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
|
||||
*
|
||||
* @return the opened document
|
||||
*/
|
||||
@Override
|
||||
public final POIXMLDocument getDocument() {
|
||||
return _document;
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ import org.apache.poi.poifs.filesystem.NotOLE2FileException;
|
||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.NotImplemented;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
@ -58,6 +59,7 @@ import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.Removal;
|
||||
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
|
||||
@ -127,20 +129,20 @@ public class ExtractorFactory {
|
||||
return OLE2ExtractorFactory.getPreferEventExtractor();
|
||||
}
|
||||
|
||||
public static POITextExtractor createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
|
||||
public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
|
||||
NPOIFSFileSystem fs = null;
|
||||
try {
|
||||
fs = new NPOIFSFileSystem(f);
|
||||
if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
|
||||
return createEncryptedOOXMLExtractor(fs);
|
||||
return (T)createEncryptedOOXMLExtractor(fs);
|
||||
}
|
||||
POIOLE2TextExtractor extractor = createExtractor(fs);
|
||||
POITextExtractor extractor = createExtractor(fs);
|
||||
extractor.setFilesystem(fs);
|
||||
return extractor;
|
||||
return (T)extractor;
|
||||
} catch (OfficeXmlFileException e) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
||||
return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
||||
} catch (NotOLE2FileException ne) {
|
||||
// ensure file-handle release
|
||||
IOUtils.closeQuietly(fs);
|
||||
@ -179,7 +181,7 @@ public class ExtractorFactory {
|
||||
* @throws XmlException If an XML parsing error occurs.
|
||||
* @throws IllegalArgumentException If no matching file type could be found.
|
||||
*/
|
||||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||
public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||
try {
|
||||
// Check for the normal Office core document
|
||||
PackageRelationshipCollection core;
|
||||
@ -226,13 +228,13 @@ public class ExtractorFactory {
|
||||
// Is it XSLF?
|
||||
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||
if ( rel.getContentType().equals( contentType ) ) {
|
||||
return new XSLFPowerPointExtractor(pkg);
|
||||
return new SlideShowExtractor(new XMLSlideShow(pkg));
|
||||
}
|
||||
}
|
||||
|
||||
// special handling for SlideShow-Theme-files,
|
||||
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
|
||||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
|
||||
return new SlideShowExtractor(new XMLSlideShow(pkg));
|
||||
}
|
||||
|
||||
// How about xlsb?
|
||||
@ -252,28 +254,28 @@ public class ExtractorFactory {
|
||||
}
|
||||
}
|
||||
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
return OLE2ExtractorFactory.createExtractor(fs);
|
||||
public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
return createExtractor(fs.getRoot());
|
||||
}
|
||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
return OLE2ExtractorFactory.createExtractor(fs);
|
||||
public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
return createExtractor(fs.getRoot());
|
||||
}
|
||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
return OLE2ExtractorFactory.createExtractor(fs);
|
||||
public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||
return createExtractor(fs.getRoot());
|
||||
}
|
||||
|
||||
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
|
||||
public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
|
||||
{
|
||||
// First, check for OOXML
|
||||
for (String entryName : poifsDir.getEntryNames()) {
|
||||
if (entryName.equals("Package")) {
|
||||
OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
|
||||
return createExtractor(pkg);
|
||||
return (T)createExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
// If not, ask the OLE2 code to check, with Scratchpad if possible
|
||||
return OLE2ExtractorFactory.createExtractor(poifsDir);
|
||||
return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -403,7 +405,7 @@ public class ExtractorFactory {
|
||||
throw new IllegalStateException("Not yet supported");
|
||||
}
|
||||
|
||||
private static POIXMLTextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
|
||||
private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
|
||||
throws IOException {
|
||||
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
||||
if (pass == null) {
|
||||
|
@ -37,7 +37,7 @@ import org.apache.xmlbeans.XmlException;
|
||||
* @deprecated use {@link SlideShowExtractor}
|
||||
*/
|
||||
@Deprecated
|
||||
@Removal(version="4.2.0")
|
||||
@Removal(version="5.0.0")
|
||||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{
|
||||
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
|
||||
|
@ -631,4 +631,9 @@ public class XMLSlideShow extends POIXMLDocument
|
||||
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
|
||||
return new POIXMLPropertiesTextExtractor(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getPersistDocument() {
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1,20 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xslf.usermodel;
|
||||
|
||||
import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS;
|
||||
|
@ -182,12 +182,20 @@ implements Slide<XSLFShape,XSLFTextParagraph> {
|
||||
*/
|
||||
public XSLFCommentAuthors getCommentAuthorsPart() {
|
||||
if(_commentAuthors == null) {
|
||||
// first scan the slide relations
|
||||
for (POIXMLDocumentPart p : getRelations()) {
|
||||
if (p instanceof XSLFCommentAuthors) {
|
||||
_commentAuthors = (XSLFCommentAuthors)p;
|
||||
return _commentAuthors;
|
||||
}
|
||||
}
|
||||
// then scan the presentation relations
|
||||
for (POIXMLDocumentPart p : getSlideShow().getRelations()) {
|
||||
if (p instanceof XSLFCommentAuthors) {
|
||||
_commentAuthors = (XSLFCommentAuthors)p;
|
||||
return _commentAuthors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -120,10 +120,10 @@ public class TestHxxFEncryption {
|
||||
public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
|
||||
Biff8EncryptionKey.setCurrentUserPassword(password);
|
||||
File f = sampleDir.getFile(file);
|
||||
POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
|
||||
POITextExtractor te1 = ExtractorFactory.createExtractor(f);
|
||||
Biff8EncryptionKey.setCurrentUserPassword(newPass);
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
POIDocument doc = te1.getDocument();
|
||||
POIDocument doc = (POIDocument)te1.getDocument();
|
||||
doc.write(bos);
|
||||
doc.close();
|
||||
te1.close();
|
||||
@ -140,25 +140,25 @@ public class TestHxxFEncryption {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
Biff8EncryptionKey.setCurrentUserPassword(password);
|
||||
File f = sampleDir.getFile(file);
|
||||
POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
|
||||
POITextExtractor te1 = ExtractorFactory.createExtractor(f);
|
||||
// first remove encryption
|
||||
Biff8EncryptionKey.setCurrentUserPassword(null);
|
||||
POIDocument doc = te1.getDocument();
|
||||
POIDocument doc = (POIDocument)te1.getDocument();
|
||||
doc.write(bos);
|
||||
doc.close();
|
||||
te1.close();
|
||||
// then use default setting, which is cryptoapi
|
||||
String newPass = "newPass";
|
||||
POIOLE2TextExtractor te2 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||
POITextExtractor te2 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||
Biff8EncryptionKey.setCurrentUserPassword(newPass);
|
||||
doc = te2.getDocument();
|
||||
doc = (POIDocument)te2.getDocument();
|
||||
bos.reset();
|
||||
doc.write(bos);
|
||||
doc.close();
|
||||
te2.close();
|
||||
// and finally update cryptoapi setting
|
||||
POIOLE2TextExtractor te3 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||
doc = te3.getDocument();
|
||||
POITextExtractor te3 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||
doc = (POIDocument)te3.getDocument();
|
||||
// need to cache data (i.e. read all data) before changing the key size
|
||||
if (doc instanceof HSLFSlideShowImpl) {
|
||||
HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc;
|
||||
@ -175,8 +175,8 @@ public class TestHxxFEncryption {
|
||||
doc.close();
|
||||
te3.close();
|
||||
// check the setting
|
||||
POIOLE2TextExtractor te4 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||
doc = te4.getDocument();
|
||||
POITextExtractor te4 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||
doc = (POIDocument)te4.getDocument();
|
||||
ei = doc.getEncryptionInfo();
|
||||
assertNotNull(ei);
|
||||
assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader);
|
||||
|
@ -50,6 +50,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePartName;
|
||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||
import org.apache.poi.sl.draw.DrawPaint;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.PaintStyle;
|
||||
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
|
||||
import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint;
|
||||
@ -221,28 +222,27 @@ public class TestXSLFBugs {
|
||||
* rID2 -> slide3.xml
|
||||
*/
|
||||
@Test
|
||||
public void bug54916() throws Exception {
|
||||
XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx");
|
||||
XSLFSlide slide;
|
||||
public void bug54916() throws IOException {
|
||||
try (XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx")) {
|
||||
XSLFSlide slide;
|
||||
|
||||
// Should find 4 slides
|
||||
assertEquals(4, ss.getSlides().size());
|
||||
// Should find 4 slides
|
||||
assertEquals(4, ss.getSlides().size());
|
||||
|
||||
// Check the text, to see we got them in order
|
||||
slide = ss.getSlides().get(0);
|
||||
assertContains(getSlideText(slide), "POI cannot read this");
|
||||
// Check the text, to see we got them in order
|
||||
slide = ss.getSlides().get(0);
|
||||
assertContains(getSlideText(ss, slide), "POI cannot read this");
|
||||
|
||||
slide = ss.getSlides().get(1);
|
||||
assertContains(getSlideText(slide), "POI can read this");
|
||||
assertContains(getSlideText(slide), "Has a relationship to another slide");
|
||||
slide = ss.getSlides().get(1);
|
||||
assertContains(getSlideText(ss, slide), "POI can read this");
|
||||
assertContains(getSlideText(ss, slide), "Has a relationship to another slide");
|
||||
|
||||
slide = ss.getSlides().get(2);
|
||||
assertContains(getSlideText(slide), "POI can read this");
|
||||
slide = ss.getSlides().get(2);
|
||||
assertContains(getSlideText(ss, slide), "POI can read this");
|
||||
|
||||
slide = ss.getSlides().get(3);
|
||||
assertContains(getSlideText(slide), "POI can read this");
|
||||
|
||||
ss.close();
|
||||
slide = ss.getSlides().get(3);
|
||||
assertContains(getSlideText(ss, slide), "POI can read this");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -311,8 +311,15 @@ public class TestXSLFBugs {
|
||||
ss.close();
|
||||
}
|
||||
|
||||
protected String getSlideText(XSLFSlide slide) {
|
||||
return XSLFPowerPointExtractor.getText(slide, true, false, false);
|
||||
protected String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
|
||||
try (SlideShowExtractor extr = new SlideShowExtractor(ppt)) {
|
||||
// do not auto-close the slideshow
|
||||
extr.setFilesystem(null);
|
||||
extr.setSlidesByDefault(true);
|
||||
extr.setNotesByDefault(false);
|
||||
extr.setMasterByDefault(false);
|
||||
return extr.getText(slide);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -458,7 +465,7 @@ public class TestXSLFBugs {
|
||||
|
||||
for (int i = 0; i < slideTexts.length; i++) {
|
||||
XSLFSlide slide = ss.getSlides().get(i);
|
||||
assertContains(getSlideText(slide), slideTexts[i]);
|
||||
assertContains(getSlideText(ss, slide), slideTexts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,16 +24,17 @@ import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.extractor.ExtractorFactory;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.junit.Ignore;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
@ -44,188 +45,189 @@ public class TestXSLFPowerPointExtractor {
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
* @throws XmlException
|
||||
* @throws OpenXML4JException
|
||||
*/
|
||||
@Test
|
||||
public void testGetSimpleText()
|
||||
throws IOException, XmlException, OpenXML4JException {
|
||||
XMLSlideShow xmlA = openPPTX("sample.pptx");
|
||||
@SuppressWarnings("resource")
|
||||
OPCPackage pkg = xmlA.getPackage();
|
||||
public void testGetSimpleText() throws IOException {
|
||||
try (XMLSlideShow xmlA = openPPTX("sample.pptx");
|
||||
SlideShowExtractor extractor = new SlideShowExtractor(xmlA)) {
|
||||
|
||||
new XSLFPowerPointExtractor(xmlA).close();
|
||||
new XSLFPowerPointExtractor(pkg).close();
|
||||
extractor.getText();
|
||||
|
||||
XSLFPowerPointExtractor extractor =
|
||||
new XSLFPowerPointExtractor(xmlA);
|
||||
extractor.getText();
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
// Check Basics
|
||||
assertStartsWith(text, "Lorem ipsum dolor sit amet\n");
|
||||
assertContains(text, "amet\n\n");
|
||||
|
||||
// Check Basics
|
||||
assertStartsWith(text, "Lorem ipsum dolor sit amet\n");
|
||||
assertContains(text, "amet\n\n");
|
||||
// Our placeholder master text
|
||||
// This shouldn't show up in the output
|
||||
// String masterText =
|
||||
// "Click to edit Master title style\n" +
|
||||
// "Click to edit Master subtitle style\n" +
|
||||
// "\n\n\n\n\n\n" +
|
||||
// "Click to edit Master title style\n" +
|
||||
// "Click to edit Master text styles\n" +
|
||||
// "Second level\n" +
|
||||
// "Third level\n" +
|
||||
// "Fourth level\n" +
|
||||
// "Fifth level\n";
|
||||
|
||||
// Our placeholder master text
|
||||
// This shouldn't show up in the output
|
||||
// String masterText =
|
||||
// "Click to edit Master title style\n" +
|
||||
// "Click to edit Master subtitle style\n" +
|
||||
// "\n\n\n\n\n\n" +
|
||||
// "Click to edit Master title style\n" +
|
||||
// "Click to edit Master text styles\n" +
|
||||
// "Second level\n" +
|
||||
// "Third level\n" +
|
||||
// "Fourth level\n" +
|
||||
// "Fifth level\n";
|
||||
// Just slides, no notes
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(false);
|
||||
text = extractor.getText();
|
||||
String slideText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n";
|
||||
assertEquals(slideText, text);
|
||||
|
||||
// Just slides, no notes
|
||||
text = extractor.getText(true, false, false);
|
||||
String slideText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n";
|
||||
assertEquals(slideText, text);
|
||||
// Just notes, no slides
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(true);
|
||||
text = extractor.getText();
|
||||
assertEquals("\n\n1\n\n\n2\n", text);
|
||||
|
||||
// Just notes, no slides
|
||||
text = extractor.getText(false, true);
|
||||
assertEquals("\n\n1\n\n\n2\n", text);
|
||||
// Both
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(true);
|
||||
text = extractor.getText();
|
||||
String bothText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n\n\n1\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n\n\n2\n";
|
||||
assertEquals(bothText, text);
|
||||
|
||||
// Both
|
||||
text = extractor.getText(true, true, false);
|
||||
String bothText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n\n\n1\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n\n\n2\n";
|
||||
assertEquals(bothText, text);
|
||||
// With Slides and Master Text
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(true);
|
||||
text = extractor.getText();
|
||||
String smText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n";
|
||||
assertEquals(smText, text);
|
||||
|
||||
// With Slides and Master Text
|
||||
text = extractor.getText(true, false, true);
|
||||
String smText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n";
|
||||
assertEquals(smText, text);
|
||||
// With Slides, Notes and Master Text
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(true);
|
||||
extractor.setMasterByDefault(true);
|
||||
text = extractor.getText();
|
||||
String snmText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n\n\n1\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n\n\n2\n";
|
||||
assertEquals(snmText, text);
|
||||
|
||||
// With Slides, Notes and Master Text
|
||||
text = extractor.getText(true, true, true);
|
||||
String snmText =
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||
"\n\n\n1\n" +
|
||||
"Lorem ipsum dolor sit amet\n" +
|
||||
"Lorem\n" +
|
||||
"ipsum\n" +
|
||||
"dolor\n" +
|
||||
"sit\n" +
|
||||
"amet\n" +
|
||||
"\n\n\n2\n";
|
||||
assertEquals(snmText, text);
|
||||
|
||||
// Via set defaults
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(true);
|
||||
text = extractor.getText();
|
||||
assertEquals("\n\n1\n\n\n2\n", text);
|
||||
|
||||
extractor.close();
|
||||
xmlA.close();
|
||||
// Via set defaults
|
||||
extractor.setSlidesByDefault(false);
|
||||
extractor.setNotesByDefault(true);
|
||||
text = extractor.getText();
|
||||
assertEquals("\n\n1\n\n\n2\n", text);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetComments() throws IOException {
|
||||
XMLSlideShow xml = openPPTX("45545_Comment.pptx");
|
||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
||||
try (XMLSlideShow xml = openPPTX("45545_Comment.pptx");
|
||||
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||
extractor.setCommentsByDefault(true);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check comments are there
|
||||
assertContains(text, "testdoc");
|
||||
assertContains(text, "test phrase");
|
||||
// Check comments are there
|
||||
assertContains(text, "testdoc");
|
||||
assertContains(text, "test phrase");
|
||||
|
||||
// Check the authors came through too
|
||||
assertContains(text, "XPVMWARE01");
|
||||
|
||||
extractor.close();
|
||||
xml.close();
|
||||
// Check the authors came through too
|
||||
assertContains(text, "XPVMWARE01");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@Ignore("currently slidelayouts aren't yet supported")
|
||||
public void testGetMasterText() throws Exception {
|
||||
XMLSlideShow xml = openPPTX("WithMaster.pptx");
|
||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(true);
|
||||
try (XMLSlideShow xml = openPPTX("WithMaster.pptx");
|
||||
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||
extractor.setSlidesByDefault(true);
|
||||
extractor.setNotesByDefault(false);
|
||||
extractor.setMasterByDefault(true);
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check master text is there
|
||||
assertContains(text, "Footer from the master slide");
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Theme text shouldn't show up
|
||||
// String themeText =
|
||||
// "Theme Master Title\n" +
|
||||
// "Theme Master first level\n" +
|
||||
// "And the 2nd level\n" +
|
||||
// "Our 3rd level goes here\n" +
|
||||
// "And onto the 4th, such fun....\n" +
|
||||
// "Finally is the Fifth level\n";
|
||||
// Check master text is there
|
||||
assertContains(text, "Footer from the master slide");
|
||||
|
||||
// Check the whole text
|
||||
String wholeText =
|
||||
"First page title\n" +
|
||||
"First page subtitle\n" +
|
||||
"This is the Master Title\n" +
|
||||
"This text comes from the Master Slide\n" +
|
||||
"\n" +
|
||||
// TODO Detect we didn't have a title, and include the master one
|
||||
"2nd page subtitle\n" +
|
||||
"Footer from the master slide\n" +
|
||||
"This is the Master Title\n" +
|
||||
"This text comes from the Master Slide\n";
|
||||
assertEquals(wholeText, text);
|
||||
// Theme text shouldn't show up
|
||||
// String themeText =
|
||||
// "Theme Master Title\n" +
|
||||
// "Theme Master first level\n" +
|
||||
// "And the 2nd level\n" +
|
||||
// "Our 3rd level goes here\n" +
|
||||
// "And onto the 4th, such fun....\n" +
|
||||
// "Finally is the Fifth level\n";
|
||||
|
||||
extractor.close();
|
||||
xml.close();
|
||||
// Check the whole text
|
||||
String wholeText =
|
||||
"First page title\n" +
|
||||
"First page subtitle\n" +
|
||||
"This is the Master Title\n" +
|
||||
"This text comes from the Master Slide\n" +
|
||||
"\n" +
|
||||
// TODO Detect we didn't have a title, and include the master one
|
||||
"2nd page subtitle\n" +
|
||||
"Footer from the master slide\n" +
|
||||
"This is the Master Title\n" +
|
||||
"This text comes from the Master Slide\n";
|
||||
assertEquals(wholeText, text);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTable() throws Exception {
|
||||
XMLSlideShow xml = openPPTX("present1.pptx");
|
||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
||||
try (XMLSlideShow xml = openPPTX("present1.pptx");
|
||||
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check comments are there
|
||||
assertContains(text, "TEST");
|
||||
|
||||
extractor.close();
|
||||
xml.close();
|
||||
// Check comments are there
|
||||
assertContains(text, "TEST");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -241,74 +243,76 @@ public class TestXSLFPowerPointExtractor {
|
||||
};
|
||||
for(String extension : extensions) {
|
||||
String filename = "testPPT." + extension;
|
||||
XMLSlideShow xml = openPPTX(filename);
|
||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
||||
|
||||
String text = extractor.getText();
|
||||
if (extension.equals("thmx")) {
|
||||
// Theme file doesn't have any textual content
|
||||
assertEquals(filename, 0, text.length());
|
||||
continue;
|
||||
try (XMLSlideShow xml = openPPTX(filename);
|
||||
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||
|
||||
String text = extractor.getText();
|
||||
if (extension.equals("thmx")) {
|
||||
// Theme file doesn't have any textual content
|
||||
assertEquals(filename, 0, text.length());
|
||||
continue;
|
||||
}
|
||||
|
||||
assertTrue(filename, text.length() > 0);
|
||||
assertContains(filename, text, "Attachment Test");
|
||||
assertContains(filename, text, "This is a test file data with the same content");
|
||||
assertContains(filename, text, "content parsing");
|
||||
assertContains(filename, text, "Different words to test against");
|
||||
assertContains(filename, text, "Mystery");
|
||||
}
|
||||
|
||||
assertTrue(filename, text.length() > 0);
|
||||
assertContains(filename, text, "Attachment Test");
|
||||
assertContains(filename, text, "This is a test file data with the same content");
|
||||
assertContains(filename, text, "content parsing");
|
||||
assertContains(filename, text, "Different words to test against");
|
||||
assertContains(filename, text, "Mystery");
|
||||
|
||||
extractor.close();
|
||||
xml.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test45541() throws Exception {
|
||||
public void test45541() throws IOException, OpenXML4JException, XmlException {
|
||||
// extract text from a powerpoint that has a header in the notes-element
|
||||
POITextExtractor extr = ExtractorFactory.createExtractor(
|
||||
slTests.getFile("45541_Header.pptx"));
|
||||
String text = extr.getText();
|
||||
assertNotNull(text);
|
||||
assertFalse("Had: " + text, text.contains("testdoc"));
|
||||
final File headerFile = slTests.getFile("45541_Header.pptx");
|
||||
try (final SlideShowExtractor extr = ExtractorFactory.createExtractor(headerFile)) {
|
||||
String text = extr.getText();
|
||||
assertNotNull(text);
|
||||
assertFalse("Had: " + text, text.contains("testdoc"));
|
||||
|
||||
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
|
||||
assertContains(text, "testdoc");
|
||||
extr.close();
|
||||
assertNotNull(text);
|
||||
extr.setSlidesByDefault(false);
|
||||
extr.setNotesByDefault(true);
|
||||
|
||||
text = extr.getText();
|
||||
assertContains(text, "testdoc");
|
||||
assertNotNull(text);
|
||||
}
|
||||
|
||||
// extract text from a powerpoint that has a footer in the master-slide
|
||||
extr = ExtractorFactory.createExtractor(
|
||||
slTests.getFile("45541_Footer.pptx"));
|
||||
text = extr.getText();
|
||||
assertNotContained(text, "testdoc");
|
||||
final File footerFile = slTests.getFile("45541_Footer.pptx");
|
||||
try (SlideShowExtractor extr = ExtractorFactory.createExtractor(footerFile)) {
|
||||
String text = extr.getText();
|
||||
assertNotContained(text, "testdoc");
|
||||
|
||||
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
|
||||
assertNotContained(text, "testdoc");
|
||||
extr.setSlidesByDefault(false);
|
||||
extr.setNotesByDefault(true);
|
||||
text = extr.getText();
|
||||
assertNotContained(text, "testdoc");
|
||||
|
||||
text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
|
||||
assertNotContained(text, "testdoc");
|
||||
|
||||
extr.close();
|
||||
extr.setSlidesByDefault(false);
|
||||
extr.setNotesByDefault(false);
|
||||
extr.setMasterByDefault(true);
|
||||
text = extr.getText();
|
||||
assertNotContained(text, "testdoc");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void bug54570() throws IOException {
|
||||
XMLSlideShow xml = openPPTX("bug54570.pptx");
|
||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
||||
String text = extractor.getText();
|
||||
assertNotNull(text);
|
||||
extractor.close();
|
||||
xml.close();
|
||||
try (XMLSlideShow xml = openPPTX("bug54570.pptx");
|
||||
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||
String text = extractor.getText();
|
||||
assertNotNull(text);
|
||||
}
|
||||
}
|
||||
|
||||
private XMLSlideShow openPPTX(String file) throws IOException {
|
||||
InputStream is = slTests.openResourceAsStream(file);
|
||||
try {
|
||||
try (InputStream is = slTests.openResourceAsStream(file)) {
|
||||
return new XMLSlideShow(is);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,6 +38,8 @@ import org.apache.poi.hwpf.extractor.WordExtractor;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||
|
||||
/**
|
||||
* Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
|
||||
@ -65,7 +67,7 @@ public class OLE2ScratchpadExtractorFactory {
|
||||
}
|
||||
|
||||
if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
|
||||
return new PowerPointExtractor(poifsDir);
|
||||
return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
|
||||
}
|
||||
|
||||
if (poifsDir.hasEntry("VisioDocument")) {
|
||||
|
@ -34,6 +34,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||
import org.apache.poi.util.Removal;
|
||||
|
||||
/**
|
||||
* This class can be used to extract text from a PowerPoint file. Can optionally
|
||||
@ -43,6 +44,7 @@ import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||
*/
|
||||
@SuppressWarnings("WeakerAccess")
|
||||
@Deprecated
|
||||
@Removal(version="5.0.0")
|
||||
public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
||||
private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate;
|
||||
|
||||
|
@ -1139,4 +1139,9 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
|
||||
public void close() throws IOException {
|
||||
_hslfSlideShow.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getPersistDocument() {
|
||||
return getSlideShowImpl();
|
||||
}
|
||||
}
|
||||
|
@ -19,8 +19,8 @@ package org.apache.poi.hslf.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.sl.usermodel.SlideShow;
|
||||
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
@ -31,12 +31,20 @@ import org.apache.poi.util.Internal;
|
||||
@Internal
|
||||
public class HSLFSlideShowFactory extends SlideShowFactory {
|
||||
/**
|
||||
* Creates a HSLFSlideShow from the given NPOIFSFileSystem
|
||||
* <p>Note that in order to properly release resources the
|
||||
* SlideShow should be closed after use.
|
||||
* Creates a HSLFSlideShow from the given NPOIFSFileSystem<p>
|
||||
* Note that in order to properly release resources the
|
||||
* SlideShow should be closed after use.
|
||||
*/
|
||||
public static SlideShow<?,?> createSlideShow(NPOIFSFileSystem fs) throws IOException {
|
||||
public static HSLFSlideShow createSlideShow(final NPOIFSFileSystem fs) throws IOException {
|
||||
return new HSLFSlideShow(fs);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a HSLFSlideShow from the given DirectoryNode<p>
|
||||
* Note that in order to properly release resources the
|
||||
* SlideShow should be closed after use.
|
||||
*/
|
||||
public static HSLFSlideShow createSlideShow(final DirectoryNode root) throws IOException {
|
||||
return new HSLFSlideShow(root);
|
||||
}
|
||||
}
|
||||
|
@ -846,9 +846,13 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
NPOIFSFileSystem fs = getDirectory().getFileSystem();
|
||||
if (fs != null) {
|
||||
fs.close();
|
||||
// only close the filesystem, if we are based on the root node.
|
||||
// embedded documents/slideshows shouldn't close the parent container
|
||||
if (getDirectory().getParent() == null) {
|
||||
NPOIFSFileSystem fs = getDirectory().getFileSystem();
|
||||
if (fs != null) {
|
||||
fs.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -42,6 +42,10 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.ObjectShape;
|
||||
import org.apache.poi.sl.usermodel.SlideShow;
|
||||
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -76,43 +80,46 @@ public final class TestExtractor {
|
||||
// ppe.close();
|
||||
// }
|
||||
|
||||
private PowerPointExtractor openExtractor(String fileName) throws IOException {
|
||||
InputStream is = slTests.openResourceAsStream(fileName);
|
||||
try {
|
||||
return new PowerPointExtractor(is);
|
||||
} finally {
|
||||
is.close();
|
||||
private SlideShowExtractor<?,?> openExtractor(String fileName) throws IOException {
|
||||
try (InputStream is = slTests.openResourceAsStream(fileName)) {
|
||||
return new SlideShowExtractor(SlideShowFactory.create(is));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadSheetText() throws IOException {
|
||||
// Basic 2 page example
|
||||
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
|
||||
assertEquals(expectText, ppe.getText());
|
||||
ppe.close();
|
||||
try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
|
||||
assertEquals(expectText, ppe.getText());
|
||||
}
|
||||
|
||||
// 1 page example with text boxes
|
||||
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt");
|
||||
assertEquals(expectText2, ppe2.getText());
|
||||
ppe2.close();
|
||||
try (SlideShowExtractor ppe = openExtractor("with_textbox.ppt")) {
|
||||
assertEquals(expectText2, ppe.getText());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadNoteText() throws IOException {
|
||||
// Basic 2 page example
|
||||
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
|
||||
String notesText = ppe.getNotes();
|
||||
String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n";
|
||||
assertEquals(expText, notesText);
|
||||
ppe.close();
|
||||
try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
|
||||
ppe.setNotesByDefault(true);
|
||||
ppe.setSlidesByDefault(false);
|
||||
ppe.setMasterByDefault(false);
|
||||
String notesText = ppe.getText();
|
||||
String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n";
|
||||
assertEquals(expText, notesText);
|
||||
}
|
||||
|
||||
// Other one doesn't have notes
|
||||
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt");
|
||||
notesText = ppe2.getNotes();
|
||||
expText = "";
|
||||
assertEquals(expText, notesText);
|
||||
ppe2.close();
|
||||
try (SlideShowExtractor ppe = openExtractor("with_textbox.ppt")) {
|
||||
ppe.setNotesByDefault(true);
|
||||
ppe.setSlidesByDefault(false);
|
||||
ppe.setMasterByDefault(false);
|
||||
String notesText = ppe.getText();
|
||||
String expText = "";
|
||||
assertEquals(expText, notesText);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -126,19 +133,19 @@ public final class TestExtractor {
|
||||
"\nThese are the notes on page two, again lacking formatting\n"
|
||||
};
|
||||
|
||||
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(false);
|
||||
assertEquals(slText[0] + slText[1], ppe.getText());
|
||||
try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(false);
|
||||
assertEquals(slText[0] + slText[1], ppe.getText());
|
||||
|
||||
ppe.setSlidesByDefault(false);
|
||||
ppe.setNotesByDefault(true);
|
||||
assertEquals(ntText[0] + ntText[1], ppe.getText());
|
||||
ppe.setSlidesByDefault(false);
|
||||
ppe.setNotesByDefault(true);
|
||||
assertEquals(ntText[0] + ntText[1], ppe.getText());
|
||||
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(true);
|
||||
assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText());
|
||||
ppe.close();
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(true);
|
||||
assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -149,45 +156,46 @@ public final class TestExtractor {
|
||||
*/
|
||||
@Test
|
||||
public void testMissingCoreRecords() throws IOException {
|
||||
PowerPointExtractor ppe = openExtractor("missing_core_records.ppt");
|
||||
try (SlideShowExtractor<?,?> ppe = openExtractor("missing_core_records.ppt")) {
|
||||
ppe.setSlidesByDefault(true);
|
||||
ppe.setNotesByDefault(false);
|
||||
String text = ppe.getText();
|
||||
ppe.setSlidesByDefault(false);
|
||||
ppe.setNotesByDefault(true);
|
||||
String nText = ppe.getText();
|
||||
|
||||
String text = ppe.getText(true, false);
|
||||
String nText = ppe.getNotes();
|
||||
assertNotNull(text);
|
||||
assertNotNull(nText);
|
||||
|
||||
assertNotNull(text);
|
||||
assertNotNull(nText);
|
||||
// Notes record were corrupt, so don't expect any
|
||||
assertEquals(nText.length(), 0);
|
||||
|
||||
// Notes record were corrupt, so don't expect any
|
||||
assertEquals(nText.length(), 0);
|
||||
|
||||
// Slide records were fine
|
||||
assertContains(text, "Using Disease Surveillance and Response");
|
||||
|
||||
ppe.close();
|
||||
// Slide records were fine
|
||||
assertContains(text, "Using Disease Surveillance and Response");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractFromEmbeded() throws IOException {
|
||||
InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
|
||||
POIFSFileSystem fs = new POIFSFileSystem(is);
|
||||
DirectoryNode root = fs.getRoot();
|
||||
PowerPointExtractor ppe1 = assertExtractFromEmbedded(root, "MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n");
|
||||
PowerPointExtractor ppe2 = assertExtractFromEmbedded(root, "MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n");
|
||||
ppe2.close();
|
||||
ppe1.close();
|
||||
fs.close();
|
||||
}
|
||||
|
||||
private PowerPointExtractor assertExtractFromEmbedded(DirectoryNode root, String entryName, String expected)
|
||||
throws IOException {
|
||||
DirectoryNode dir = (DirectoryNode)root.getEntry(entryName);
|
||||
assertTrue(dir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT));
|
||||
try (final InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
|
||||
final POIFSFileSystem fs = new POIFSFileSystem(is)) {
|
||||
final DirectoryNode root = fs.getRoot();
|
||||
|
||||
// Check the first file
|
||||
HSLFSlideShowImpl ppt = new HSLFSlideShowImpl(dir);
|
||||
PowerPointExtractor ppe = new PowerPointExtractor(ppt);
|
||||
assertEquals(expected, ppe.getText(true, false));
|
||||
return ppe;
|
||||
final String[] TEST_SET = {
|
||||
"MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
|
||||
"MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n"
|
||||
};
|
||||
|
||||
for (int i=0; i<TEST_SET.length; i+=2) {
|
||||
DirectoryNode dir = (DirectoryNode)root.getEntry(TEST_SET[i]);
|
||||
assertTrue(dir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT));
|
||||
|
||||
try (final SlideShow<?,?> ppt = SlideShowFactory.create(dir);
|
||||
final SlideShowExtractor<?,?> ppe = new SlideShowExtractor(ppt)) {
|
||||
assertEquals(TEST_SET[i+1], ppe.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -195,32 +203,32 @@ public final class TestExtractor {
|
||||
*/
|
||||
@Test
|
||||
public void testExtractFromOwnEmbeded() throws IOException {
|
||||
PowerPointExtractor ppe = openExtractor("ppt_with_embeded.ppt");
|
||||
List<HSLFObjectShape> shapes = ppe.getOLEShapes();
|
||||
assertEquals("Expected 6 ole shapes", 6, shapes.size());
|
||||
int num_ppt = 0, num_doc = 0, num_xls = 0;
|
||||
for (HSLFObjectShape ole : shapes) {
|
||||
String name = ole.getInstanceName();
|
||||
InputStream data = ole.getObjectData().getInputStream();
|
||||
if ("Worksheet".equals(name)) {
|
||||
HSSFWorkbook wb = new HSSFWorkbook(data);
|
||||
num_xls++;
|
||||
wb.close();
|
||||
} else if ("Document".equals(name)) {
|
||||
HWPFDocument doc = new HWPFDocument(data);
|
||||
num_doc++;
|
||||
doc.close();
|
||||
} else if ("Presentation".equals(name)) {
|
||||
num_ppt++;
|
||||
HSLFSlideShow ppt = new HSLFSlideShow(data);
|
||||
ppt.close();
|
||||
try (SlideShowExtractor<?,?> ppe = openExtractor("ppt_with_embeded.ppt")) {
|
||||
List<? extends ObjectShape> shapes = ppe.getOLEShapes();
|
||||
assertEquals("Expected 6 ole shapes", 6, shapes.size());
|
||||
int num_ppt = 0, num_doc = 0, num_xls = 0;
|
||||
for (ObjectShape ole : shapes) {
|
||||
String name = ((HSLFObjectShape)ole).getInstanceName();
|
||||
InputStream data = ole.getObjectData().getInputStream();
|
||||
if ("Worksheet".equals(name)) {
|
||||
HSSFWorkbook wb = new HSSFWorkbook(data);
|
||||
num_xls++;
|
||||
wb.close();
|
||||
} else if ("Document".equals(name)) {
|
||||
HWPFDocument doc = new HWPFDocument(data);
|
||||
num_doc++;
|
||||
doc.close();
|
||||
} else if ("Presentation".equals(name)) {
|
||||
num_ppt++;
|
||||
HSLFSlideShow ppt = new HSLFSlideShow(data);
|
||||
ppt.close();
|
||||
}
|
||||
data.close();
|
||||
}
|
||||
data.close();
|
||||
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
|
||||
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
|
||||
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
|
||||
}
|
||||
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
|
||||
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
|
||||
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
|
||||
ppe.close();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -228,11 +236,11 @@ public final class TestExtractor {
|
||||
*/
|
||||
@Test
|
||||
public void test52991() throws IOException {
|
||||
PowerPointExtractor ppe = openExtractor("badzip.ppt");
|
||||
for (HSLFObjectShape shape : ppe.getOLEShapes()) {
|
||||
IOUtils.copy(shape.getObjectData().getInputStream(), new ByteArrayOutputStream());
|
||||
try (SlideShowExtractor<?,?> ppe = openExtractor("badzip.ppt")) {
|
||||
for (ObjectShape shape : ppe.getOLEShapes()) {
|
||||
IOUtils.copy(shape.getObjectData().getInputStream(), new ByteArrayOutputStream());
|
||||
}
|
||||
}
|
||||
ppe.close();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -240,27 +248,27 @@ public final class TestExtractor {
|
||||
*/
|
||||
@Test
|
||||
public void testWithComments() throws IOException {
|
||||
PowerPointExtractor ppe1 = openExtractor("WithComments.ppt");
|
||||
String text = ppe1.getText();
|
||||
assertFalse("Comments not in by default", text.contains("This is a test comment"));
|
||||
try (final SlideShowExtractor ppe = openExtractor("WithComments.ppt")) {
|
||||
String text = ppe.getText();
|
||||
assertFalse("Comments not in by default", text.contains("This is a test comment"));
|
||||
|
||||
ppe1.setCommentsByDefault(true);
|
||||
ppe.setCommentsByDefault(true);
|
||||
|
||||
text = ppe1.getText();
|
||||
assertContains(text, "This is a test comment");
|
||||
ppe1.close();
|
||||
text = ppe.getText();
|
||||
assertContains(text, "This is a test comment");
|
||||
}
|
||||
|
||||
|
||||
// And another file
|
||||
PowerPointExtractor ppe2 = openExtractor("45543.ppt");
|
||||
text = ppe2.getText();
|
||||
assertFalse("Comments not in by default", text.contains("testdoc"));
|
||||
try (SlideShowExtractor ppe = openExtractor("45543.ppt")) {
|
||||
String text = ppe.getText();
|
||||
assertFalse("Comments not in by default", text.contains("testdoc"));
|
||||
|
||||
ppe2.setCommentsByDefault(true);
|
||||
ppe.setCommentsByDefault(true);
|
||||
|
||||
text = ppe2.getText();
|
||||
assertContains(text, "testdoc");
|
||||
ppe2.close();
|
||||
text = ppe.getText();
|
||||
assertContains(text, "testdoc");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -268,48 +276,37 @@ public final class TestExtractor {
|
||||
*/
|
||||
@Test
|
||||
public void testHeaderFooter() throws IOException {
|
||||
String text;
|
||||
|
||||
// With a header on the notes
|
||||
InputStream is1 = slTests.openResourceAsStream("45537_Header.ppt");
|
||||
HSLFSlideShow ppt1 = new HSLFSlideShow(is1);
|
||||
is1.close();
|
||||
assertNotNull(ppt1.getNotesHeadersFooters());
|
||||
assertEquals("testdoc test phrase", ppt1.getNotesHeadersFooters().getHeaderText());
|
||||
try (InputStream is = slTests.openResourceAsStream("45537_Header.ppt");
|
||||
HSLFSlideShow ppt = new HSLFSlideShow(is)) {
|
||||
|
||||
PowerPointExtractor ppe1 = new PowerPointExtractor(ppt1.getSlideShowImpl());
|
||||
assertNotNull(ppt.getNotesHeadersFooters());
|
||||
assertEquals("testdoc test phrase", ppt.getNotesHeadersFooters().getHeaderText());
|
||||
|
||||
text = ppe1.getText();
|
||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
|
||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
|
||||
|
||||
ppe1.setNotesByDefault(true);
|
||||
text = ppe1.getText();
|
||||
assertContains(text, "testdoc");
|
||||
assertContains(text, "test phrase");
|
||||
ppe1.close();
|
||||
ppt1.close();
|
||||
testHeaderFooterInner(ppt);
|
||||
}
|
||||
|
||||
// And with a footer, also on notes
|
||||
InputStream is2 = slTests.openResourceAsStream("45537_Footer.ppt");
|
||||
HSLFSlideShow ppt2 = new HSLFSlideShow(is2);
|
||||
is2.close();
|
||||
|
||||
assertNotNull(ppt2.getNotesHeadersFooters());
|
||||
assertEquals("testdoc test phrase", ppt2.getNotesHeadersFooters().getFooterText());
|
||||
ppt2.close();
|
||||
try (final InputStream is = slTests.openResourceAsStream("45537_Footer.ppt");
|
||||
final HSLFSlideShow ppt = new HSLFSlideShow(is)) {
|
||||
assertNotNull(ppt.getNotesHeadersFooters());
|
||||
assertEquals("testdoc test phrase", ppt.getNotesHeadersFooters().getFooterText());
|
||||
|
||||
PowerPointExtractor ppe2 = openExtractor("45537_Footer.ppt");
|
||||
testHeaderFooterInner(ppt);
|
||||
}
|
||||
}
|
||||
|
||||
text = ppe2.getText();
|
||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
|
||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
|
||||
private void testHeaderFooterInner(final HSLFSlideShow ppt) throws IOException {
|
||||
try (final SlideShowExtractor<?,?> ppe = new SlideShowExtractor(ppt)) {
|
||||
String text = ppe.getText();
|
||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
|
||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
|
||||
|
||||
ppe2.setNotesByDefault(true);
|
||||
text = ppe2.getText();
|
||||
assertContains(text, "testdoc");
|
||||
assertContains(text, "test phrase");
|
||||
ppe2.close();
|
||||
ppe.setNotesByDefault(true);
|
||||
text = ppe.getText();
|
||||
assertContains(text, "testdoc");
|
||||
assertContains(text, "test phrase");
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
@ -318,41 +315,40 @@ public final class TestExtractor {
|
||||
String masterTitleText = "This is the Master Title";
|
||||
String masterRandomText = "This text comes from the Master Slide";
|
||||
String masterFooterText = "Footer from the master slide";
|
||||
PowerPointExtractor ppe = openExtractor("WithMaster.ppt");
|
||||
ppe.setMasterByDefault(true);
|
||||
try (final SlideShowExtractor ppe = openExtractor("WithMaster.ppt")) {
|
||||
ppe.setMasterByDefault(true);
|
||||
|
||||
String text = ppe.getText();
|
||||
assertContains(text, masterRandomText);
|
||||
assertContains(text, masterFooterText);
|
||||
ppe.close();
|
||||
String text = ppe.getText();
|
||||
assertContains(text, masterRandomText);
|
||||
assertContains(text, masterFooterText);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testMasterText() throws IOException {
|
||||
PowerPointExtractor ppe1 = openExtractor("master_text.ppt");
|
||||
try (final SlideShowExtractor ppe = openExtractor("master_text.ppt")) {
|
||||
// Initially not there
|
||||
String text = ppe.getText();
|
||||
assertFalse(text.contains("Text that I added to the master slide"));
|
||||
|
||||
// Initially not there
|
||||
String text = ppe1.getText();
|
||||
assertFalse(text.contains("Text that I added to the master slide"));
|
||||
// Enable, shows up
|
||||
ppe.setMasterByDefault(true);
|
||||
text = ppe.getText();
|
||||
assertContains(text, "Text that I added to the master slide");
|
||||
|
||||
// Enable, shows up
|
||||
ppe1.setMasterByDefault(true);
|
||||
text = ppe1.getText();
|
||||
assertContains(text, "Text that I added to the master slide");
|
||||
|
||||
// Make sure placeholder text does not come out
|
||||
assertNotContained(text, "Click to edit Master");
|
||||
ppe1.close();
|
||||
// Make sure placeholder text does not come out
|
||||
assertNotContained(text, "Click to edit Master");
|
||||
}
|
||||
|
||||
// Now with another file only containing master text
|
||||
// Will always show up
|
||||
PowerPointExtractor ppe2 = openExtractor("WithMaster.ppt");
|
||||
String masterText = "Footer from the master slide";
|
||||
try (final SlideShowExtractor ppe = openExtractor("WithMaster.ppt")) {
|
||||
String masterText = "Footer from the master slide";
|
||||
|
||||
text = ppe2.getText();
|
||||
assertContainsIgnoreCase(text, "master");
|
||||
assertContains(text, masterText);
|
||||
ppe2.close();
|
||||
String text = ppe.getText();
|
||||
assertContainsIgnoreCase(text, "master");
|
||||
assertContains(text, masterText);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -360,22 +356,21 @@ public final class TestExtractor {
|
||||
*/
|
||||
@Test
|
||||
public void testChineseText() throws IOException {
|
||||
PowerPointExtractor ppe = openExtractor("54880_chinese.ppt");
|
||||
try (final SlideShowExtractor ppe = openExtractor("54880_chinese.ppt")) {
|
||||
String text = ppe.getText();
|
||||
|
||||
String text = ppe.getText();
|
||||
// Check for the english text line
|
||||
assertContains(text, "Single byte");
|
||||
|
||||
// Check for the english text line
|
||||
assertContains(text, "Single byte");
|
||||
// Check for the english text in the mixed line
|
||||
assertContains(text, "Mix");
|
||||
|
||||
// Check for the english text in the mixed line
|
||||
assertContains(text, "Mix");
|
||||
// Check for the chinese text in the mixed line
|
||||
assertContains(text, "\u8868");
|
||||
|
||||
// Check for the chinese text in the mixed line
|
||||
assertContains(text, "\u8868");
|
||||
|
||||
// Check for the chinese only text line
|
||||
assertContains(text, "\uff8a\uff9d\uff76\uff78");
|
||||
ppe.close();
|
||||
// Check for the chinese only text line
|
||||
assertContains(text, "\uff8a\uff9d\uff76\uff78");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -387,67 +382,59 @@ public final class TestExtractor {
|
||||
public void testDifferentPOIFS() throws IOException {
|
||||
// Open the two filesystems
|
||||
File pptFile = slTests.getFile("basic_test_ppt_file.ppt");
|
||||
InputStream is1 = new FileInputStream(pptFile);
|
||||
OPOIFSFileSystem opoifs = new OPOIFSFileSystem(is1);
|
||||
is1.close();
|
||||
NPOIFSFileSystem npoifs = new NPOIFSFileSystem(pptFile);
|
||||
|
||||
DirectoryNode[] files = { opoifs.getRoot(), npoifs.getRoot() };
|
||||
try (final InputStream is1 = new FileInputStream(pptFile);
|
||||
final NPOIFSFileSystem npoifs = new NPOIFSFileSystem(pptFile)) {
|
||||
|
||||
// Open directly
|
||||
for (DirectoryNode dir : files) {
|
||||
PowerPointExtractor extractor = new PowerPointExtractor(dir);
|
||||
assertEquals(expectText, extractor.getText());
|
||||
final OPOIFSFileSystem opoifs = new OPOIFSFileSystem(is1);
|
||||
|
||||
DirectoryNode[] files = {opoifs.getRoot(), npoifs.getRoot()};
|
||||
|
||||
// Open directly
|
||||
for (DirectoryNode dir : files) {
|
||||
try (SlideShow<?,?> ppt = SlideShowFactory.create(dir);
|
||||
SlideShowExtractor<?,?> extractor = new SlideShowExtractor(ppt)) {
|
||||
assertEquals(expectText, extractor.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Open via a HSLFSlideShow
|
||||
for (DirectoryNode dir : files) {
|
||||
HSLFSlideShowImpl slideshow = new HSLFSlideShowImpl(dir);
|
||||
PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
|
||||
assertEquals(expectText, extractor.getText());
|
||||
extractor.close();
|
||||
slideshow.close();
|
||||
}
|
||||
|
||||
npoifs.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTable() throws Exception {
|
||||
PowerPointExtractor ppe1 = openExtractor("54111.ppt");
|
||||
String text1 = ppe1.getText();
|
||||
String target1 = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n"+
|
||||
"Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n"+
|
||||
"Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n"+
|
||||
"Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n"+
|
||||
"Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n"+
|
||||
"Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n";
|
||||
assertContains(text1, target1);
|
||||
ppe1.close();
|
||||
try (SlideShowExtractor ppe = openExtractor("54111.ppt")) {
|
||||
String text = ppe.getText();
|
||||
String target = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n" +
|
||||
"Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n" +
|
||||
"Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n" +
|
||||
"Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n" +
|
||||
"Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n" +
|
||||
"Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n";
|
||||
assertContains(text, target);
|
||||
}
|
||||
|
||||
PowerPointExtractor ppe2 = openExtractor("54722.ppt");
|
||||
String text2 = ppe2.getText();
|
||||
try (SlideShowExtractor ppe = openExtractor("54722.ppt")) {
|
||||
String text = ppe.getText();
|
||||
|
||||
String target2 = "this\tText\tis\twithin\ta\n" +
|
||||
"table\t1\t2\t3\t4";
|
||||
assertContains(text2, target2);
|
||||
ppe2.close();
|
||||
String target = "this\tText\tis\twithin\ta\n" +
|
||||
"table\t1\t2\t3\t4";
|
||||
assertContains(text, target);
|
||||
}
|
||||
}
|
||||
|
||||
// bug 60003
|
||||
@Test
|
||||
public void testExtractMasterSlideFooterText() throws Exception {
|
||||
PowerPointExtractor ppe = openExtractor("60003.ppt");
|
||||
ppe.setMasterByDefault(true);
|
||||
try (SlideShowExtractor ppe = openExtractor("60003.ppt")) {
|
||||
ppe.setMasterByDefault(true);
|
||||
|
||||
String text = ppe.getText();
|
||||
assertContains(text, "Prague");
|
||||
ppe.close();
|
||||
String text = ppe.getText();
|
||||
assertContains(text, "Prague");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExtractGroupedShapeText() throws Exception {
|
||||
try (final PowerPointExtractor ppe = openExtractor("bug62092.ppt")) {
|
||||
try (final SlideShowExtractor ppe = openExtractor("bug62092.ppt")) {
|
||||
final String text = ppe.getText();
|
||||
|
||||
//this tests that we're ignoring text shapes at depth=0
|
||||
|
@ -73,6 +73,7 @@ import org.apache.poi.poifs.macros.VBAMacroReader;
|
||||
import org.apache.poi.sl.draw.DrawFactory;
|
||||
import org.apache.poi.sl.draw.DrawPaint;
|
||||
import org.apache.poi.sl.draw.DrawTextParagraph;
|
||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||
import org.apache.poi.sl.usermodel.ColorStyle;
|
||||
import org.apache.poi.sl.usermodel.PaintStyle;
|
||||
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
|
||||
@ -800,18 +801,18 @@ public final class TestBugs {
|
||||
String files[] = { "bug58718_008524.ppt","bug58718_008558.ppt","bug58718_349008.ppt","bug58718_008495.ppt", };
|
||||
for (String f : files) {
|
||||
File sample = HSLFTestDataSamples.getSampleFile(f);
|
||||
PowerPointExtractor ex = new PowerPointExtractor(sample.getAbsolutePath());
|
||||
assertNotNull(ex.getText());
|
||||
ex.close();
|
||||
try (SlideShowExtractor ex = new SlideShowExtractor(SlideShowFactory.create(sample))) {
|
||||
assertNotNull(ex.getText());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void bug58733() throws IOException {
|
||||
File sample = HSLFTestDataSamples.getSampleFile("bug58733_671884.ppt");
|
||||
PowerPointExtractor ex = new PowerPointExtractor(sample.getAbsolutePath());
|
||||
assertNotNull(ex.getText());
|
||||
ex.close();
|
||||
try (SlideShowExtractor ex = new SlideShowExtractor(SlideShowFactory.create(sample))) {
|
||||
assertNotNull(ex.getText());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user