#62319 - Decommission XSLF-/PowerPointExtractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1829653 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
bc436fcc3d
commit
ab390ce170
@ -330,8 +330,6 @@ public class TestAllFiles {
|
|||||||
);
|
);
|
||||||
|
|
||||||
private static final Set<String> IGNORED = unmodifiableHashSet(
|
private static final Set<String> IGNORED = unmodifiableHashSet(
|
||||||
// need JDK8+ - https://bugs.openjdk.java.net/browse/JDK-8038081
|
|
||||||
"slideshow/42474-2.ppt",
|
|
||||||
// OPC handler works / XSSF handler fails
|
// OPC handler works / XSSF handler fails
|
||||||
"spreadsheet/57181.xlsm",
|
"spreadsheet/57181.xlsm",
|
||||||
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop
|
"spreadsheet/61300.xls"//intentionally fuzzed -- used to cause infinite loop
|
||||||
|
@ -24,6 +24,7 @@ import java.io.FileInputStream;
|
|||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
|
||||||
import org.apache.poi.extractor.ExtractorFactory;
|
import org.apache.poi.extractor.ExtractorFactory;
|
||||||
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||||
@ -53,12 +54,19 @@ public class XSLFFileHandler extends SlideShowHandler {
|
|||||||
|
|
||||||
// additionally try the other getText() methods
|
// additionally try the other getText() methods
|
||||||
|
|
||||||
try (XSLFPowerPointExtractor extractor = (XSLFPowerPointExtractor) ExtractorFactory.createExtractor(file)) {
|
try (SlideShowExtractor extractor = ExtractorFactory.createExtractor(file)) {
|
||||||
assertNotNull(extractor);
|
assertNotNull(extractor);
|
||||||
|
extractor.setSlidesByDefault(true);
|
||||||
|
extractor.setNotesByDefault(true);
|
||||||
|
extractor.setMasterByDefault(true);
|
||||||
|
|
||||||
assertNotNull(extractor.getText(true, true, true));
|
assertNotNull(extractor.getText());
|
||||||
assertEquals("With all options disabled we should not get text",
|
|
||||||
"", extractor.getText(false, false, false));
|
extractor.setSlidesByDefault(false);
|
||||||
|
extractor.setNotesByDefault(false);
|
||||||
|
extractor.setMasterByDefault(false);
|
||||||
|
|
||||||
|
assertEquals("With all options disabled we should not get text", "", extractor.getText());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,6 +105,7 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
|
|||||||
*
|
*
|
||||||
* @return the underlying POIDocument
|
* @return the underlying POIDocument
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public POIDocument getDocument() {
|
public POIDocument getDocument() {
|
||||||
return document;
|
return document;
|
||||||
}
|
}
|
||||||
|
@ -74,4 +74,9 @@ public abstract class POITextExtractor implements Closeable {
|
|||||||
fsToClose.close();
|
fsToClose.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the processed document
|
||||||
|
*/
|
||||||
|
public abstract Object getDocument();
|
||||||
}
|
}
|
||||||
|
@ -115,26 +115,23 @@ public class OLE2ExtractorFactory {
|
|||||||
return threadPreferEventExtractors.get();
|
return threadPreferEventExtractors.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException {
|
||||||
// Only ever an OLE2 one from the root of the FS
|
return (T)createExtractor(fs.getRoot());
|
||||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
|
||||||
}
|
}
|
||||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException {
|
public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException {
|
||||||
// Only ever an OLE2 one from the root of the FS
|
return (T)createExtractor(fs.getRoot());
|
||||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
|
||||||
}
|
}
|
||||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException {
|
public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException {
|
||||||
// Only ever an OLE2 one from the root of the FS
|
return (T)createExtractor(fs.getRoot());
|
||||||
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(InputStream input) throws IOException {
|
public static <T extends POITextExtractor> T createExtractor(InputStream input) throws IOException {
|
||||||
Class<?> cls = getOOXMLClass();
|
Class<?> cls = getOOXMLClass();
|
||||||
if (cls != null) {
|
if (cls != null) {
|
||||||
// Use Reflection to get us the full OOXML-enabled version
|
// Use Reflection to get us the full OOXML-enabled version
|
||||||
try {
|
try {
|
||||||
Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
|
Method m = cls.getDeclaredMethod("createExtractor", InputStream.class);
|
||||||
return (POITextExtractor)m.invoke(null, input);
|
return (T)m.invoke(null, input);
|
||||||
} catch (IllegalArgumentException iae) {
|
} catch (IllegalArgumentException iae) {
|
||||||
throw iae;
|
throw iae;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -44,8 +44,30 @@ public class DocumentFactoryHelper {
|
|||||||
* @throws IOException If an error occurs while decrypting or if the password does not match
|
* @throws IOException If an error occurs while decrypting or if the password does not match
|
||||||
*/
|
*/
|
||||||
public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password)
|
public static InputStream getDecryptedStream(final NPOIFSFileSystem fs, String password)
|
||||||
|
throws IOException {
|
||||||
|
// wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
|
||||||
|
// as well when the resulting OPCPackage is closed
|
||||||
|
return new FilterInputStream(getDecryptedStream(fs.getRoot(), password)) {
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
fs.close();
|
||||||
|
super.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrap the OLE2 data of the DirectoryNode into a decrypted stream by using
|
||||||
|
* the given password.
|
||||||
|
*
|
||||||
|
* @param root The OLE2 directory node for the document
|
||||||
|
* @param password The password, null if the default password should be used
|
||||||
|
* @return A stream for reading the decrypted data
|
||||||
|
* @throws IOException If an error occurs while decrypting or if the password does not match
|
||||||
|
*/
|
||||||
|
public static InputStream getDecryptedStream(final DirectoryNode root, String password)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
EncryptionInfo info = new EncryptionInfo(fs);
|
EncryptionInfo info = new EncryptionInfo(root);
|
||||||
Decryptor d = Decryptor.getInstance(info);
|
Decryptor d = Decryptor.getInstance(info);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@ -58,21 +80,11 @@ public class DocumentFactoryHelper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (passwordCorrect) {
|
if (passwordCorrect) {
|
||||||
// wrap the stream in a FilterInputStream to close the NPOIFSFileSystem
|
return d.getDataStream(root);
|
||||||
// as well when the resulting OPCPackage is closed
|
} else if (password != null) {
|
||||||
return new FilterInputStream(d.getDataStream(fs.getRoot())) {
|
throw new EncryptedDocumentException("Password incorrect");
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
fs.close();
|
|
||||||
|
|
||||||
super.close();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
} else {
|
} else {
|
||||||
if (password != null)
|
throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
|
||||||
throw new EncryptedDocumentException("Password incorrect");
|
|
||||||
else
|
|
||||||
throw new EncryptedDocumentException("The supplied spreadsheet is protected, but no password was supplied");
|
|
||||||
}
|
}
|
||||||
} catch (GeneralSecurityException e) {
|
} catch (GeneralSecurityException e) {
|
||||||
throw new IOException(e);
|
throw new IOException(e);
|
||||||
|
@ -1,3 +1,20 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
package org.apache.poi.sl.extractor;
|
package org.apache.poi.sl.extractor;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -48,6 +65,16 @@ public class SlideShowExtractor<
|
|||||||
this.slideshow = slideshow;
|
this.slideshow = slideshow;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns opened document
|
||||||
|
*
|
||||||
|
* @return the opened document
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public final Object getDocument() {
|
||||||
|
return slideshow.getPersistDocument();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should a call to getText() return slide text? Default is yes
|
* Should a call to getText() return slide text? Default is yes
|
||||||
*/
|
*/
|
||||||
@ -219,7 +246,6 @@ public class SlideShowExtractor<
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (final P para : paraList) {
|
for (final P para : paraList) {
|
||||||
final int oldLen = sb.length();
|
|
||||||
for (final TextRun tr : para) {
|
for (final TextRun tr : para) {
|
||||||
final String str = tr.getRawText().replace("\r", "");
|
final String str = tr.getRawText().replace("\r", "");
|
||||||
final String newStr;
|
final String newStr;
|
||||||
|
@ -126,4 +126,13 @@ public interface SlideShow<
|
|||||||
* @since POI 4.0.0
|
* @since POI 4.0.0
|
||||||
*/
|
*/
|
||||||
POITextExtractor getMetadataTextExtractor();
|
POITextExtractor getMetadataTextExtractor();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the instance which handles the persisting of the slideshow,
|
||||||
|
* which is either a subclass of {@link org.apache.poi.POIDocument}
|
||||||
|
* or {@link org.apache.poi.POIXMLDocument}
|
||||||
|
*
|
||||||
|
* @since POI 4.0.0
|
||||||
|
*/
|
||||||
|
Object getPersistDocument();
|
||||||
}
|
}
|
||||||
|
@ -60,13 +60,40 @@ public class SlideShowFactory {
|
|||||||
* @throws IOException if an error occurs while reading the data
|
* @throws IOException if an error occurs while reading the data
|
||||||
*/
|
*/
|
||||||
public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException {
|
public static SlideShow<?,?> create(final NPOIFSFileSystem fs, String password) throws IOException {
|
||||||
DirectoryNode root = fs.getRoot();
|
return create(fs.getRoot(), password);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a SlideShow from the given NPOIFSFileSystem.
|
||||||
|
*
|
||||||
|
* @param root The {@link DirectoryNode} to start reading the document from
|
||||||
|
*
|
||||||
|
* @return The created SlideShow
|
||||||
|
*
|
||||||
|
* @throws IOException if an error occurs while reading the data
|
||||||
|
*/
|
||||||
|
public static SlideShow<?,?> create(final DirectoryNode root) throws IOException {
|
||||||
|
return create(root, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a SlideShow from the given NPOIFSFileSystem, which may
|
||||||
|
* be password protected
|
||||||
|
*
|
||||||
|
* @param root The {@link DirectoryNode} to start reading the document from
|
||||||
|
* @param password The password that should be used or null if no password is necessary.
|
||||||
|
*
|
||||||
|
* @return The created SlideShow
|
||||||
|
*
|
||||||
|
* @throws IOException if an error occurs while reading the data
|
||||||
|
*/
|
||||||
|
public static SlideShow<?,?> create(final DirectoryNode root, String password) throws IOException {
|
||||||
// Encrypted OOXML files go inside OLE2 containers, is this one?
|
// Encrypted OOXML files go inside OLE2 containers, is this one?
|
||||||
if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
|
if (root.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
|
||||||
InputStream stream = null;
|
InputStream stream = null;
|
||||||
try {
|
try {
|
||||||
stream = DocumentFactoryHelper.getDecryptedStream(fs, password);
|
stream = DocumentFactoryHelper.getDecryptedStream(root, password);
|
||||||
|
|
||||||
return createXSLFSlideShow(stream);
|
return createXSLFSlideShow(stream);
|
||||||
} finally {
|
} finally {
|
||||||
@ -82,7 +109,7 @@ public class SlideShowFactory {
|
|||||||
passwordSet = true;
|
passwordSet = true;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return createHSLFSlideShow(fs);
|
return createHSLFSlideShow(root);
|
||||||
} finally {
|
} finally {
|
||||||
if (passwordSet) {
|
if (passwordSet) {
|
||||||
Biff8EncryptionKey.setCurrentUserPassword(null);
|
Biff8EncryptionKey.setCurrentUserPassword(null);
|
||||||
|
@ -68,6 +68,7 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
|
|||||||
*
|
*
|
||||||
* @return the opened document
|
* @return the opened document
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public final POIXMLDocument getDocument() {
|
public final POIXMLDocument getDocument() {
|
||||||
return _document;
|
return _document;
|
||||||
}
|
}
|
||||||
|
@ -51,6 +51,7 @@ import org.apache.poi.poifs.filesystem.NotOLE2FileException;
|
|||||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.apache.poi.util.NotImplemented;
|
import org.apache.poi.util.NotImplemented;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
@ -58,6 +59,7 @@ import org.apache.poi.util.POILogger;
|
|||||||
import org.apache.poi.util.Removal;
|
import org.apache.poi.util.Removal;
|
||||||
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
|
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
|
||||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||||
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||||
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
|
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
|
||||||
@ -127,20 +129,20 @@ public class ExtractorFactory {
|
|||||||
return OLE2ExtractorFactory.getPreferEventExtractor();
|
return OLE2ExtractorFactory.getPreferEventExtractor();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
|
public static <T extends POITextExtractor> T createExtractor(File f) throws IOException, OpenXML4JException, XmlException {
|
||||||
NPOIFSFileSystem fs = null;
|
NPOIFSFileSystem fs = null;
|
||||||
try {
|
try {
|
||||||
fs = new NPOIFSFileSystem(f);
|
fs = new NPOIFSFileSystem(f);
|
||||||
if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
|
if (fs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
|
||||||
return createEncryptedOOXMLExtractor(fs);
|
return (T)createEncryptedOOXMLExtractor(fs);
|
||||||
}
|
}
|
||||||
POIOLE2TextExtractor extractor = createExtractor(fs);
|
POITextExtractor extractor = createExtractor(fs);
|
||||||
extractor.setFilesystem(fs);
|
extractor.setFilesystem(fs);
|
||||||
return extractor;
|
return (T)extractor;
|
||||||
} catch (OfficeXmlFileException e) {
|
} catch (OfficeXmlFileException e) {
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
return (T)createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
|
||||||
} catch (NotOLE2FileException ne) {
|
} catch (NotOLE2FileException ne) {
|
||||||
// ensure file-handle release
|
// ensure file-handle release
|
||||||
IOUtils.closeQuietly(fs);
|
IOUtils.closeQuietly(fs);
|
||||||
@ -179,7 +181,7 @@ public class ExtractorFactory {
|
|||||||
* @throws XmlException If an XML parsing error occurs.
|
* @throws XmlException If an XML parsing error occurs.
|
||||||
* @throws IllegalArgumentException If no matching file type could be found.
|
* @throws IllegalArgumentException If no matching file type could be found.
|
||||||
*/
|
*/
|
||||||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
public static POITextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||||
try {
|
try {
|
||||||
// Check for the normal Office core document
|
// Check for the normal Office core document
|
||||||
PackageRelationshipCollection core;
|
PackageRelationshipCollection core;
|
||||||
@ -226,13 +228,13 @@ public class ExtractorFactory {
|
|||||||
// Is it XSLF?
|
// Is it XSLF?
|
||||||
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
for (XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||||
if ( rel.getContentType().equals( contentType ) ) {
|
if ( rel.getContentType().equals( contentType ) ) {
|
||||||
return new XSLFPowerPointExtractor(pkg);
|
return new SlideShowExtractor(new XMLSlideShow(pkg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// special handling for SlideShow-Theme-files,
|
// special handling for SlideShow-Theme-files,
|
||||||
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
|
if (XSLFRelation.THEME_MANAGER.getContentType().equals(contentType)) {
|
||||||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
|
return new SlideShowExtractor(new XMLSlideShow(pkg));
|
||||||
}
|
}
|
||||||
|
|
||||||
// How about xlsb?
|
// How about xlsb?
|
||||||
@ -252,28 +254,28 @@ public class ExtractorFactory {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
public static <T extends POITextExtractor> T createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||||
return OLE2ExtractorFactory.createExtractor(fs);
|
return createExtractor(fs.getRoot());
|
||||||
}
|
}
|
||||||
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
public static <T extends POITextExtractor> T createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||||
return OLE2ExtractorFactory.createExtractor(fs);
|
return createExtractor(fs.getRoot());
|
||||||
}
|
}
|
||||||
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
public static <T extends POITextExtractor> T createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
|
||||||
return OLE2ExtractorFactory.createExtractor(fs);
|
return createExtractor(fs.getRoot());
|
||||||
}
|
}
|
||||||
|
|
||||||
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
|
public static <T extends POITextExtractor> T createExtractor(DirectoryNode poifsDir) throws IOException, OpenXML4JException, XmlException
|
||||||
{
|
{
|
||||||
// First, check for OOXML
|
// First, check for OOXML
|
||||||
for (String entryName : poifsDir.getEntryNames()) {
|
for (String entryName : poifsDir.getEntryNames()) {
|
||||||
if (entryName.equals("Package")) {
|
if (entryName.equals("Package")) {
|
||||||
OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
|
OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
|
||||||
return createExtractor(pkg);
|
return (T)createExtractor(pkg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If not, ask the OLE2 code to check, with Scratchpad if possible
|
// If not, ask the OLE2 code to check, with Scratchpad if possible
|
||||||
return OLE2ExtractorFactory.createExtractor(poifsDir);
|
return (T)OLE2ExtractorFactory.createExtractor(poifsDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -403,7 +405,7 @@ public class ExtractorFactory {
|
|||||||
throw new IllegalStateException("Not yet supported");
|
throw new IllegalStateException("Not yet supported");
|
||||||
}
|
}
|
||||||
|
|
||||||
private static POIXMLTextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
|
private static POITextExtractor createEncryptedOOXMLExtractor(NPOIFSFileSystem fs)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
String pass = Biff8EncryptionKey.getCurrentUserPassword();
|
||||||
if (pass == null) {
|
if (pass == null) {
|
||||||
|
@ -37,7 +37,7 @@ import org.apache.xmlbeans.XmlException;
|
|||||||
* @deprecated use {@link SlideShowExtractor}
|
* @deprecated use {@link SlideShowExtractor}
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
@Removal(version="4.2.0")
|
@Removal(version="5.0.0")
|
||||||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||||
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{
|
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[]{
|
||||||
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
|
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
|
||||||
|
@ -631,4 +631,9 @@ public class XMLSlideShow extends POIXMLDocument
|
|||||||
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
|
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
|
||||||
return new POIXMLPropertiesTextExtractor(this);
|
return new POIXMLPropertiesTextExtractor(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getPersistDocument() {
|
||||||
|
return this;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,20 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
package org.apache.poi.xslf.usermodel;
|
package org.apache.poi.xslf.usermodel;
|
||||||
|
|
||||||
import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS;
|
import static org.apache.poi.xslf.usermodel.XSLFShape.PML_NS;
|
||||||
|
@ -182,12 +182,20 @@ implements Slide<XSLFShape,XSLFTextParagraph> {
|
|||||||
*/
|
*/
|
||||||
public XSLFCommentAuthors getCommentAuthorsPart() {
|
public XSLFCommentAuthors getCommentAuthorsPart() {
|
||||||
if(_commentAuthors == null) {
|
if(_commentAuthors == null) {
|
||||||
|
// first scan the slide relations
|
||||||
for (POIXMLDocumentPart p : getRelations()) {
|
for (POIXMLDocumentPart p : getRelations()) {
|
||||||
if (p instanceof XSLFCommentAuthors) {
|
if (p instanceof XSLFCommentAuthors) {
|
||||||
_commentAuthors = (XSLFCommentAuthors)p;
|
_commentAuthors = (XSLFCommentAuthors)p;
|
||||||
return _commentAuthors;
|
return _commentAuthors;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// then scan the presentation relations
|
||||||
|
for (POIXMLDocumentPart p : getSlideShow().getRelations()) {
|
||||||
|
if (p instanceof XSLFCommentAuthors) {
|
||||||
|
_commentAuthors = (XSLFCommentAuthors)p;
|
||||||
|
return _commentAuthors;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -120,10 +120,10 @@ public class TestHxxFEncryption {
|
|||||||
public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
|
public void newPassword(String newPass) throws IOException, OpenXML4JException, XmlException {
|
||||||
Biff8EncryptionKey.setCurrentUserPassword(password);
|
Biff8EncryptionKey.setCurrentUserPassword(password);
|
||||||
File f = sampleDir.getFile(file);
|
File f = sampleDir.getFile(file);
|
||||||
POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
|
POITextExtractor te1 = ExtractorFactory.createExtractor(f);
|
||||||
Biff8EncryptionKey.setCurrentUserPassword(newPass);
|
Biff8EncryptionKey.setCurrentUserPassword(newPass);
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||||
POIDocument doc = te1.getDocument();
|
POIDocument doc = (POIDocument)te1.getDocument();
|
||||||
doc.write(bos);
|
doc.write(bos);
|
||||||
doc.close();
|
doc.close();
|
||||||
te1.close();
|
te1.close();
|
||||||
@ -140,25 +140,25 @@ public class TestHxxFEncryption {
|
|||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||||
Biff8EncryptionKey.setCurrentUserPassword(password);
|
Biff8EncryptionKey.setCurrentUserPassword(password);
|
||||||
File f = sampleDir.getFile(file);
|
File f = sampleDir.getFile(file);
|
||||||
POIOLE2TextExtractor te1 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(f);
|
POITextExtractor te1 = ExtractorFactory.createExtractor(f);
|
||||||
// first remove encryption
|
// first remove encryption
|
||||||
Biff8EncryptionKey.setCurrentUserPassword(null);
|
Biff8EncryptionKey.setCurrentUserPassword(null);
|
||||||
POIDocument doc = te1.getDocument();
|
POIDocument doc = (POIDocument)te1.getDocument();
|
||||||
doc.write(bos);
|
doc.write(bos);
|
||||||
doc.close();
|
doc.close();
|
||||||
te1.close();
|
te1.close();
|
||||||
// then use default setting, which is cryptoapi
|
// then use default setting, which is cryptoapi
|
||||||
String newPass = "newPass";
|
String newPass = "newPass";
|
||||||
POIOLE2TextExtractor te2 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
POITextExtractor te2 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||||
Biff8EncryptionKey.setCurrentUserPassword(newPass);
|
Biff8EncryptionKey.setCurrentUserPassword(newPass);
|
||||||
doc = te2.getDocument();
|
doc = (POIDocument)te2.getDocument();
|
||||||
bos.reset();
|
bos.reset();
|
||||||
doc.write(bos);
|
doc.write(bos);
|
||||||
doc.close();
|
doc.close();
|
||||||
te2.close();
|
te2.close();
|
||||||
// and finally update cryptoapi setting
|
// and finally update cryptoapi setting
|
||||||
POIOLE2TextExtractor te3 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
POITextExtractor te3 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||||
doc = te3.getDocument();
|
doc = (POIDocument)te3.getDocument();
|
||||||
// need to cache data (i.e. read all data) before changing the key size
|
// need to cache data (i.e. read all data) before changing the key size
|
||||||
if (doc instanceof HSLFSlideShowImpl) {
|
if (doc instanceof HSLFSlideShowImpl) {
|
||||||
HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc;
|
HSLFSlideShowImpl hss = (HSLFSlideShowImpl)doc;
|
||||||
@ -175,8 +175,8 @@ public class TestHxxFEncryption {
|
|||||||
doc.close();
|
doc.close();
|
||||||
te3.close();
|
te3.close();
|
||||||
// check the setting
|
// check the setting
|
||||||
POIOLE2TextExtractor te4 = (POIOLE2TextExtractor)ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
POITextExtractor te4 = ExtractorFactory.createExtractor(new ByteArrayInputStream(bos.toByteArray()));
|
||||||
doc = te4.getDocument();
|
doc = (POIDocument)te4.getDocument();
|
||||||
ei = doc.getEncryptionInfo();
|
ei = doc.getEncryptionInfo();
|
||||||
assertNotNull(ei);
|
assertNotNull(ei);
|
||||||
assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader);
|
assertTrue(ei.getHeader() instanceof CryptoAPIEncryptionHeader);
|
||||||
|
@ -50,6 +50,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
|
|||||||
import org.apache.poi.openxml4j.opc.PackagePartName;
|
import org.apache.poi.openxml4j.opc.PackagePartName;
|
||||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||||
import org.apache.poi.sl.draw.DrawPaint;
|
import org.apache.poi.sl.draw.DrawPaint;
|
||||||
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.sl.usermodel.PaintStyle;
|
import org.apache.poi.sl.usermodel.PaintStyle;
|
||||||
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
|
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
|
||||||
import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint;
|
import org.apache.poi.sl.usermodel.PaintStyle.TexturePaint;
|
||||||
@ -221,28 +222,27 @@ public class TestXSLFBugs {
|
|||||||
* rID2 -> slide3.xml
|
* rID2 -> slide3.xml
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void bug54916() throws Exception {
|
public void bug54916() throws IOException {
|
||||||
XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx");
|
try (XMLSlideShow ss = XSLFTestDataSamples.openSampleDocument("OverlappingRelations.pptx")) {
|
||||||
XSLFSlide slide;
|
XSLFSlide slide;
|
||||||
|
|
||||||
// Should find 4 slides
|
// Should find 4 slides
|
||||||
assertEquals(4, ss.getSlides().size());
|
assertEquals(4, ss.getSlides().size());
|
||||||
|
|
||||||
// Check the text, to see we got them in order
|
// Check the text, to see we got them in order
|
||||||
slide = ss.getSlides().get(0);
|
slide = ss.getSlides().get(0);
|
||||||
assertContains(getSlideText(slide), "POI cannot read this");
|
assertContains(getSlideText(ss, slide), "POI cannot read this");
|
||||||
|
|
||||||
slide = ss.getSlides().get(1);
|
slide = ss.getSlides().get(1);
|
||||||
assertContains(getSlideText(slide), "POI can read this");
|
assertContains(getSlideText(ss, slide), "POI can read this");
|
||||||
assertContains(getSlideText(slide), "Has a relationship to another slide");
|
assertContains(getSlideText(ss, slide), "Has a relationship to another slide");
|
||||||
|
|
||||||
slide = ss.getSlides().get(2);
|
slide = ss.getSlides().get(2);
|
||||||
assertContains(getSlideText(slide), "POI can read this");
|
assertContains(getSlideText(ss, slide), "POI can read this");
|
||||||
|
|
||||||
slide = ss.getSlides().get(3);
|
slide = ss.getSlides().get(3);
|
||||||
assertContains(getSlideText(slide), "POI can read this");
|
assertContains(getSlideText(ss, slide), "POI can read this");
|
||||||
|
}
|
||||||
ss.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -311,8 +311,15 @@ public class TestXSLFBugs {
|
|||||||
ss.close();
|
ss.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected String getSlideText(XSLFSlide slide) {
|
protected String getSlideText(XMLSlideShow ppt, XSLFSlide slide) throws IOException {
|
||||||
return XSLFPowerPointExtractor.getText(slide, true, false, false);
|
try (SlideShowExtractor extr = new SlideShowExtractor(ppt)) {
|
||||||
|
// do not auto-close the slideshow
|
||||||
|
extr.setFilesystem(null);
|
||||||
|
extr.setSlidesByDefault(true);
|
||||||
|
extr.setNotesByDefault(false);
|
||||||
|
extr.setMasterByDefault(false);
|
||||||
|
return extr.getText(slide);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -458,7 +465,7 @@ public class TestXSLFBugs {
|
|||||||
|
|
||||||
for (int i = 0; i < slideTexts.length; i++) {
|
for (int i = 0; i < slideTexts.length; i++) {
|
||||||
XSLFSlide slide = ss.getSlides().get(i);
|
XSLFSlide slide = ss.getSlides().get(i);
|
||||||
assertContains(getSlideText(slide), slideTexts[i]);
|
assertContains(getSlideText(ss, slide), slideTexts[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,16 +24,17 @@ import static org.junit.Assert.assertFalse;
|
|||||||
import static org.junit.Assert.assertNotNull;
|
import static org.junit.Assert.assertNotNull;
|
||||||
import static org.junit.Assert.assertTrue;
|
import static org.junit.Assert.assertTrue;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.POITextExtractor;
|
|
||||||
import org.apache.poi.extractor.ExtractorFactory;
|
import org.apache.poi.extractor.ExtractorFactory;
|
||||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -44,188 +45,189 @@ public class TestXSLFPowerPointExtractor {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Get text out of the simple file
|
* Get text out of the simple file
|
||||||
* @throws XmlException
|
|
||||||
* @throws OpenXML4JException
|
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testGetSimpleText()
|
public void testGetSimpleText() throws IOException {
|
||||||
throws IOException, XmlException, OpenXML4JException {
|
try (XMLSlideShow xmlA = openPPTX("sample.pptx");
|
||||||
XMLSlideShow xmlA = openPPTX("sample.pptx");
|
SlideShowExtractor extractor = new SlideShowExtractor(xmlA)) {
|
||||||
@SuppressWarnings("resource")
|
|
||||||
OPCPackage pkg = xmlA.getPackage();
|
|
||||||
|
|
||||||
new XSLFPowerPointExtractor(xmlA).close();
|
extractor.getText();
|
||||||
new XSLFPowerPointExtractor(pkg).close();
|
|
||||||
|
|
||||||
XSLFPowerPointExtractor extractor =
|
String text = extractor.getText();
|
||||||
new XSLFPowerPointExtractor(xmlA);
|
assertTrue(text.length() > 0);
|
||||||
extractor.getText();
|
|
||||||
|
|
||||||
String text = extractor.getText();
|
// Check Basics
|
||||||
assertTrue(text.length() > 0);
|
assertStartsWith(text, "Lorem ipsum dolor sit amet\n");
|
||||||
|
assertContains(text, "amet\n\n");
|
||||||
|
|
||||||
// Check Basics
|
// Our placeholder master text
|
||||||
assertStartsWith(text, "Lorem ipsum dolor sit amet\n");
|
// This shouldn't show up in the output
|
||||||
assertContains(text, "amet\n\n");
|
// String masterText =
|
||||||
|
// "Click to edit Master title style\n" +
|
||||||
|
// "Click to edit Master subtitle style\n" +
|
||||||
|
// "\n\n\n\n\n\n" +
|
||||||
|
// "Click to edit Master title style\n" +
|
||||||
|
// "Click to edit Master text styles\n" +
|
||||||
|
// "Second level\n" +
|
||||||
|
// "Third level\n" +
|
||||||
|
// "Fourth level\n" +
|
||||||
|
// "Fifth level\n";
|
||||||
|
|
||||||
// Our placeholder master text
|
// Just slides, no notes
|
||||||
// This shouldn't show up in the output
|
extractor.setSlidesByDefault(true);
|
||||||
// String masterText =
|
extractor.setNotesByDefault(false);
|
||||||
// "Click to edit Master title style\n" +
|
extractor.setMasterByDefault(false);
|
||||||
// "Click to edit Master subtitle style\n" +
|
text = extractor.getText();
|
||||||
// "\n\n\n\n\n\n" +
|
String slideText =
|
||||||
// "Click to edit Master title style\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
// "Click to edit Master text styles\n" +
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
// "Second level\n" +
|
"\n" +
|
||||||
// "Third level\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
// "Fourth level\n" +
|
"Lorem\n" +
|
||||||
// "Fifth level\n";
|
"ipsum\n" +
|
||||||
|
"dolor\n" +
|
||||||
|
"sit\n" +
|
||||||
|
"amet\n" +
|
||||||
|
"\n";
|
||||||
|
assertEquals(slideText, text);
|
||||||
|
|
||||||
// Just slides, no notes
|
// Just notes, no slides
|
||||||
text = extractor.getText(true, false, false);
|
extractor.setSlidesByDefault(false);
|
||||||
String slideText =
|
extractor.setNotesByDefault(true);
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
text = extractor.getText();
|
||||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
assertEquals("\n\n1\n\n\n2\n", text);
|
||||||
"\n" +
|
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
|
||||||
"Lorem\n" +
|
|
||||||
"ipsum\n" +
|
|
||||||
"dolor\n" +
|
|
||||||
"sit\n" +
|
|
||||||
"amet\n" +
|
|
||||||
"\n";
|
|
||||||
assertEquals(slideText, text);
|
|
||||||
|
|
||||||
// Just notes, no slides
|
// Both
|
||||||
text = extractor.getText(false, true);
|
extractor.setSlidesByDefault(true);
|
||||||
assertEquals("\n\n1\n\n\n2\n", text);
|
extractor.setNotesByDefault(true);
|
||||||
|
text = extractor.getText();
|
||||||
|
String bothText =
|
||||||
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
|
"\n\n\n1\n" +
|
||||||
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
|
"Lorem\n" +
|
||||||
|
"ipsum\n" +
|
||||||
|
"dolor\n" +
|
||||||
|
"sit\n" +
|
||||||
|
"amet\n" +
|
||||||
|
"\n\n\n2\n";
|
||||||
|
assertEquals(bothText, text);
|
||||||
|
|
||||||
// Both
|
// With Slides and Master Text
|
||||||
text = extractor.getText(true, true, false);
|
extractor.setSlidesByDefault(true);
|
||||||
String bothText =
|
extractor.setNotesByDefault(false);
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
extractor.setMasterByDefault(true);
|
||||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
text = extractor.getText();
|
||||||
"\n\n\n1\n" +
|
String smText =
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"Lorem\n" +
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
"ipsum\n" +
|
"\n" +
|
||||||
"dolor\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"sit\n" +
|
"Lorem\n" +
|
||||||
"amet\n" +
|
"ipsum\n" +
|
||||||
"\n\n\n2\n";
|
"dolor\n" +
|
||||||
assertEquals(bothText, text);
|
"sit\n" +
|
||||||
|
"amet\n" +
|
||||||
|
"\n";
|
||||||
|
assertEquals(smText, text);
|
||||||
|
|
||||||
// With Slides and Master Text
|
// With Slides, Notes and Master Text
|
||||||
text = extractor.getText(true, false, true);
|
extractor.setSlidesByDefault(true);
|
||||||
String smText =
|
extractor.setNotesByDefault(true);
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
extractor.setMasterByDefault(true);
|
||||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
text = extractor.getText();
|
||||||
"\n" +
|
String snmText =
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"Lorem\n" +
|
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
||||||
"ipsum\n" +
|
"\n\n\n1\n" +
|
||||||
"dolor\n" +
|
"Lorem ipsum dolor sit amet\n" +
|
||||||
"sit\n" +
|
"Lorem\n" +
|
||||||
"amet\n" +
|
"ipsum\n" +
|
||||||
"\n";
|
"dolor\n" +
|
||||||
assertEquals(smText, text);
|
"sit\n" +
|
||||||
|
"amet\n" +
|
||||||
|
"\n\n\n2\n";
|
||||||
|
assertEquals(snmText, text);
|
||||||
|
|
||||||
// With Slides, Notes and Master Text
|
// Via set defaults
|
||||||
text = extractor.getText(true, true, true);
|
extractor.setSlidesByDefault(false);
|
||||||
String snmText =
|
extractor.setNotesByDefault(true);
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
text = extractor.getText();
|
||||||
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
|
assertEquals("\n\n1\n\n\n2\n", text);
|
||||||
"\n\n\n1\n" +
|
}
|
||||||
"Lorem ipsum dolor sit amet\n" +
|
|
||||||
"Lorem\n" +
|
|
||||||
"ipsum\n" +
|
|
||||||
"dolor\n" +
|
|
||||||
"sit\n" +
|
|
||||||
"amet\n" +
|
|
||||||
"\n\n\n2\n";
|
|
||||||
assertEquals(snmText, text);
|
|
||||||
|
|
||||||
// Via set defaults
|
|
||||||
extractor.setSlidesByDefault(false);
|
|
||||||
extractor.setNotesByDefault(true);
|
|
||||||
text = extractor.getText();
|
|
||||||
assertEquals("\n\n1\n\n\n2\n", text);
|
|
||||||
|
|
||||||
extractor.close();
|
|
||||||
xmlA.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
public void testGetComments() throws IOException {
|
public void testGetComments() throws IOException {
|
||||||
XMLSlideShow xml = openPPTX("45545_Comment.pptx");
|
try (XMLSlideShow xml = openPPTX("45545_Comment.pptx");
|
||||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||||
|
extractor.setCommentsByDefault(true);
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
assertTrue(text.length() > 0);
|
assertTrue(text.length() > 0);
|
||||||
|
|
||||||
// Check comments are there
|
// Check comments are there
|
||||||
assertContains(text, "testdoc");
|
assertContains(text, "testdoc");
|
||||||
assertContains(text, "test phrase");
|
assertContains(text, "test phrase");
|
||||||
|
|
||||||
// Check the authors came through too
|
// Check the authors came through too
|
||||||
assertContains(text, "XPVMWARE01");
|
assertContains(text, "XPVMWARE01");
|
||||||
|
}
|
||||||
extractor.close();
|
|
||||||
xml.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Ignore("currently slidelayouts aren't yet supported")
|
||||||
public void testGetMasterText() throws Exception {
|
public void testGetMasterText() throws Exception {
|
||||||
XMLSlideShow xml = openPPTX("WithMaster.pptx");
|
try (XMLSlideShow xml = openPPTX("WithMaster.pptx");
|
||||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||||
extractor.setSlidesByDefault(true);
|
extractor.setSlidesByDefault(true);
|
||||||
extractor.setNotesByDefault(false);
|
extractor.setNotesByDefault(false);
|
||||||
extractor.setMasterByDefault(true);
|
extractor.setMasterByDefault(true);
|
||||||
|
|
||||||
String text = extractor.getText();
|
|
||||||
assertTrue(text.length() > 0);
|
|
||||||
|
|
||||||
// Check master text is there
|
String text = extractor.getText();
|
||||||
assertContains(text, "Footer from the master slide");
|
assertTrue(text.length() > 0);
|
||||||
|
|
||||||
// Theme text shouldn't show up
|
// Check master text is there
|
||||||
// String themeText =
|
assertContains(text, "Footer from the master slide");
|
||||||
// "Theme Master Title\n" +
|
|
||||||
// "Theme Master first level\n" +
|
|
||||||
// "And the 2nd level\n" +
|
|
||||||
// "Our 3rd level goes here\n" +
|
|
||||||
// "And onto the 4th, such fun....\n" +
|
|
||||||
// "Finally is the Fifth level\n";
|
|
||||||
|
|
||||||
// Check the whole text
|
// Theme text shouldn't show up
|
||||||
String wholeText =
|
// String themeText =
|
||||||
"First page title\n" +
|
// "Theme Master Title\n" +
|
||||||
"First page subtitle\n" +
|
// "Theme Master first level\n" +
|
||||||
"This is the Master Title\n" +
|
// "And the 2nd level\n" +
|
||||||
"This text comes from the Master Slide\n" +
|
// "Our 3rd level goes here\n" +
|
||||||
"\n" +
|
// "And onto the 4th, such fun....\n" +
|
||||||
// TODO Detect we didn't have a title, and include the master one
|
// "Finally is the Fifth level\n";
|
||||||
"2nd page subtitle\n" +
|
|
||||||
"Footer from the master slide\n" +
|
|
||||||
"This is the Master Title\n" +
|
|
||||||
"This text comes from the Master Slide\n";
|
|
||||||
assertEquals(wholeText, text);
|
|
||||||
|
|
||||||
extractor.close();
|
// Check the whole text
|
||||||
xml.close();
|
String wholeText =
|
||||||
|
"First page title\n" +
|
||||||
|
"First page subtitle\n" +
|
||||||
|
"This is the Master Title\n" +
|
||||||
|
"This text comes from the Master Slide\n" +
|
||||||
|
"\n" +
|
||||||
|
// TODO Detect we didn't have a title, and include the master one
|
||||||
|
"2nd page subtitle\n" +
|
||||||
|
"Footer from the master slide\n" +
|
||||||
|
"This is the Master Title\n" +
|
||||||
|
"This text comes from the Master Slide\n";
|
||||||
|
assertEquals(wholeText, text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTable() throws Exception {
|
public void testTable() throws Exception {
|
||||||
XMLSlideShow xml = openPPTX("present1.pptx");
|
try (XMLSlideShow xml = openPPTX("present1.pptx");
|
||||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||||
|
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
assertTrue(text.length() > 0);
|
assertTrue(text.length() > 0);
|
||||||
|
|
||||||
// Check comments are there
|
// Check comments are there
|
||||||
assertContains(text, "TEST");
|
assertContains(text, "TEST");
|
||||||
|
}
|
||||||
extractor.close();
|
|
||||||
xml.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -241,74 +243,76 @@ public class TestXSLFPowerPointExtractor {
|
|||||||
};
|
};
|
||||||
for(String extension : extensions) {
|
for(String extension : extensions) {
|
||||||
String filename = "testPPT." + extension;
|
String filename = "testPPT." + extension;
|
||||||
XMLSlideShow xml = openPPTX(filename);
|
|
||||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
|
||||||
|
|
||||||
String text = extractor.getText();
|
try (XMLSlideShow xml = openPPTX(filename);
|
||||||
if (extension.equals("thmx")) {
|
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||||
// Theme file doesn't have any textual content
|
|
||||||
assertEquals(filename, 0, text.length());
|
String text = extractor.getText();
|
||||||
continue;
|
if (extension.equals("thmx")) {
|
||||||
|
// Theme file doesn't have any textual content
|
||||||
|
assertEquals(filename, 0, text.length());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(filename, text.length() > 0);
|
||||||
|
assertContains(filename, text, "Attachment Test");
|
||||||
|
assertContains(filename, text, "This is a test file data with the same content");
|
||||||
|
assertContains(filename, text, "content parsing");
|
||||||
|
assertContains(filename, text, "Different words to test against");
|
||||||
|
assertContains(filename, text, "Mystery");
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue(filename, text.length() > 0);
|
|
||||||
assertContains(filename, text, "Attachment Test");
|
|
||||||
assertContains(filename, text, "This is a test file data with the same content");
|
|
||||||
assertContains(filename, text, "content parsing");
|
|
||||||
assertContains(filename, text, "Different words to test against");
|
|
||||||
assertContains(filename, text, "Mystery");
|
|
||||||
|
|
||||||
extractor.close();
|
|
||||||
xml.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void test45541() throws Exception {
|
public void test45541() throws IOException, OpenXML4JException, XmlException {
|
||||||
// extract text from a powerpoint that has a header in the notes-element
|
// extract text from a powerpoint that has a header in the notes-element
|
||||||
POITextExtractor extr = ExtractorFactory.createExtractor(
|
final File headerFile = slTests.getFile("45541_Header.pptx");
|
||||||
slTests.getFile("45541_Header.pptx"));
|
try (final SlideShowExtractor extr = ExtractorFactory.createExtractor(headerFile)) {
|
||||||
String text = extr.getText();
|
String text = extr.getText();
|
||||||
assertNotNull(text);
|
assertNotNull(text);
|
||||||
assertFalse("Had: " + text, text.contains("testdoc"));
|
assertFalse("Had: " + text, text.contains("testdoc"));
|
||||||
|
|
||||||
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
|
extr.setSlidesByDefault(false);
|
||||||
assertContains(text, "testdoc");
|
extr.setNotesByDefault(true);
|
||||||
extr.close();
|
|
||||||
assertNotNull(text);
|
text = extr.getText();
|
||||||
|
assertContains(text, "testdoc");
|
||||||
|
assertNotNull(text);
|
||||||
|
}
|
||||||
|
|
||||||
// extract text from a powerpoint that has a footer in the master-slide
|
// extract text from a powerpoint that has a footer in the master-slide
|
||||||
extr = ExtractorFactory.createExtractor(
|
final File footerFile = slTests.getFile("45541_Footer.pptx");
|
||||||
slTests.getFile("45541_Footer.pptx"));
|
try (SlideShowExtractor extr = ExtractorFactory.createExtractor(footerFile)) {
|
||||||
text = extr.getText();
|
String text = extr.getText();
|
||||||
assertNotContained(text, "testdoc");
|
assertNotContained(text, "testdoc");
|
||||||
|
|
||||||
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
|
extr.setSlidesByDefault(false);
|
||||||
assertNotContained(text, "testdoc");
|
extr.setNotesByDefault(true);
|
||||||
|
text = extr.getText();
|
||||||
|
assertNotContained(text, "testdoc");
|
||||||
|
|
||||||
text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
|
extr.setSlidesByDefault(false);
|
||||||
assertNotContained(text, "testdoc");
|
extr.setNotesByDefault(false);
|
||||||
|
extr.setMasterByDefault(true);
|
||||||
extr.close();
|
text = extr.getText();
|
||||||
|
assertNotContained(text, "testdoc");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bug54570() throws IOException {
|
public void bug54570() throws IOException {
|
||||||
XMLSlideShow xml = openPPTX("bug54570.pptx");
|
try (XMLSlideShow xml = openPPTX("bug54570.pptx");
|
||||||
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
|
SlideShowExtractor extractor = new SlideShowExtractor(xml)) {
|
||||||
String text = extractor.getText();
|
String text = extractor.getText();
|
||||||
assertNotNull(text);
|
assertNotNull(text);
|
||||||
extractor.close();
|
}
|
||||||
xml.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private XMLSlideShow openPPTX(String file) throws IOException {
|
private XMLSlideShow openPPTX(String file) throws IOException {
|
||||||
InputStream is = slTests.openResourceAsStream(file);
|
try (InputStream is = slTests.openResourceAsStream(file)) {
|
||||||
try {
|
|
||||||
return new XMLSlideShow(is);
|
return new XMLSlideShow(is);
|
||||||
} finally {
|
|
||||||
is.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,8 @@ import org.apache.poi.hwpf.extractor.WordExtractor;
|
|||||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.Entry;
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
|
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
|
* Scratchpad-specific logic for {@link OLE2ExtractorFactory} and
|
||||||
@ -65,7 +67,7 @@ public class OLE2ScratchpadExtractorFactory {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
|
if (poifsDir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT)) {
|
||||||
return new PowerPointExtractor(poifsDir);
|
return new SlideShowExtractor(SlideShowFactory.create(poifsDir));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (poifsDir.hasEntry("VisioDocument")) {
|
if (poifsDir.hasEntry("VisioDocument")) {
|
||||||
|
@ -34,6 +34,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
|||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||||
|
import org.apache.poi.util.Removal;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class can be used to extract text from a PowerPoint file. Can optionally
|
* This class can be used to extract text from a PowerPoint file. Can optionally
|
||||||
@ -43,6 +44,7 @@ import org.apache.poi.sl.usermodel.SlideShowFactory;
|
|||||||
*/
|
*/
|
||||||
@SuppressWarnings("WeakerAccess")
|
@SuppressWarnings("WeakerAccess")
|
||||||
@Deprecated
|
@Deprecated
|
||||||
|
@Removal(version="5.0.0")
|
||||||
public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
||||||
private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate;
|
private final SlideShowExtractor<HSLFShape,HSLFTextParagraph> delegate;
|
||||||
|
|
||||||
|
@ -1139,4 +1139,9 @@ public final class HSLFSlideShow implements SlideShow<HSLFShape,HSLFTextParagrap
|
|||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
_hslfSlideShow.close();
|
_hslfSlideShow.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object getPersistDocument() {
|
||||||
|
return getSlideShowImpl();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,8 +19,8 @@ package org.apache.poi.hslf.usermodel;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||||
import org.apache.poi.sl.usermodel.SlideShow;
|
|
||||||
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||||
import org.apache.poi.util.Internal;
|
import org.apache.poi.util.Internal;
|
||||||
|
|
||||||
@ -31,12 +31,20 @@ import org.apache.poi.util.Internal;
|
|||||||
@Internal
|
@Internal
|
||||||
public class HSLFSlideShowFactory extends SlideShowFactory {
|
public class HSLFSlideShowFactory extends SlideShowFactory {
|
||||||
/**
|
/**
|
||||||
* Creates a HSLFSlideShow from the given NPOIFSFileSystem
|
* Creates a HSLFSlideShow from the given NPOIFSFileSystem<p>
|
||||||
* <p>Note that in order to properly release resources the
|
* Note that in order to properly release resources the
|
||||||
* SlideShow should be closed after use.
|
* SlideShow should be closed after use.
|
||||||
*/
|
*/
|
||||||
public static SlideShow<?,?> createSlideShow(NPOIFSFileSystem fs) throws IOException {
|
public static HSLFSlideShow createSlideShow(final NPOIFSFileSystem fs) throws IOException {
|
||||||
return new HSLFSlideShow(fs);
|
return new HSLFSlideShow(fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a HSLFSlideShow from the given DirectoryNode<p>
|
||||||
|
* Note that in order to properly release resources the
|
||||||
|
* SlideShow should be closed after use.
|
||||||
|
*/
|
||||||
|
public static HSLFSlideShow createSlideShow(final DirectoryNode root) throws IOException {
|
||||||
|
return new HSLFSlideShow(root);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -846,9 +846,13 @@ public final class HSLFSlideShowImpl extends POIDocument implements Closeable {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
NPOIFSFileSystem fs = getDirectory().getFileSystem();
|
// only close the filesystem, if we are based on the root node.
|
||||||
if (fs != null) {
|
// embedded documents/slideshows shouldn't close the parent container
|
||||||
fs.close();
|
if (getDirectory().getParent() == null) {
|
||||||
|
NPOIFSFileSystem fs = getDirectory().getFileSystem();
|
||||||
|
if (fs != null) {
|
||||||
|
fs.close();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -42,6 +42,10 @@ import org.apache.poi.poifs.filesystem.DirectoryNode;
|
|||||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
|
import org.apache.poi.sl.usermodel.ObjectShape;
|
||||||
|
import org.apache.poi.sl.usermodel.SlideShow;
|
||||||
|
import org.apache.poi.sl.usermodel.SlideShowFactory;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
@ -76,43 +80,46 @@ public final class TestExtractor {
|
|||||||
// ppe.close();
|
// ppe.close();
|
||||||
// }
|
// }
|
||||||
|
|
||||||
private PowerPointExtractor openExtractor(String fileName) throws IOException {
|
private SlideShowExtractor<?,?> openExtractor(String fileName) throws IOException {
|
||||||
InputStream is = slTests.openResourceAsStream(fileName);
|
try (InputStream is = slTests.openResourceAsStream(fileName)) {
|
||||||
try {
|
return new SlideShowExtractor(SlideShowFactory.create(is));
|
||||||
return new PowerPointExtractor(is);
|
|
||||||
} finally {
|
|
||||||
is.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testReadSheetText() throws IOException {
|
public void testReadSheetText() throws IOException {
|
||||||
// Basic 2 page example
|
// Basic 2 page example
|
||||||
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
|
try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
|
||||||
assertEquals(expectText, ppe.getText());
|
assertEquals(expectText, ppe.getText());
|
||||||
ppe.close();
|
}
|
||||||
|
|
||||||
// 1 page example with text boxes
|
// 1 page example with text boxes
|
||||||
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt");
|
try (SlideShowExtractor ppe = openExtractor("with_textbox.ppt")) {
|
||||||
assertEquals(expectText2, ppe2.getText());
|
assertEquals(expectText2, ppe.getText());
|
||||||
ppe2.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testReadNoteText() throws IOException {
|
public void testReadNoteText() throws IOException {
|
||||||
// Basic 2 page example
|
// Basic 2 page example
|
||||||
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
|
try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
|
||||||
String notesText = ppe.getNotes();
|
ppe.setNotesByDefault(true);
|
||||||
String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n";
|
ppe.setSlidesByDefault(false);
|
||||||
assertEquals(expText, notesText);
|
ppe.setMasterByDefault(false);
|
||||||
ppe.close();
|
String notesText = ppe.getText();
|
||||||
|
String expText = "\nThese are the notes for page 1\n\nThese are the notes on page two, again lacking formatting\n";
|
||||||
|
assertEquals(expText, notesText);
|
||||||
|
}
|
||||||
|
|
||||||
// Other one doesn't have notes
|
// Other one doesn't have notes
|
||||||
PowerPointExtractor ppe2 = openExtractor("with_textbox.ppt");
|
try (SlideShowExtractor ppe = openExtractor("with_textbox.ppt")) {
|
||||||
notesText = ppe2.getNotes();
|
ppe.setNotesByDefault(true);
|
||||||
expText = "";
|
ppe.setSlidesByDefault(false);
|
||||||
assertEquals(expText, notesText);
|
ppe.setMasterByDefault(false);
|
||||||
ppe2.close();
|
String notesText = ppe.getText();
|
||||||
|
String expText = "";
|
||||||
|
assertEquals(expText, notesText);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -126,19 +133,19 @@ public final class TestExtractor {
|
|||||||
"\nThese are the notes on page two, again lacking formatting\n"
|
"\nThese are the notes on page two, again lacking formatting\n"
|
||||||
};
|
};
|
||||||
|
|
||||||
PowerPointExtractor ppe = openExtractor("basic_test_ppt_file.ppt");
|
try (SlideShowExtractor ppe = openExtractor("basic_test_ppt_file.ppt")) {
|
||||||
ppe.setSlidesByDefault(true);
|
ppe.setSlidesByDefault(true);
|
||||||
ppe.setNotesByDefault(false);
|
ppe.setNotesByDefault(false);
|
||||||
assertEquals(slText[0] + slText[1], ppe.getText());
|
assertEquals(slText[0] + slText[1], ppe.getText());
|
||||||
|
|
||||||
ppe.setSlidesByDefault(false);
|
ppe.setSlidesByDefault(false);
|
||||||
ppe.setNotesByDefault(true);
|
ppe.setNotesByDefault(true);
|
||||||
assertEquals(ntText[0] + ntText[1], ppe.getText());
|
assertEquals(ntText[0] + ntText[1], ppe.getText());
|
||||||
|
|
||||||
ppe.setSlidesByDefault(true);
|
ppe.setSlidesByDefault(true);
|
||||||
ppe.setNotesByDefault(true);
|
ppe.setNotesByDefault(true);
|
||||||
assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText());
|
assertEquals(slText[0] + ntText[0] + slText[1] + ntText[1], ppe.getText());
|
||||||
ppe.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -149,45 +156,46 @@ public final class TestExtractor {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testMissingCoreRecords() throws IOException {
|
public void testMissingCoreRecords() throws IOException {
|
||||||
PowerPointExtractor ppe = openExtractor("missing_core_records.ppt");
|
try (SlideShowExtractor<?,?> ppe = openExtractor("missing_core_records.ppt")) {
|
||||||
|
ppe.setSlidesByDefault(true);
|
||||||
|
ppe.setNotesByDefault(false);
|
||||||
|
String text = ppe.getText();
|
||||||
|
ppe.setSlidesByDefault(false);
|
||||||
|
ppe.setNotesByDefault(true);
|
||||||
|
String nText = ppe.getText();
|
||||||
|
|
||||||
String text = ppe.getText(true, false);
|
assertNotNull(text);
|
||||||
String nText = ppe.getNotes();
|
assertNotNull(nText);
|
||||||
|
|
||||||
assertNotNull(text);
|
// Notes record were corrupt, so don't expect any
|
||||||
assertNotNull(nText);
|
assertEquals(nText.length(), 0);
|
||||||
|
|
||||||
// Notes record were corrupt, so don't expect any
|
// Slide records were fine
|
||||||
assertEquals(nText.length(), 0);
|
assertContains(text, "Using Disease Surveillance and Response");
|
||||||
|
}
|
||||||
// Slide records were fine
|
|
||||||
assertContains(text, "Using Disease Surveillance and Response");
|
|
||||||
|
|
||||||
ppe.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractFromEmbeded() throws IOException {
|
public void testExtractFromEmbeded() throws IOException {
|
||||||
InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
|
try (final InputStream is = POIDataSamples.getSpreadSheetInstance().openResourceAsStream("excel_with_embeded.xls");
|
||||||
POIFSFileSystem fs = new POIFSFileSystem(is);
|
final POIFSFileSystem fs = new POIFSFileSystem(is)) {
|
||||||
DirectoryNode root = fs.getRoot();
|
final DirectoryNode root = fs.getRoot();
|
||||||
PowerPointExtractor ppe1 = assertExtractFromEmbedded(root, "MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n");
|
|
||||||
PowerPointExtractor ppe2 = assertExtractFromEmbedded(root, "MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n");
|
|
||||||
ppe2.close();
|
|
||||||
ppe1.close();
|
|
||||||
fs.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
private PowerPointExtractor assertExtractFromEmbedded(DirectoryNode root, String entryName, String expected)
|
|
||||||
throws IOException {
|
|
||||||
DirectoryNode dir = (DirectoryNode)root.getEntry(entryName);
|
|
||||||
assertTrue(dir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT));
|
|
||||||
|
|
||||||
// Check the first file
|
final String[] TEST_SET = {
|
||||||
HSLFSlideShowImpl ppt = new HSLFSlideShowImpl(dir);
|
"MBD0000A3B6", "Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
|
||||||
PowerPointExtractor ppe = new PowerPointExtractor(ppt);
|
"MBD0000A3B3", "Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n"
|
||||||
assertEquals(expected, ppe.getText(true, false));
|
};
|
||||||
return ppe;
|
|
||||||
|
for (int i=0; i<TEST_SET.length; i+=2) {
|
||||||
|
DirectoryNode dir = (DirectoryNode)root.getEntry(TEST_SET[i]);
|
||||||
|
assertTrue(dir.hasEntry(HSLFSlideShow.POWERPOINT_DOCUMENT));
|
||||||
|
|
||||||
|
try (final SlideShow<?,?> ppt = SlideShowFactory.create(dir);
|
||||||
|
final SlideShowExtractor<?,?> ppe = new SlideShowExtractor(ppt)) {
|
||||||
|
assertEquals(TEST_SET[i+1], ppe.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -195,32 +203,32 @@ public final class TestExtractor {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testExtractFromOwnEmbeded() throws IOException {
|
public void testExtractFromOwnEmbeded() throws IOException {
|
||||||
PowerPointExtractor ppe = openExtractor("ppt_with_embeded.ppt");
|
try (SlideShowExtractor<?,?> ppe = openExtractor("ppt_with_embeded.ppt")) {
|
||||||
List<HSLFObjectShape> shapes = ppe.getOLEShapes();
|
List<? extends ObjectShape> shapes = ppe.getOLEShapes();
|
||||||
assertEquals("Expected 6 ole shapes", 6, shapes.size());
|
assertEquals("Expected 6 ole shapes", 6, shapes.size());
|
||||||
int num_ppt = 0, num_doc = 0, num_xls = 0;
|
int num_ppt = 0, num_doc = 0, num_xls = 0;
|
||||||
for (HSLFObjectShape ole : shapes) {
|
for (ObjectShape ole : shapes) {
|
||||||
String name = ole.getInstanceName();
|
String name = ((HSLFObjectShape)ole).getInstanceName();
|
||||||
InputStream data = ole.getObjectData().getInputStream();
|
InputStream data = ole.getObjectData().getInputStream();
|
||||||
if ("Worksheet".equals(name)) {
|
if ("Worksheet".equals(name)) {
|
||||||
HSSFWorkbook wb = new HSSFWorkbook(data);
|
HSSFWorkbook wb = new HSSFWorkbook(data);
|
||||||
num_xls++;
|
num_xls++;
|
||||||
wb.close();
|
wb.close();
|
||||||
} else if ("Document".equals(name)) {
|
} else if ("Document".equals(name)) {
|
||||||
HWPFDocument doc = new HWPFDocument(data);
|
HWPFDocument doc = new HWPFDocument(data);
|
||||||
num_doc++;
|
num_doc++;
|
||||||
doc.close();
|
doc.close();
|
||||||
} else if ("Presentation".equals(name)) {
|
} else if ("Presentation".equals(name)) {
|
||||||
num_ppt++;
|
num_ppt++;
|
||||||
HSLFSlideShow ppt = new HSLFSlideShow(data);
|
HSLFSlideShow ppt = new HSLFSlideShow(data);
|
||||||
ppt.close();
|
ppt.close();
|
||||||
|
}
|
||||||
|
data.close();
|
||||||
}
|
}
|
||||||
data.close();
|
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
|
||||||
|
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
|
||||||
|
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
|
||||||
}
|
}
|
||||||
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
|
|
||||||
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
|
|
||||||
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
|
|
||||||
ppe.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -228,11 +236,11 @@ public final class TestExtractor {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void test52991() throws IOException {
|
public void test52991() throws IOException {
|
||||||
PowerPointExtractor ppe = openExtractor("badzip.ppt");
|
try (SlideShowExtractor<?,?> ppe = openExtractor("badzip.ppt")) {
|
||||||
for (HSLFObjectShape shape : ppe.getOLEShapes()) {
|
for (ObjectShape shape : ppe.getOLEShapes()) {
|
||||||
IOUtils.copy(shape.getObjectData().getInputStream(), new ByteArrayOutputStream());
|
IOUtils.copy(shape.getObjectData().getInputStream(), new ByteArrayOutputStream());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ppe.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -240,27 +248,27 @@ public final class TestExtractor {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testWithComments() throws IOException {
|
public void testWithComments() throws IOException {
|
||||||
PowerPointExtractor ppe1 = openExtractor("WithComments.ppt");
|
try (final SlideShowExtractor ppe = openExtractor("WithComments.ppt")) {
|
||||||
String text = ppe1.getText();
|
String text = ppe.getText();
|
||||||
assertFalse("Comments not in by default", text.contains("This is a test comment"));
|
assertFalse("Comments not in by default", text.contains("This is a test comment"));
|
||||||
|
|
||||||
ppe1.setCommentsByDefault(true);
|
ppe.setCommentsByDefault(true);
|
||||||
|
|
||||||
text = ppe1.getText();
|
text = ppe.getText();
|
||||||
assertContains(text, "This is a test comment");
|
assertContains(text, "This is a test comment");
|
||||||
ppe1.close();
|
}
|
||||||
|
|
||||||
|
|
||||||
// And another file
|
// And another file
|
||||||
PowerPointExtractor ppe2 = openExtractor("45543.ppt");
|
try (SlideShowExtractor ppe = openExtractor("45543.ppt")) {
|
||||||
text = ppe2.getText();
|
String text = ppe.getText();
|
||||||
assertFalse("Comments not in by default", text.contains("testdoc"));
|
assertFalse("Comments not in by default", text.contains("testdoc"));
|
||||||
|
|
||||||
ppe2.setCommentsByDefault(true);
|
ppe.setCommentsByDefault(true);
|
||||||
|
|
||||||
text = ppe2.getText();
|
text = ppe.getText();
|
||||||
assertContains(text, "testdoc");
|
assertContains(text, "testdoc");
|
||||||
ppe2.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -268,48 +276,37 @@ public final class TestExtractor {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testHeaderFooter() throws IOException {
|
public void testHeaderFooter() throws IOException {
|
||||||
String text;
|
|
||||||
|
|
||||||
// With a header on the notes
|
// With a header on the notes
|
||||||
InputStream is1 = slTests.openResourceAsStream("45537_Header.ppt");
|
try (InputStream is = slTests.openResourceAsStream("45537_Header.ppt");
|
||||||
HSLFSlideShow ppt1 = new HSLFSlideShow(is1);
|
HSLFSlideShow ppt = new HSLFSlideShow(is)) {
|
||||||
is1.close();
|
|
||||||
assertNotNull(ppt1.getNotesHeadersFooters());
|
|
||||||
assertEquals("testdoc test phrase", ppt1.getNotesHeadersFooters().getHeaderText());
|
|
||||||
|
|
||||||
PowerPointExtractor ppe1 = new PowerPointExtractor(ppt1.getSlideShowImpl());
|
assertNotNull(ppt.getNotesHeadersFooters());
|
||||||
|
assertEquals("testdoc test phrase", ppt.getNotesHeadersFooters().getHeaderText());
|
||||||
|
|
||||||
text = ppe1.getText();
|
testHeaderFooterInner(ppt);
|
||||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
|
}
|
||||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
|
|
||||||
|
|
||||||
ppe1.setNotesByDefault(true);
|
|
||||||
text = ppe1.getText();
|
|
||||||
assertContains(text, "testdoc");
|
|
||||||
assertContains(text, "test phrase");
|
|
||||||
ppe1.close();
|
|
||||||
ppt1.close();
|
|
||||||
|
|
||||||
// And with a footer, also on notes
|
// And with a footer, also on notes
|
||||||
InputStream is2 = slTests.openResourceAsStream("45537_Footer.ppt");
|
try (final InputStream is = slTests.openResourceAsStream("45537_Footer.ppt");
|
||||||
HSLFSlideShow ppt2 = new HSLFSlideShow(is2);
|
final HSLFSlideShow ppt = new HSLFSlideShow(is)) {
|
||||||
is2.close();
|
assertNotNull(ppt.getNotesHeadersFooters());
|
||||||
|
assertEquals("testdoc test phrase", ppt.getNotesHeadersFooters().getFooterText());
|
||||||
assertNotNull(ppt2.getNotesHeadersFooters());
|
|
||||||
assertEquals("testdoc test phrase", ppt2.getNotesHeadersFooters().getFooterText());
|
|
||||||
ppt2.close();
|
|
||||||
|
|
||||||
PowerPointExtractor ppe2 = openExtractor("45537_Footer.ppt");
|
testHeaderFooterInner(ppt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
text = ppe2.getText();
|
private void testHeaderFooterInner(final HSLFSlideShow ppt) throws IOException {
|
||||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
|
try (final SlideShowExtractor<?,?> ppe = new SlideShowExtractor(ppt)) {
|
||||||
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
|
String text = ppe.getText();
|
||||||
|
assertFalse("Header shouldn't be there by default\n" + text, text.contains("testdoc"));
|
||||||
|
assertFalse("Header shouldn't be there by default\n" + text, text.contains("test phrase"));
|
||||||
|
|
||||||
ppe2.setNotesByDefault(true);
|
ppe.setNotesByDefault(true);
|
||||||
text = ppe2.getText();
|
text = ppe.getText();
|
||||||
assertContains(text, "testdoc");
|
assertContains(text, "testdoc");
|
||||||
assertContains(text, "test phrase");
|
assertContains(text, "test phrase");
|
||||||
ppe2.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
@ -318,41 +315,40 @@ public final class TestExtractor {
|
|||||||
String masterTitleText = "This is the Master Title";
|
String masterTitleText = "This is the Master Title";
|
||||||
String masterRandomText = "This text comes from the Master Slide";
|
String masterRandomText = "This text comes from the Master Slide";
|
||||||
String masterFooterText = "Footer from the master slide";
|
String masterFooterText = "Footer from the master slide";
|
||||||
PowerPointExtractor ppe = openExtractor("WithMaster.ppt");
|
try (final SlideShowExtractor ppe = openExtractor("WithMaster.ppt")) {
|
||||||
ppe.setMasterByDefault(true);
|
ppe.setMasterByDefault(true);
|
||||||
|
|
||||||
String text = ppe.getText();
|
String text = ppe.getText();
|
||||||
assertContains(text, masterRandomText);
|
assertContains(text, masterRandomText);
|
||||||
assertContains(text, masterFooterText);
|
assertContains(text, masterFooterText);
|
||||||
ppe.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testMasterText() throws IOException {
|
public void testMasterText() throws IOException {
|
||||||
PowerPointExtractor ppe1 = openExtractor("master_text.ppt");
|
try (final SlideShowExtractor ppe = openExtractor("master_text.ppt")) {
|
||||||
|
// Initially not there
|
||||||
|
String text = ppe.getText();
|
||||||
|
assertFalse(text.contains("Text that I added to the master slide"));
|
||||||
|
|
||||||
// Initially not there
|
// Enable, shows up
|
||||||
String text = ppe1.getText();
|
ppe.setMasterByDefault(true);
|
||||||
assertFalse(text.contains("Text that I added to the master slide"));
|
text = ppe.getText();
|
||||||
|
assertContains(text, "Text that I added to the master slide");
|
||||||
|
|
||||||
// Enable, shows up
|
// Make sure placeholder text does not come out
|
||||||
ppe1.setMasterByDefault(true);
|
assertNotContained(text, "Click to edit Master");
|
||||||
text = ppe1.getText();
|
}
|
||||||
assertContains(text, "Text that I added to the master slide");
|
|
||||||
|
|
||||||
// Make sure placeholder text does not come out
|
|
||||||
assertNotContained(text, "Click to edit Master");
|
|
||||||
ppe1.close();
|
|
||||||
|
|
||||||
// Now with another file only containing master text
|
// Now with another file only containing master text
|
||||||
// Will always show up
|
// Will always show up
|
||||||
PowerPointExtractor ppe2 = openExtractor("WithMaster.ppt");
|
try (final SlideShowExtractor ppe = openExtractor("WithMaster.ppt")) {
|
||||||
String masterText = "Footer from the master slide";
|
String masterText = "Footer from the master slide";
|
||||||
|
|
||||||
text = ppe2.getText();
|
String text = ppe.getText();
|
||||||
assertContainsIgnoreCase(text, "master");
|
assertContainsIgnoreCase(text, "master");
|
||||||
assertContains(text, masterText);
|
assertContains(text, masterText);
|
||||||
ppe2.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -360,22 +356,21 @@ public final class TestExtractor {
|
|||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testChineseText() throws IOException {
|
public void testChineseText() throws IOException {
|
||||||
PowerPointExtractor ppe = openExtractor("54880_chinese.ppt");
|
try (final SlideShowExtractor ppe = openExtractor("54880_chinese.ppt")) {
|
||||||
|
String text = ppe.getText();
|
||||||
|
|
||||||
String text = ppe.getText();
|
// Check for the english text line
|
||||||
|
assertContains(text, "Single byte");
|
||||||
|
|
||||||
// Check for the english text line
|
// Check for the english text in the mixed line
|
||||||
assertContains(text, "Single byte");
|
assertContains(text, "Mix");
|
||||||
|
|
||||||
// Check for the english text in the mixed line
|
// Check for the chinese text in the mixed line
|
||||||
assertContains(text, "Mix");
|
assertContains(text, "\u8868");
|
||||||
|
|
||||||
// Check for the chinese text in the mixed line
|
// Check for the chinese only text line
|
||||||
assertContains(text, "\u8868");
|
assertContains(text, "\uff8a\uff9d\uff76\uff78");
|
||||||
|
}
|
||||||
// Check for the chinese only text line
|
|
||||||
assertContains(text, "\uff8a\uff9d\uff76\uff78");
|
|
||||||
ppe.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -387,67 +382,59 @@ public final class TestExtractor {
|
|||||||
public void testDifferentPOIFS() throws IOException {
|
public void testDifferentPOIFS() throws IOException {
|
||||||
// Open the two filesystems
|
// Open the two filesystems
|
||||||
File pptFile = slTests.getFile("basic_test_ppt_file.ppt");
|
File pptFile = slTests.getFile("basic_test_ppt_file.ppt");
|
||||||
InputStream is1 = new FileInputStream(pptFile);
|
try (final InputStream is1 = new FileInputStream(pptFile);
|
||||||
OPOIFSFileSystem opoifs = new OPOIFSFileSystem(is1);
|
final NPOIFSFileSystem npoifs = new NPOIFSFileSystem(pptFile)) {
|
||||||
is1.close();
|
|
||||||
NPOIFSFileSystem npoifs = new NPOIFSFileSystem(pptFile);
|
|
||||||
|
|
||||||
DirectoryNode[] files = { opoifs.getRoot(), npoifs.getRoot() };
|
|
||||||
|
|
||||||
// Open directly
|
final OPOIFSFileSystem opoifs = new OPOIFSFileSystem(is1);
|
||||||
for (DirectoryNode dir : files) {
|
|
||||||
PowerPointExtractor extractor = new PowerPointExtractor(dir);
|
DirectoryNode[] files = {opoifs.getRoot(), npoifs.getRoot()};
|
||||||
assertEquals(expectText, extractor.getText());
|
|
||||||
|
// Open directly
|
||||||
|
for (DirectoryNode dir : files) {
|
||||||
|
try (SlideShow<?,?> ppt = SlideShowFactory.create(dir);
|
||||||
|
SlideShowExtractor<?,?> extractor = new SlideShowExtractor(ppt)) {
|
||||||
|
assertEquals(expectText, extractor.getText());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Open via a HSLFSlideShow
|
|
||||||
for (DirectoryNode dir : files) {
|
|
||||||
HSLFSlideShowImpl slideshow = new HSLFSlideShowImpl(dir);
|
|
||||||
PowerPointExtractor extractor = new PowerPointExtractor(slideshow);
|
|
||||||
assertEquals(expectText, extractor.getText());
|
|
||||||
extractor.close();
|
|
||||||
slideshow.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
npoifs.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testTable() throws Exception {
|
public void testTable() throws Exception {
|
||||||
PowerPointExtractor ppe1 = openExtractor("54111.ppt");
|
try (SlideShowExtractor ppe = openExtractor("54111.ppt")) {
|
||||||
String text1 = ppe1.getText();
|
String text = ppe.getText();
|
||||||
String target1 = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n"+
|
String target = "TH Cell 1\tTH Cell 2\tTH Cell 3\tTH Cell 4\n" +
|
||||||
"Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n"+
|
"Row 1, Cell 1\tRow 1, Cell 2\tRow 1, Cell 3\tRow 1, Cell 4\n" +
|
||||||
"Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n"+
|
"Row 2, Cell 1\tRow 2, Cell 2\tRow 2, Cell 3\tRow 2, Cell 4\n" +
|
||||||
"Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n"+
|
"Row 3, Cell 1\tRow 3, Cell 2\tRow 3, Cell 3\tRow 3, Cell 4\n" +
|
||||||
"Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n"+
|
"Row 4, Cell 1\tRow 4, Cell 2\tRow 4, Cell 3\tRow 4, Cell 4\n" +
|
||||||
"Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n";
|
"Row 5, Cell 1\tRow 5, Cell 2\tRow 5, Cell 3\tRow 5, Cell 4\n";
|
||||||
assertContains(text1, target1);
|
assertContains(text, target);
|
||||||
ppe1.close();
|
}
|
||||||
|
|
||||||
PowerPointExtractor ppe2 = openExtractor("54722.ppt");
|
try (SlideShowExtractor ppe = openExtractor("54722.ppt")) {
|
||||||
String text2 = ppe2.getText();
|
String text = ppe.getText();
|
||||||
|
|
||||||
String target2 = "this\tText\tis\twithin\ta\n" +
|
String target = "this\tText\tis\twithin\ta\n" +
|
||||||
"table\t1\t2\t3\t4";
|
"table\t1\t2\t3\t4";
|
||||||
assertContains(text2, target2);
|
assertContains(text, target);
|
||||||
ppe2.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// bug 60003
|
// bug 60003
|
||||||
@Test
|
@Test
|
||||||
public void testExtractMasterSlideFooterText() throws Exception {
|
public void testExtractMasterSlideFooterText() throws Exception {
|
||||||
PowerPointExtractor ppe = openExtractor("60003.ppt");
|
try (SlideShowExtractor ppe = openExtractor("60003.ppt")) {
|
||||||
ppe.setMasterByDefault(true);
|
ppe.setMasterByDefault(true);
|
||||||
|
|
||||||
String text = ppe.getText();
|
String text = ppe.getText();
|
||||||
assertContains(text, "Prague");
|
assertContains(text, "Prague");
|
||||||
ppe.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testExtractGroupedShapeText() throws Exception {
|
public void testExtractGroupedShapeText() throws Exception {
|
||||||
try (final PowerPointExtractor ppe = openExtractor("bug62092.ppt")) {
|
try (final SlideShowExtractor ppe = openExtractor("bug62092.ppt")) {
|
||||||
final String text = ppe.getText();
|
final String text = ppe.getText();
|
||||||
|
|
||||||
//this tests that we're ignoring text shapes at depth=0
|
//this tests that we're ignoring text shapes at depth=0
|
||||||
|
@ -73,6 +73,7 @@ import org.apache.poi.poifs.macros.VBAMacroReader;
|
|||||||
import org.apache.poi.sl.draw.DrawFactory;
|
import org.apache.poi.sl.draw.DrawFactory;
|
||||||
import org.apache.poi.sl.draw.DrawPaint;
|
import org.apache.poi.sl.draw.DrawPaint;
|
||||||
import org.apache.poi.sl.draw.DrawTextParagraph;
|
import org.apache.poi.sl.draw.DrawTextParagraph;
|
||||||
|
import org.apache.poi.sl.extractor.SlideShowExtractor;
|
||||||
import org.apache.poi.sl.usermodel.ColorStyle;
|
import org.apache.poi.sl.usermodel.ColorStyle;
|
||||||
import org.apache.poi.sl.usermodel.PaintStyle;
|
import org.apache.poi.sl.usermodel.PaintStyle;
|
||||||
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
|
import org.apache.poi.sl.usermodel.PaintStyle.SolidPaint;
|
||||||
@ -800,18 +801,18 @@ public final class TestBugs {
|
|||||||
String files[] = { "bug58718_008524.ppt","bug58718_008558.ppt","bug58718_349008.ppt","bug58718_008495.ppt", };
|
String files[] = { "bug58718_008524.ppt","bug58718_008558.ppt","bug58718_349008.ppt","bug58718_008495.ppt", };
|
||||||
for (String f : files) {
|
for (String f : files) {
|
||||||
File sample = HSLFTestDataSamples.getSampleFile(f);
|
File sample = HSLFTestDataSamples.getSampleFile(f);
|
||||||
PowerPointExtractor ex = new PowerPointExtractor(sample.getAbsolutePath());
|
try (SlideShowExtractor ex = new SlideShowExtractor(SlideShowFactory.create(sample))) {
|
||||||
assertNotNull(ex.getText());
|
assertNotNull(ex.getText());
|
||||||
ex.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void bug58733() throws IOException {
|
public void bug58733() throws IOException {
|
||||||
File sample = HSLFTestDataSamples.getSampleFile("bug58733_671884.ppt");
|
File sample = HSLFTestDataSamples.getSampleFile("bug58733_671884.ppt");
|
||||||
PowerPointExtractor ex = new PowerPointExtractor(sample.getAbsolutePath());
|
try (SlideShowExtractor ex = new SlideShowExtractor(SlideShowFactory.create(sample))) {
|
||||||
assertNotNull(ex.getText());
|
assertNotNull(ex.getText());
|
||||||
ex.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
Binary file not shown.
Loading…
Reference in New Issue
Block a user