#60519 - Extractor for *SSF embeddings
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1776819 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
db4bdaf29a
commit
1d9c74b1bf
@ -23,6 +23,8 @@ import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.ss.extractor.EmbeddedData;
|
||||
import org.apache.poi.ss.extractor.EmbeddedExtractor;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
@ -55,6 +57,8 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
||||
|
||||
readContent(read);
|
||||
|
||||
extractEmbedded(read);
|
||||
|
||||
modifyContent(read);
|
||||
|
||||
read.close();
|
||||
@ -92,6 +96,18 @@ public abstract class SpreadsheetHandler extends AbstractFileHandler {
|
||||
}
|
||||
}
|
||||
|
||||
private void extractEmbedded(Workbook wb) throws IOException {
|
||||
EmbeddedExtractor ee = new EmbeddedExtractor();
|
||||
|
||||
for (Sheet s : wb) {
|
||||
for (EmbeddedData ed : ee.extractAll(s)) {
|
||||
assertNotNull(ed.getFilename());
|
||||
assertNotNull(ed.getEmbeddedData());
|
||||
assertNotNull(ed.getShape());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void modifyContent(Workbook wb) {
|
||||
/* a number of file fail because of various things: udf, unimplemented functions, ...
|
||||
we would need quite a list of excludes and the large regression tests would probably
|
||||
|
@ -25,6 +25,7 @@ import org.apache.poi.ddf.*;
|
||||
import org.apache.poi.hssf.record.*;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.ss.usermodel.ObjectData;
|
||||
import org.apache.poi.util.HexDump;
|
||||
|
||||
/**
|
||||
@ -32,7 +33,7 @@ import org.apache.poi.util.HexDump;
|
||||
* <p/>
|
||||
* Right now, 13, july, 2012 can not be created from scratch
|
||||
*/
|
||||
public final class HSSFObjectData extends HSSFPicture {
|
||||
public final class HSSFObjectData extends HSSFPicture implements ObjectData {
|
||||
/**
|
||||
* Reference to the filesystem root, required for retrieving the object data.
|
||||
*/
|
||||
@ -43,20 +44,12 @@ public final class HSSFObjectData extends HSSFPicture {
|
||||
this._root = _root;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the OLE2 Class Name of the object
|
||||
*/
|
||||
@Override
|
||||
public String getOLE2ClassName() {
|
||||
return findObjectRecord().getOLEClassName();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the object data. Only call for ones that have
|
||||
* data though. See {@link #hasDirectoryEntry()}
|
||||
*
|
||||
* @return the object data as an OLE2 directory.
|
||||
* @throws IOException if there was an error reading the data.
|
||||
*/
|
||||
@Override
|
||||
public DirectoryEntry getDirectory() throws IOException {
|
||||
EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
|
||||
|
||||
@ -70,20 +63,12 @@ public final class HSSFObjectData extends HSSFPicture {
|
||||
throw new IOException("Stream " + streamName + " was not an OLE2 directory");
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the data portion, for an ObjectData
|
||||
* that doesn't have an associated POIFS Directory
|
||||
* Entry
|
||||
*/
|
||||
@Override
|
||||
public byte[] getObjectData() {
|
||||
return findObjectRecord().getObjectData();
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this ObjectData have an associated POIFS
|
||||
* Directory Entry?
|
||||
* (Not all do, those that don't have a data portion)
|
||||
*/
|
||||
@Override
|
||||
public boolean hasDirectoryEntry() {
|
||||
EmbeddedObjectRefSubRecord subRecord = findObjectRecord();
|
||||
|
||||
|
65
src/java/org/apache/poi/ss/usermodel/ObjectData.java
Normal file
65
src/java/org/apache/poi/ss/usermodel/ObjectData.java
Normal file
@ -0,0 +1,65 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.ss.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
|
||||
/**
|
||||
* Common interface for OLE shapes, i.e. shapes linked to embedded documents
|
||||
*
|
||||
* @since POI 3.16-beta2
|
||||
*/
|
||||
public interface ObjectData extends SimpleShape {
|
||||
/**
|
||||
* @return the data portion, for an ObjectData that doesn't have an associated POIFS Directory Entry
|
||||
*/
|
||||
byte[] getObjectData() throws IOException;
|
||||
|
||||
/**
|
||||
* @return does this ObjectData have an associated POIFS Directory Entry?
|
||||
* (Not all do, those that don't have a data portion)
|
||||
*/
|
||||
boolean hasDirectoryEntry();
|
||||
|
||||
/**
|
||||
* Gets the object data. Only call for ones that have
|
||||
* data though. See {@link #hasDirectoryEntry()}.
|
||||
* The caller has to close the corresponding POIFSFileSystem
|
||||
*
|
||||
* @return the object data as an OLE2 directory.
|
||||
* @throws IOException if there was an error reading the data.
|
||||
*/
|
||||
DirectoryEntry getDirectory() throws IOException;
|
||||
|
||||
/**
|
||||
* @return the OLE2 Class Name of the object
|
||||
*/
|
||||
String getOLE2ClassName();
|
||||
|
||||
/**
|
||||
* @return a filename suggestion - inspecting/interpreting the Directory object probably gives a better result
|
||||
*/
|
||||
String getFileName();
|
||||
|
||||
/**
|
||||
* @return the preview picture
|
||||
*/
|
||||
PictureData getPictureData();
|
||||
}
|
@ -41,6 +41,11 @@ public interface PackageRelationshipTypes {
|
||||
*/
|
||||
String CORE_PROPERTIES_ECMA376 = "http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties";
|
||||
|
||||
/**
|
||||
* Namespace of Core properties relationship type as defiend in ECMA 376
|
||||
*/
|
||||
String CORE_PROPERTIES_ECMA376_NS = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
|
||||
|
||||
/**
|
||||
* Digital signature relationship type.
|
||||
*/
|
||||
|
104
src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java
Normal file
104
src/ooxml/java/org/apache/poi/ss/extractor/EmbeddedData.java
Normal file
@ -0,0 +1,104 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.ss.extractor;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Shape;
|
||||
|
||||
/**
|
||||
* A collection of embedded object informations and content
|
||||
*/
|
||||
public class EmbeddedData {
|
||||
private String filename;
|
||||
private byte[] embeddedData;
|
||||
private Shape shape;
|
||||
private String contentType = "binary/octet-stream";
|
||||
|
||||
public EmbeddedData(String filename, byte[] embeddedData, String contentType) {
|
||||
setFilename(filename);
|
||||
setEmbeddedData(embeddedData);
|
||||
setContentType(contentType);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the filename
|
||||
*/
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the filename
|
||||
*
|
||||
* @param filename the filename
|
||||
*/
|
||||
public void setFilename(String filename) {
|
||||
if (filename == null) {
|
||||
this.filename = "unknown.bin";
|
||||
} else {
|
||||
this.filename = filename.replaceAll("[^/\\\\]*[/\\\\]", "").trim();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the embedded object byte array
|
||||
*/
|
||||
public byte[] getEmbeddedData() {
|
||||
return embeddedData;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the embedded object as byte array
|
||||
*
|
||||
* @param embeddedData the embedded object byte array
|
||||
*/
|
||||
public void setEmbeddedData(byte[] embeddedData) {
|
||||
this.embeddedData = (embeddedData == null) ? null : embeddedData.clone();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the shape which links to the embedded object
|
||||
*/
|
||||
public Shape getShape() {
|
||||
return shape;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the shape which links to the embedded object
|
||||
*
|
||||
* @param shape the shape
|
||||
*/
|
||||
public void setShape(Shape shape) {
|
||||
this.shape = shape;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the content-/mime-type of the embedded object, the default (if unknown) is {@code binary/octet-stream}
|
||||
*/
|
||||
public String getContentType() {
|
||||
return contentType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the content-/mime-type
|
||||
*
|
||||
* @param contentType the content-type
|
||||
*/
|
||||
public void setContentType(String contentType) {
|
||||
this.contentType = contentType;
|
||||
}
|
||||
}
|
@ -0,0 +1,353 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.ss.extractor;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.poi.hpsf.ClassID;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.Ole10Native;
|
||||
import org.apache.poi.poifs.filesystem.Ole10NativeException;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.ss.usermodel.Drawing;
|
||||
import org.apache.poi.ss.usermodel.ObjectData;
|
||||
import org.apache.poi.ss.usermodel.Picture;
|
||||
import org.apache.poi.ss.usermodel.PictureData;
|
||||
import org.apache.poi.ss.usermodel.Shape;
|
||||
import org.apache.poi.ss.usermodel.ShapeContainer;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LocaleUtil;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
public class EmbeddedExtractor implements Iterable<EmbeddedExtractor> {
|
||||
private static final POILogger LOG = POILogFactory.getLogger(EmbeddedExtractor.class);
|
||||
|
||||
/**
|
||||
* @return the list of known extractors, if you provide custom extractors, override this method
|
||||
*/
|
||||
@Override
|
||||
public Iterator<EmbeddedExtractor> iterator() {
|
||||
EmbeddedExtractor[] ee = {
|
||||
new Ole10Extractor(), new PdfExtractor(), new WordExtractor(), new ExcelExtractor(), new FsExtractor()
|
||||
};
|
||||
return Arrays.asList(ee).iterator();
|
||||
}
|
||||
|
||||
public EmbeddedData extractOne(DirectoryNode src) throws IOException {
|
||||
for (EmbeddedExtractor ee : this) {
|
||||
if (ee.canExtract(src)) {
|
||||
return ee.extract(src);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public EmbeddedData extractOne(Picture src) throws IOException {
|
||||
for (EmbeddedExtractor ee : this) {
|
||||
if (ee.canExtract(src)) {
|
||||
return ee.extract(src);
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public List<EmbeddedData> extractAll(Sheet sheet) throws IOException {
|
||||
Drawing<?> patriarch = sheet.getDrawingPatriarch();
|
||||
if (null == patriarch){
|
||||
return Collections.emptyList();
|
||||
}
|
||||
List<EmbeddedData> embeddings = new ArrayList<EmbeddedData>();
|
||||
extractAll(patriarch, embeddings);
|
||||
return embeddings;
|
||||
}
|
||||
|
||||
protected void extractAll(ShapeContainer<?> parent, List<EmbeddedData> embeddings) throws IOException {
|
||||
for (Shape shape : parent) {
|
||||
EmbeddedData data = null;
|
||||
if (shape instanceof ObjectData) {
|
||||
ObjectData od = (ObjectData)shape;
|
||||
try {
|
||||
if (od.hasDirectoryEntry()) {
|
||||
data = extractOne((DirectoryNode)od.getDirectory());
|
||||
} else {
|
||||
data = new EmbeddedData(od.getFileName(), od.getObjectData(), "binary/octet-stream");
|
||||
}
|
||||
} catch (Exception e) {
|
||||
LOG.log(POILogger.WARN, "Entry not found / readable - ignoring OLE embedding", e);
|
||||
}
|
||||
} else if (shape instanceof Picture) {
|
||||
data = extractOne((Picture)shape);
|
||||
} else if (shape instanceof ShapeContainer) {
|
||||
extractAll((ShapeContainer<?>)shape, embeddings);
|
||||
}
|
||||
|
||||
if (data == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
data.setShape(shape);
|
||||
String filename = data.getFilename();
|
||||
String extension = (filename == null || filename.indexOf('.') == -1) ? ".bin" : filename.substring(filename.indexOf('.'));
|
||||
|
||||
// try to find an alternative name
|
||||
if (filename == null || "".equals(filename) || filename.startsWith("MBD") || filename.startsWith("Root Entry")) {
|
||||
filename = shape.getShapeName();
|
||||
if (filename != null) {
|
||||
filename += extension;
|
||||
}
|
||||
}
|
||||
// default to dummy name
|
||||
if (filename == null || "".equals(filename)) {
|
||||
filename = "picture_"+embeddings.size()+extension;
|
||||
}
|
||||
filename = filename.trim();
|
||||
data.setFilename(filename);
|
||||
|
||||
embeddings.add(data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public boolean canExtract(DirectoryNode source) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean canExtract(Picture source) {
|
||||
return false;
|
||||
}
|
||||
|
||||
protected EmbeddedData extract(DirectoryNode dn) throws IOException {
|
||||
assert(canExtract(dn));
|
||||
POIFSFileSystem dest = new POIFSFileSystem();
|
||||
copyNodes(dn, dest.getRoot());
|
||||
// start with a reasonable big size
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(20000);
|
||||
dest.writeFilesystem(bos);
|
||||
dest.close();
|
||||
|
||||
return new EmbeddedData(dn.getName(), bos.toByteArray(), "binary/octet-stream");
|
||||
}
|
||||
|
||||
protected EmbeddedData extract(Picture source) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
public static class Ole10Extractor extends EmbeddedExtractor {
|
||||
@Override
|
||||
public boolean canExtract(DirectoryNode dn) {
|
||||
ClassID clsId = dn.getStorageClsid();
|
||||
return ClassID.OLE10_PACKAGE.equals(clsId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmbeddedData extract(DirectoryNode dn) throws IOException {
|
||||
try {
|
||||
Ole10Native ole10 = Ole10Native.createFromEmbeddedOleObject(dn);
|
||||
return new EmbeddedData(ole10.getFileName(), ole10.getDataBuffer(), "binary/octet-stream");
|
||||
} catch (Ole10NativeException e) {
|
||||
throw new IOException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static class PdfExtractor extends EmbeddedExtractor {
|
||||
static ClassID PdfClassID = new ClassID("{B801CA65-A1FC-11D0-85AD-444553540000}");
|
||||
@Override
|
||||
public boolean canExtract(DirectoryNode dn) {
|
||||
ClassID clsId = dn.getStorageClsid();
|
||||
return (PdfClassID.equals(clsId)
|
||||
|| dn.hasEntry("CONTENTS"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmbeddedData extract(DirectoryNode dn) throws IOException {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
InputStream is = dn.createDocumentInputStream("CONTENTS");
|
||||
IOUtils.copy(is, bos);
|
||||
is.close();
|
||||
return new EmbeddedData(dn.getName()+".pdf", bos.toByteArray(), "application/pdf");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canExtract(Picture source) {
|
||||
PictureData pd = source.getPictureData();
|
||||
return (pd.getPictureType() == Workbook.PICTURE_TYPE_EMF);
|
||||
}
|
||||
|
||||
/**
|
||||
* Mac Office encodes embedded objects inside the picture, e.g. PDF is part of an EMF.
|
||||
* If an embedded stream is inside an EMF picture, this method extracts the payload.
|
||||
*
|
||||
* @return the embedded data in an EMF picture or null if none is found
|
||||
*/
|
||||
@Override
|
||||
protected EmbeddedData extract(Picture source) throws IOException {
|
||||
// check for emf+ embedded pdf (poor mans style :( )
|
||||
// Mac Excel 2011 embeds pdf files with this method.
|
||||
PictureData pd = source.getPictureData();
|
||||
if (pd.getPictureType() != Workbook.PICTURE_TYPE_EMF) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: investigate if this is just an EMF-hack or if other formats are also embedded in EMF
|
||||
byte pictureBytes[] = pd.getData();
|
||||
int idxStart = indexOf(pictureBytes, 0, "%PDF-".getBytes(LocaleUtil.CHARSET_1252));
|
||||
if (idxStart == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int idxEnd = indexOf(pictureBytes, idxStart, "%%EOF".getBytes(LocaleUtil.CHARSET_1252));
|
||||
if (idxEnd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
int pictureBytesLen = idxEnd-idxStart+6;
|
||||
byte[] pdfBytes = new byte[pictureBytesLen];
|
||||
System.arraycopy(pictureBytes, idxStart, pdfBytes, 0, pictureBytesLen);
|
||||
String filename = source.getShapeName().trim();
|
||||
if (!filename.toLowerCase(Locale.ROOT).endsWith(".pdf")) {
|
||||
filename += ".pdf";
|
||||
}
|
||||
return new EmbeddedData(filename, pdfBytes, "application/pdf");
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
static class WordExtractor extends EmbeddedExtractor {
|
||||
@Override
|
||||
public boolean canExtract(DirectoryNode dn) {
|
||||
ClassID clsId = dn.getStorageClsid();
|
||||
return (ClassID.WORD95.equals(clsId)
|
||||
|| ClassID.WORD97.equals(clsId)
|
||||
|| dn.hasEntry("WordDocument"));
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmbeddedData extract(DirectoryNode dn) throws IOException {
|
||||
EmbeddedData ed = super.extract(dn);
|
||||
ed.setFilename(dn.getName()+".doc");
|
||||
return ed;
|
||||
}
|
||||
}
|
||||
|
||||
static class ExcelExtractor extends EmbeddedExtractor {
|
||||
@Override
|
||||
public boolean canExtract(DirectoryNode dn) {
|
||||
ClassID clsId = dn.getStorageClsid();
|
||||
return (ClassID.EXCEL95.equals(clsId)
|
||||
|| ClassID.EXCEL97.equals(clsId)
|
||||
|| dn.hasEntry("Workbook") /*...*/);
|
||||
}
|
||||
|
||||
@Override
|
||||
public EmbeddedData extract(DirectoryNode dn) throws IOException {
|
||||
EmbeddedData ed = super.extract(dn);
|
||||
ed.setFilename(dn.getName()+".xls");
|
||||
return ed;
|
||||
}
|
||||
}
|
||||
|
||||
static class FsExtractor extends EmbeddedExtractor {
|
||||
@Override
|
||||
public boolean canExtract(DirectoryNode dn) {
|
||||
return true;
|
||||
}
|
||||
@Override
|
||||
public EmbeddedData extract(DirectoryNode dn) throws IOException {
|
||||
EmbeddedData ed = super.extract(dn);
|
||||
ed.setFilename(dn.getName()+".ole");
|
||||
// TODO: read the content type from CombObj stream
|
||||
return ed;
|
||||
}
|
||||
}
|
||||
|
||||
protected static void copyNodes(DirectoryNode src, DirectoryNode dest) throws IOException {
|
||||
for (Entry e : src) {
|
||||
if (e instanceof DirectoryNode) {
|
||||
DirectoryNode srcDir = (DirectoryNode)e;
|
||||
DirectoryNode destDir = (DirectoryNode)dest.createDirectory(srcDir.getName());
|
||||
destDir.setStorageClsid(srcDir.getStorageClsid());
|
||||
copyNodes(srcDir, destDir);
|
||||
} else {
|
||||
InputStream is = src.createDocumentInputStream(e);
|
||||
dest.createDocument(e.getName(), is);
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Knuth-Morris-Pratt Algorithm for Pattern Matching
|
||||
* Finds the first occurrence of the pattern in the text.
|
||||
*/
|
||||
private static int indexOf(byte[] data, int offset, byte[] pattern) {
|
||||
int[] failure = computeFailure(pattern);
|
||||
|
||||
int j = 0;
|
||||
if (data.length == 0) return -1;
|
||||
|
||||
for (int i = offset; i < data.length; i++) {
|
||||
while (j > 0 && pattern[j] != data[i]) {
|
||||
j = failure[j - 1];
|
||||
}
|
||||
if (pattern[j] == data[i]) { j++; }
|
||||
if (j == pattern.length) {
|
||||
return i - pattern.length + 1;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the failure function using a boot-strapping process,
|
||||
* where the pattern is matched against itself.
|
||||
*/
|
||||
private static int[] computeFailure(byte[] pattern) {
|
||||
int[] failure = new int[pattern.length];
|
||||
|
||||
int j = 0;
|
||||
for (int i = 1; i < pattern.length; i++) {
|
||||
while (j > 0 && pattern[j] != pattern[i]) {
|
||||
j = failure[j - 1];
|
||||
}
|
||||
if (pattern[j] == pattern[i]) {
|
||||
j++;
|
||||
}
|
||||
failure[i] = j;
|
||||
}
|
||||
|
||||
return failure;
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -20,8 +20,10 @@ package org.apache.poi.xssf.usermodel;
|
||||
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
@ -32,13 +34,21 @@ import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.ss.usermodel.ClientAnchor;
|
||||
import org.apache.poi.ss.usermodel.Drawing;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.ss.util.ImageUtils;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.Units;
|
||||
import org.apache.poi.xssf.model.CommentsTable;
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.apache.xmlbeans.XmlOptions;
|
||||
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTGroupTransform2D;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTPoint2D;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTPositiveSize2D;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.CTTransform2D;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTConnector;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTGraphicalObjectFrame;
|
||||
@ -53,7 +63,9 @@ import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.STEditAs;
|
||||
/**
|
||||
* Represents a SpreadsheetML drawing
|
||||
*/
|
||||
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing<XSSFShape> {
|
||||
private static final POILogger LOG = POILogFactory.getLogger(XSSFDrawing.class);
|
||||
|
||||
/**
|
||||
* Root element of the SpreadsheetML Drawing part
|
||||
*/
|
||||
@ -86,7 +98,12 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
XmlOptions options = new XmlOptions(DEFAULT_XML_OPTIONS);
|
||||
//Removing root element
|
||||
options.setLoadReplaceDocumentElement(null);
|
||||
drawing = CTDrawing.Factory.parse(part.getInputStream(),options);
|
||||
InputStream is = part.getInputStream();
|
||||
try {
|
||||
drawing = CTDrawing.Factory.parse(is,options);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ -176,6 +193,8 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
XSSFPicture shape = new XSSFPicture(this, ctShape);
|
||||
shape.anchor = anchor;
|
||||
shape.setPictureReference(rel);
|
||||
ctShape.getSpPr().setXfrm(createXfrm(anchor));
|
||||
|
||||
return shape;
|
||||
}
|
||||
|
||||
@ -202,6 +221,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
|
||||
XSSFGraphicFrame frame = createGraphicFrame(anchor);
|
||||
frame.setChart(chart, chartRelId);
|
||||
frame.getCTGraphicalObjectFrame().setXfrm(createXfrm(anchor));
|
||||
|
||||
return chart;
|
||||
}
|
||||
@ -241,6 +261,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
CTShape ctShape = ctAnchor.addNewSp();
|
||||
ctShape.set(XSSFSimpleShape.prototype());
|
||||
ctShape.getNvSpPr().getCNvPr().setId(shapeId);
|
||||
ctShape.getSpPr().setXfrm(createXfrm(anchor));
|
||||
XSSFSimpleShape shape = new XSSFSimpleShape(this, ctShape);
|
||||
shape.anchor = anchor;
|
||||
return shape;
|
||||
@ -278,6 +299,11 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
|
||||
CTGroupShape ctGroup = ctAnchor.addNewGrpSp();
|
||||
ctGroup.set(XSSFShapeGroup.prototype());
|
||||
CTTransform2D xfrm = createXfrm(anchor);
|
||||
CTGroupTransform2D grpXfrm =ctGroup.getGrpSpPr().getXfrm();
|
||||
grpXfrm.setOff(xfrm.getOff());
|
||||
grpXfrm.setExt(xfrm.getExt());
|
||||
grpXfrm.setChExt(xfrm.getExt());
|
||||
|
||||
XSSFShapeGroup shape = new XSSFShapeGroup(this, ctGroup);
|
||||
shape.anchor = anchor;
|
||||
@ -333,6 +359,7 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
CTTwoCellAnchor ctAnchor = createTwoCellAnchor(anchor);
|
||||
CTGraphicalObjectFrame ctGraphicFrame = ctAnchor.addNewGraphicFrame();
|
||||
ctGraphicFrame.set(XSSFGraphicFrame.prototype());
|
||||
ctGraphicFrame.setXfrm(createXfrm(anchor));
|
||||
|
||||
long frameId = numOfGraphicFrames++;
|
||||
XSSFGraphicFrame graphicFrame = new XSSFGraphicFrame(this, ctGraphicFrame);
|
||||
@ -378,39 +405,159 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
return ctAnchor;
|
||||
}
|
||||
|
||||
private CTTransform2D createXfrm(XSSFClientAnchor anchor) {
|
||||
CTTransform2D xfrm = CTTransform2D.Factory.newInstance();
|
||||
CTPoint2D off = xfrm.addNewOff();
|
||||
off.setX(anchor.getDx1());
|
||||
off.setY(anchor.getDy1());
|
||||
XSSFSheet sheet = (XSSFSheet)getParent();
|
||||
double widthPx = 0;
|
||||
for (int col=anchor.getCol1(); col<anchor.getCol2(); col++) {
|
||||
widthPx += sheet.getColumnWidthInPixels(col);
|
||||
}
|
||||
double heightPx = 0;
|
||||
for (int row=anchor.getRow1(); row<anchor.getRow2(); row++) {
|
||||
heightPx += ImageUtils.getRowHeightInPixels(sheet, row);
|
||||
}
|
||||
int width = Units.pixelToEMU((int)widthPx);
|
||||
int height = Units.pixelToEMU((int)heightPx);
|
||||
CTPositiveSize2D ext = xfrm.addNewExt();
|
||||
ext.setCx(width - anchor.getDx1() + anchor.getDx2());
|
||||
ext.setCy(height - anchor.getDy1() + anchor.getDy2());
|
||||
|
||||
// TODO: handle vflip/hflip
|
||||
return xfrm;
|
||||
}
|
||||
|
||||
private long newShapeId(){
|
||||
return drawing.sizeOfTwoCellAnchorArray() + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return list of shapes in this drawing
|
||||
*/
|
||||
public List<XSSFShape> getShapes(){
|
||||
public List<XSSFShape> getShapes(){
|
||||
List<XSSFShape> lst = new ArrayList<XSSFShape>();
|
||||
for(XmlObject obj : drawing.selectPath("./*/*")) {
|
||||
XSSFShape shape = null;
|
||||
if(obj instanceof CTPicture) shape = new XSSFPicture(this, (CTPicture)obj) ;
|
||||
else if(obj instanceof CTConnector) shape = new XSSFConnector(this, (CTConnector)obj) ;
|
||||
else if(obj instanceof CTShape) shape = new XSSFSimpleShape(this, (CTShape)obj) ;
|
||||
else if(obj instanceof CTGraphicalObjectFrame) shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ;
|
||||
else if(obj instanceof CTGroupShape) shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ;
|
||||
|
||||
if(shape != null){
|
||||
shape.anchor = getAnchorFromParent(obj);
|
||||
lst.add(shape);
|
||||
XmlCursor cur = drawing.newCursor();
|
||||
try {
|
||||
if (cur.toFirstChild()) {
|
||||
addShapes(cur, lst);
|
||||
}
|
||||
} finally {
|
||||
cur.dispose();
|
||||
}
|
||||
return lst;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return list of shapes in this shape group
|
||||
*/
|
||||
public List<XSSFShape> getShapes(XSSFShapeGroup groupshape){
|
||||
List<XSSFShape> lst = new ArrayList<XSSFShape>();
|
||||
XmlCursor cur = groupshape.getCTGroupShape().newCursor();
|
||||
try {
|
||||
addShapes(cur, lst);
|
||||
} finally {
|
||||
cur.dispose();
|
||||
}
|
||||
return lst;
|
||||
}
|
||||
|
||||
private void addShapes(XmlCursor cur, List<XSSFShape> lst) {
|
||||
try {
|
||||
do {
|
||||
cur.push();
|
||||
if (cur.toFirstChild()) {
|
||||
do {
|
||||
XmlObject obj = cur.getObject();
|
||||
|
||||
XSSFShape shape;
|
||||
if (obj instanceof CTMarker) {
|
||||
// ignore anchor elements
|
||||
continue;
|
||||
} else if (obj instanceof CTPicture) {
|
||||
shape = new XSSFPicture(this, (CTPicture)obj) ;
|
||||
} else if(obj instanceof CTConnector) {
|
||||
shape = new XSSFConnector(this, (CTConnector)obj) ;
|
||||
} else if(obj instanceof CTShape) {
|
||||
shape = hasOleLink(obj)
|
||||
? new XSSFObjectData(this, (CTShape)obj)
|
||||
: new XSSFSimpleShape(this, (CTShape)obj) ;
|
||||
} else if(obj instanceof CTGraphicalObjectFrame) {
|
||||
shape = new XSSFGraphicFrame(this, (CTGraphicalObjectFrame)obj) ;
|
||||
} else if(obj instanceof CTGroupShape) {
|
||||
shape = new XSSFShapeGroup(this, (CTGroupShape)obj) ;
|
||||
} else if(obj instanceof XmlAnyTypeImpl) {
|
||||
LOG.log(POILogger.WARN, "trying to parse AlternateContent, "
|
||||
+ "this unlinks the returned Shapes from the underlying xml content, "
|
||||
+ "so those shapes can't be used to modify the drawing, "
|
||||
+ "i.e. modifications will be ignored!");
|
||||
|
||||
// XmlAnyTypeImpl is returned for AlternateContent parts, which might contain a CTDrawing
|
||||
cur.push();
|
||||
cur.toFirstChild();
|
||||
XmlCursor cur2 = null;
|
||||
try {
|
||||
// need to parse AlternateContent again, otherwise the child elements aren't typed,
|
||||
// but also XmlAnyTypes
|
||||
CTDrawing alterWS = CTDrawing.Factory.parse(cur.newXMLStreamReader());
|
||||
cur2 = alterWS.newCursor();
|
||||
if (cur2.toFirstChild()) {
|
||||
addShapes(cur2, lst);
|
||||
}
|
||||
} catch (XmlException e) {
|
||||
LOG.log(POILogger.WARN, "unable to parse CTDrawing in alternate content.", e);
|
||||
} finally {
|
||||
if (cur2 != null) {
|
||||
cur2.dispose();
|
||||
}
|
||||
cur.pop();
|
||||
}
|
||||
continue;
|
||||
} else {
|
||||
// ignore anything else
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(shape != null);
|
||||
shape.anchor = getAnchorFromParent(obj);
|
||||
lst.add(shape);
|
||||
|
||||
} while (cur.toNextSibling());
|
||||
}
|
||||
cur.pop();
|
||||
} while (cur.toNextSibling());
|
||||
} finally {
|
||||
cur.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
private boolean hasOleLink(XmlObject shape) {
|
||||
QName uriName = new QName(null, "uri");
|
||||
String xquery = "declare namespace a='"+XSSFRelation.NS_DRAWINGML+"' .//a:extLst/a:ext";
|
||||
XmlCursor cur = shape.newCursor();
|
||||
cur.selectPath(xquery);
|
||||
try {
|
||||
while (cur.toNextSelection()) {
|
||||
String uri = cur.getAttributeText(uriName);
|
||||
if ("{63B3BB69-23CF-44E3-9099-C40C66FF867C}".equals(uri)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
cur.dispose();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private XSSFAnchor getAnchorFromParent(XmlObject obj){
|
||||
XSSFAnchor anchor = null;
|
||||
|
||||
XmlObject parentXbean = null;
|
||||
XmlCursor cursor = obj.newCursor();
|
||||
if(cursor.toParent()) parentXbean = cursor.getObject();
|
||||
if(cursor.toParent()) {
|
||||
parentXbean = cursor.getObject();
|
||||
}
|
||||
cursor.dispose();
|
||||
if(parentXbean != null){
|
||||
if (parentXbean instanceof CTTwoCellAnchor) {
|
||||
@ -424,4 +571,8 @@ public final class XSSFDrawing extends POIXMLDocumentPart implements Drawing {
|
||||
return anchor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<XSSFShape> iterator() {
|
||||
return getShapes().iterator();
|
||||
}
|
||||
}
|
||||
|
169
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java
Normal file
169
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFObjectData.java
Normal file
@ -0,0 +1,169 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.usermodel;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
import org.apache.poi.POIXMLException;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.ss.usermodel.ObjectData;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTShape;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTOleObject;
|
||||
|
||||
/**
|
||||
* Represents binary object (i.e. OLE) data stored in the file. Eg. A GIF, JPEG etc...
|
||||
*/
|
||||
public class XSSFObjectData extends XSSFSimpleShape implements ObjectData {
|
||||
private static final POILogger LOG = POILogFactory.getLogger(XSSFObjectData.class);
|
||||
|
||||
/**
|
||||
* A default instance of CTShape used for creating new shapes.
|
||||
*/
|
||||
private static CTShape prototype = null;
|
||||
|
||||
private CTOleObject oleObject;
|
||||
|
||||
protected XSSFObjectData(XSSFDrawing drawing, CTShape ctShape) {
|
||||
super(drawing, ctShape);
|
||||
}
|
||||
|
||||
/**
|
||||
* Prototype with the default structure of a new auto-shape.
|
||||
*/
|
||||
protected static CTShape prototype() {
|
||||
if(prototype == null) {
|
||||
prototype = XSSFSimpleShape.prototype();
|
||||
}
|
||||
return prototype;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getOLE2ClassName() {
|
||||
return getOleObject().getProgId();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the CTOleObject associated with the shape
|
||||
*/
|
||||
public CTOleObject getOleObject() {
|
||||
if (oleObject == null) {
|
||||
long shapeId = getCTShape().getNvSpPr().getCNvPr().getId();
|
||||
oleObject = getSheet().readOleObject(shapeId);
|
||||
if (oleObject == null) {
|
||||
throw new POIXMLException("Ole object not found in sheet container - it's probably a control element");
|
||||
}
|
||||
}
|
||||
return oleObject;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getObjectData() throws IOException {
|
||||
InputStream is = getObjectPart().getInputStream();
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
IOUtils.copy(is, bos);
|
||||
is.close();
|
||||
return bos.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the package part of the object data
|
||||
*/
|
||||
public PackagePart getObjectPart() {
|
||||
if (!getOleObject().isSetId()) {
|
||||
throw new POIXMLException("Invalid ole object found in sheet container");
|
||||
}
|
||||
POIXMLDocumentPart pdp = getSheet().getRelationById(getOleObject().getId());
|
||||
return (pdp == null) ? null : pdp.getPackagePart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasDirectoryEntry() {
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = getObjectPart().getInputStream();
|
||||
|
||||
// If clearly doesn't do mark/reset, wrap up
|
||||
if (! is.markSupported()) {
|
||||
is = new PushbackInputStream(is, 8);
|
||||
}
|
||||
|
||||
// Ensure that there is at least some data there
|
||||
byte[] header8 = IOUtils.peekFirst8Bytes(is);
|
||||
|
||||
// Try to create
|
||||
return NPOIFSFileSystem.hasPOIFSHeader(header8);
|
||||
} catch (IOException e) {
|
||||
LOG.log(POILogger.WARN, "can't determine if directory entry exists", e);
|
||||
return false;
|
||||
} finally {
|
||||
IOUtils.closeQuietly(is);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("resource")
|
||||
public DirectoryEntry getDirectory() throws IOException {
|
||||
InputStream is = null;
|
||||
try {
|
||||
is = getObjectPart().getInputStream();
|
||||
return new POIFSFileSystem(is).getRoot();
|
||||
} finally {
|
||||
IOUtils.closeQuietly(is);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The filename of the embedded image
|
||||
*/
|
||||
@Override
|
||||
public String getFileName() {
|
||||
return getObjectPart().getPartName().getName();
|
||||
}
|
||||
|
||||
protected XSSFSheet getSheet() {
|
||||
return (XSSFSheet)getDrawing().getParent();
|
||||
}
|
||||
|
||||
@Override
|
||||
public XSSFPictureData getPictureData() {
|
||||
XmlCursor cur = getOleObject().newCursor();
|
||||
try {
|
||||
if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) {
|
||||
String blipId = cur.getAttributeText(new QName(PackageRelationshipTypes.CORE_PROPERTIES_ECMA376_NS, "id"));
|
||||
return (XSSFPictureData)getDrawing().getRelationById(blipId);
|
||||
}
|
||||
return null;
|
||||
} finally {
|
||||
cur.dispose();
|
||||
}
|
||||
}
|
||||
}
|
@ -40,6 +40,8 @@ import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import javax.xml.namespace.QName;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
import org.apache.poi.POIXMLException;
|
||||
@ -86,7 +88,9 @@ import org.apache.poi.xssf.usermodel.XSSFPivotTable.PivotTableReferenceConfigura
|
||||
import org.apache.poi.xssf.usermodel.helpers.ColumnHelper;
|
||||
import org.apache.poi.xssf.usermodel.helpers.XSSFIgnoredErrorHelper;
|
||||
import org.apache.poi.xssf.usermodel.helpers.XSSFRowShifter;
|
||||
import org.apache.xmlbeans.XmlCursor;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
import org.apache.xmlbeans.XmlOptions;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*;
|
||||
|
||||
@ -4371,4 +4375,64 @@ public class XSSFSheet extends POIXMLDocumentPart implements Sheet {
|
||||
CTIgnoredError ctIgnoredError = ctIgnoredErrors.addNewIgnoredError();
|
||||
XSSFIgnoredErrorHelper.addIgnoredErrors(ctIgnoredError, ref, ignoredErrorTypes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine the OleObject which links shapes with embedded resources
|
||||
*
|
||||
* @param shapeId the shape id
|
||||
* @return the CTOleObject of the shape
|
||||
*/
|
||||
protected CTOleObject readOleObject(long shapeId) {
|
||||
if (!getCTWorksheet().isSetOleObjects()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// we use a XmlCursor here to handle oleObject with-/out AlternateContent wrappers
|
||||
String xquery = "declare namespace p='"+XSSFRelation.NS_SPREADSHEETML+"' .//p:oleObject";
|
||||
XmlCursor cur = getCTWorksheet().getOleObjects().newCursor();
|
||||
try {
|
||||
cur.selectPath(xquery);
|
||||
CTOleObject coo = null;
|
||||
while (cur.toNextSelection()) {
|
||||
String sId = cur.getAttributeText(new QName(null, "shapeId"));
|
||||
if (sId == null || Long.parseLong(sId) != shapeId) {
|
||||
continue;
|
||||
}
|
||||
|
||||
XmlObject xObj = cur.getObject();
|
||||
if (xObj instanceof CTOleObject) {
|
||||
// the unusual case ...
|
||||
coo = (CTOleObject)xObj;
|
||||
} else {
|
||||
XMLStreamReader reader = cur.newXMLStreamReader();
|
||||
try {
|
||||
CTOleObjects coos = CTOleObjects.Factory.parse(reader);
|
||||
if (coos.sizeOfOleObjectArray() == 0) {
|
||||
continue;
|
||||
}
|
||||
coo = coos.getOleObjectArray(0);
|
||||
} catch (XmlException e) {
|
||||
logger.log(POILogger.INFO, "can't parse CTOleObjects", e);
|
||||
} finally {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (XMLStreamException e) {
|
||||
logger.log(POILogger.INFO, "can't close reader", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// there are choice and fallback OleObject ... we prefer the one having the objectPr element,
|
||||
// which is in the choice element
|
||||
if (cur.toChild(XSSFRelation.NS_SPREADSHEETML, "objectPr")) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (coo == null) ? null : coo;
|
||||
} finally {
|
||||
cur.dispose();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,115 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.ss.extractor;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.bind.DatatypeConverter;
|
||||
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.ss.usermodel.WorkbookFactory;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestEmbeddedExtractor {
|
||||
private static final POIDataSamples samples = POIDataSamples.getSpreadSheetInstance();
|
||||
|
||||
@Test
|
||||
public void extractPDFfromEMF() throws Exception {
|
||||
InputStream fis = samples.openResourceAsStream("Basic_Expense_Template_2011.xls");
|
||||
Workbook wb = WorkbookFactory.create(fis);
|
||||
fis.close();
|
||||
|
||||
EmbeddedExtractor ee = new EmbeddedExtractor();
|
||||
List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
|
||||
for (Sheet s : wb) {
|
||||
edList.addAll(ee.extractAll(s));
|
||||
}
|
||||
wb.close();
|
||||
|
||||
assertEquals(2, edList.size());
|
||||
|
||||
String filename1 = "Sample.pdf";
|
||||
EmbeddedData ed0 = edList.get(0);
|
||||
assertEquals(filename1, ed0.getFilename());
|
||||
assertEquals(filename1, ed0.getShape().getShapeName().trim());
|
||||
assertEquals("uNplB1QpYug+LWappiTh0w==", md5hash(ed0.getEmbeddedData()));
|
||||
|
||||
String filename2 = "kalastuslupa_jiyjhnj_yuiyuiyuio_uyte_sldfsdfsdf_sfsdfsdf_sfsssfsf_sdfsdfsdfsdf_sdfsdfsdf.pdf";
|
||||
EmbeddedData ed1 = edList.get(1);
|
||||
assertEquals(filename2, ed1.getFilename());
|
||||
assertEquals(filename2, ed1.getShape().getShapeName().trim());
|
||||
assertEquals("QjLuAZ+cd7KbhVz4sj+QdA==", md5hash(ed1.getEmbeddedData()));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void extractFromXSSF() throws IOException, EncryptedDocumentException, InvalidFormatException {
|
||||
InputStream fis = samples.openResourceAsStream("58325_db.xlsx");
|
||||
Workbook wb = WorkbookFactory.create(fis);
|
||||
fis.close();
|
||||
|
||||
EmbeddedExtractor ee = new EmbeddedExtractor();
|
||||
List<EmbeddedData> edList = new ArrayList<EmbeddedData>();
|
||||
for (Sheet s : wb) {
|
||||
edList.addAll(ee.extractAll(s));
|
||||
}
|
||||
wb.close();
|
||||
|
||||
assertEquals(4, edList.size());
|
||||
EmbeddedData ed0 = edList.get(0);
|
||||
assertEquals("Object 1.pdf", ed0.getFilename());
|
||||
assertEquals("Object 1", ed0.getShape().getShapeName().trim());
|
||||
assertEquals("Oyys6UtQU1gbHYBYqA4NFA==", md5hash(ed0.getEmbeddedData()));
|
||||
|
||||
EmbeddedData ed1 = edList.get(1);
|
||||
assertEquals("Object 2.pdf", ed1.getFilename());
|
||||
assertEquals("Object 2", ed1.getShape().getShapeName().trim());
|
||||
assertEquals("xLScPUS0XH+5CTZ2A3neNw==", md5hash(ed1.getEmbeddedData()));
|
||||
|
||||
EmbeddedData ed2 = edList.get(2);
|
||||
assertEquals("Object 3.pdf", ed2.getFilename());
|
||||
assertEquals("Object 3", ed2.getShape().getShapeName().trim());
|
||||
assertEquals("rX4klZqJAeM5npb54Gi2+Q==", md5hash(ed2.getEmbeddedData()));
|
||||
|
||||
EmbeddedData ed3 = edList.get(3);
|
||||
assertEquals("Microsoft_Excel_Worksheet1.xlsx", ed3.getFilename());
|
||||
assertEquals("Object 1", ed3.getShape().getShapeName().trim());
|
||||
assertEquals("4m4N8ji2tjpEGPQuw2YwGA==", md5hash(ed3.getEmbeddedData()));
|
||||
}
|
||||
|
||||
public static String md5hash(byte[] input) {
|
||||
try {
|
||||
MessageDigest md = MessageDigest.getInstance("MD5");
|
||||
byte hash[] = md.digest(input);
|
||||
return DatatypeConverter.printBase64Binary(hash);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
// doesn't happen
|
||||
return "";
|
||||
}
|
||||
}
|
||||
}
|
BIN
test-data/spreadsheet/Basic_Expense_Template_2011.xls
Normal file
BIN
test-data/spreadsheet/Basic_Expense_Template_2011.xls
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user