From 829cdf330fe21e7b07e51ae80c7004c2847fe367 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 23 May 2008 15:05:12 +0000 Subject: [PATCH] Patch from Yury from bug #45018 - Support for fetching embeded documents from within an OOXML files git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@659564 13f79535-47bb-0310-9956-ffa450edef68 --- build.xml | 1 + src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../java/org/apache/poi/POIXMLDocument.java | 22 +++++++++++++++++++ .../org/apache/poi/POIXMLTextExtractor.java | 7 ++++++ .../poi/xwpf/extractor/XWPFWordExtractor.java | 2 +- 6 files changed, 33 insertions(+), 1 deletion(-) diff --git a/build.xml b/build.xml index 121cbbd2d..e1cb26c35 100644 --- a/build.xml +++ b/build.xml @@ -562,6 +562,7 @@ under the License. + diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 5a90a0463..f26c6271b 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 45018 - Support for fetching embeded documents from within an OOXML file Port support for setting a policy on missing / blank cells when fetching, to XSSF too Common text extraction factory, which returns the correct POITextExtractor for the supplied data Text Extraction support for the new OOXML files (.xlsx, .docx and .pptx) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 6709b3271..4fc778a5f 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 45018 - Support for fetching embeded documents from within an OOXML file Port support for setting a policy on missing / blank cells when fetching, to XSSF too Common text extraction factory, which returns the correct POITextExtractor for the supplied data Text Extraction support for the new OOXML files (.xlsx, .docx and .pptx) diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 9fa4789db..7be637275 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -19,6 +19,8 @@ package org.apache.poi; import java.io.IOException; import java.io.InputStream; import java.io.PushbackInputStream; +import java.util.LinkedList; +import java.util.List; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.IOUtils; @@ -39,6 +41,8 @@ public abstract class POIXMLDocument { public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; + public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; + /** The OPC Package */ private Package pkg; @@ -50,6 +54,10 @@ public abstract class POIXMLDocument { */ private POIXMLProperties properties; + /** + * The embedded OLE2 files in the OPC package + */ + private List embedds; protected POIXMLDocument() {} @@ -62,6 +70,12 @@ public abstract class POIXMLDocument { // Get core part this.corePart = this.pkg.getPart(coreDocRelationship); + + // Get any embedded OLE2 documents + this.embedds = new LinkedList(); + for(PackageRelationship rel : corePart.getRelationshipsByType(OLE_OBJECT_REL_TYPE)) { + embedds.add(getTargetPart(rel)); + } } catch (OpenXML4JException e) { throw new IOException(e.toString()); } @@ -190,4 +204,12 @@ public abstract class POIXMLDocument { } return properties; } + + /** + * Get the document's embedded files. + */ + public List getAllEmbedds() throws OpenXML4JException + { + return embedds; + } } diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java index ae8514c27..8df75d949 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java @@ -47,4 +47,11 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { public ExtendedProperties getExtendedProperties() throws IOException, OpenXML4JException, XmlException { return document.getProperties().getExtendedProperties(); } + + /** + * Returns opened document + */ + public POIXMLDocument getDocument(){ + return document; + } } diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java index 8ca4f0349..64c8e3f78 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java +++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java @@ -58,7 +58,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { public static void main(String[] args) throws Exception { if(args.length < 1) { System.err.println("Use:"); - System.err.println(" HXFWordExtractor "); + System.err.println(" HXFWordExtractor "); System.exit(1); } POIXMLTextExtractor extractor =