From cc84a72aac6e841b6ba7c308d57175a3e638d5b6 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 26 Dec 2007 17:47:27 +0000 Subject: [PATCH] A quick play with OOXML parsing. Uses XmlBeans and OpenXml4J to get at the data. Expect the API to change rapidly in the near future as we discover what works and what doesn't! git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@606923 13f79535-47bb-0310-9956-ffa450edef68 --- .../ooxml-src/org/apache/poi/HXFDocument.java | 89 +++++++++++++++++++ .../org/apache/poi/POIXMLDocument.java | 27 ++++++ .../org/apache/poi/hssf/HSSFXML.java | 51 +++++++++++ .../org/apache/poi/hssf/TestHSSFXML.java | 68 ++++++++++++++ 4 files changed, 235 insertions(+) create mode 100644 src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java create mode 100644 src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java create mode 100644 src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java create mode 100644 src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java diff --git a/src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java new file mode 100644 index 000000000..427e377fa --- /dev/null +++ b/src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java @@ -0,0 +1,89 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import java.io.File; +import java.io.IOException; +import java.util.ArrayList; + +import org.dom4j.Document; +import org.dom4j.DocumentException; +import org.dom4j.io.SAXReader; +import org.openxml4j.exceptions.InvalidFormatException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackageAccess; +import org.openxml4j.opc.PackagePart; + +/** + * Parent class of the low level interface to + * all POI XML (OOXML) implementations. + * Normal users should probably deal with things that + * extends {@link POIXMLDocument}, unless they really + * do need to get low level access to the files. + * + * WARNING - APIs expected to change rapidly + */ +public abstract class HXFDocument { + /** + * File package/container. + */ + protected Package container; + /** + * The Package Part for our base document + */ + protected PackagePart basePart; + /** + * The base document of this instance, eg Workbook for + * xslsx + */ + protected Document baseDocument; + + protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException { + this.container = container; + + // Find the base document + ArrayList baseParts = + container.getPartsByContentType(baseContentType); + if(baseParts.size() != 1) { + throw new OpenXML4JException("Expecting one entry with content type of " + baseContentType + ", but found " + baseParts.size()); + } + basePart = baseParts.get(0); + + // And load it up + try { + SAXReader reader = new SAXReader(); + baseDocument = reader.read(basePart.getInputStream()); + } catch (DocumentException e) { + throw new OpenXML4JException(e.getMessage()); + } catch (IOException ioe) { + throw new OpenXML4JException(ioe.getMessage()); + } + } + + public static Package openPackage(File f) throws InvalidFormatException { + return Package.open(f.toString(), PackageAccess.READ_WRITE); + } + + /** + * Get the package container. + * @return The package associated to this document. + */ + public Package getPackage() { + return container; + } +} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java new file mode 100644 index 000000000..a070e9f08 --- /dev/null +++ b/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java @@ -0,0 +1,27 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +/** + * Parent class of all UserModel POI XML (ooxml) + * implementations. + * Provides a similar function to {@link POIDocument}, + * for the XML based classes. + */ +public abstract class POIXMLDocument { + // TODO +} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java new file mode 100644 index 000000000..20707aac8 --- /dev/null +++ b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java @@ -0,0 +1,51 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf; + +import java.io.IOException; + +import org.apache.poi.HXFDocument; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; + +/** + * Experimental class to do low level processing + * of xlsx files. + * + * WARNING - APIs expected to change rapidly + */ +public class HSSFXML extends HXFDocument { + public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"; + public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"; + public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"; + + private WorkbookDocument workbookDoc; + + public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException { + super(container, MAIN_CONTENT_TYPE); + + workbookDoc = + WorkbookDocument.Factory.parse(basePart.getInputStream()); + } + + public CTWorkbook getWorkbook() { + return workbookDoc.getWorkbook(); + } +} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java new file mode 100644 index 000000000..1013d4f10 --- /dev/null +++ b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java @@ -0,0 +1,68 @@ +package org.apache.poi.hssf; + +import java.io.File; + +import org.apache.poi.HXFDocument; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; + +import junit.framework.TestCase; + +public class TestHSSFXML extends TestCase { + /** + * Uses the old style schemas.microsoft.com schema uri + */ + private File sampleFileBeta; + /** + * Uses the new style schemas.openxmlformats.org schema uri + */ + private File sampleFile; + + protected void setUp() throws Exception { + super.setUp(); + + sampleFile = new File( + System.getProperty("HSSF.testdata.path") + + File.separator + "sample.xlsx" + ); + sampleFileBeta = new File( + System.getProperty("HSSF.testdata.path") + + File.separator + "sample-beta.xlsx" + ); + } + + public void testContainsMainContentType() throws Exception { + Package pack = HXFDocument.openPackage(sampleFile); + + boolean found = false; + for(PackagePart part : pack.getParts()) { + if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) { + found = true; + } + System.out.println(part); + } + assertTrue(found); + } + + public void testOpen() throws Exception { + HXFDocument.openPackage(sampleFile); + HXFDocument.openPackage(sampleFileBeta); + + HSSFXML xml; + + // With an old-style uri, as found in a file produced + // with the office 2007 beta, will fail, as we don't + // translate things + try { + xml = new HSSFXML( + HXFDocument.openPackage(sampleFileBeta) + ); + fail(); + } catch(Exception e) {} + + // With the finalised uri, should be fine + xml = new HSSFXML( + HXFDocument.openPackage(sampleFile) + ); + } +} \ No newline at end of file