A few small updates to HSSFXML, and start on HWPFXML

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@607554 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2007-12-30 16:21:22 +00:00
parent 1f1575e1be
commit 40235d1b5e
7 changed files with 273 additions and 2 deletions

View File

@ -34,6 +34,11 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument;
* Experimental class to do low level processing * Experimental class to do low level processing
* of xlsx files. * of xlsx files.
* *
* If you are using these low level classes, then you
* will almost certainly need to refer to the OOXML
* specifications from
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
*
* WARNING - APIs expected to change rapidly * WARNING - APIs expected to change rapidly
*/ */
public class HSSFXML extends HXFDocument { public class HSSFXML extends HXFDocument {

View File

@ -26,7 +26,6 @@ import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package; import org.openxml4j.opc.Package;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellFormula;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;

View File

@ -0,0 +1,92 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf;
import java.io.IOException;
import org.apache.poi.hxf.HXFDocument;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
/**
* Experimental class to do low level processing
* of docx files.
*
* If you are using these low level classes, then you
* will almost certainly need to refer to the OOXML
* specifications from
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
*
* WARNING - APIs expected to change rapidly
*/
public class HWPFXML extends HXFDocument {
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
private DocumentDocument wordDoc;
public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
super(container, MAIN_CONTENT_TYPE);
wordDoc =
DocumentDocument.Factory.parse(basePart.getInputStream());
}
/**
* Returns the low level document base object
*/
public CTDocument1 getDocument() {
return wordDoc.getDocument();
}
/**
* Returns the low level body of the document
*/
public CTBody getDocumentBody() {
return getDocument().getBody();
}
/**
* Returns the styles object used
*/
public CTStyles getStyle() throws XmlException, IOException {
PackagePart[] parts;
try {
parts = getRelatedByType(STYLES_RELATION_TYPE);
} catch(InvalidFormatException e) {
throw new IllegalStateException(e);
}
if(parts.length != 1) {
throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
}
StylesDocument sd =
StylesDocument.Factory.parse(parts[0].getInputStream());
return sd.getStyles();
}
}

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLDocument;
import org.apache.xmlbeans.XmlException;
import org.dom4j.Document; import org.dom4j.Document;
import org.dom4j.DocumentException; import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader; import org.dom4j.io.SAXReader;
@ -31,7 +32,10 @@ import org.openxml4j.opc.PackageAccess;
import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackagePartName; import org.openxml4j.opc.PackagePartName;
import org.openxml4j.opc.PackageRelationship; import org.openxml4j.opc.PackageRelationship;
import org.openxml4j.opc.PackageRelationshipCollection;
import org.openxml4j.opc.PackagingURIHelper; import org.openxml4j.opc.PackagingURIHelper;
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
/** /**
* Parent class of the low level interface to * Parent class of the low level interface to
@ -40,6 +44,11 @@ import org.openxml4j.opc.PackagingURIHelper;
* extends {@link POIXMLDocument}, unless they really * extends {@link POIXMLDocument}, unless they really
* do need to get low level access to the files. * do need to get low level access to the files.
* *
* If you are using these low level classes, then you
* will almost certainly need to refer to the OOXML
* specifications from
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
*
* WARNING - APIs expected to change rapidly * WARNING - APIs expected to change rapidly
*/ */
public abstract class HXFDocument { public abstract class HXFDocument {
@ -81,14 +90,22 @@ public abstract class HXFDocument {
/** /**
* Retrieves the PackagePart for the given relation * Retrieves the PackagePart for the given relation
* id. This will normally come from a r:id attribute * id. This will normally come from a r:id attribute
* on part of the base document. * on part of the base document.
* @param partId The r:id pointing to the other PackagePart * @param partId The r:id pointing to the other PackagePart
*/ */
protected PackagePart getRelatedPackagePart(String partId) { protected PackagePart getRelatedPackagePart(String partId) {
PackageRelationship rel = PackageRelationship rel =
basePart.getRelationship(partId); basePart.getRelationship(partId);
return getPackagePart(rel);
}
/**
* Retrieves the PackagePart for the given Relationship
* object. Normally you'll want to go via a content type
* or r:id to get one of those.
*/
protected PackagePart getPackagePart(PackageRelationship rel) {
PackagePartName relName; PackagePartName relName;
try { try {
relName = PackagingURIHelper.createPartName(rel.getTargetURI()); relName = PackagingURIHelper.createPartName(rel.getTargetURI());
@ -103,6 +120,24 @@ public abstract class HXFDocument {
return part; return part;
} }
/**
* Retrieves all the PackageParts which are defined as
* relationships of the base document with the
* specified content type.
*/
protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
PackageRelationshipCollection partsC =
basePart.getRelationshipsByType(contentType);
PackagePart[] parts = new PackagePart[partsC.size()];
int count = 0;
for (PackageRelationship rel : partsC) {
parts[count] = getPackagePart(rel);
count++;
}
return parts;
}
/** /**
* Get the package container. * Get the package container.
* @return The package associated to this document. * @return The package associated to this document.
@ -111,6 +146,26 @@ public abstract class HXFDocument {
return container; return container;
} }
/**
* Get the document properties (extended ooxml properties)
*/
public CTProperties getDocumentProperties() throws OpenXML4JException, XmlException, IOException {
PackageRelationshipCollection docProps =
container.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
if(docProps.size() == 0) {
return null;
}
if(docProps.size() > 1) {
throw new IllegalStateException("Found " + docProps.size() + " relations for the extended properties, should only ever be one!");
}
PackageRelationship rel = docProps.getRelationship(0);
PackagePart propsPart = getPackagePart(rel);
PropertiesDocument props = PropertiesDocument.Factory.parse(
propsPart.getInputStream());
return props.getProperties();
}
/** /**
* Returns an opened OOXML Package for the supplied File * Returns an opened OOXML Package for the supplied File
* @param f File to open * @param f File to open

View File

@ -102,6 +102,7 @@ public class HXFLister {
disp.println(indent+"Relationship:"); disp.println(indent+"Relationship:");
disp.println(indent+"\tFrom: "+ rel.getSourceURI()); disp.println(indent+"\tFrom: "+ rel.getSourceURI());
disp.println(indent+"\tTo: " + rel.getTargetURI()); disp.println(indent+"\tTo: " + rel.getTargetURI());
disp.println(indent+"\tID: " + rel.getId());
disp.println(indent+"\tMode: " + rel.getTargetMode()); disp.println(indent+"\tMode: " + rel.getTargetMode());
disp.println(indent+"\tType: " + rel.getRelationshipType()); disp.println(indent+"\tType: " + rel.getRelationshipType());
} }

View File

@ -1,3 +1,19 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hssf; package org.apache.poi.hssf;
import java.io.File; import java.io.File;
@ -93,4 +109,15 @@ public class TestHSSFXML extends TestCase {
assertNotNull(xml.getSheet(sheets[1])); assertNotNull(xml.getSheet(sheets[1]));
assertNotNull(xml.getSheet(sheets[2])); assertNotNull(xml.getSheet(sheets[2]));
} }
public void testMetadataBasics() throws Exception {
HSSFXML xml = new HSSFXML(
HXFDocument.openPackage(sampleFile)
);
assertNotNull(xml.getDocumentProperties());
assertEquals("Microsoft Excel", xml.getDocumentProperties().getApplication());
assertEquals(0, xml.getDocumentProperties().getCharacters());
assertEquals(0, xml.getDocumentProperties().getLines());
}
} }

View File

@ -0,0 +1,92 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf;
import java.io.File;
import org.apache.poi.hssf.HSSFXML;
import org.apache.poi.hxf.HXFDocument;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart;
import junit.framework.TestCase;
public class TestHWPFXML extends TestCase {
private File sampleFile;
private File complexFile;
protected void setUp() throws Exception {
super.setUp();
sampleFile = new File(
System.getProperty("HWPF.testdata.path") +
File.separator + "sample.docx"
);
complexFile = new File(
System.getProperty("HWPF.testdata.path") +
File.separator + "IllustrativeCases.docx"
);
}
public void testContainsMainContentType() throws Exception {
Package pack = HXFDocument.openPackage(sampleFile);
boolean found = false;
for(PackagePart part : pack.getParts()) {
if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
found = true;
}
System.out.println(part);
}
assertTrue(found);
}
public void testOpen() throws Exception {
HXFDocument.openPackage(sampleFile);
HXFDocument.openPackage(complexFile);
HWPFXML xml;
// Simple file
xml = new HWPFXML(
HXFDocument.openPackage(sampleFile)
);
// Check it has key parts
assertNotNull(xml.getDocument());
assertNotNull(xml.getDocumentBody());
assertNotNull(xml.getStyle());
// Complex file
xml = new HWPFXML(
HXFDocument.openPackage(complexFile)
);
assertNotNull(xml.getDocument());
assertNotNull(xml.getDocumentBody());
assertNotNull(xml.getStyle());
}
public void testMetadataBasics() throws Exception {
HWPFXML xml = new HWPFXML(
HXFDocument.openPackage(sampleFile)
);
assertNotNull(xml.getDocumentProperties());
assertEquals("Microsoft Office Word", xml.getDocumentProperties().getApplication());
assertEquals(1315, xml.getDocumentProperties().getCharacters());
assertEquals(10, xml.getDocumentProperties().getLines());
}
}