A few small updates to HSSFXML, and start on HWPFXML
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@607554 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1f1575e1be
commit
40235d1b5e
@ -34,6 +34,11 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument;
|
|||||||
* Experimental class to do low level processing
|
* Experimental class to do low level processing
|
||||||
* of xlsx files.
|
* of xlsx files.
|
||||||
*
|
*
|
||||||
|
* If you are using these low level classes, then you
|
||||||
|
* will almost certainly need to refer to the OOXML
|
||||||
|
* specifications from
|
||||||
|
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
|
||||||
|
*
|
||||||
* WARNING - APIs expected to change rapidly
|
* WARNING - APIs expected to change rapidly
|
||||||
*/
|
*/
|
||||||
public class HSSFXML extends HXFDocument {
|
public class HSSFXML extends HXFDocument {
|
||||||
|
@ -26,7 +26,6 @@ import org.apache.xmlbeans.XmlException;
|
|||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellFormula;
|
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
|
||||||
|
92
src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java
Normal file
92
src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hwpf;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.poi.hxf.HXFDocument;
|
||||||
|
import org.apache.xmlbeans.XmlException;
|
||||||
|
import org.openxml4j.exceptions.InvalidFormatException;
|
||||||
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
|
import org.openxml4j.opc.Package;
|
||||||
|
import org.openxml4j.opc.PackagePart;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
|
||||||
|
import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Experimental class to do low level processing
|
||||||
|
* of docx files.
|
||||||
|
*
|
||||||
|
* If you are using these low level classes, then you
|
||||||
|
* will almost certainly need to refer to the OOXML
|
||||||
|
* specifications from
|
||||||
|
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
|
||||||
|
*
|
||||||
|
* WARNING - APIs expected to change rapidly
|
||||||
|
*/
|
||||||
|
public class HWPFXML extends HXFDocument {
|
||||||
|
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
|
||||||
|
public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
|
||||||
|
public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
|
||||||
|
public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
|
||||||
|
public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
|
||||||
|
|
||||||
|
private DocumentDocument wordDoc;
|
||||||
|
|
||||||
|
public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||||
|
super(container, MAIN_CONTENT_TYPE);
|
||||||
|
|
||||||
|
wordDoc =
|
||||||
|
DocumentDocument.Factory.parse(basePart.getInputStream());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the low level document base object
|
||||||
|
*/
|
||||||
|
public CTDocument1 getDocument() {
|
||||||
|
return wordDoc.getDocument();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the low level body of the document
|
||||||
|
*/
|
||||||
|
public CTBody getDocumentBody() {
|
||||||
|
return getDocument().getBody();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the styles object used
|
||||||
|
*/
|
||||||
|
public CTStyles getStyle() throws XmlException, IOException {
|
||||||
|
PackagePart[] parts;
|
||||||
|
try {
|
||||||
|
parts = getRelatedByType(STYLES_RELATION_TYPE);
|
||||||
|
} catch(InvalidFormatException e) {
|
||||||
|
throw new IllegalStateException(e);
|
||||||
|
}
|
||||||
|
if(parts.length != 1) {
|
||||||
|
throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
StylesDocument sd =
|
||||||
|
StylesDocument.Factory.parse(parts[0].getInputStream());
|
||||||
|
return sd.getStyles();
|
||||||
|
}
|
||||||
|
}
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLDocument;
|
import org.apache.poi.POIXMLDocument;
|
||||||
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.dom4j.Document;
|
import org.dom4j.Document;
|
||||||
import org.dom4j.DocumentException;
|
import org.dom4j.DocumentException;
|
||||||
import org.dom4j.io.SAXReader;
|
import org.dom4j.io.SAXReader;
|
||||||
@ -31,7 +32,10 @@ import org.openxml4j.opc.PackageAccess;
|
|||||||
import org.openxml4j.opc.PackagePart;
|
import org.openxml4j.opc.PackagePart;
|
||||||
import org.openxml4j.opc.PackagePartName;
|
import org.openxml4j.opc.PackagePartName;
|
||||||
import org.openxml4j.opc.PackageRelationship;
|
import org.openxml4j.opc.PackageRelationship;
|
||||||
|
import org.openxml4j.opc.PackageRelationshipCollection;
|
||||||
import org.openxml4j.opc.PackagingURIHelper;
|
import org.openxml4j.opc.PackagingURIHelper;
|
||||||
|
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
|
||||||
|
import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parent class of the low level interface to
|
* Parent class of the low level interface to
|
||||||
@ -40,6 +44,11 @@ import org.openxml4j.opc.PackagingURIHelper;
|
|||||||
* extends {@link POIXMLDocument}, unless they really
|
* extends {@link POIXMLDocument}, unless they really
|
||||||
* do need to get low level access to the files.
|
* do need to get low level access to the files.
|
||||||
*
|
*
|
||||||
|
* If you are using these low level classes, then you
|
||||||
|
* will almost certainly need to refer to the OOXML
|
||||||
|
* specifications from
|
||||||
|
* http://www.ecma-international.org/publications/standards/Ecma-376.htm
|
||||||
|
*
|
||||||
* WARNING - APIs expected to change rapidly
|
* WARNING - APIs expected to change rapidly
|
||||||
*/
|
*/
|
||||||
public abstract class HXFDocument {
|
public abstract class HXFDocument {
|
||||||
@ -81,14 +90,22 @@ public abstract class HXFDocument {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the PackagePart for the given relation
|
* Retrieves the PackagePart for the given relation
|
||||||
* id. This will normally come from a r:id attribute
|
* id. This will normally come from a r:id attribute
|
||||||
* on part of the base document.
|
* on part of the base document.
|
||||||
* @param partId The r:id pointing to the other PackagePart
|
* @param partId The r:id pointing to the other PackagePart
|
||||||
*/
|
*/
|
||||||
protected PackagePart getRelatedPackagePart(String partId) {
|
protected PackagePart getRelatedPackagePart(String partId) {
|
||||||
PackageRelationship rel =
|
PackageRelationship rel =
|
||||||
basePart.getRelationship(partId);
|
basePart.getRelationship(partId);
|
||||||
|
return getPackagePart(rel);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the PackagePart for the given Relationship
|
||||||
|
* object. Normally you'll want to go via a content type
|
||||||
|
* or r:id to get one of those.
|
||||||
|
*/
|
||||||
|
protected PackagePart getPackagePart(PackageRelationship rel) {
|
||||||
PackagePartName relName;
|
PackagePartName relName;
|
||||||
try {
|
try {
|
||||||
relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
||||||
@ -103,6 +120,24 @@ public abstract class HXFDocument {
|
|||||||
return part;
|
return part;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves all the PackageParts which are defined as
|
||||||
|
* relationships of the base document with the
|
||||||
|
* specified content type.
|
||||||
|
*/
|
||||||
|
protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
|
||||||
|
PackageRelationshipCollection partsC =
|
||||||
|
basePart.getRelationshipsByType(contentType);
|
||||||
|
|
||||||
|
PackagePart[] parts = new PackagePart[partsC.size()];
|
||||||
|
int count = 0;
|
||||||
|
for (PackageRelationship rel : partsC) {
|
||||||
|
parts[count] = getPackagePart(rel);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return parts;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the package container.
|
* Get the package container.
|
||||||
* @return The package associated to this document.
|
* @return The package associated to this document.
|
||||||
@ -111,6 +146,26 @@ public abstract class HXFDocument {
|
|||||||
return container;
|
return container;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the document properties (extended ooxml properties)
|
||||||
|
*/
|
||||||
|
public CTProperties getDocumentProperties() throws OpenXML4JException, XmlException, IOException {
|
||||||
|
PackageRelationshipCollection docProps =
|
||||||
|
container.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
|
||||||
|
if(docProps.size() == 0) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if(docProps.size() > 1) {
|
||||||
|
throw new IllegalStateException("Found " + docProps.size() + " relations for the extended properties, should only ever be one!");
|
||||||
|
}
|
||||||
|
PackageRelationship rel = docProps.getRelationship(0);
|
||||||
|
PackagePart propsPart = getPackagePart(rel);
|
||||||
|
|
||||||
|
PropertiesDocument props = PropertiesDocument.Factory.parse(
|
||||||
|
propsPart.getInputStream());
|
||||||
|
return props.getProperties();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns an opened OOXML Package for the supplied File
|
* Returns an opened OOXML Package for the supplied File
|
||||||
* @param f File to open
|
* @param f File to open
|
||||||
|
@ -102,6 +102,7 @@ public class HXFLister {
|
|||||||
disp.println(indent+"Relationship:");
|
disp.println(indent+"Relationship:");
|
||||||
disp.println(indent+"\tFrom: "+ rel.getSourceURI());
|
disp.println(indent+"\tFrom: "+ rel.getSourceURI());
|
||||||
disp.println(indent+"\tTo: " + rel.getTargetURI());
|
disp.println(indent+"\tTo: " + rel.getTargetURI());
|
||||||
|
disp.println(indent+"\tID: " + rel.getId());
|
||||||
disp.println(indent+"\tMode: " + rel.getTargetMode());
|
disp.println(indent+"\tMode: " + rel.getTargetMode());
|
||||||
disp.println(indent+"\tType: " + rel.getRelationshipType());
|
disp.println(indent+"\tType: " + rel.getRelationshipType());
|
||||||
}
|
}
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
package org.apache.poi.hssf;
|
package org.apache.poi.hssf;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
@ -93,4 +109,15 @@ public class TestHSSFXML extends TestCase {
|
|||||||
assertNotNull(xml.getSheet(sheets[1]));
|
assertNotNull(xml.getSheet(sheets[1]));
|
||||||
assertNotNull(xml.getSheet(sheets[2]));
|
assertNotNull(xml.getSheet(sheets[2]));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testMetadataBasics() throws Exception {
|
||||||
|
HSSFXML xml = new HSSFXML(
|
||||||
|
HXFDocument.openPackage(sampleFile)
|
||||||
|
);
|
||||||
|
assertNotNull(xml.getDocumentProperties());
|
||||||
|
|
||||||
|
assertEquals("Microsoft Excel", xml.getDocumentProperties().getApplication());
|
||||||
|
assertEquals(0, xml.getDocumentProperties().getCharacters());
|
||||||
|
assertEquals(0, xml.getDocumentProperties().getLines());
|
||||||
|
}
|
||||||
}
|
}
|
@ -0,0 +1,92 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hwpf;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.poi.hssf.HSSFXML;
|
||||||
|
import org.apache.poi.hxf.HXFDocument;
|
||||||
|
import org.openxml4j.opc.Package;
|
||||||
|
import org.openxml4j.opc.PackagePart;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class TestHWPFXML extends TestCase {
|
||||||
|
private File sampleFile;
|
||||||
|
private File complexFile;
|
||||||
|
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
super.setUp();
|
||||||
|
|
||||||
|
sampleFile = new File(
|
||||||
|
System.getProperty("HWPF.testdata.path") +
|
||||||
|
File.separator + "sample.docx"
|
||||||
|
);
|
||||||
|
complexFile = new File(
|
||||||
|
System.getProperty("HWPF.testdata.path") +
|
||||||
|
File.separator + "IllustrativeCases.docx"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testContainsMainContentType() throws Exception {
|
||||||
|
Package pack = HXFDocument.openPackage(sampleFile);
|
||||||
|
|
||||||
|
boolean found = false;
|
||||||
|
for(PackagePart part : pack.getParts()) {
|
||||||
|
if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
System.out.println(part);
|
||||||
|
}
|
||||||
|
assertTrue(found);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testOpen() throws Exception {
|
||||||
|
HXFDocument.openPackage(sampleFile);
|
||||||
|
HXFDocument.openPackage(complexFile);
|
||||||
|
|
||||||
|
HWPFXML xml;
|
||||||
|
|
||||||
|
// Simple file
|
||||||
|
xml = new HWPFXML(
|
||||||
|
HXFDocument.openPackage(sampleFile)
|
||||||
|
);
|
||||||
|
// Check it has key parts
|
||||||
|
assertNotNull(xml.getDocument());
|
||||||
|
assertNotNull(xml.getDocumentBody());
|
||||||
|
assertNotNull(xml.getStyle());
|
||||||
|
|
||||||
|
// Complex file
|
||||||
|
xml = new HWPFXML(
|
||||||
|
HXFDocument.openPackage(complexFile)
|
||||||
|
);
|
||||||
|
assertNotNull(xml.getDocument());
|
||||||
|
assertNotNull(xml.getDocumentBody());
|
||||||
|
assertNotNull(xml.getStyle());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testMetadataBasics() throws Exception {
|
||||||
|
HWPFXML xml = new HWPFXML(
|
||||||
|
HXFDocument.openPackage(sampleFile)
|
||||||
|
);
|
||||||
|
assertNotNull(xml.getDocumentProperties());
|
||||||
|
|
||||||
|
assertEquals("Microsoft Office Word", xml.getDocumentProperties().getApplication());
|
||||||
|
assertEquals(1315, xml.getDocumentProperties().getCharacters());
|
||||||
|
assertEquals(10, xml.getDocumentProperties().getLines());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user