Custom XML import features; tests and implementation

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@799258 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Paolo Mottadelli 2009-07-30 13:24:57 +00:00
parent c37ef4c7ca
commit 37db132d93
8 changed files with 262 additions and 3 deletions

View File

@ -65,7 +65,7 @@ import org.xml.sax.SAXException;
* The output XML Schema must respect this limitations:
*
* <ul>
* <li> all mandatory elements and attributes must be mapped </li>
* <li> all mandatory elements and attributes must be mapped (enable validation to check this)</li>
*
* <li> no &lt;any&gt; in complex type/element declaration </li>
* <li> no &lt;anyAttribute&gt; attributes declaration </li>

View File

@ -0,0 +1,209 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import java.util.List;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xssf.model.Table;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFMap;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.helpers.XSSFSingleXmlCell;
import org.apache.poi.xssf.usermodel.helpers.XSSFXmlColumnPr;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.sun.org.apache.xml.internal.utils.PrefixResolver;
import com.sun.org.apache.xml.internal.utils.PrefixResolverDefault;
/**
*
* Imports data from an external XML to an XLSX to an XML according to one of the mapping defined.
*
* The output XML Schema must respect this limitations:
*
* - the input XML must be valid according to the XML Schema used in the mapping
* - denormalized table mapping is not supported (see OpenOffice part 4: chapter 3.5.1.7)
* - all the namespaces used in the document must be declared in the root node
*
*
* @author Roberto Manicardi
*
*/
public class XSSFImportFromXML {
private XSSFMap map;
private static POILogger logger = POILogFactory.getLogger(XSSFImportFromXML.class);
public XSSFImportFromXML(XSSFMap map){
this.map = map;
}
/**
* Imports an XML into the XLSX using the Custom XML mapping defined
*
* @param xmlInputString the XML to import
* @throws SAXException raised if error occurs during XML parsing
* @throws XPathExpressionException raised if error occurs during XML navigation
*/
public void importFromXML(String xmlInputString) throws SAXException, XPathExpressionException{
try{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new InputSource(new StringReader(xmlInputString.trim())));
List<XSSFSingleXmlCell> singleXmlCells = map.getRelatedSingleXMLCell();
List<Table> tables = map.getRelatedTables();
XPathFactory xpathFactory = XPathFactory.newInstance();
XPath xpath = xpathFactory.newXPath();
// Setting namespace context to XPath
// Assuming that the namespace prefix in the mapping xpath is the same as the one used in the document
final PrefixResolver resolver = new PrefixResolverDefault(doc.getDocumentElement());
NamespaceContext ctx = new NamespaceContext() {
public String getNamespaceURI(String prefix) {
return resolver.getNamespaceForPrefix(prefix);
}
// Dummy implementation - not used!
public Iterator getPrefixes(String val) {
return null;
}
// Dummy implemenation - not used!
public String getPrefix(String uri) {
return null;
}
};
xpath.setNamespaceContext(ctx);
for(XSSFSingleXmlCell singleXmlCell :singleXmlCells ){
String xpathString = singleXmlCell.getXpath();
Node result = (Node) xpath.evaluate(xpathString,doc, XPathConstants.NODE);
String textContent = result.getTextContent();
logger.log(POILogger.DEBUG,"Extracting with xpath "+xpathString+" : value is '"+textContent+"'");
XSSFCell cell = singleXmlCell.getReferencedCell();
logger.log(POILogger.DEBUG,"Setting '"+textContent+"' to cell "+cell.getColumnIndex()+"-"+cell.getRowIndex()+" in sheet "+cell.getSheet().getSheetName());
cell.setCellValue(textContent);
}
for(Table table : tables){
String commonXPath = table.getCommonXpath();
NodeList result = (NodeList) xpath.evaluate(commonXPath,doc, XPathConstants.NODESET);
int rowOffset = table.getStartCellReference().getRow()+1;//the first row contains the table header
int columnOffset = table.getStartCellReference().getCol()-1;
for(int i = 0; i< result.getLength();i++){
// TODO: implement support for denormalized XMLs (see OpenOffice part 4: chapter 3.5.1.7)
for(XSSFXmlColumnPr xmlColumnPr: table.getXmlColumnPrs()){
int localColumnId = (int)xmlColumnPr.getId();
int rowId = rowOffset+i;
int columnId = columnOffset+localColumnId;
String localXPath = xmlColumnPr.getLocalXPath();
localXPath = localXPath.substring(localXPath.substring(1).indexOf('/')+1);
// Build an XPath to select the right node (assuming that the commonXPath != "/")
String nodeXPath = commonXPath+"["+(i+1)+"]"+localXPath;
// TODO: convert the data to the cell format
String value = (String) xpath.evaluate(nodeXPath,result.item(i), XPathConstants.STRING);
logger.log(POILogger.DEBUG,"Extracting with xpath "+nodeXPath+" : value is '"+value+"'");
XSSFRow row = table.getXSSFSheet().getRow(rowId);
if(row==null){
row = table.getXSSFSheet().createRow(rowId);
}
XSSFCell cell = row.getCell(columnId);
if(cell==null){
cell = row.createCell(columnId);
}
logger.log(POILogger.DEBUG,"Setting '"+value+"' to cell "+cell.getColumnIndex()+"-"+cell.getRowIndex()+" in sheet "+table.getXSSFSheet().getSheetName());
cell.setCellValue(value.trim());
}
}
}
}catch(IOException e){
//Thrown by StringReader
e.printStackTrace();
}catch(ParserConfigurationException e){
//Thrown by DocumentBuilderFactory
e.printStackTrace();
}
}
}

View File

@ -23,7 +23,6 @@ import java.io.OutputStream;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.Iterator;
import org.apache.poi.POIXMLDocumentPart;
@ -123,6 +122,19 @@ public class MapInfo extends POIXMLDocumentPart {
return maps.get(id);
}
public XSSFMap getXSSFMapByName(String name){
XSSFMap matchedMap = null;
for(XSSFMap map :maps.values()){
if(map.getCtMap().getName()!=null && map.getCtMap().getName().equals(name)){
matchedMap = map;
}
}
return matchedMap;
}
/**
*
* @return all the mappings configured in this document

View File

@ -1297,4 +1297,13 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook, Iterable<X
public Collection<XSSFMap> getCustomXMLMappings(){
return mapInfo == null ? new ArrayList<XSSFMap>() : mapInfo.getAllXSSFMaps();
}
/**
*
* @return the helper class used to query the custom XML mapping defined in this workbook
*/
public MapInfo getMapInfo(){
return mapInfo;
}
}

View File

@ -48,7 +48,7 @@ public class XSSFSingleXmlCell {
}
/**
* Gets the XSSFCell referenced by the R attribute
* Gets the XSSFCell referenced by the R attribute or creates a new one if cell doesn't exists
* @return the referenced XSSFCell, null if the cell reference is invalid
*/
public XSSFCell getReferencedCell(){
@ -58,7 +58,14 @@ public class XSSFSingleXmlCell {
CellReference cellReference = new CellReference(singleXmlCell.getR());
XSSFRow row = parent.getXSSFSheet().getRow(cellReference.getRow());
if(row==null){
row = parent.getXSSFSheet().createRow(cellReference.getRow());
}
cell = row.getCell(cellReference.getCol());
if(cell==null){
cell = row.createCell(cellReference.getCol());
}
return cell;

View File

@ -47,6 +47,23 @@ public class XSSFXmlColumnPr {
return ctXmlColumnPr.getMapId();
}
public String getXPath(){
return ctXmlColumnPr.getXpath();
}
/**
* (see Open Office XML Part 4: chapter 3.5.1.3)
* @return An integer representing the unique identifier of this column.
*/
public long getId(){
return ctTableColumn.getId();
}
/**
* If the XPath is, for example, /Node1/Node2/Node3 and /Node1/Node2 is the common XPath for the table, the local XPath is /Node3
*
* @return the local XPath
*/
public String getLocalXPath(){
String localXPath = "";
int numberOfCommonXPathAxis = table.getCommonXpath().split("/").length-1;
@ -59,7 +76,12 @@ public class XSSFXmlColumnPr {
}
public Enum getXmlDataType() {
return ctXmlColumnPr.getXmlDataType();
}
}