2008-03-30 13:28:29 -04:00
|
|
|
/* ====================================================================
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.xssf.eventusermodel;
|
|
|
|
|
2015-11-21 14:37:26 -05:00
|
|
|
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
|
|
|
|
|
2008-03-30 13:28:29 -04:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
2009-03-18 14:54:01 -04:00
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.HashMap;
|
|
|
|
import java.util.Iterator;
|
2013-08-08 10:04:07 -04:00
|
|
|
import java.util.LinkedList;
|
|
|
|
import java.util.List;
|
2009-03-18 14:54:01 -04:00
|
|
|
import java.util.Map;
|
2008-03-30 13:28:29 -04:00
|
|
|
|
2008-10-01 12:14:39 -04:00
|
|
|
import org.apache.poi.POIXMLException;
|
2009-01-29 07:44:31 -05:00
|
|
|
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
|
|
|
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
2009-03-18 14:54:01 -04:00
|
|
|
import org.apache.poi.openxml4j.opc.OPCPackage;
|
2009-01-29 07:44:31 -05:00
|
|
|
import org.apache.poi.openxml4j.opc.PackagePart;
|
|
|
|
import org.apache.poi.openxml4j.opc.PackagePartName;
|
|
|
|
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
2010-11-22 10:16:45 -05:00
|
|
|
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
2009-01-29 07:44:31 -05:00
|
|
|
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
|
|
|
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
2017-01-13 15:55:04 -05:00
|
|
|
import org.apache.poi.util.POILogFactory;
|
|
|
|
import org.apache.poi.util.POILogger;
|
2010-11-22 10:16:45 -05:00
|
|
|
import org.apache.poi.xssf.model.CommentsTable;
|
2009-03-18 14:54:01 -04:00
|
|
|
import org.apache.poi.xssf.model.SharedStringsTable;
|
|
|
|
import org.apache.poi.xssf.model.StylesTable;
|
2011-10-05 17:05:00 -04:00
|
|
|
import org.apache.poi.xssf.model.ThemesTable;
|
2013-08-08 10:04:07 -04:00
|
|
|
import org.apache.poi.xssf.usermodel.XSSFDrawing;
|
2009-03-18 14:54:01 -04:00
|
|
|
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
2013-08-08 10:04:07 -04:00
|
|
|
import org.apache.poi.xssf.usermodel.XSSFShape;
|
2009-03-18 14:54:01 -04:00
|
|
|
import org.apache.xmlbeans.XmlException;
|
|
|
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
|
2008-10-01 12:14:39 -04:00
|
|
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
|
|
|
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
|
2008-03-30 13:28:29 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* This class makes it easy to get at individual parts
|
|
|
|
* of an OOXML .xlsx file, suitable for low memory sax
|
|
|
|
* parsing or similar.
|
|
|
|
* It makes up the core part of the EventUserModel support
|
|
|
|
* for XSSF.
|
|
|
|
*/
|
|
|
|
public class XSSFReader {
|
2017-01-13 15:55:04 -05:00
|
|
|
|
|
|
|
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
|
|
|
|
|
2009-03-18 14:54:01 -04:00
|
|
|
private OPCPackage pkg;
|
2008-10-01 12:14:39 -04:00
|
|
|
private PackagePart workbookPart;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a new XSSFReader, for the given package
|
|
|
|
*/
|
2009-03-18 14:54:01 -04:00
|
|
|
public XSSFReader(OPCPackage pkg) throws IOException, OpenXML4JException {
|
2008-10-01 12:14:39 -04:00
|
|
|
this.pkg = pkg;
|
|
|
|
|
2008-03-30 13:28:29 -04:00
|
|
|
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
|
|
|
|
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
|
2008-10-01 12:14:39 -04:00
|
|
|
|
2016-02-27 05:23:46 -05:00
|
|
|
// strict OOXML likely not fully supported, see #57699
|
|
|
|
// this code is similar to POIXMLDocumentPart.getPartFromOPCPackage(), but I could not combine it
|
|
|
|
// easily due to different return values
|
|
|
|
if(coreDocRelationship == null) {
|
|
|
|
if (this.pkg.getRelationshipsByType(
|
|
|
|
PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0) != null) {
|
|
|
|
throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
|
|
|
|
}
|
|
|
|
|
|
|
|
throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
|
|
|
|
}
|
|
|
|
|
2008-03-30 13:28:29 -04:00
|
|
|
// Get the part that holds the workbook
|
|
|
|
workbookPart = this.pkg.getPart(coreDocRelationship);
|
2008-10-01 12:14:39 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Opens up the Shared Strings Table, parses it, and
|
|
|
|
* returns a handy object for working with
|
|
|
|
* shared strings.
|
|
|
|
*/
|
|
|
|
public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
|
2008-10-19 08:54:40 -04:00
|
|
|
ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.SHARED_STRINGS.getContentType());
|
2016-12-02 19:49:08 -05:00
|
|
|
return parts.size() == 0 ? null : new SharedStringsTable(parts.get(0));
|
2008-10-01 12:14:39 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Opens up the Styles Table, parses it, and
|
|
|
|
* returns a handy object for working with cell styles
|
|
|
|
*/
|
|
|
|
public StylesTable getStylesTable() throws IOException, InvalidFormatException {
|
2008-10-19 08:54:40 -04:00
|
|
|
ArrayList<PackagePart> parts = pkg.getPartsByContentType( XSSFRelation.STYLES.getContentType());
|
2011-10-05 17:05:00 -04:00
|
|
|
if(parts.size() == 0) return null;
|
|
|
|
|
|
|
|
// Create the Styles Table, and associate the Themes if present
|
2016-12-02 19:49:08 -05:00
|
|
|
StylesTable styles = new StylesTable(parts.get(0));
|
2011-10-05 17:05:00 -04:00
|
|
|
parts = pkg.getPartsByContentType( XSSFRelation.THEME.getContentType());
|
|
|
|
if(parts.size() != 0) {
|
2016-12-02 19:49:08 -05:00
|
|
|
styles.setTheme(new ThemesTable(parts.get(0)));
|
2011-10-05 17:05:00 -04:00
|
|
|
}
|
|
|
|
return styles;
|
2008-10-01 12:14:39 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns an InputStream to read the contents of the
|
|
|
|
* shared strings table.
|
|
|
|
*/
|
|
|
|
public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
|
|
|
|
return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns an InputStream to read the contents of the
|
|
|
|
* styles table.
|
|
|
|
*/
|
|
|
|
public InputStream getStylesData() throws IOException, InvalidFormatException {
|
|
|
|
return XSSFRelation.STYLES.getContents(workbookPart);
|
|
|
|
}
|
|
|
|
|
2011-10-05 17:05:00 -04:00
|
|
|
/**
|
|
|
|
* Returns an InputStream to read the contents of the
|
|
|
|
* themes table.
|
|
|
|
*/
|
|
|
|
public InputStream getThemesData() throws IOException, InvalidFormatException {
|
|
|
|
return XSSFRelation.THEME.getContents(workbookPart);
|
|
|
|
}
|
|
|
|
|
2008-10-01 12:14:39 -04:00
|
|
|
/**
|
|
|
|
* Returns an InputStream to read the contents of the
|
|
|
|
* main Workbook, which contains key overall data for
|
|
|
|
* the file, including sheet definitions.
|
|
|
|
*/
|
|
|
|
public InputStream getWorkbookData() throws IOException, InvalidFormatException {
|
|
|
|
return workbookPart.getInputStream();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns an InputStream to read the contents of the
|
|
|
|
* specified Sheet.
|
|
|
|
* @param relId The relationId of the sheet, from a r:id on the workbook
|
|
|
|
*/
|
|
|
|
public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
|
2008-03-30 13:28:29 -04:00
|
|
|
PackageRelationship rel = workbookPart.getRelationship(relId);
|
|
|
|
if(rel == null) {
|
2008-10-01 12:14:39 -04:00
|
|
|
throw new IllegalArgumentException("No Sheet found with r:id " + relId);
|
2008-03-30 13:28:29 -04:00
|
|
|
}
|
2008-10-01 12:14:39 -04:00
|
|
|
|
2008-03-30 13:28:29 -04:00
|
|
|
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
|
|
|
PackagePart sheet = pkg.getPart(relName);
|
|
|
|
if(sheet == null) {
|
2008-10-01 12:14:39 -04:00
|
|
|
throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
|
2008-03-30 13:28:29 -04:00
|
|
|
}
|
|
|
|
return sheet.getInputStream();
|
2008-10-01 12:14:39 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns an Iterator which will let you get at all the
|
|
|
|
* different Sheets in turn.
|
|
|
|
* Each sheet's InputStream is only opened when fetched
|
|
|
|
* from the Iterator. It's up to you to close the
|
|
|
|
* InputStreams when done with each one.
|
|
|
|
*/
|
|
|
|
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
|
|
|
|
return new SheetIterator(workbookPart);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Iterator over sheet data.
|
|
|
|
*/
|
|
|
|
public static class SheetIterator implements Iterator<InputStream> {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Maps relId and the corresponding PackagePart
|
|
|
|
*/
|
2016-10-19 04:38:19 -04:00
|
|
|
private final Map<String, PackagePart> sheetMap;
|
2008-10-01 12:14:39 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Current CTSheet bean
|
|
|
|
*/
|
|
|
|
private CTSheet ctSheet;
|
2010-11-22 10:16:45 -05:00
|
|
|
|
2008-10-01 12:14:39 -04:00
|
|
|
/**
|
|
|
|
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
|
|
|
|
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
|
|
|
|
* i.e. as they are stored in the underlying package
|
|
|
|
*/
|
2016-10-19 04:38:19 -04:00
|
|
|
private final Iterator<CTSheet> sheetIterator;
|
2008-10-01 12:14:39 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Construct a new SheetIterator
|
|
|
|
*
|
|
|
|
* @param wb package part holding workbook.xml
|
|
|
|
*/
|
|
|
|
private SheetIterator(PackagePart wb) throws IOException {
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The order of sheets is defined by the order of CTSheet elements in workbook.xml
|
|
|
|
*/
|
|
|
|
try {
|
|
|
|
//step 1. Map sheet's relationship Id and the corresponding PackagePart
|
|
|
|
sheetMap = new HashMap<String, PackagePart>();
|
2016-10-19 04:38:19 -04:00
|
|
|
OPCPackage pkg = wb.getPackage();
|
|
|
|
String REL_WORKSHEET = XSSFRelation.WORKSHEET.getRelation();
|
|
|
|
String REL_CHARTSHEET = XSSFRelation.CHARTSHEET.getRelation();
|
2008-10-01 12:14:39 -04:00
|
|
|
for(PackageRelationship rel : wb.getRelationships()){
|
2016-10-19 04:38:19 -04:00
|
|
|
String relType = rel.getRelationshipType();
|
|
|
|
if (relType.equals(REL_WORKSHEET) || relType.equals(REL_CHARTSHEET)) {
|
2008-10-01 12:14:39 -04:00
|
|
|
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
2016-10-19 04:38:19 -04:00
|
|
|
sheetMap.put(rel.getId(), pkg.getPart(relName));
|
2008-10-01 12:14:39 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
//step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
|
|
|
|
//Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
|
2015-11-21 14:37:26 -05:00
|
|
|
CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
|
2016-10-20 14:45:03 -04:00
|
|
|
List<CTSheet> validSheets = new ArrayList<CTSheet>();
|
|
|
|
for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
|
|
|
|
//if there's no relationship id, silently skip the sheet
|
2016-11-11 18:22:43 -05:00
|
|
|
String sheetId = ctSheet.getId();
|
|
|
|
if (sheetId != null && sheetId.length() > 0) {
|
2016-10-20 14:45:03 -04:00
|
|
|
validSheets.add(ctSheet);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
sheetIterator = validSheets.iterator();
|
2008-10-01 12:14:39 -04:00
|
|
|
} catch (InvalidFormatException e){
|
|
|
|
throw new POIXMLException(e);
|
|
|
|
} catch (XmlException e){
|
|
|
|
throw new POIXMLException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns <tt>true</tt> if the iteration has more elements.
|
|
|
|
*
|
|
|
|
* @return <tt>true</tt> if the iterator has more elements.
|
|
|
|
*/
|
2016-10-19 04:38:19 -04:00
|
|
|
@Override
|
2008-10-01 12:14:39 -04:00
|
|
|
public boolean hasNext() {
|
|
|
|
return sheetIterator.hasNext();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns input stream of the next sheet in the iteration
|
|
|
|
*
|
|
|
|
* @return input stream of the next sheet in the iteration
|
|
|
|
*/
|
2016-10-19 04:38:19 -04:00
|
|
|
@Override
|
2008-10-01 12:14:39 -04:00
|
|
|
public InputStream next() {
|
|
|
|
ctSheet = sheetIterator.next();
|
|
|
|
|
|
|
|
String sheetId = ctSheet.getId();
|
|
|
|
try {
|
|
|
|
PackagePart sheetPkg = sheetMap.get(sheetId);
|
|
|
|
return sheetPkg.getInputStream();
|
|
|
|
} catch(IOException e) {
|
|
|
|
throw new POIXMLException(e);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns name of the current sheet
|
|
|
|
*
|
|
|
|
* @return name of the current sheet
|
|
|
|
*/
|
|
|
|
public String getSheetName() {
|
|
|
|
return ctSheet.getName();
|
|
|
|
}
|
2010-11-22 10:16:45 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns the comments associated with this sheet,
|
|
|
|
* or null if there aren't any
|
|
|
|
*/
|
|
|
|
public CommentsTable getSheetComments() {
|
|
|
|
PackagePart sheetPkg = getSheetPart();
|
|
|
|
|
|
|
|
// Do we have a comments relationship? (Only ever one if so)
|
|
|
|
try {
|
|
|
|
PackageRelationshipCollection commentsList =
|
|
|
|
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
|
|
|
|
if(commentsList.size() > 0) {
|
|
|
|
PackageRelationship comments = commentsList.getRelationship(0);
|
|
|
|
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
|
|
|
|
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
|
2016-12-02 19:49:08 -05:00
|
|
|
return new CommentsTable(commentsPart);
|
2010-11-22 10:16:45 -05:00
|
|
|
}
|
|
|
|
} catch (InvalidFormatException e) {
|
|
|
|
return null;
|
|
|
|
} catch (IOException e) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2013-08-08 10:04:07 -04:00
|
|
|
/**
|
|
|
|
* Returns the shapes associated with this sheet,
|
|
|
|
* an empty list or null if there is an exception
|
|
|
|
*/
|
|
|
|
public List<XSSFShape> getShapes() {
|
|
|
|
PackagePart sheetPkg = getSheetPart();
|
|
|
|
List<XSSFShape> shapes= new LinkedList<XSSFShape>();
|
|
|
|
// Do we have a comments relationship? (Only ever one if so)
|
|
|
|
try {
|
|
|
|
PackageRelationshipCollection drawingsList = sheetPkg.getRelationshipsByType(XSSFRelation.DRAWINGS.getRelation());
|
|
|
|
for (int i = 0; i < drawingsList.size(); i++){
|
|
|
|
PackageRelationship drawings = drawingsList.getRelationship(i);
|
|
|
|
PackagePartName drawingsName = PackagingURIHelper.createPartName(drawings.getTargetURI());
|
|
|
|
PackagePart drawingsPart = sheetPkg.getPackage().getPart(drawingsName);
|
2017-01-13 15:55:04 -05:00
|
|
|
if (drawingsPart == null) {
|
|
|
|
//parts can go missing; Excel ignores them silently -- TIKA-2134
|
|
|
|
LOGGER.log(POILogger.WARN, "Missing drawing: "+drawingsName +". Skipping it.");
|
|
|
|
continue;
|
|
|
|
}
|
2016-12-02 19:49:08 -05:00
|
|
|
XSSFDrawing drawing = new XSSFDrawing(drawingsPart);
|
2013-08-08 10:04:07 -04:00
|
|
|
for (XSSFShape shape : drawing.getShapes()){
|
|
|
|
shapes.add(shape);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (XmlException e){
|
|
|
|
return null;
|
|
|
|
} catch (InvalidFormatException e) {
|
|
|
|
return null;
|
|
|
|
} catch (IOException e) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return shapes;
|
|
|
|
}
|
|
|
|
|
2010-11-22 10:16:45 -05:00
|
|
|
public PackagePart getSheetPart() {
|
|
|
|
String sheetId = ctSheet.getId();
|
|
|
|
return sheetMap.get(sheetId);
|
|
|
|
}
|
2008-10-01 12:14:39 -04:00
|
|
|
|
2010-11-22 10:16:45 -05:00
|
|
|
/**
|
|
|
|
* We're read only, so remove isn't supported
|
|
|
|
*/
|
2016-10-19 04:38:19 -04:00
|
|
|
@Override
|
2008-10-01 12:14:39 -04:00
|
|
|
public void remove() {
|
|
|
|
throw new IllegalStateException("Not supported");
|
|
|
|
}
|
|
|
|
}
|
2008-03-30 13:28:29 -04:00
|
|
|
}
|