iterator over sheet data in XSSFReader returns sheets in logical order, i.e. as they are defined in workbook.xml (was in physical order, as they were stored in the relationship table)

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@700821 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2008-10-01 16:14:39 +00:00
parent baa56c4fc8
commit 0afe659835
3 changed files with 197 additions and 108 deletions

View File

@ -18,20 +18,24 @@ package org.apache.poi.xssf.eventusermodel;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Iterator; import java.util.*;
import org.apache.poi.xssf.model.SharedStringsTable; import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRelation; import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.POIXMLException;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.InvalidFormatException; import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package; import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackagePartName; import org.openxml4j.opc.PackagePartName;
import org.openxml4j.opc.PackageRelationship; import org.openxml4j.opc.PackageRelationship;
import org.openxml4j.opc.PackageRelationshipCollection;
import org.openxml4j.opc.PackageRelationshipTypes; import org.openxml4j.opc.PackageRelationshipTypes;
import org.openxml4j.opc.PackagingURIHelper; import org.openxml4j.opc.PackagingURIHelper;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
/** /**
* This class makes it easy to get at individual parts * This class makes it easy to get at individual parts
@ -41,127 +45,186 @@ import org.openxml4j.opc.PackagingURIHelper;
* for XSSF. * for XSSF.
*/ */
public class XSSFReader { public class XSSFReader {
private Package pkg; private Package pkg;
private PackagePart workbookPart; private PackagePart workbookPart;
/** /**
* Creates a new XSSFReader, for the given package * Creates a new XSSFReader, for the given package
*/ */
public XSSFReader(Package pkg) throws IOException, OpenXML4JException { public XSSFReader(Package pkg) throws IOException, OpenXML4JException {
this.pkg = pkg; this.pkg = pkg;
PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType( PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
// Get the part that holds the workbook // Get the part that holds the workbook
workbookPart = this.pkg.getPart(coreDocRelationship); workbookPart = this.pkg.getPart(coreDocRelationship);
} }
/** /**
* Opens up the Shared Strings Table, parses it, and * Opens up the Shared Strings Table, parses it, and
* returns a handy object for working with * returns a handy object for working with
* shared strings. * shared strings.
*/ */
public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException { public SharedStringsTable getSharedStringsTable() throws IOException, InvalidFormatException {
return new SharedStringsTable(getSharedStringsData()); return new SharedStringsTable(getSharedStringsData());
} }
/** /**
* Opens up the Styles Table, parses it, and * Opens up the Styles Table, parses it, and
* returns a handy object for working with cell styles * returns a handy object for working with cell styles
*/ */
public StylesTable getStylesTable() throws IOException, InvalidFormatException { public StylesTable getStylesTable() throws IOException, InvalidFormatException {
return new StylesTable(getStylesData()); return new StylesTable(getStylesData());
} }
/** /**
* Returns an InputStream to read the contents of the * Returns an InputStream to read the contents of the
* shared strings table. * shared strings table.
*/ */
public InputStream getSharedStringsData() throws IOException, InvalidFormatException { public InputStream getSharedStringsData() throws IOException, InvalidFormatException {
return XSSFRelation.SHARED_STRINGS.getContents(workbookPart); return XSSFRelation.SHARED_STRINGS.getContents(workbookPart);
} }
/** /**
* Returns an InputStream to read the contents of the * Returns an InputStream to read the contents of the
* styles table. * styles table.
*/ */
public InputStream getStylesData() throws IOException, InvalidFormatException { public InputStream getStylesData() throws IOException, InvalidFormatException {
return XSSFRelation.STYLES.getContents(workbookPart); return XSSFRelation.STYLES.getContents(workbookPart);
} }
/** /**
* Returns an InputStream to read the contents of the * Returns an InputStream to read the contents of the
* main Workbook, which contains key overall data for * main Workbook, which contains key overall data for
* the file, including sheet definitions. * the file, including sheet definitions.
*/ */
public InputStream getWorkbookData() throws IOException, InvalidFormatException { public InputStream getWorkbookData() throws IOException, InvalidFormatException {
return workbookPart.getInputStream(); return workbookPart.getInputStream();
} }
/** /**
* Returns an InputStream to read the contents of the * Returns an InputStream to read the contents of the
* specified Sheet. * specified Sheet.
* @param relId The relationId of the sheet, from a r:id on the workbook * @param relId The relationId of the sheet, from a r:id on the workbook
*/ */
public InputStream getSheet(String relId) throws IOException, InvalidFormatException { public InputStream getSheet(String relId) throws IOException, InvalidFormatException {
PackageRelationship rel = workbookPart.getRelationship(relId); PackageRelationship rel = workbookPart.getRelationship(relId);
if(rel == null) { if(rel == null) {
throw new IllegalArgumentException("No Sheet found with r:id " + relId); throw new IllegalArgumentException("No Sheet found with r:id " + relId);
} }
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI()); PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
PackagePart sheet = pkg.getPart(relName); PackagePart sheet = pkg.getPart(relName);
if(sheet == null) { if(sheet == null) {
throw new IllegalArgumentException("No data found for Sheet with r:id " + relId); throw new IllegalArgumentException("No data found for Sheet with r:id " + relId);
} }
return sheet.getInputStream(); return sheet.getInputStream();
} }
/** /**
* Returns an Iterator which will let you get at all the * Returns an Iterator which will let you get at all the
* different Sheets in turn. * different Sheets in turn.
* Each sheet's InputStream is only opened when fetched * Each sheet's InputStream is only opened when fetched
* from the Iterator. It's up to you to close the * from the Iterator. It's up to you to close the
* InputStreams when done with each one. * InputStreams when done with each one.
*/ */
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException { public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
return new SheetDataIterator(); return new SheetIterator(workbookPart);
} }
private class SheetDataIterator implements Iterator<InputStream> { /**
private Iterator<PackageRelationship> sheetRels; * Iterator over sheet data.
private SheetDataIterator() throws IOException, InvalidFormatException { */
// Find all the sheets public static class SheetIterator implements Iterator<InputStream> {
PackageRelationshipCollection sheets =
workbookPart.getRelationshipsByType(
XSSFRelation.WORKSHEET.getRelation()
);
sheetRels = sheets.iterator();
}
public boolean hasNext() { /**
return sheetRels.hasNext(); * Maps relId and the corresponding PackagePart
} */
private Map<String, PackagePart> sheetMap;
public InputStream next() { /**
PackageRelationship sheet = sheetRels.next(); * Current CTSheet bean
try { */
PackagePartName relName = PackagingURIHelper.createPartName(sheet.getTargetURI()); private CTSheet ctSheet;
PackagePart sheetPkg = pkg.getPart(relName);
return sheetPkg.getInputStream();
} catch(IOException e) {
throw new RuntimeException(e);
} catch(InvalidFormatException ife) {
throw new RuntimeException(ife);
}
}
public void remove() { /**
throw new IllegalStateException("Not supported"); * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
} * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
} * i.e. as they are stored in the underlying package
*/
private Iterator<CTSheet> sheetIterator;
/**
* Construct a new SheetIterator
*
* @param wb package part holding workbook.xml
*/
private SheetIterator(PackagePart wb) throws IOException {
/**
* The order of sheets is defined by the order of CTSheet elements in workbook.xml
*/
try {
//step 1. Map sheet's relationship Id and the corresponding PackagePart
sheetMap = new HashMap<String, PackagePart>();
for(PackageRelationship rel : wb.getRelationships()){
if(rel.getRelationshipType().equals(XSSFRelation.WORKSHEET.getRelation())){
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
sheetMap.put(rel.getId(), wb.getPackage().getPart(relName));
}
}
//step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
//Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream()).getWorkbook();
sheetIterator = Arrays.asList(wbBean.getSheets().getSheetArray()).iterator();
} catch (InvalidFormatException e){
throw new POIXMLException(e);
} catch (XmlException e){
throw new POIXMLException(e);
}
}
/**
* Returns <tt>true</tt> if the iteration has more elements.
*
* @return <tt>true</tt> if the iterator has more elements.
*/
public boolean hasNext() {
return sheetIterator.hasNext();
}
/**
* Returns input stream of the next sheet in the iteration
*
* @return input stream of the next sheet in the iteration
*/
public InputStream next() {
ctSheet = sheetIterator.next();
String sheetId = ctSheet.getId();
try {
PackagePart sheetPkg = sheetMap.get(sheetId);
return sheetPkg.getInputStream();
} catch(IOException e) {
throw new POIXMLException(e);
}
}
/**
* Returns name of the current sheet
*
* @return name of the current sheet
*/
public String getSheetName() {
return ctSheet.getName();
}
public void remove() {
throw new IllegalStateException("Not supported");
}
}
} }

View File

@ -109,4 +109,30 @@ public class TestXSSFReader extends TestCase {
} }
assertEquals(3, count); assertEquals(3, count);
} }
/**
* Check that the sheet iterator returns sheets in the logical order
* (as they are defined in the workbook.xml)
*/
public void testOrderOfSheets() throws Exception {
File f = new File(dirName, "reordered_sheets.xlsx");
Package pkg = Package.open(f.toString());
XSSFReader r = new XSSFReader(pkg);
String[] sheetNames = {"Sheet4", "Sheet2", "Sheet3", "Sheet1"};
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator)r.getSheetsData();
int count = 0;
while(it.hasNext()) {
InputStream inp = it.next();
assertNotNull(inp);
inp.close();
assertEquals(sheetNames[count], it.getSheetName());
count++;
}
assertEquals(4, count);
}
} }

Binary file not shown.