60826 -- add initial support for streaming reading of xlsb files.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787228 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2017-03-16 18:37:13 +00:00
parent 6fc050a162
commit 730f394261
37 changed files with 2867 additions and 43 deletions

View File

@ -56,6 +56,7 @@ import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XSLFRelation;
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xssf.usermodel.XSSFRelation;
@ -244,6 +245,13 @@ public class ExtractorFactory {
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
}
// How about xlsb?
for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
if (rel.getContentType().equals(contentType)) {
return new XSSFBEventBasedExcelExtractor(pkg);
}
}
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
} catch (IOException e) {

View File

@ -19,7 +19,9 @@ package org.apache.poi.xssf;
import org.apache.poi.UnsupportedFileFormatException;
/**
* We don't support .xlsb files, sorry
* We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}.
* As of POI 3.15-beta3, we do support streaming reading of xlsb files
* via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader}
*/
public class XLSBUnsupportedException extends UnsupportedFileFormatException {
private static final long serialVersionUID = 7849681804154571175L;

View File

@ -0,0 +1,71 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
/**
* This class encapsulates what the spec calls a "Cell" object.
* I added "Header" to clarify that this does not contain the contents
* of the cell, only the column number, the style id and the phonetic boolean
*/
@Internal
class XSSFBCellHeader {
public static int length = 8;
/**
*
* @param data raw data
* @param offset offset at which to start reading the record
* @param currentRow 0-based current row count
* @param cell cell buffer to update
*/
public static void parse(byte[] data, int offset, int currentRow, XSSFBCellHeader cell) {
long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3;
//TODO: range checking
boolean showPhonetic = false;//TODO: fill this out
cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic);
}
private int rowNum;
private int colNum;
private int styleIdx;
private boolean showPhonetic;
public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) {
this.rowNum = rowNum;
this.colNum = colNum;
this.styleIdx = styleIdx;
this.showPhonetic = showPhonetic;
}
int getColNum() {
return colNum;
}
String formatAddressAsString() {
return CellReference.convertNumToColString(colNum)+(rowNum+1);
}
int getStyleIdx() {
return styleIdx;
}
}

View File

@ -0,0 +1,54 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
@Internal
class XSSFBCellRange {
public final static int length = 4* LittleEndian.INT_SIZE;
/**
* Parses an RfX cell range from the data starting at the offset.
* This performs no range checking.
* @param data raw bytes
* @param offset offset at which to start reading from data
* @param cellRange to overwrite. If null, a new cellRange will be created.
* @return a mutable cell range.
*/
public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) {
if (cellRange == null) {
cellRange = new XSSFBCellRange();
}
cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset));
return cellRange;
}
int firstRow;
int lastRow;
int firstCol;
int lastCol;
}

View File

@ -0,0 +1,112 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.ss.usermodel.ClientAnchor;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.XSSFComment;
@Internal
class XSSFBComment extends XSSFComment {
private final CellAddress cellAddress;
private final String author;
private final XSSFBRichTextString comment;
private boolean visible = true;
XSSFBComment(CellAddress cellAddress, String author, String comment) {
super(null, null, null);
this.cellAddress = cellAddress;
this.author = author;
this.comment = new XSSFBRichTextString(comment);
}
@Override
public void setVisible(boolean visible) {
throw new IllegalArgumentException("XSSFBComment is read only.");
}
@Override
public boolean isVisible() {
return visible;
}
@Override
public CellAddress getAddress() {
return cellAddress;
}
@Override
public void setAddress(CellAddress addr) {
throw new IllegalArgumentException("XSSFBComment is read only");
}
@Override
public void setAddress(int row, int col) {
throw new IllegalArgumentException("XSSFBComment is read only");
}
@Override
public int getRow() {
return cellAddress.getRow();
}
@Override
public void setRow(int row) {
throw new IllegalArgumentException("XSSFBComment is read only");
}
@Override
public int getColumn() {
return cellAddress.getColumn();
}
@Override
public void setColumn(int col) {
throw new IllegalArgumentException("XSSFBComment is read only");
}
@Override
public String getAuthor() {
return author;
}
@Override
public void setAuthor(String author) {
throw new IllegalArgumentException("XSSFBComment is read only");
}
@Override
public XSSFBRichTextString getString() {
return comment;
}
@Override
public void setString(RichTextString string) {
throw new IllegalArgumentException("XSSFBComment is read only");
}
@Override
public ClientAnchor getClientAnchor() {
return null;
}
}

View File

@ -0,0 +1,113 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.TreeMap;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
@Internal
public class XSSFBCommentsTable extends XSSFBParser {
private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1
private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>();
private List<String> authors = new ArrayList<String>();
//these are all used only during parsing, and they are mutable!
private int authorId = -1;
private CellAddress cellAddress = null;
private XSSFBCellRange cellRange = null;
private String comment = null;
private StringBuilder authorBuffer = new StringBuilder();
public XSSFBCommentsTable(InputStream is) throws IOException {
super(is);
parse();
commentAddresses.addAll(comments.keySet());
}
@Override
public void handleRecord(int id, byte[] data) throws XSSFBParseException {
XSSFBRecordType recordType = XSSFBRecordType.lookup(id);
switch (recordType) {
case BrtBeginComment:
int offset = 0;
authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE;
cellRange = XSSFBCellRange.parse(data, offset, cellRange);
offset+= XSSFBCellRange.length;
//for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28)
cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol);
break;
case BrtCommentText:
XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0);
comment = xssfbRichStr.getString();
break;
case BrtEndComment:
comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment));
authorId = -1;
cellAddress = null;
break;
case BrtCommentAuthor:
authorBuffer.setLength(0);
XSSFBUtils.readXLWideString(data, 0, authorBuffer);
authors.add(authorBuffer.toString());
break;
}
}
public Queue<CellAddress> getAddresses() {
return commentAddresses;
}
public XSSFBComment get(CellAddress cellAddress) {
if (cellAddress == null) {
return null;
}
return comments.get(cellAddress);
}
private final static class CellAddressComparator implements Comparator<CellAddress> {
@Override
public int compare(CellAddress o1, CellAddress o2) {
if (o1.getRow() < o2.getRow()) {
return -1;
} else if (o1.getRow() > o2.getRow()) {
return 1;
}
if (o1.getColumn() < o2.getColumn()) {
return -1;
} else if (o1.getColumn() > o2.getColumn()) {
return 1;
}
return 0;
}
}
}

View File

@ -0,0 +1,75 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
@Internal
class XSSFBHeaderFooter {
private final String headerFooterTypeLabel;
private final boolean isHeader;
private String rawString;
private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper();
XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) {
this.headerFooterTypeLabel = headerFooterTypeLabel;
this.isHeader = isHeader;
}
String getHeaderFooterTypeLabel() {
return headerFooterTypeLabel;
}
String getRawString() {
return rawString;
}
String getString() {
StringBuilder sb = new StringBuilder();
String left = headerFooterHelper.getLeftSection(rawString);
String center = headerFooterHelper.getCenterSection(rawString);
String right = headerFooterHelper.getRightSection(rawString);
if (left != null && left.length() > 0) {
sb.append(left);
}
if (center != null && center.length() > 0) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(center);
}
if (right != null && right.length() > 0) {
if (sb.length() > 0) {
sb.append(" ");
}
sb.append(right);
}
return sb.toString();
}
void setRawString(String rawString) {
this.rawString = rawString;
}
boolean isHeader() {
return isHeader;
}
}

View File

@ -0,0 +1,87 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal;
@Internal
class XSSFBHeaderFooters {
public static XSSFBHeaderFooters parse(byte[] data) {
boolean diffOddEven = false;
boolean diffFirst = false;
boolean scaleWDoc = false;
boolean alignMargins = false;
int offset = 2;
XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters();
xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true);
xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false);
xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true);
xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false);
xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true);
xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false);
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header);
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer);
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven);
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven);
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst);
readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst);
return xssfbHeaderFooter;
}
private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) {
if (offset + 4 >= data.length) {
return 0;
}
StringBuilder sb = new StringBuilder();
int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb);
headerFooter.setRawString(sb.toString());
return bytesRead;
}
private XSSFBHeaderFooter header;
private XSSFBHeaderFooter footer;
private XSSFBHeaderFooter headerEven;
private XSSFBHeaderFooter footerEven;
private XSSFBHeaderFooter headerFirst;
private XSSFBHeaderFooter footerFirst;
public XSSFBHeaderFooter getHeader() {
return header;
}
public XSSFBHeaderFooter getFooter() {
return footer;
}
public XSSFBHeaderFooter getHeaderEven() {
return headerEven;
}
public XSSFBHeaderFooter getFooterEven() {
return footerEven;
}
public XSSFBHeaderFooter getHeaderFirst() {
return headerFirst;
}
public XSSFBHeaderFooter getFooterFirst() {
return footerFirst;
}
}

View File

@ -0,0 +1,181 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.ss.util.CellRangeUtil;
import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.XSSFRelation;
@Internal
public class XSSFBHyperlinksTable {
private final static BitSet RECORDS = new BitSet();
static {
RECORDS.set(XSSFBRecordType.BrtHLink.getId());
}
private final List<XSSFHyperlinkRecord> hyperlinkRecords = new ArrayList<XSSFHyperlinkRecord>();
//cache the relId to hyperlink url from the sheet's .rels
private Map<String, String> relIdToHyperlink = new HashMap<String, String>();
public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException {
//load the urls from the sheet .rels
loadUrlsFromSheetRels(sheetPart);
//now load the hyperlinks from the bottom of the sheet
HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream());
scraper.parse();
}
/**
*
* @return a map of the hyperlinks. The key is the top left cell address in their CellRange
*/
public Map<CellAddress, List<XSSFHyperlinkRecord>> getHyperLinks() {
Map<CellAddress, List<XSSFHyperlinkRecord>> hyperlinkMap =
new TreeMap<CellAddress, List<XSSFHyperlinkRecord>>(new TopLeftCellAddressComparator());
for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) {
CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(),
hyperlinkRecord.getCellRangeAddress().getFirstColumn());
List<XSSFHyperlinkRecord> list = hyperlinkMap.get(cellAddress);
if (list == null) {
list = new ArrayList<XSSFHyperlinkRecord>();
}
list.add(hyperlinkRecord);
hyperlinkMap.put(cellAddress, list);
}
return hyperlinkMap;
}
/**
*
* @param cellAddress cell address to find
* @return null if not a hyperlink
*/
public List<XSSFHyperlinkRecord> findHyperlinkRecord(CellAddress cellAddress) {
List<XSSFHyperlinkRecord> overlapping = null;
CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(),
cellAddress.getRow(),
cellAddress.getColumn(),
cellAddress.getColumn());
for (XSSFHyperlinkRecord record : hyperlinkRecords) {
if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) {
if (overlapping == null) {
overlapping = new ArrayList<XSSFHyperlinkRecord>();
}
overlapping.add(record);
}
}
return overlapping;
}
private void loadUrlsFromSheetRels(PackagePart sheetPart) {
try {
for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) {
relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString());
}
} catch (InvalidFormatException e) {
//swallow
}
}
private class HyperlinkSheetScraper extends XSSFBParser {
private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange();
private final StringBuilder xlWideStringBuffer = new StringBuilder();
HyperlinkSheetScraper(InputStream is) {
super(is, RECORDS);
}
@Override
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
if (recordType != XSSFBRecordType.BrtHLink.getId()) {
return;
}
int offset = 0;
String relId = "";
String location = "";
String toolTip = "";
String display = "";
hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange);
offset += XSSFBCellRange.length;
xlWideStringBuffer.setLength(0);
offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer);
relId = xlWideStringBuffer.toString();
xlWideStringBuffer.setLength(0);
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
location = xlWideStringBuffer.toString();
xlWideStringBuffer.setLength(0);
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
toolTip = xlWideStringBuffer.toString();
xlWideStringBuffer.setLength(0);
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
display = xlWideStringBuffer.toString();
CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol);
String url = relIdToHyperlink.get(relId);
if (location == null || location.length() == 0) {
location = url;
}
hyperlinkRecords.add(
new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display)
);
}
}
private static class TopLeftCellAddressComparator implements Comparator<CellAddress> {
@Override
public int compare(CellAddress o1, CellAddress o2) {
if (o1.getRow() < o2.getRow()) {
return -1;
} else if (o1.getRow() > o2.getRow()) {
return 1;
}
if (o1.getColumn() < o2.getColumn()) {
return -1;
} else if (o1.getColumn() > o2.getColumn()) {
return 1;
}
return 0;
}
}
}

View File

@ -0,0 +1,28 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
/**
* Parse exception while reading an xssfb
*/
public class XSSFBParseException extends RuntimeException {
public XSSFBParseException(String msg) {
super(msg);
}
}

View File

@ -0,0 +1,105 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.BitSet;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndianInputStream;
/**
* Experimental parser for Microsoft's ooxml xssfb format.
* Not thread safe, obviously. Need to create a new one
* for each thread.
*/
@Internal
public abstract class XSSFBParser {
private final LittleEndianInputStream is;
private final BitSet records;
public XSSFBParser(InputStream is) {
this.is = new LittleEndianInputStream(is);
records = null;
}
XSSFBParser(InputStream is, BitSet bitSet) {
this.is = new LittleEndianInputStream(is);
records = bitSet;
}
public void parse() throws IOException {
while (true) {
int bInt = is.read();
if (bInt == -1) {
return;
}
readNext((byte) bInt);
}
}
private void readNext(byte b1) throws IOException {
int recordId = 0;
//if highest bit == 1
if ((b1 >> 7 & 1) == 1) {
byte b2 = is.readByte();
b1 &= ~(1<<7); //unset highest bit
b2 &= ~(1<<7); //unset highest bit (if it exists?)
recordId = (128*(int)b2)+(int)b1;
} else {
recordId = (int)b1;
}
long recordLength = 0;
int i = 0;
boolean halt = false;
while (i < 4 && ! halt) {
byte b = is.readByte();
halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue
b &= ~(1<<7);
recordLength += (int)b << (i*7); //multiply by 128^i
i++;
}
if (records == null || records.get(recordId)) {
//add sanity check for length?
byte[] buff = new byte[(int) recordLength];
is.readFully(buff);
handleRecord(recordId, buff);
} else {
long length = is.skip(recordLength);
if (length != recordLength) {
throw new XSSFBParseException("End of file reached before expected.\t"+
"Tried to skip "+recordLength + ", but only skipped "+length);
}
}
}
//It hurts, hurts, hurts to create a new byte array for every record.
//However, on a large Excel spreadsheet, this parser was 1/3 faster than
//the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf.
//The code is far cleaner to have the parser read all
//of the data rather than having every component promise that it read
//the correct amount.
abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException;
}

View File

@ -0,0 +1,92 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal;
@Internal
public enum XSSFBRecordType {
BrtCellBlank(1),
BrtCellRk(2),
BrtCellError(3),
BrtCellBool(4),
BrtCellReal(5),
BrtCellSt(6),
BrtCellIsst(7),
BrtFmlaString(8),
BrtFmlaNum(9),
BrtFmlaBool(10),
BrtFmlaError(11),
BrtRowHdr(0),
BrtCellRString(62),
BrtBeginSheet(129),
BrtWsProp(147),
BrtWsDim(148),
BrtColInfo(60),
BrtBeginSheetData(145),
BrtEndSheetData(146),
BrtHLink(494),
BrtBeginHeaderFooter(479),
//comments
BrtBeginCommentAuthors(630),
BrtEndCommentAuthors(631),
BrtCommentAuthor(632),
BrtBeginComment(635),
BrtCommentText(637),
BrtEndComment(636),
//styles table
BrtXf(47),
BrtFmt(44),
BrtBeginFmts(615),
BrtEndFmts(616),
BrtBeginCellXFs(617),
BrtEndCellXFs(618),
BrtBeginCellStyleXFS(626),
BrtEndCellStyleXFS(627),
//stored strings table
BrtSstItem(19), //stored strings items
BrtBeginSst(159), //stored strings begin sst
BrtEndSst(160), //stored strings end sst
BrtBundleSh(156), //defines worksheet in wb part
Unimplemented(-1);
private final int id;
XSSFBRecordType(int id) {
this.id = id;
}
public int getId() {
return id;
}
public static XSSFBRecordType lookup(int id) {
for (XSSFBRecordType r : XSSFBRecordType.values()) {
if (r.id == id) {
return r;
}
}
return Unimplemented;
}
}

View File

@ -0,0 +1,85 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.POIXMLRelation;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.Internal;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation}
* because of conflicts with regular ooxml relations.
* If we failed to break this into a separate class, in the cases of SharedStrings and Styles,
* 2 parts would exist, and &quot;Packages shall not contain equivalent part names...&quot;
* <p>
* Also, we need to avoid the possibility of breaking the marshalling process for xml.
*/
@Internal
public class XSSFBRelation extends POIXMLRelation {
private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class);
static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation(
"application/vnd.ms-excel.sharedStrings",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings",
"/xl/sharedStrings.bin",
null
);
public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation(
"application/vnd.ms-excel.styles",
PackageRelationshipTypes.STYLE_PART,
"/xl/styles.bin",
null
);
private XSSFBRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
super(type, rel, defaultName, cls);
}
/**
* Fetches the InputStream to read the contents, based
* of the specified core part, for which we are defined
* as a suitable relationship
*/
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
PackageRelationshipCollection prc =
corePart.getRelationshipsByType(getRelation());
Iterator<PackageRelationship> it = prc.iterator();
if (it.hasNext()) {
PackageRelationship rel = it.next();
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
PackagePart part = corePart.getPackage().getPart(relName);
return part.getInputStream();
}
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
return null;
}
}

View File

@ -0,0 +1,47 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal;
@Internal
class XSSFBRichStr {
public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException {
byte first = bytes[offset];
boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1?
boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1?
StringBuilder sb = new StringBuilder();
int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb);
//TODO: parse phonetic strings.
return new XSSFBRichStr(sb.toString(), "");
}
private final String string;
private final String phoneticString;
XSSFBRichStr(String string, String phoneticString) {
this.string = string;
this.phoneticString = phoneticString;
}
public String getString() {
return string;
}
}

View File

@ -0,0 +1,80 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
/**
* Wrapper class around String so that we can use it in Comment.
* Nothing has been implemented yet except for {@link #getString()}.
*/
@Internal
class XSSFBRichTextString extends XSSFRichTextString {
private final String string;
XSSFBRichTextString(String string) {
this.string = string;
}
@Override
public void applyFont(int startIndex, int endIndex, short fontIndex) {
}
@Override
public void applyFont(int startIndex, int endIndex, Font font) {
}
@Override
public void applyFont(Font font) {
}
@Override
public void clearFormatting() {
}
@Override
public String getString() {
return string;
}
@Override
public int length() {
return string.length();
}
@Override
public int numFormattingRuns() {
return 0;
}
@Override
public int getIndexOfFormattingRun(int index) {
return 0;
}
@Override
public void applyFont(short fontIndex) {
}
}

View File

@ -0,0 +1,137 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.xml.sax.SAXException;
@Internal
public class XSSFBSharedStringsTable {
/**
* An integer representing the total count of strings in the workbook. This count does not
* include any numbers, it counts only the total of text strings in the workbook.
*/
private int count;
/**
* An integer representing the total count of unique strings in the Shared String Table.
* A string is unique even if it is a copy of another string, but has different formatting applied
* at the character level.
*/
private int uniqueCount;
/**
* The shared strings table.
*/
private List<String> strings = new ArrayList<String>();
/**
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
* @throws IOException If reading the data from the package fails.
* @throws SAXException if parsing the XML data fails.
*/
public XSSFBSharedStringsTable(OPCPackage pkg)
throws IOException, SAXException {
ArrayList<PackagePart> parts =
pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType());
// Some workbooks have no shared strings table.
if (parts.size() > 0) {
PackagePart sstPart = parts.get(0);
readFrom(sstPart.getInputStream());
}
}
/**
* Like POIXMLDocumentPart constructor
*
* @since POI 3.14-Beta3
*/
XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException {
readFrom(part.getInputStream());
}
private void readFrom(InputStream inputStream) throws IOException {
SSTBinaryReader reader = new SSTBinaryReader(inputStream);
reader.parse();
}
public List<String> getItems() {
return strings;
}
public String getEntryAt(int i) {
return strings.get(i);
}
/**
* Return an integer representing the total count of strings in the workbook. This count does not
* include any numbers, it counts only the total of text strings in the workbook.
*
* @return the total count of strings in the workbook
*/
public int getCount() {
return this.count;
}
/**
* Returns an integer representing the total count of unique strings in the Shared String Table.
* A string is unique even if it is a copy of another string, but has different formatting applied
* at the character level.
*
* @return the total count of unique strings in the workbook
*/
public int getUniqueCount() {
return this.uniqueCount;
}
private class SSTBinaryReader extends XSSFBParser {
SSTBinaryReader(InputStream is) {
super(is);
}
@Override
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
switch (type) {
case BrtSstItem:
XSSFBRichStr rstr = XSSFBRichStr.build(data, 0);
strings.add(rstr.getString());
break;
case BrtBeginSst:
count = (int) LittleEndian.getUInt(data,0);
uniqueCount = (int) LittleEndian.getUInt(data, 4);
break;
}
}
}
}

View File

@ -0,0 +1,329 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.InputStream;
import java.util.Queue;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
@Internal
public class XSSFBSheetHandler extends XSSFBParser {
private final static int CHECK_ALL_ROWS = -1;
private final XSSFBSharedStringsTable stringsTable;
private final XSSFSheetXMLHandler.SheetContentsHandler handler;
private final XSSFBStylesTable styles;
private final XSSFBCommentsTable comments;
private final DataFormatter dataFormatter;
private final boolean formulasNotResults;//TODO: implement this
private int lastEndedRow = -1;
private int lastStartedRow = -1;
private int currentRow = 0;
private byte[] rkBuffer = new byte[8];
private XSSFBCellRange hyperlinkCellRange = null;
private StringBuilder xlWideStringBuffer = new StringBuilder();
private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader();
public XSSFBSheetHandler(InputStream is,
XSSFBStylesTable styles,
XSSFBCommentsTable comments,
XSSFBSharedStringsTable strings,
XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) {
super(is);
this.styles = styles;
this.comments = comments;
this.stringsTable = strings;
this.handler = sheetContentsHandler;
this.dataFormatter = dataFormatter;
this.formulasNotResults = formulasNotResults;
}
@Override
public void handleRecord(int id, byte[] data) throws XSSFBParseException {
XSSFBRecordType type = XSSFBRecordType.lookup(id);
switch(type) {
case BrtRowHdr:
long rw = LittleEndian.getUInt(data, 0);
if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec?
throw new XSSFBParseException("Row number beyond allowable range: "+rw);
}
currentRow = (int)rw;
checkMissedComments(currentRow);
startRow(currentRow);
break;
case BrtCellIsst:
handleBrtCellIsst(data);
break;
case BrtCellSt: //TODO: needs test
handleCellSt(data);
break;
case BrtCellRk:
handleCellRk(data);
break;
case BrtCellReal:
handleCellReal(data);
break;
case BrtCellBool:
handleBoolean(data);
break;
case BrtCellError:
handleCellError(data);
break;
case BrtCellBlank:
beforeCellValue(data);//read cell info and check for missing comments
break;
case BrtFmlaString:
handleFmlaString(data);
break;
case BrtFmlaNum:
handleFmlaNum(data);
break;
case BrtFmlaError:
handleFmlaError(data);
break;
//TODO: All the PCDI and PCDIA
case BrtEndSheetData:
checkMissedComments(CHECK_ALL_ROWS);
endRow(lastStartedRow);
break;
case BrtBeginHeaderFooter:
handleHeaderFooter(data);
break;
}
}
private void beforeCellValue(byte[] data) {
XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer);
checkMissedComments(currentRow, cellBuffer.getColNum());
}
private void handleCellValue(String formattedValue) {
CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum());
XSSFBComment comment = null;
if (comments != null) {
comment = comments.get(cellAddress);
}
handler.cell(cellAddress.formatAsString(), formattedValue, comment);
}
private void handleFmlaNum(byte[] data) {
beforeCellValue(data);
//xNum
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
handleCellValue(formattedVal);
}
private void handleCellSt(byte[] data) {
beforeCellValue(data);
xlWideStringBuffer.setLength(0);
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
handleCellValue(xlWideStringBuffer.toString());
}
private void handleFmlaString(byte[] data) {
beforeCellValue(data);
xlWideStringBuffer.setLength(0);
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
handleCellValue(xlWideStringBuffer.toString());
}
private void handleCellError(byte[] data) {
beforeCellValue(data);
//TODO, read byte to figure out the type of error
handleCellValue("ERROR");
}
private void handleFmlaError(byte[] data) {
beforeCellValue(data);
//TODO, read byte to figure out the type of error
handleCellValue("ERROR");
}
private void handleBoolean(byte[] data) {
beforeCellValue(data);
String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE";
handleCellValue(formattedVal);
}
private void handleCellReal(byte[] data) {
beforeCellValue(data);
//xNum
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
handleCellValue(formattedVal);
}
private void handleCellRk(byte[] data) {
beforeCellValue(data);
double val = rkNumber(data, XSSFBCellHeader.length);
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
handleCellValue(formattedVal);
}
private void handleBrtCellIsst(byte[] data) {
beforeCellValue(data);
long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length);
//check for out of range, buffer overflow
XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx));
handleCellValue(rtss.getString());
}
private void handleHeaderFooter(byte[] data) {
XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data);
outputHeaderFooter(headerFooter.getHeader());
outputHeaderFooter(headerFooter.getFooter());
outputHeaderFooter(headerFooter.getHeaderEven());
outputHeaderFooter(headerFooter.getFooterEven());
outputHeaderFooter(headerFooter.getHeaderFirst());
outputHeaderFooter(headerFooter.getFooterFirst());
}
private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) {
String text = headerFooter.getString();
if (text != null && text.trim().length() > 0) {
handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel());
}
}
//at start of next cell or end of row, return the cellAddress if it equals currentRow and col
private void checkMissedComments(int currentRow, int colNum) {
if (comments == null) {
return;
}
Queue<CellAddress> queue = comments.getAddresses();
while (queue.size() > 0) {
CellAddress cellAddress = queue.peek();
if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) {
cellAddress = queue.remove();
dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) {
queue.remove();
return;
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) {
return;
} else if (cellAddress.getRow() > currentRow) {
return;
}
}
}
//check for anything from rows before
private void checkMissedComments(int currentRow) {
if (comments == null) {
return;
}
Queue<CellAddress> queue = comments.getAddresses();
int lastInterpolatedRow = -1;
while (queue.size() > 0) {
CellAddress cellAddress = queue.peek();
if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) {
cellAddress = queue.remove();
if (cellAddress.getRow() != lastInterpolatedRow) {
startRow(cellAddress.getRow());
}
dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
lastInterpolatedRow = cellAddress.getRow();
} else {
break;
}
}
}
private void startRow(int row) {
if (row == lastStartedRow) {
return;
}
if (lastStartedRow != lastEndedRow) {
endRow(lastStartedRow);
}
handler.startRow(row);
lastStartedRow = row;
}
private void endRow(int row) {
if (lastEndedRow == row) {
return;
}
handler.endRow(row);
lastEndedRow = row;
}
private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) {
handler.cell(cellAddress.formatAsString(), null, comment);
}
private double rkNumber(byte[] data, int offset) {
//see 2.5.122 for this abomination
byte b0 = data[offset];
String s = Integer.toString(b0, 2);
boolean numDivBy100 = ((b0 & 1) == 1); // else as is
boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer
//unset highest 2 bits
b0 &= ~1;
b0 &= ~(1<<1);
rkBuffer[4] = b0;
for (int i = 1; i < 4; i++) {
rkBuffer[i+4] = data[offset+i];
}
double d = 0.0;
if (floatingPoint) {
d = LittleEndian.getDouble(rkBuffer);
} else {
d = LittleEndian.getInt(rkBuffer);
}
d = (numDivBy100) ? d/100 : d;
return d;
}
/**
* You need to implement this to handle the results
* of the sheet parsing.
*/
public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler {
/**
* A cell, with the given formatted value (may be null),
* a url (may be null), a toolTip (may be null)
* and possibly a comment (may be null), was encountered */
void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment);
}
}

View File

@ -0,0 +1,101 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.poi.POIXMLException;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.util.Internal;
@Internal
public class XSSFBStylesTable extends XSSFBParser {
private final SortedMap<Short, String> numberFormats = new TreeMap<Short,String>();
private final List<Short> styleIds = new ArrayList<Short>();
private boolean inCellXFS = false;
private boolean inFmts = false;
public XSSFBStylesTable(InputStream is) throws IOException {
super(is);
parse();
}
String getNumberFormatString(int idx) {
if (numberFormats.containsKey(styleIds.get((short)idx))) {
return numberFormats.get(styleIds.get((short)idx));
}
return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx));
}
@Override
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
switch (type) {
case BrtBeginCellXFs:
inCellXFS = true;
break;
case BrtEndCellXFs:
inCellXFS = false;
break;
case BrtXf:
if (inCellXFS) {
handleBrtXFInCellXF(data);
}
break;
case BrtBeginFmts:
inFmts = true;
break;
case BrtEndFmts:
inFmts = false;
break;
case BrtFmt:
if (inFmts) {
handleFormat(data);
}
break;
}
}
private void handleFormat(byte[] data) {
int ifmt = data[0] & 0xFF;
if (ifmt > Short.MAX_VALUE) {
throw new POIXMLException("Format id must be a short");
}
StringBuilder sb = new StringBuilder();
XSSFBUtils.readXLWideString(data, 2, sb);
String fmt = sb.toString();
numberFormats.put((short)ifmt, fmt);
}
private void handleBrtXFInCellXF(byte[] data) {
int ifmtOffset = 2;
//int ifmtLength = 2;
//numFmtId in xml terms
int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored
styleIds.add((short)ifmt);
}
}

View File

@ -0,0 +1,108 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import java.nio.charset.Charset;
import org.apache.poi.POIXMLException;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
@Internal
public class XSSFBUtils {
/**
* Reads an XLNullableWideString.
* @param data data from which to read
* @param offset in data from which to start
* @param sb buffer to which to write. You must setLength(0) before calling!
* @return number of bytes read
* @throws XSSFBParseException if there was an exception during reading
*/
static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
long numChars = LittleEndian.getUInt(data, offset);
if (numChars < 0) {
throw new XSSFBParseException("too few chars to read");
} else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!!
return 0;
} else if (numChars > 0xFFFFFFFFL) {
throw new XSSFBParseException("too many chars to read");
}
int numBytes = 2*(int)numChars;
offset += 4;
if (offset+numBytes > data.length) {
throw new XSSFBParseException("trying to read beyond data length:" +
"offset="+offset+", numBytes="+numBytes+", data.length="+data.length);
}
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
numBytes+=4;
return numBytes;
}
/**
* Reads an XLNullableWideString.
* @param data data from which to read
* @param offset in data from which to start
* @param sb buffer to which to write. You must setLength(0) before calling!
* @return number of bytes read
* @throws XSSFBParseException if there was an exception while trying to read the string
*/
public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
long numChars = LittleEndian.getUInt(data, offset);
if (numChars < 0) {
throw new XSSFBParseException("too few chars to read");
} else if (numChars > 0xFFFFFFFFL) {
throw new XSSFBParseException("too many chars to read");
}
int numBytes = 2*(int)numChars;
offset += 4;
if (offset+numBytes > data.length) {
throw new XSSFBParseException("trying to read beyond data length");
}
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
numBytes+=4;
return numBytes;
}
static int castToInt(long val) {
if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) {
return (int)val;
}
throw new POIXMLException("val ("+val+") can't be cast to int");
}
static short castToShort(int val) {
if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) {
return (short)val;
}
throw new POIXMLException("val ("+val+") can't be cast to short");
}
//TODO: move to LittleEndian?
static int get24BitInt( byte[] data, int offset) {
int i = offset;
int b0 = data[i++] & 0xFF;
int b1 = data[i++] & 0xFF;
int b2 = data[i] & 0xFF;
return ( b2 << 16 ) + ( b1 << 8 ) + b0;
}
}

View File

@ -0,0 +1,117 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.util.Internal;
/**
* This is a read only record that maintains information about
* a hyperlink. In OOXML land, this information has to be merged
* from 1) the sheet's .rels to get the url and 2) from after the
* sheet data in they hyperlink section.
*
* The {@link #display} is often empty and should be filled from
* the contents of the anchor cell.
*
*/
@Internal
public class XSSFHyperlinkRecord {
private final CellRangeAddress cellRangeAddress;
private final String relId;
private String location;
private String toolTip;
private String display;
XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) {
this.cellRangeAddress = cellRangeAddress;
this.relId = relId;
this.location = location;
this.toolTip = toolTip;
this.display = display;
}
void setLocation(String location) {
this.location = location;
}
void setToolTip(String toolTip) {
this.toolTip = toolTip;
}
void setDisplay(String display) {
this.display = display;
}
CellRangeAddress getCellRangeAddress() {
return cellRangeAddress;
}
public String getRelId() {
return relId;
}
public String getLocation() {
return location;
}
public String getToolTip() {
return toolTip;
}
public String getDisplay() {
return display;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o;
if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null)
return false;
if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false;
if (location != null ? !location.equals(that.location) : that.location != null) return false;
if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false;
return display != null ? display.equals(that.display) : that.display == null;
}
@Override
public int hashCode() {
int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0;
result = 31 * result + (relId != null ? relId.hashCode() : 0);
result = 31 * result + (location != null ? location.hashCode() : 0);
result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0);
result = 31 * result + (display != null ? display.hashCode() : 0);
return result;
}
@Override
public String toString() {
return "XSSFHyperlinkRecord{" +
"cellRangeAddress=" + cellRangeAddress +
", relId='" + relId + '\'' +
", location='" + location + '\'' +
", toolTip='" + toolTip + '\'' +
", display='" + display + '\'' +
'}';
}
}

View File

@ -0,0 +1,44 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<!--
====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
====================================================================
-->
<html>
<head>
</head>
<body bgcolor="white">
<p>The org.apache.poi.xssf.binary package includes necessary underlying components
for streaming/read-only processing of xlsb files.
</p>
<p>
POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
in o.a.p.xssf.eventusermodel.
</p>
<p>
This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes
have been marked @Internal and the API is subject to change.
</p>
<h2>Related Documentation</h2>
For overviews, tutorials, examples, guides, and tool documentation, please see:
<ul>
<li><a href="http://poi.apache.org">Apache POI Project</a>
</ul>
</body>
</html>

View File

@ -0,0 +1,172 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.xssf.binary.XSSFBCommentsTable;
import org.apache.poi.xssf.binary.XSSFBParseException;
import org.apache.poi.xssf.binary.XSSFBParser;
import org.apache.poi.xssf.binary.XSSFBRecordType;
import org.apache.poi.xssf.binary.XSSFBRelation;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.binary.XSSFBUtils;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.usermodel.XSSFRelation;
/**
* Reader for xlsb files.
*/
public class XSSFBReader extends XSSFReader {
/**
* Creates a new XSSFReader, for the given package
*
* @param pkg opc package
*/
public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException {
super(pkg);
}
/**
* Returns an Iterator which will let you get at all the
* different Sheets in turn.
* Each sheet's InputStream is only opened when fetched
* from the Iterator. It's up to you to close the
* InputStreams when done with each one.
*/
@Override
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
return new SheetIterator(workbookPart);
}
public XSSFBStylesTable getXSSFBStylesTable() throws IOException {
ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType());
if(parts.size() == 0) return null;
// Create the Styles Table, and associate the Themes if present
return new XSSFBStylesTable(parts.get(0).getInputStream());
}
public static class SheetIterator extends XSSFReader.SheetIterator {
/**
* Construct a new SheetIterator
*
* @param wb package part holding workbook.xml
*/
private SheetIterator(PackagePart wb) throws IOException {
super(wb);
}
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream());
sheetRefLoader.parse();
return sheetRefLoader.getSheets().iterator();
}
/**
* Not supported by XSSFBReader's SheetIterator.
* Please use {@link #getXSSFBSheetComments()} instead.
* @return nothing, always throws IllegalArgumentException!
*/
@Override
public CommentsTable getSheetComments() {
throw new IllegalArgumentException("Please use getXSSFBSheetComments");
}
public XSSFBCommentsTable getXSSFBSheetComments() {
PackagePart sheetPkg = getSheetPart();
// Do we have a comments relationship? (Only ever one if so)
try {
PackageRelationshipCollection commentsList =
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
if (commentsList.size() > 0) {
PackageRelationship comments = commentsList.getRelationship(0);
if (comments == null || comments.getTargetURI() == null) {
return null;
}
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
return new XSSFBCommentsTable(commentsPart.getInputStream());
}
} catch (InvalidFormatException e) {
return null;
} catch (IOException e) {
return null;
}
return null;
}
}
private static class SheetRefLoader extends XSSFBParser {
List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>();
private SheetRefLoader(InputStream is) {
super(is);
}
@Override
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
if (recordType == XSSFBRecordType.BrtBundleSh.getId()) {
addWorksheet(data);
}
}
private void addWorksheet(byte[] data) {
int offset = 0;
//this is the sheet state #2.5.142
long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
//according to #2.4.304
if (iTabID < 1 || iTabID > 0x0000FFFFL) {
throw new XSSFBParseException("table id out of range: "+iTabID);
}
StringBuilder sb = new StringBuilder();
offset += XSSFBUtils.readXLWideString(data, offset, sb);
String relId = sb.toString();
sb.setLength(0);
XSSFBUtils.readXLWideString(data, offset, sb);
String name = sb.toString();
if (relId != null && relId.trim().length() > 0) {
sheets.add(new XSSFSheetRef(relId, name));
}
}
List<XSSFSheetRef> getSheets() {
return sheets;
}
}
}

View File

@ -16,15 +16,16 @@
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.poi.POIXMLException;
@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSSFDrawing;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
/**
* This class makes it easy to get at individual parts
@ -62,8 +66,8 @@ public class XSSFReader {
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
private OPCPackage pkg;
private PackagePart workbookPart;
protected OPCPackage pkg;
protected PackagePart workbookPart;
/**
* Creates a new XSSFReader, for the given package
@ -194,23 +198,23 @@ public class XSSFReader {
private final Map<String, PackagePart> sheetMap;
/**
* Current CTSheet bean
* Current sheet reference
*/
private CTSheet ctSheet;
XSSFSheetRef xssfSheetRef;
/**
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
* i.e. as they are stored in the underlying package
*/
private final Iterator<CTSheet> sheetIterator;
final Iterator<XSSFSheetRef> sheetIterator;
/**
* Construct a new SheetIterator
*
* @param wb package part holding workbook.xml
*/
private SheetIterator(PackagePart wb) throws IOException {
SheetIterator(PackagePart wb) throws IOException {
/**
* The order of sheets is defined by the order of CTSheet elements in workbook.xml
@ -228,25 +232,44 @@ public class XSSFReader {
sheetMap.put(rel.getId(), pkg.getPart(relName));
}
}
//step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
//Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
List<CTSheet> validSheets = new ArrayList<CTSheet>();
for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
//if there's no relationship id, silently skip the sheet
String sheetId = ctSheet.getId();
if (sheetId != null && sheetId.length() > 0) {
validSheets.add(ctSheet);
}
}
sheetIterator = validSheets.iterator();
//step 2. Read array of CTSheet elements, wrap it in a LinkedList
//and construct an iterator
sheetIterator = createSheetIteratorFromWB(wb);
} catch (InvalidFormatException e){
throw new POIXMLException(e);
} catch (XmlException e){
throw new POIXMLException(e);
}
}
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
XMLReader xmlReader = null;
try {
xmlReader = SAXHelper.newXMLReader();
} catch (ParserConfigurationException e) {
throw new POIXMLException(e);
} catch (SAXException e) {
throw new POIXMLException(e);
}
xmlReader.setContentHandler(xmlSheetRefReader);
try {
xmlReader.parse(new InputSource(wb.getInputStream()));
} catch (SAXException e) {
throw new POIXMLException(e);
}
List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
//if there's no relationship id, silently skip the sheet
String sheetId = xssfSheetRef.getId();
if (sheetId != null && sheetId.length() > 0) {
validSheets.add(xssfSheetRef);
}
}
return validSheets.iterator();
}
/**
* Returns <tt>true</tt> if the iteration has more elements.
*
@ -264,9 +287,9 @@ public class XSSFReader {
*/
@Override
public InputStream next() {
ctSheet = sheetIterator.next();
xssfSheetRef = sheetIterator.next();
String sheetId = ctSheet.getId();
String sheetId = xssfSheetRef.getId();
try {
PackagePart sheetPkg = sheetMap.get(sheetId);
return sheetPkg.getInputStream();
@ -281,7 +304,7 @@ public class XSSFReader {
* @return name of the current sheet
*/
public String getSheetName() {
return ctSheet.getName();
return xssfSheetRef.getName();
}
/**
@ -344,7 +367,7 @@ public class XSSFReader {
}
public PackagePart getSheetPart() {
String sheetId = ctSheet.getId();
String sheetId = xssfSheetRef.getId();
return sheetMap.get(sheetId);
}
@ -356,4 +379,52 @@ public class XSSFReader {
throw new IllegalStateException("Not supported");
}
}
protected final static class XSSFSheetRef {
//do we need to store sheetId, too?
private final String id;
private final String name;
public XSSFSheetRef(String id, String name) {
this.id = id;
this.name = name;
}
public String getId() {
return id;
}
public String getName() {
return name;
}
}
//scrapes sheet reference info and order from workbook.xml
private static class XMLSheetRefReader extends DefaultHandler {
private final static String SHEET = "sheet";
private final static String ID = "id";
private final static String NAME = "name";
private final List<XSSFSheetRef> sheetRefs = new LinkedList();
@Override
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
if (localName.toLowerCase(Locale.US).equals(SHEET)) {
String name = null;
String id = null;
for (int i = 0; i < attrs.getLength(); i++) {
if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
name = attrs.getValue(i);
} else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
id = attrs.getValue(i);
}
sheetRefs.add(new XSSFSheetRef(id, name));
}
}
}
List<XSSFSheetRef> getSheetRefs() {
return Collections.unmodifiableList(sheetRefs);
}
}
}

View File

@ -0,0 +1,160 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.extractor;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.binary.XSSFBCommentsTable;
import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.xmlbeans.XmlException;
import org.xml.sax.SAXException;
/**
* Implementation of a text extractor or xlsb Excel
* files that uses SAX-like binary parsing.
*/
public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
XSSFRelation.XLSB_BINARY_WORKBOOK
};
private boolean handleHyperlinksInCells = false;
public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
super(path);
}
public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
super(container);
}
public static void main(String[] args) throws Exception {
if (args.length < 1) {
System.err.println("Use:");
System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>");
System.exit(1);
}
POIXMLTextExtractor extractor =
new XSSFBEventBasedExcelExtractor(args[0]);
System.out.println(extractor.getText());
extractor.close();
}
public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
this.handleHyperlinksInCells = handleHyperlinksInCells;
}
/**
* Should we return the formula itself, and not
* the result it produces? Default is false
* This is currently unsupported for xssfb
*/
@Override
public void setFormulasNotResults(boolean formulasNotResults) {
throw new IllegalArgumentException("Not currently supported");
}
/**
* Processes the given sheet
*/
public void processSheet(
SheetContentsHandler sheetContentsExtractor,
XSSFBStylesTable styles,
XSSFBCommentsTable comments,
XSSFBSharedStringsTable strings,
InputStream sheetInputStream)
throws IOException, SAXException {
DataFormatter formatter;
if (locale == null) {
formatter = new DataFormatter();
} else {
formatter = new DataFormatter(locale);
}
XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
sheetInputStream,
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
);
xssfbSheetHandler.parse();
}
/**
* Processes the file and returns the text
*/
public String getText() {
try {
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
XSSFBReader xssfbReader = new XSSFBReader(container);
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
StringBuffer text = new StringBuffer();
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
XSSFBHyperlinksTable hyperlinksTable = null;
while (iter.hasNext()) {
InputStream stream = iter.next();
if (includeSheetNames) {
text.append(iter.getSheetName());
text.append('\n');
}
if (handleHyperlinksInCells) {
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
}
XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream);
if (includeHeadersFooters) {
sheetExtractor.appendHeaderText(text);
}
sheetExtractor.appendCellText(text);
if (includeTextBoxes) {
processShapes(iter.getShapes(), text);
}
if (includeHeadersFooters) {
sheetExtractor.appendFooterText(text);
}
sheetExtractor.reset();
stream.close();
}
return text.toString();
} catch (IOException e) {
System.err.println(e);
return null;
} catch (SAXException se) {
System.err.println(se);
return null;
} catch (OpenXML4JException o4je) {
System.err.println(o4je);
return null;
}
}
}

View File

@ -54,15 +54,15 @@ import org.xml.sax.XMLReader;
*/
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
private OPCPackage container;
OPCPackage container;
private POIXMLProperties properties;
private Locale locale;
private boolean includeTextBoxes = true;
private boolean includeSheetNames = true;
private boolean includeCellComments = false;
private boolean includeHeadersFooters = true;
private boolean formulasNotResults = false;
Locale locale;
boolean includeTextBoxes = true;
boolean includeSheetNames = true;
boolean includeCellComments = false;
boolean includeHeadersFooters = true;
boolean formulasNotResults = false;
private boolean concatenatePhoneticRuns = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
}
private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
void processShapes(List<XSSFShape> shapes, StringBuffer text) {
if (shapes == null){
return;
}
@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
private void appendHeaderText(StringBuffer buffer) {
void appendHeaderText(StringBuffer buffer) {
appendHeaderFooterText(buffer, "firstHeader");
appendHeaderFooterText(buffer, "oddHeader");
appendHeaderFooterText(buffer, "evenHeader");
@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
private void appendFooterText(StringBuffer buffer) {
void appendFooterText(StringBuffer buffer) {
// append the text for each footer type in the same order
// they are appended in XSSFExcelExtractor
appendHeaderFooterText(buffer, "firstFooter");
@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Append the cell contents we have collected.
*/
private void appendCellText(StringBuffer buffer) {
void appendCellText(StringBuffer buffer) {
checkMaxTextSize(buffer, output.toString());
buffer.append(output);
}
@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
/**
* Reset this <code>SheetTextExtractor</code> for the next sheet.
*/
private void reset() {
void reset() {
output.setLength(0);
firstCellOfRow = true;
if (headerFooterMap != null) {

View File

@ -68,6 +68,7 @@ public class TestExtractorFactory {
private static File xlsxStrict;
private static File xltx;
private static File xlsEmb;
private static File xlsb;
private static File doc;
private static File doc6;
@ -108,6 +109,7 @@ public class TestExtractorFactory {
xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
xltx = getFileAndCheck(ssTests, "test.xltx");
xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
doc = getFileAndCheck(wpTests, "SampleDoc.doc");
@ -172,6 +174,13 @@ public class TestExtractorFactory {
);
extractor.close();
extractor = ExtractorFactory.createExtractor(xlsb);
assertTrue(
extractor.getText().contains("test")
);
extractor.close();
extractor = ExtractorFactory.createExtractor(xltx);
assertTrue(
extractor.getText().contains("test")

View File

@ -0,0 +1,56 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import static org.junit.Assert.assertEquals;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.junit.Test;
public class TestXSSFBSharedStringsTable {
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
@Test
public void testBasic() throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb"));
List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin"));
assertEquals(1, parts.size());
XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0));
List<String> strings = rtbl.getItems();
assertEquals(49, strings.size());
assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
assertEquals(55, rtbl.getCount());
assertEquals(49, rtbl.getUniqueCount());
//TODO: add in tests for phonetic runs
}
}

View File

@ -0,0 +1,54 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.binary;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.util.List;
import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.junit.Test;
public class TestXSSFBSheetHyperlinkManager {
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
@Test
public void testBasic() throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
XSSFBReader reader = new XSSFBReader(pkg);
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
it.next();
XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
assertNotNull(records);
assertEquals(1, records.size());
XSSFHyperlinkRecord record = records.get(0);
assertEquals("http://tika.apache.org/", record.getLocation());
assertEquals("rId2", record.getRelId());
}
}

View File

@ -0,0 +1,224 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.junit.Test;
public class TestXSSFBReader {
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
@Test
public void testBasic() throws Exception {
List<String> sheetTexts = getSheets("testVarious.xlsb");
assertEquals(1, sheetTexts.size());
String xsxml = sheetTexts.get(0);
assertContains("This is a string", xsxml);
assertContains("<td ref=\"B2\">13</td>", xsxml);
assertContains("<td ref=\"B3\">13.12112313</td>", xsxml);
assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml);
assertContains("<td ref=\"B5\">20%</td>", xsxml);
assertContains("<td ref=\"B6\">13.12</td>", xsxml);
assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml);
assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml);
assertContains("46/1963", xsxml);//custom format 1
assertContains("3/128", xsxml);//custom format 2
assertContains("<tr num=\"7>\n" +
"\t<td ref=\"A8\">longer int</td>\n" +
"\t<td ref=\"B8\">1.23457E+15</td>\n" +
"\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"test comment2</span></td>\n" +
"</tr num=\"7>", xsxml);
assertContains("<tr num=\"34>\n" +
"\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment6 actually in cell</span></td>\n" +
"</tr num=\"34>", xsxml);
assertContains("<tr num=\"64>\n" +
"\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment7 end of file</span></td>\n" +
"</tr num=\"64>", xsxml);
assertContains("<tr num=\"65>\n" +
"\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment8 end of file</span></td>\n" +
"</tr num=\"65>", xsxml);
assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml);
assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml);
assertContains(
"<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>",
xsxml);
assertContains(
"<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>",
xsxml);
assertContains(
"<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>",
xsxml);
assertContains(
"<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
xsxml);
}
@Test
public void testComments() throws Exception {
List<String> sheetTexts = getSheets("comments.xlsb");
String xsxml = sheetTexts.get(0);
assertContains(
"<tr num=\"0>\n" +
"\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
"\t<td ref=\"B1\">row1</td>\n" +
"</tr num=\"0>", xsxml);
assertContains(
"<tr num=\"1>\n" +
"\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment row2 (index1)</span></td>\n" +
"</tr num=\"1>",
xsxml);
assertContains("<tr num=\"2>\n" +
"\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
"\t<td ref=\"B3\">row3</td>\n", xsxml);
assertContains("<tr num=\"3>\n" +
"\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
"\t<td ref=\"B4\">row4</td>\n" +
"</tr num=\"3></sheet>", xsxml);
}
private List<String> getSheets(String testFileName) throws Exception {
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
List<String> sheetTexts = new ArrayList<String>();
XSSFBReader r = new XSSFBReader(pkg);
// assertNotNull(r.getWorkbookData());
// assertNotNull(r.getSharedStringsData());
assertNotNull(r.getXSSFBStylesTable());
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData();
while (it.hasNext()) {
InputStream is = it.next();
String name = it.getSheetName();
TestSheetHandler testSheetHandler = new TestSheetHandler();
testSheetHandler.startSheet(name);
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
xssfbStylesTable,
it.getXSSFBSheetComments(),
sst, testSheetHandler,
new DataFormatter(),
false);
sheetHandler.parse();
testSheetHandler.endSheet();
sheetTexts.add(testSheetHandler.toString());
}
return sheetTexts;
}
//This converts all [\r\n\t]+ to " "
private void assertContains(String needle, String haystack) {
needle = needle.replaceAll("[\r\n\t]+", " ");
haystack = haystack.replaceAll("[\r\n\t]+", " ");
if (haystack.indexOf(needle) < 0) {
fail("couldn't find >"+needle+"< in: "+haystack );
}
}
@Test
public void testDate() throws Exception {
List<String> sheets = getSheets("date.xlsb");
assertEquals(1, sheets.size());
assertContains("1/12/13", sheets.get(0));
}
private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
private final StringBuilder sb = new StringBuilder();
public void startSheet(String sheetName) {
sb.append("<sheet name=\"").append(sheetName).append(">");
}
public void endSheet(){
sb.append("</sheet>");
}
@Override
public void startRow(int rowNum) {
sb.append("\n<tr num=\"").append(rowNum).append(">");
}
@Override
public void endRow(int rowNum) {
sb.append("\n</tr num=\"").append(rowNum).append(">");
}
@Override
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
formattedValue = (formattedValue == null) ? "" : formattedValue;
if (comment == null) {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
} else {
sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
.append(formattedValue)
.append("<span type=\"comment\" author=\"")
.append(comment.getAuthor()).append("\">")
.append(comment.getString().toString().trim()).append("</span>")
.append("</td>");
}
}
@Override
public void headerFooter(String text, boolean isHeader, String tagName) {
if (isHeader) {
sb.append("<header tagName=\""+tagName+"\">"+text+"</header>");
} else {
sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>");
}
}
@Override
public String toString() {
return sb.toString();
}
}
}

View File

@ -0,0 +1,102 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.extractor;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.junit.Test;
/**
* Tests for {@link XSSFBEventBasedExcelExtractor}
*/
public class TestXSSFBEventBasedExcelExtractor {
protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples.
openSamplePackage(sampleName));
}
/**
* Get text out of the simple file
*/
@Test
public void testGetSimpleText() throws Exception {
// a very simple file
XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb");
extractor.setIncludeCellComments(true);
extractor.getText();
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check sheet names
assertTrue(text.startsWith("Sheet1"));
assertTrue(text.endsWith("Sheet3\n"));
// Now without, will have text
extractor.setIncludeSheetNames(false);
text = extractor.getText();
String CHUNK1 =
"Lorem\t111\n" +
"ipsum\t222\n" +
"dolor\t333\n" +
"sit\t444\n" +
"amet\t555\n" +
"consectetuer\t666\n" +
"adipiscing\t777\n" +
"elit\t888\n" +
"Nunc\t999\n";
String CHUNK2 =
"The quick brown fox jumps over the lazy dog\n" +
"hello, xssf hello, xssf\n" +
"hello, xssf hello, xssf\n" +
"hello, xssf hello, xssf\n" +
"hello, xssf hello, xssf\n";
assertEquals(
CHUNK1 +
"at\t4995\n" +
CHUNK2
, text);
}
/**
* Test text extraction from text box using getShapes()
*
* @throws Exception
*/
@Test
public void testShapes() throws Exception {
XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb");
try {
String text = ooxmlExtractor.getText();
assertTrue(text.indexOf("Line 1") > -1);
assertTrue(text.indexOf("Line 2") > -1);
assertTrue(text.indexOf("Line 3") > -1);
} finally {
ooxmlExtractor.close();
}
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.