60826 -- add initial support for streaming reading of xlsb files.
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787228 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6fc050a162
commit
730f394261
@ -56,6 +56,7 @@ import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
|
||||
import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
|
||||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
|
||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
@ -244,6 +245,13 @@ public class ExtractorFactory {
|
||||
return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg));
|
||||
}
|
||||
|
||||
// How about xlsb?
|
||||
for (XSSFRelation rel : XSSFBEventBasedExcelExtractor.SUPPORTED_TYPES) {
|
||||
if (rel.getContentType().equals(contentType)) {
|
||||
return new XSSFBEventBasedExcelExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
|
||||
|
||||
} catch (IOException e) {
|
||||
|
@ -19,7 +19,9 @@ package org.apache.poi.xssf;
|
||||
import org.apache.poi.UnsupportedFileFormatException;
|
||||
|
||||
/**
|
||||
* We don't support .xlsb files, sorry
|
||||
* We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}.
|
||||
* As of POI 3.15-beta3, we do support streaming reading of xlsb files
|
||||
* via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader}
|
||||
*/
|
||||
public class XLSBUnsupportedException extends UnsupportedFileFormatException {
|
||||
private static final long serialVersionUID = 7849681804154571175L;
|
||||
|
@ -0,0 +1,71 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.ss.util.CellReference;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
/**
|
||||
* This class encapsulates what the spec calls a "Cell" object.
|
||||
* I added "Header" to clarify that this does not contain the contents
|
||||
* of the cell, only the column number, the style id and the phonetic boolean
|
||||
*/
|
||||
@Internal
|
||||
class XSSFBCellHeader {
|
||||
public static int length = 8;
|
||||
|
||||
/**
|
||||
*
|
||||
* @param data raw data
|
||||
* @param offset offset at which to start reading the record
|
||||
* @param currentRow 0-based current row count
|
||||
* @param cell cell buffer to update
|
||||
*/
|
||||
public static void parse(byte[] data, int offset, int currentRow, XSSFBCellHeader cell) {
|
||||
long colNum = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
|
||||
int styleIdx = XSSFBUtils.get24BitInt(data, offset); offset += 3;
|
||||
//TODO: range checking
|
||||
boolean showPhonetic = false;//TODO: fill this out
|
||||
cell.reset(currentRow, (int)colNum, styleIdx, showPhonetic);
|
||||
}
|
||||
|
||||
private int rowNum;
|
||||
private int colNum;
|
||||
private int styleIdx;
|
||||
private boolean showPhonetic;
|
||||
|
||||
public void reset(int rowNum, int colNum, int styleIdx, boolean showPhonetic) {
|
||||
this.rowNum = rowNum;
|
||||
this.colNum = colNum;
|
||||
this.styleIdx = styleIdx;
|
||||
this.showPhonetic = showPhonetic;
|
||||
}
|
||||
|
||||
int getColNum() {
|
||||
return colNum;
|
||||
}
|
||||
|
||||
String formatAddressAsString() {
|
||||
return CellReference.convertNumToColString(colNum)+(rowNum+1);
|
||||
}
|
||||
|
||||
int getStyleIdx() {
|
||||
return styleIdx;
|
||||
}
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
@Internal
|
||||
class XSSFBCellRange {
|
||||
|
||||
public final static int length = 4* LittleEndian.INT_SIZE;
|
||||
/**
|
||||
* Parses an RfX cell range from the data starting at the offset.
|
||||
* This performs no range checking.
|
||||
* @param data raw bytes
|
||||
* @param offset offset at which to start reading from data
|
||||
* @param cellRange to overwrite. If null, a new cellRange will be created.
|
||||
* @return a mutable cell range.
|
||||
*/
|
||||
public static XSSFBCellRange parse(byte[] data, int offset, XSSFBCellRange cellRange) {
|
||||
if (cellRange == null) {
|
||||
cellRange = new XSSFBCellRange();
|
||||
}
|
||||
cellRange.firstRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
|
||||
cellRange.lastRow = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
|
||||
cellRange.firstCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset)); offset += LittleEndian.INT_SIZE;
|
||||
cellRange.lastCol = XSSFBUtils.castToInt(LittleEndian.getUInt(data, offset));
|
||||
|
||||
return cellRange;
|
||||
}
|
||||
|
||||
int firstRow;
|
||||
int lastRow;
|
||||
int firstCol;
|
||||
int lastCol;
|
||||
|
||||
|
||||
}
|
112
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java
Normal file
112
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBComment.java
Normal file
@ -0,0 +1,112 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
|
||||
import org.apache.poi.ss.usermodel.ClientAnchor;
|
||||
import org.apache.poi.ss.usermodel.RichTextString;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
|
||||
@Internal
|
||||
class XSSFBComment extends XSSFComment {
|
||||
|
||||
private final CellAddress cellAddress;
|
||||
private final String author;
|
||||
private final XSSFBRichTextString comment;
|
||||
private boolean visible = true;
|
||||
|
||||
XSSFBComment(CellAddress cellAddress, String author, String comment) {
|
||||
super(null, null, null);
|
||||
this.cellAddress = cellAddress;
|
||||
this.author = author;
|
||||
this.comment = new XSSFBRichTextString(comment);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setVisible(boolean visible) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only.");
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isVisible() {
|
||||
return visible;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CellAddress getAddress() {
|
||||
return cellAddress;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setAddress(CellAddress addr) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setAddress(int row, int col) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only");
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getRow() {
|
||||
return cellAddress.getRow();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setRow(int row) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getColumn() {
|
||||
return cellAddress.getColumn();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setColumn(int col) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only");
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAuthor() {
|
||||
return author;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setAuthor(String author) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only");
|
||||
}
|
||||
|
||||
@Override
|
||||
public XSSFBRichTextString getString() {
|
||||
return comment;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setString(RichTextString string) {
|
||||
throw new IllegalArgumentException("XSSFBComment is read only");
|
||||
}
|
||||
|
||||
@Override
|
||||
public ClientAnchor getClientAnchor() {
|
||||
return null;
|
||||
}
|
||||
}
|
@ -0,0 +1,113 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Queue;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
@Internal
|
||||
public class XSSFBCommentsTable extends XSSFBParser {
|
||||
|
||||
private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1
|
||||
private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>();
|
||||
private List<String> authors = new ArrayList<String>();
|
||||
|
||||
//these are all used only during parsing, and they are mutable!
|
||||
private int authorId = -1;
|
||||
private CellAddress cellAddress = null;
|
||||
private XSSFBCellRange cellRange = null;
|
||||
private String comment = null;
|
||||
private StringBuilder authorBuffer = new StringBuilder();
|
||||
|
||||
|
||||
public XSSFBCommentsTable(InputStream is) throws IOException {
|
||||
super(is);
|
||||
parse();
|
||||
commentAddresses.addAll(comments.keySet());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleRecord(int id, byte[] data) throws XSSFBParseException {
|
||||
XSSFBRecordType recordType = XSSFBRecordType.lookup(id);
|
||||
switch (recordType) {
|
||||
case BrtBeginComment:
|
||||
int offset = 0;
|
||||
authorId = XSSFBUtils.castToInt(LittleEndian.getUInt(data)); offset += LittleEndian.INT_SIZE;
|
||||
cellRange = XSSFBCellRange.parse(data, offset, cellRange);
|
||||
offset+= XSSFBCellRange.length;
|
||||
//for strict parsing; confirm that firstRow==lastRow and firstCol==colLats (2.4.28)
|
||||
cellAddress = new CellAddress(cellRange.firstRow, cellRange.firstCol);
|
||||
break;
|
||||
case BrtCommentText:
|
||||
XSSFBRichStr xssfbRichStr = XSSFBRichStr.build(data, 0);
|
||||
comment = xssfbRichStr.getString();
|
||||
break;
|
||||
case BrtEndComment:
|
||||
comments.put(cellAddress, new XSSFBComment(cellAddress, authors.get(authorId), comment));
|
||||
authorId = -1;
|
||||
cellAddress = null;
|
||||
break;
|
||||
case BrtCommentAuthor:
|
||||
authorBuffer.setLength(0);
|
||||
XSSFBUtils.readXLWideString(data, 0, authorBuffer);
|
||||
authors.add(authorBuffer.toString());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public Queue<CellAddress> getAddresses() {
|
||||
return commentAddresses;
|
||||
}
|
||||
|
||||
public XSSFBComment get(CellAddress cellAddress) {
|
||||
if (cellAddress == null) {
|
||||
return null;
|
||||
}
|
||||
return comments.get(cellAddress);
|
||||
}
|
||||
|
||||
private final static class CellAddressComparator implements Comparator<CellAddress> {
|
||||
|
||||
@Override
|
||||
public int compare(CellAddress o1, CellAddress o2) {
|
||||
if (o1.getRow() < o2.getRow()) {
|
||||
return -1;
|
||||
} else if (o1.getRow() > o2.getRow()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getColumn() < o2.getColumn()) {
|
||||
return -1;
|
||||
} else if (o1.getColumn() > o2.getColumn()) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,75 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
|
||||
|
||||
@Internal
|
||||
class XSSFBHeaderFooter {
|
||||
private final String headerFooterTypeLabel;
|
||||
private final boolean isHeader;
|
||||
private String rawString;
|
||||
private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper();
|
||||
|
||||
|
||||
XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) {
|
||||
this.headerFooterTypeLabel = headerFooterTypeLabel;
|
||||
this.isHeader = isHeader;
|
||||
}
|
||||
|
||||
String getHeaderFooterTypeLabel() {
|
||||
return headerFooterTypeLabel;
|
||||
}
|
||||
|
||||
String getRawString() {
|
||||
return rawString;
|
||||
}
|
||||
|
||||
String getString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
String left = headerFooterHelper.getLeftSection(rawString);
|
||||
String center = headerFooterHelper.getCenterSection(rawString);
|
||||
String right = headerFooterHelper.getRightSection(rawString);
|
||||
if (left != null && left.length() > 0) {
|
||||
sb.append(left);
|
||||
}
|
||||
if (center != null && center.length() > 0) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(" ");
|
||||
}
|
||||
sb.append(center);
|
||||
}
|
||||
if (right != null && right.length() > 0) {
|
||||
if (sb.length() > 0) {
|
||||
sb.append(" ");
|
||||
}
|
||||
sb.append(right);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
void setRawString(String rawString) {
|
||||
this.rawString = rawString;
|
||||
}
|
||||
|
||||
boolean isHeader() {
|
||||
return isHeader;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,87 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
@Internal
|
||||
class XSSFBHeaderFooters {
|
||||
|
||||
public static XSSFBHeaderFooters parse(byte[] data) {
|
||||
boolean diffOddEven = false;
|
||||
boolean diffFirst = false;
|
||||
boolean scaleWDoc = false;
|
||||
boolean alignMargins = false;
|
||||
|
||||
int offset = 2;
|
||||
XSSFBHeaderFooters xssfbHeaderFooter = new XSSFBHeaderFooters();
|
||||
xssfbHeaderFooter.header = new XSSFBHeaderFooter("header", true);
|
||||
xssfbHeaderFooter.footer = new XSSFBHeaderFooter("footer", false);
|
||||
xssfbHeaderFooter.headerEven = new XSSFBHeaderFooter("evenHeader", true);
|
||||
xssfbHeaderFooter.footerEven = new XSSFBHeaderFooter("evenFooter", false);
|
||||
xssfbHeaderFooter.headerFirst = new XSSFBHeaderFooter("firstHeader", true);
|
||||
xssfbHeaderFooter.footerFirst = new XSSFBHeaderFooter("firstFooter", false);
|
||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.header);
|
||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footer);
|
||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerEven);
|
||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.footerEven);
|
||||
offset += readHeaderFooter(data, offset, xssfbHeaderFooter.headerFirst);
|
||||
readHeaderFooter(data, offset, xssfbHeaderFooter.footerFirst);
|
||||
return xssfbHeaderFooter;
|
||||
}
|
||||
|
||||
private static int readHeaderFooter(byte[] data, int offset, XSSFBHeaderFooter headerFooter) {
|
||||
if (offset + 4 >= data.length) {
|
||||
return 0;
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int bytesRead = XSSFBUtils.readXLNullableWideString(data, offset, sb);
|
||||
headerFooter.setRawString(sb.toString());
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
private XSSFBHeaderFooter header;
|
||||
private XSSFBHeaderFooter footer;
|
||||
private XSSFBHeaderFooter headerEven;
|
||||
private XSSFBHeaderFooter footerEven;
|
||||
private XSSFBHeaderFooter headerFirst;
|
||||
private XSSFBHeaderFooter footerFirst;
|
||||
|
||||
public XSSFBHeaderFooter getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public XSSFBHeaderFooter getFooter() {
|
||||
return footer;
|
||||
}
|
||||
|
||||
public XSSFBHeaderFooter getHeaderEven() {
|
||||
return headerEven;
|
||||
}
|
||||
|
||||
public XSSFBHeaderFooter getFooterEven() {
|
||||
return footerEven;
|
||||
}
|
||||
|
||||
public XSSFBHeaderFooter getHeaderFirst() {
|
||||
return headerFirst;
|
||||
}
|
||||
|
||||
public XSSFBHeaderFooter getFooterFirst() {
|
||||
return footerFirst;
|
||||
}
|
||||
}
|
@ -0,0 +1,181 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.BitSet;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.ss.util.CellRangeAddress;
|
||||
import org.apache.poi.ss.util.CellRangeUtil;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
|
||||
@Internal
|
||||
public class XSSFBHyperlinksTable {
|
||||
|
||||
private final static BitSet RECORDS = new BitSet();
|
||||
|
||||
|
||||
static {
|
||||
RECORDS.set(XSSFBRecordType.BrtHLink.getId());
|
||||
}
|
||||
|
||||
|
||||
private final List<XSSFHyperlinkRecord> hyperlinkRecords = new ArrayList<XSSFHyperlinkRecord>();
|
||||
|
||||
//cache the relId to hyperlink url from the sheet's .rels
|
||||
private Map<String, String> relIdToHyperlink = new HashMap<String, String>();
|
||||
|
||||
public XSSFBHyperlinksTable(PackagePart sheetPart) throws IOException {
|
||||
//load the urls from the sheet .rels
|
||||
loadUrlsFromSheetRels(sheetPart);
|
||||
//now load the hyperlinks from the bottom of the sheet
|
||||
HyperlinkSheetScraper scraper = new HyperlinkSheetScraper(sheetPart.getInputStream());
|
||||
scraper.parse();
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return a map of the hyperlinks. The key is the top left cell address in their CellRange
|
||||
*/
|
||||
public Map<CellAddress, List<XSSFHyperlinkRecord>> getHyperLinks() {
|
||||
Map<CellAddress, List<XSSFHyperlinkRecord>> hyperlinkMap =
|
||||
new TreeMap<CellAddress, List<XSSFHyperlinkRecord>>(new TopLeftCellAddressComparator());
|
||||
for (XSSFHyperlinkRecord hyperlinkRecord : hyperlinkRecords) {
|
||||
CellAddress cellAddress = new CellAddress(hyperlinkRecord.getCellRangeAddress().getFirstRow(),
|
||||
hyperlinkRecord.getCellRangeAddress().getFirstColumn());
|
||||
List<XSSFHyperlinkRecord> list = hyperlinkMap.get(cellAddress);
|
||||
if (list == null) {
|
||||
list = new ArrayList<XSSFHyperlinkRecord>();
|
||||
}
|
||||
list.add(hyperlinkRecord);
|
||||
hyperlinkMap.put(cellAddress, list);
|
||||
}
|
||||
return hyperlinkMap;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @param cellAddress cell address to find
|
||||
* @return null if not a hyperlink
|
||||
*/
|
||||
public List<XSSFHyperlinkRecord> findHyperlinkRecord(CellAddress cellAddress) {
|
||||
List<XSSFHyperlinkRecord> overlapping = null;
|
||||
CellRangeAddress targetCellRangeAddress = new CellRangeAddress(cellAddress.getRow(),
|
||||
cellAddress.getRow(),
|
||||
cellAddress.getColumn(),
|
||||
cellAddress.getColumn());
|
||||
for (XSSFHyperlinkRecord record : hyperlinkRecords) {
|
||||
if (CellRangeUtil.intersect(targetCellRangeAddress, record.getCellRangeAddress()) != CellRangeUtil.NO_INTERSECTION) {
|
||||
if (overlapping == null) {
|
||||
overlapping = new ArrayList<XSSFHyperlinkRecord>();
|
||||
}
|
||||
overlapping.add(record);
|
||||
}
|
||||
}
|
||||
return overlapping;
|
||||
}
|
||||
|
||||
private void loadUrlsFromSheetRels(PackagePart sheetPart) {
|
||||
try {
|
||||
for (PackageRelationship rel : sheetPart.getRelationshipsByType(XSSFRelation.SHEET_HYPERLINKS.getRelation())) {
|
||||
relIdToHyperlink.put(rel.getId(), rel.getTargetURI().toString());
|
||||
}
|
||||
} catch (InvalidFormatException e) {
|
||||
//swallow
|
||||
}
|
||||
}
|
||||
|
||||
private class HyperlinkSheetScraper extends XSSFBParser {
|
||||
|
||||
private XSSFBCellRange hyperlinkCellRange = new XSSFBCellRange();
|
||||
private final StringBuilder xlWideStringBuffer = new StringBuilder();
|
||||
|
||||
HyperlinkSheetScraper(InputStream is) {
|
||||
super(is, RECORDS);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
|
||||
if (recordType != XSSFBRecordType.BrtHLink.getId()) {
|
||||
return;
|
||||
}
|
||||
int offset = 0;
|
||||
String relId = "";
|
||||
String location = "";
|
||||
String toolTip = "";
|
||||
String display = "";
|
||||
|
||||
hyperlinkCellRange = XSSFBCellRange.parse(data, offset, hyperlinkCellRange);
|
||||
offset += XSSFBCellRange.length;
|
||||
xlWideStringBuffer.setLength(0);
|
||||
offset += XSSFBUtils.readXLNullableWideString(data, offset, xlWideStringBuffer);
|
||||
relId = xlWideStringBuffer.toString();
|
||||
xlWideStringBuffer.setLength(0);
|
||||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
|
||||
location = xlWideStringBuffer.toString();
|
||||
xlWideStringBuffer.setLength(0);
|
||||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
|
||||
toolTip = xlWideStringBuffer.toString();
|
||||
xlWideStringBuffer.setLength(0);
|
||||
offset += XSSFBUtils.readXLWideString(data, offset, xlWideStringBuffer);
|
||||
display = xlWideStringBuffer.toString();
|
||||
CellRangeAddress cellRangeAddress = new CellRangeAddress(hyperlinkCellRange.firstRow, hyperlinkCellRange.lastRow, hyperlinkCellRange.firstCol, hyperlinkCellRange.lastCol);
|
||||
|
||||
String url = relIdToHyperlink.get(relId);
|
||||
if (location == null || location.length() == 0) {
|
||||
location = url;
|
||||
}
|
||||
|
||||
hyperlinkRecords.add(
|
||||
new XSSFHyperlinkRecord(cellRangeAddress, relId, location, toolTip, display)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static class TopLeftCellAddressComparator implements Comparator<CellAddress> {
|
||||
|
||||
@Override
|
||||
public int compare(CellAddress o1, CellAddress o2) {
|
||||
if (o1.getRow() < o2.getRow()) {
|
||||
return -1;
|
||||
} else if (o1.getRow() > o2.getRow()) {
|
||||
return 1;
|
||||
}
|
||||
if (o1.getColumn() < o2.getColumn()) {
|
||||
return -1;
|
||||
} else if (o1.getColumn() > o2.getColumn()) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
/**
|
||||
* Parse exception while reading an xssfb
|
||||
*/
|
||||
public class XSSFBParseException extends RuntimeException {
|
||||
|
||||
public XSSFBParseException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
}
|
105
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java
Normal file
105
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBParser.java
Normal file
@ -0,0 +1,105 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.BitSet;
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndianInputStream;
|
||||
|
||||
/**
|
||||
* Experimental parser for Microsoft's ooxml xssfb format.
|
||||
* Not thread safe, obviously. Need to create a new one
|
||||
* for each thread.
|
||||
*/
|
||||
@Internal
|
||||
public abstract class XSSFBParser {
|
||||
|
||||
private final LittleEndianInputStream is;
|
||||
private final BitSet records;
|
||||
|
||||
public XSSFBParser(InputStream is) {
|
||||
this.is = new LittleEndianInputStream(is);
|
||||
records = null;
|
||||
}
|
||||
|
||||
XSSFBParser(InputStream is, BitSet bitSet) {
|
||||
this.is = new LittleEndianInputStream(is);
|
||||
records = bitSet;
|
||||
}
|
||||
|
||||
public void parse() throws IOException {
|
||||
|
||||
while (true) {
|
||||
int bInt = is.read();
|
||||
if (bInt == -1) {
|
||||
return;
|
||||
}
|
||||
readNext((byte) bInt);
|
||||
}
|
||||
}
|
||||
|
||||
private void readNext(byte b1) throws IOException {
|
||||
int recordId = 0;
|
||||
|
||||
//if highest bit == 1
|
||||
if ((b1 >> 7 & 1) == 1) {
|
||||
byte b2 = is.readByte();
|
||||
b1 &= ~(1<<7); //unset highest bit
|
||||
b2 &= ~(1<<7); //unset highest bit (if it exists?)
|
||||
recordId = (128*(int)b2)+(int)b1;
|
||||
} else {
|
||||
recordId = (int)b1;
|
||||
}
|
||||
|
||||
long recordLength = 0;
|
||||
int i = 0;
|
||||
boolean halt = false;
|
||||
while (i < 4 && ! halt) {
|
||||
byte b = is.readByte();
|
||||
halt = (b >> 7 & 1) == 0; //if highest bit !=1 then continue
|
||||
b &= ~(1<<7);
|
||||
recordLength += (int)b << (i*7); //multiply by 128^i
|
||||
i++;
|
||||
|
||||
}
|
||||
if (records == null || records.get(recordId)) {
|
||||
//add sanity check for length?
|
||||
byte[] buff = new byte[(int) recordLength];
|
||||
is.readFully(buff);
|
||||
handleRecord(recordId, buff);
|
||||
} else {
|
||||
long length = is.skip(recordLength);
|
||||
if (length != recordLength) {
|
||||
throw new XSSFBParseException("End of file reached before expected.\t"+
|
||||
"Tried to skip "+recordLength + ", but only skipped "+length);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//It hurts, hurts, hurts to create a new byte array for every record.
|
||||
//However, on a large Excel spreadsheet, this parser was 1/3 faster than
|
||||
//the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf.
|
||||
//The code is far cleaner to have the parser read all
|
||||
//of the data rather than having every component promise that it read
|
||||
//the correct amount.
|
||||
abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException;
|
||||
|
||||
}
|
@ -0,0 +1,92 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
@Internal
|
||||
public enum XSSFBRecordType {
|
||||
|
||||
BrtCellBlank(1),
|
||||
BrtCellRk(2),
|
||||
BrtCellError(3),
|
||||
BrtCellBool(4),
|
||||
BrtCellReal(5),
|
||||
BrtCellSt(6),
|
||||
BrtCellIsst(7),
|
||||
BrtFmlaString(8),
|
||||
BrtFmlaNum(9),
|
||||
BrtFmlaBool(10),
|
||||
BrtFmlaError(11),
|
||||
BrtRowHdr(0),
|
||||
BrtCellRString(62),
|
||||
BrtBeginSheet(129),
|
||||
BrtWsProp(147),
|
||||
BrtWsDim(148),
|
||||
BrtColInfo(60),
|
||||
BrtBeginSheetData(145),
|
||||
BrtEndSheetData(146),
|
||||
BrtHLink(494),
|
||||
BrtBeginHeaderFooter(479),
|
||||
|
||||
//comments
|
||||
BrtBeginCommentAuthors(630),
|
||||
BrtEndCommentAuthors(631),
|
||||
BrtCommentAuthor(632),
|
||||
BrtBeginComment(635),
|
||||
BrtCommentText(637),
|
||||
BrtEndComment(636),
|
||||
//styles table
|
||||
BrtXf(47),
|
||||
BrtFmt(44),
|
||||
BrtBeginFmts(615),
|
||||
BrtEndFmts(616),
|
||||
BrtBeginCellXFs(617),
|
||||
BrtEndCellXFs(618),
|
||||
BrtBeginCellStyleXFS(626),
|
||||
BrtEndCellStyleXFS(627),
|
||||
|
||||
//stored strings table
|
||||
BrtSstItem(19), //stored strings items
|
||||
BrtBeginSst(159), //stored strings begin sst
|
||||
BrtEndSst(160), //stored strings end sst
|
||||
|
||||
BrtBundleSh(156), //defines worksheet in wb part
|
||||
Unimplemented(-1);
|
||||
|
||||
|
||||
private final int id;
|
||||
|
||||
XSSFBRecordType(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public static XSSFBRecordType lookup(int id) {
|
||||
for (XSSFBRecordType r : XSSFBRecordType.values()) {
|
||||
if (r.id == id) {
|
||||
return r;
|
||||
}
|
||||
}
|
||||
return Unimplemented;
|
||||
}
|
||||
|
||||
}
|
85
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java
Normal file
85
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRelation.java
Normal file
@ -0,0 +1,85 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
import org.apache.poi.POIXMLRelation;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackagePartName;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* Need to have this mirror class of {@link org.apache.poi.xssf.usermodel.XSSFRelation}
|
||||
* because of conflicts with regular ooxml relations.
|
||||
* If we failed to break this into a separate class, in the cases of SharedStrings and Styles,
|
||||
* 2 parts would exist, and "Packages shall not contain equivalent part names..."
|
||||
* <p>
|
||||
* Also, we need to avoid the possibility of breaking the marshalling process for xml.
|
||||
*/
|
||||
@Internal
|
||||
public class XSSFBRelation extends POIXMLRelation {
|
||||
private static final POILogger log = POILogFactory.getLogger(XSSFBRelation.class);
|
||||
|
||||
static final XSSFBRelation SHARED_STRINGS_BINARY = new XSSFBRelation(
|
||||
"application/vnd.ms-excel.sharedStrings",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings",
|
||||
"/xl/sharedStrings.bin",
|
||||
null
|
||||
);
|
||||
|
||||
public static final XSSFBRelation STYLES_BINARY = new XSSFBRelation(
|
||||
"application/vnd.ms-excel.styles",
|
||||
PackageRelationshipTypes.STYLE_PART,
|
||||
"/xl/styles.bin",
|
||||
null
|
||||
);
|
||||
|
||||
private XSSFBRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
|
||||
super(type, rel, defaultName, cls);
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches the InputStream to read the contents, based
|
||||
* of the specified core part, for which we are defined
|
||||
* as a suitable relationship
|
||||
*/
|
||||
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
|
||||
PackageRelationshipCollection prc =
|
||||
corePart.getRelationshipsByType(getRelation());
|
||||
Iterator<PackageRelationship> it = prc.iterator();
|
||||
if (it.hasNext()) {
|
||||
PackageRelationship rel = it.next();
|
||||
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
|
||||
PackagePart part = corePart.getPackage().getPart(relName);
|
||||
return part.getInputStream();
|
||||
}
|
||||
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
|
||||
return null;
|
||||
}
|
||||
}
|
47
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java
Normal file
47
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBRichStr.java
Normal file
@ -0,0 +1,47 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
@Internal
|
||||
class XSSFBRichStr {
|
||||
|
||||
public static XSSFBRichStr build(byte[] bytes, int offset) throws XSSFBParseException {
|
||||
byte first = bytes[offset];
|
||||
boolean dwSizeStrRunExists = (first >> 7 & 1) == 1;//first bit == 1?
|
||||
boolean phoneticExists = (first >> 6 & 1) == 1;//second bit == 1?
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
int read = XSSFBUtils.readXLWideString(bytes, offset+1, sb);
|
||||
//TODO: parse phonetic strings.
|
||||
return new XSSFBRichStr(sb.toString(), "");
|
||||
}
|
||||
|
||||
private final String string;
|
||||
private final String phoneticString;
|
||||
|
||||
XSSFBRichStr(String string, String phoneticString) {
|
||||
this.string = string;
|
||||
this.phoneticString = phoneticString;
|
||||
}
|
||||
|
||||
public String getString() {
|
||||
return string;
|
||||
}
|
||||
}
|
@ -0,0 +1,80 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.ss.usermodel.Font;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||
|
||||
/**
|
||||
* Wrapper class around String so that we can use it in Comment.
|
||||
* Nothing has been implemented yet except for {@link #getString()}.
|
||||
*/
|
||||
@Internal
|
||||
class XSSFBRichTextString extends XSSFRichTextString {
|
||||
private final String string;
|
||||
|
||||
XSSFBRichTextString(String string) {
|
||||
this.string = string;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void applyFont(int startIndex, int endIndex, short fontIndex) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void applyFont(int startIndex, int endIndex, Font font) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void applyFont(Font font) {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clearFormatting() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getString() {
|
||||
return string;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return string.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int numFormattingRuns() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getIndexOfFormattingRun(int index) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void applyFont(short fontIndex) {
|
||||
|
||||
}
|
||||
}
|
@ -0,0 +1,137 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
@Internal
|
||||
public class XSSFBSharedStringsTable {
|
||||
|
||||
/**
|
||||
* An integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*/
|
||||
private int count;
|
||||
|
||||
/**
|
||||
* An integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*/
|
||||
private int uniqueCount;
|
||||
|
||||
/**
|
||||
* The shared strings table.
|
||||
*/
|
||||
private List<String> strings = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
|
||||
* @throws IOException If reading the data from the package fails.
|
||||
* @throws SAXException if parsing the XML data fails.
|
||||
*/
|
||||
public XSSFBSharedStringsTable(OPCPackage pkg)
|
||||
throws IOException, SAXException {
|
||||
ArrayList<PackagePart> parts =
|
||||
pkg.getPartsByContentType(XSSFBRelation.SHARED_STRINGS_BINARY.getContentType());
|
||||
|
||||
// Some workbooks have no shared strings table.
|
||||
if (parts.size() > 0) {
|
||||
PackagePart sstPart = parts.get(0);
|
||||
|
||||
readFrom(sstPart.getInputStream());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Like POIXMLDocumentPart constructor
|
||||
*
|
||||
* @since POI 3.14-Beta3
|
||||
*/
|
||||
XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException {
|
||||
readFrom(part.getInputStream());
|
||||
}
|
||||
|
||||
private void readFrom(InputStream inputStream) throws IOException {
|
||||
SSTBinaryReader reader = new SSTBinaryReader(inputStream);
|
||||
reader.parse();
|
||||
}
|
||||
|
||||
public List<String> getItems() {
|
||||
return strings;
|
||||
}
|
||||
|
||||
public String getEntryAt(int i) {
|
||||
return strings.get(i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*
|
||||
* @return the total count of strings in the workbook
|
||||
*/
|
||||
public int getCount() {
|
||||
return this.count;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*
|
||||
* @return the total count of unique strings in the workbook
|
||||
*/
|
||||
public int getUniqueCount() {
|
||||
return this.uniqueCount;
|
||||
}
|
||||
|
||||
private class SSTBinaryReader extends XSSFBParser {
|
||||
|
||||
SSTBinaryReader(InputStream is) {
|
||||
super(is);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
|
||||
XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
|
||||
|
||||
switch (type) {
|
||||
case BrtSstItem:
|
||||
XSSFBRichStr rstr = XSSFBRichStr.build(data, 0);
|
||||
strings.add(rstr.getString());
|
||||
break;
|
||||
case BrtBeginSst:
|
||||
count = (int) LittleEndian.getUInt(data,0);
|
||||
uniqueCount = (int) LittleEndian.getUInt(data, 4);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
329
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java
Normal file
329
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java
Normal file
@ -0,0 +1,329 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.Queue;
|
||||
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||
|
||||
@Internal
|
||||
public class XSSFBSheetHandler extends XSSFBParser {
|
||||
|
||||
private final static int CHECK_ALL_ROWS = -1;
|
||||
|
||||
private final XSSFBSharedStringsTable stringsTable;
|
||||
private final XSSFSheetXMLHandler.SheetContentsHandler handler;
|
||||
private final XSSFBStylesTable styles;
|
||||
private final XSSFBCommentsTable comments;
|
||||
private final DataFormatter dataFormatter;
|
||||
private final boolean formulasNotResults;//TODO: implement this
|
||||
|
||||
private int lastEndedRow = -1;
|
||||
private int lastStartedRow = -1;
|
||||
private int currentRow = 0;
|
||||
private byte[] rkBuffer = new byte[8];
|
||||
private XSSFBCellRange hyperlinkCellRange = null;
|
||||
private StringBuilder xlWideStringBuffer = new StringBuilder();
|
||||
|
||||
private final XSSFBCellHeader cellBuffer = new XSSFBCellHeader();
|
||||
public XSSFBSheetHandler(InputStream is,
|
||||
XSSFBStylesTable styles,
|
||||
XSSFBCommentsTable comments,
|
||||
XSSFBSharedStringsTable strings,
|
||||
XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
|
||||
DataFormatter dataFormatter,
|
||||
boolean formulasNotResults) {
|
||||
super(is);
|
||||
this.styles = styles;
|
||||
this.comments = comments;
|
||||
this.stringsTable = strings;
|
||||
this.handler = sheetContentsHandler;
|
||||
this.dataFormatter = dataFormatter;
|
||||
this.formulasNotResults = formulasNotResults;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleRecord(int id, byte[] data) throws XSSFBParseException {
|
||||
XSSFBRecordType type = XSSFBRecordType.lookup(id);
|
||||
|
||||
switch(type) {
|
||||
case BrtRowHdr:
|
||||
long rw = LittleEndian.getUInt(data, 0);
|
||||
if (rw > 0x00100000L) {//could make sure this is larger than currentRow, according to spec?
|
||||
throw new XSSFBParseException("Row number beyond allowable range: "+rw);
|
||||
}
|
||||
currentRow = (int)rw;
|
||||
checkMissedComments(currentRow);
|
||||
startRow(currentRow);
|
||||
break;
|
||||
case BrtCellIsst:
|
||||
handleBrtCellIsst(data);
|
||||
break;
|
||||
case BrtCellSt: //TODO: needs test
|
||||
handleCellSt(data);
|
||||
break;
|
||||
case BrtCellRk:
|
||||
handleCellRk(data);
|
||||
break;
|
||||
case BrtCellReal:
|
||||
handleCellReal(data);
|
||||
break;
|
||||
case BrtCellBool:
|
||||
handleBoolean(data);
|
||||
break;
|
||||
case BrtCellError:
|
||||
handleCellError(data);
|
||||
break;
|
||||
case BrtCellBlank:
|
||||
beforeCellValue(data);//read cell info and check for missing comments
|
||||
break;
|
||||
case BrtFmlaString:
|
||||
handleFmlaString(data);
|
||||
break;
|
||||
case BrtFmlaNum:
|
||||
handleFmlaNum(data);
|
||||
break;
|
||||
case BrtFmlaError:
|
||||
handleFmlaError(data);
|
||||
break;
|
||||
//TODO: All the PCDI and PCDIA
|
||||
case BrtEndSheetData:
|
||||
checkMissedComments(CHECK_ALL_ROWS);
|
||||
endRow(lastStartedRow);
|
||||
break;
|
||||
case BrtBeginHeaderFooter:
|
||||
handleHeaderFooter(data);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void beforeCellValue(byte[] data) {
|
||||
XSSFBCellHeader.parse(data, 0, currentRow, cellBuffer);
|
||||
checkMissedComments(currentRow, cellBuffer.getColNum());
|
||||
}
|
||||
|
||||
private void handleCellValue(String formattedValue) {
|
||||
CellAddress cellAddress = new CellAddress(currentRow, cellBuffer.getColNum());
|
||||
XSSFBComment comment = null;
|
||||
if (comments != null) {
|
||||
comment = comments.get(cellAddress);
|
||||
}
|
||||
handler.cell(cellAddress.formatAsString(), formattedValue, comment);
|
||||
}
|
||||
|
||||
private void handleFmlaNum(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
//xNum
|
||||
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
|
||||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
|
||||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
|
||||
handleCellValue(formattedVal);
|
||||
}
|
||||
|
||||
private void handleCellSt(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
xlWideStringBuffer.setLength(0);
|
||||
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
|
||||
handleCellValue(xlWideStringBuffer.toString());
|
||||
}
|
||||
|
||||
private void handleFmlaString(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
xlWideStringBuffer.setLength(0);
|
||||
XSSFBUtils.readXLWideString(data, XSSFBCellHeader.length, xlWideStringBuffer);
|
||||
handleCellValue(xlWideStringBuffer.toString());
|
||||
}
|
||||
|
||||
private void handleCellError(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
//TODO, read byte to figure out the type of error
|
||||
handleCellValue("ERROR");
|
||||
}
|
||||
|
||||
private void handleFmlaError(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
//TODO, read byte to figure out the type of error
|
||||
handleCellValue("ERROR");
|
||||
}
|
||||
|
||||
private void handleBoolean(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
String formattedVal = (data[XSSFBCellHeader.length] == 1) ? "TRUE" : "FALSE";
|
||||
handleCellValue(formattedVal);
|
||||
}
|
||||
|
||||
private void handleCellReal(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
//xNum
|
||||
double val = LittleEndian.getDouble(data, XSSFBCellHeader.length);
|
||||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
|
||||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
|
||||
handleCellValue(formattedVal);
|
||||
}
|
||||
|
||||
private void handleCellRk(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
double val = rkNumber(data, XSSFBCellHeader.length);
|
||||
String formatString = styles.getNumberFormatString(cellBuffer.getStyleIdx());
|
||||
String formattedVal = dataFormatter.formatRawCellContents(val, cellBuffer.getStyleIdx(), formatString);
|
||||
handleCellValue(formattedVal);
|
||||
}
|
||||
|
||||
private void handleBrtCellIsst(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
long idx = LittleEndian.getUInt(data, XSSFBCellHeader.length);
|
||||
//check for out of range, buffer overflow
|
||||
|
||||
XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt((int)idx));
|
||||
handleCellValue(rtss.getString());
|
||||
}
|
||||
|
||||
|
||||
private void handleHeaderFooter(byte[] data) {
|
||||
XSSFBHeaderFooters headerFooter = XSSFBHeaderFooters.parse(data);
|
||||
outputHeaderFooter(headerFooter.getHeader());
|
||||
outputHeaderFooter(headerFooter.getFooter());
|
||||
outputHeaderFooter(headerFooter.getHeaderEven());
|
||||
outputHeaderFooter(headerFooter.getFooterEven());
|
||||
outputHeaderFooter(headerFooter.getHeaderFirst());
|
||||
outputHeaderFooter(headerFooter.getFooterFirst());
|
||||
}
|
||||
|
||||
private void outputHeaderFooter(XSSFBHeaderFooter headerFooter) {
|
||||
String text = headerFooter.getString();
|
||||
if (text != null && text.trim().length() > 0) {
|
||||
handler.headerFooter(text, headerFooter.isHeader(), headerFooter.getHeaderFooterTypeLabel());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//at start of next cell or end of row, return the cellAddress if it equals currentRow and col
|
||||
private void checkMissedComments(int currentRow, int colNum) {
|
||||
if (comments == null) {
|
||||
return;
|
||||
}
|
||||
Queue<CellAddress> queue = comments.getAddresses();
|
||||
while (queue.size() > 0) {
|
||||
CellAddress cellAddress = queue.peek();
|
||||
if (cellAddress.getRow() == currentRow && cellAddress.getColumn() < colNum) {
|
||||
cellAddress = queue.remove();
|
||||
dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
|
||||
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() == colNum) {
|
||||
queue.remove();
|
||||
return;
|
||||
} else if (cellAddress.getRow() == currentRow && cellAddress.getColumn() > colNum) {
|
||||
return;
|
||||
} else if (cellAddress.getRow() > currentRow) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//check for anything from rows before
|
||||
private void checkMissedComments(int currentRow) {
|
||||
if (comments == null) {
|
||||
return;
|
||||
}
|
||||
Queue<CellAddress> queue = comments.getAddresses();
|
||||
int lastInterpolatedRow = -1;
|
||||
while (queue.size() > 0) {
|
||||
CellAddress cellAddress = queue.peek();
|
||||
if (currentRow == CHECK_ALL_ROWS || cellAddress.getRow() < currentRow) {
|
||||
cellAddress = queue.remove();
|
||||
if (cellAddress.getRow() != lastInterpolatedRow) {
|
||||
startRow(cellAddress.getRow());
|
||||
}
|
||||
dumpEmptyCellComment(cellAddress, comments.get(cellAddress));
|
||||
lastInterpolatedRow = cellAddress.getRow();
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void startRow(int row) {
|
||||
if (row == lastStartedRow) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (lastStartedRow != lastEndedRow) {
|
||||
endRow(lastStartedRow);
|
||||
}
|
||||
handler.startRow(row);
|
||||
lastStartedRow = row;
|
||||
}
|
||||
|
||||
private void endRow(int row) {
|
||||
if (lastEndedRow == row) {
|
||||
return;
|
||||
}
|
||||
handler.endRow(row);
|
||||
lastEndedRow = row;
|
||||
}
|
||||
|
||||
private void dumpEmptyCellComment(CellAddress cellAddress, XSSFBComment comment) {
|
||||
handler.cell(cellAddress.formatAsString(), null, comment);
|
||||
}
|
||||
|
||||
private double rkNumber(byte[] data, int offset) {
|
||||
//see 2.5.122 for this abomination
|
||||
byte b0 = data[offset];
|
||||
String s = Integer.toString(b0, 2);
|
||||
boolean numDivBy100 = ((b0 & 1) == 1); // else as is
|
||||
boolean floatingPoint = ((b0 >> 1 & 1) == 0); // else signed integer
|
||||
|
||||
//unset highest 2 bits
|
||||
b0 &= ~1;
|
||||
b0 &= ~(1<<1);
|
||||
|
||||
rkBuffer[4] = b0;
|
||||
for (int i = 1; i < 4; i++) {
|
||||
rkBuffer[i+4] = data[offset+i];
|
||||
}
|
||||
double d = 0.0;
|
||||
if (floatingPoint) {
|
||||
d = LittleEndian.getDouble(rkBuffer);
|
||||
} else {
|
||||
d = LittleEndian.getInt(rkBuffer);
|
||||
}
|
||||
d = (numDivBy100) ? d/100 : d;
|
||||
return d;
|
||||
}
|
||||
|
||||
/**
|
||||
* You need to implement this to handle the results
|
||||
* of the sheet parsing.
|
||||
*/
|
||||
public interface SheetContentsHandler extends XSSFSheetXMLHandler.SheetContentsHandler {
|
||||
/**
|
||||
* A cell, with the given formatted value (may be null),
|
||||
* a url (may be null), a toolTip (may be null)
|
||||
* and possibly a comment (may be null), was encountered */
|
||||
void hyperlinkCell(String cellReference, String formattedValue, String url, String toolTip, XSSFComment comment);
|
||||
}
|
||||
}
|
101
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java
Normal file
101
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBStylesTable.java
Normal file
@ -0,0 +1,101 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
|
||||
import org.apache.poi.POIXMLException;
|
||||
import org.apache.poi.ss.usermodel.BuiltinFormats;
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
@Internal
|
||||
public class XSSFBStylesTable extends XSSFBParser {
|
||||
|
||||
private final SortedMap<Short, String> numberFormats = new TreeMap<Short,String>();
|
||||
private final List<Short> styleIds = new ArrayList<Short>();
|
||||
|
||||
private boolean inCellXFS = false;
|
||||
private boolean inFmts = false;
|
||||
public XSSFBStylesTable(InputStream is) throws IOException {
|
||||
super(is);
|
||||
parse();
|
||||
}
|
||||
|
||||
String getNumberFormatString(int idx) {
|
||||
if (numberFormats.containsKey(styleIds.get((short)idx))) {
|
||||
return numberFormats.get(styleIds.get((short)idx));
|
||||
}
|
||||
|
||||
return BuiltinFormats.getBuiltinFormat(styleIds.get((short)idx));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
|
||||
XSSFBRecordType type = XSSFBRecordType.lookup(recordType);
|
||||
switch (type) {
|
||||
case BrtBeginCellXFs:
|
||||
inCellXFS = true;
|
||||
break;
|
||||
case BrtEndCellXFs:
|
||||
inCellXFS = false;
|
||||
break;
|
||||
case BrtXf:
|
||||
if (inCellXFS) {
|
||||
handleBrtXFInCellXF(data);
|
||||
}
|
||||
break;
|
||||
case BrtBeginFmts:
|
||||
inFmts = true;
|
||||
break;
|
||||
case BrtEndFmts:
|
||||
inFmts = false;
|
||||
break;
|
||||
case BrtFmt:
|
||||
if (inFmts) {
|
||||
handleFormat(data);
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void handleFormat(byte[] data) {
|
||||
int ifmt = data[0] & 0xFF;
|
||||
if (ifmt > Short.MAX_VALUE) {
|
||||
throw new POIXMLException("Format id must be a short");
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
XSSFBUtils.readXLWideString(data, 2, sb);
|
||||
String fmt = sb.toString();
|
||||
numberFormats.put((short)ifmt, fmt);
|
||||
}
|
||||
|
||||
private void handleBrtXFInCellXF(byte[] data) {
|
||||
int ifmtOffset = 2;
|
||||
//int ifmtLength = 2;
|
||||
|
||||
//numFmtId in xml terms
|
||||
int ifmt = data[ifmtOffset] & 0xFF;//the second byte is ignored
|
||||
styleIds.add((short)ifmt);
|
||||
}
|
||||
}
|
108
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java
Normal file
108
src/ooxml/java/org/apache/poi/xssf/binary/XSSFBUtils.java
Normal file
@ -0,0 +1,108 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
import org.apache.poi.POIXMLException;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
@Internal
|
||||
public class XSSFBUtils {
|
||||
|
||||
/**
|
||||
* Reads an XLNullableWideString.
|
||||
* @param data data from which to read
|
||||
* @param offset in data from which to start
|
||||
* @param sb buffer to which to write. You must setLength(0) before calling!
|
||||
* @return number of bytes read
|
||||
* @throws XSSFBParseException if there was an exception during reading
|
||||
*/
|
||||
static int readXLNullableWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
|
||||
long numChars = LittleEndian.getUInt(data, offset);
|
||||
if (numChars < 0) {
|
||||
throw new XSSFBParseException("too few chars to read");
|
||||
} else if (numChars == 0xFFFFFFFFL) { //this means null value (2.5.166), do not read any bytes!!!
|
||||
return 0;
|
||||
} else if (numChars > 0xFFFFFFFFL) {
|
||||
throw new XSSFBParseException("too many chars to read");
|
||||
}
|
||||
|
||||
int numBytes = 2*(int)numChars;
|
||||
offset += 4;
|
||||
if (offset+numBytes > data.length) {
|
||||
throw new XSSFBParseException("trying to read beyond data length:" +
|
||||
"offset="+offset+", numBytes="+numBytes+", data.length="+data.length);
|
||||
}
|
||||
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
|
||||
numBytes+=4;
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reads an XLNullableWideString.
|
||||
* @param data data from which to read
|
||||
* @param offset in data from which to start
|
||||
* @param sb buffer to which to write. You must setLength(0) before calling!
|
||||
* @return number of bytes read
|
||||
* @throws XSSFBParseException if there was an exception while trying to read the string
|
||||
*/
|
||||
public static int readXLWideString(byte[] data, int offset, StringBuilder sb) throws XSSFBParseException {
|
||||
long numChars = LittleEndian.getUInt(data, offset);
|
||||
if (numChars < 0) {
|
||||
throw new XSSFBParseException("too few chars to read");
|
||||
} else if (numChars > 0xFFFFFFFFL) {
|
||||
throw new XSSFBParseException("too many chars to read");
|
||||
}
|
||||
int numBytes = 2*(int)numChars;
|
||||
offset += 4;
|
||||
if (offset+numBytes > data.length) {
|
||||
throw new XSSFBParseException("trying to read beyond data length");
|
||||
}
|
||||
sb.append(new String(data, offset, numBytes, Charset.forName("UTF-16LE")));
|
||||
numBytes+=4;
|
||||
return numBytes;
|
||||
}
|
||||
|
||||
static int castToInt(long val) {
|
||||
if (val < Integer.MAX_VALUE && val > Integer.MIN_VALUE) {
|
||||
return (int)val;
|
||||
}
|
||||
throw new POIXMLException("val ("+val+") can't be cast to int");
|
||||
}
|
||||
|
||||
static short castToShort(int val) {
|
||||
if (val < Short.MAX_VALUE && val > Short.MIN_VALUE) {
|
||||
return (short)val;
|
||||
}
|
||||
throw new POIXMLException("val ("+val+") can't be cast to short");
|
||||
|
||||
}
|
||||
|
||||
//TODO: move to LittleEndian?
|
||||
static int get24BitInt( byte[] data, int offset) {
|
||||
int i = offset;
|
||||
int b0 = data[i++] & 0xFF;
|
||||
int b1 = data[i++] & 0xFF;
|
||||
int b2 = data[i] & 0xFF;
|
||||
return ( b2 << 16 ) + ( b1 << 8 ) + b0;
|
||||
}
|
||||
}
|
@ -0,0 +1,117 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import org.apache.poi.ss.util.CellRangeAddress;
|
||||
import org.apache.poi.util.Internal;
|
||||
|
||||
/**
|
||||
* This is a read only record that maintains information about
|
||||
* a hyperlink. In OOXML land, this information has to be merged
|
||||
* from 1) the sheet's .rels to get the url and 2) from after the
|
||||
* sheet data in they hyperlink section.
|
||||
*
|
||||
* The {@link #display} is often empty and should be filled from
|
||||
* the contents of the anchor cell.
|
||||
*
|
||||
*/
|
||||
@Internal
|
||||
public class XSSFHyperlinkRecord {
|
||||
|
||||
private final CellRangeAddress cellRangeAddress;
|
||||
private final String relId;
|
||||
private String location;
|
||||
private String toolTip;
|
||||
private String display;
|
||||
|
||||
XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) {
|
||||
this.cellRangeAddress = cellRangeAddress;
|
||||
this.relId = relId;
|
||||
this.location = location;
|
||||
this.toolTip = toolTip;
|
||||
this.display = display;
|
||||
}
|
||||
|
||||
void setLocation(String location) {
|
||||
this.location = location;
|
||||
}
|
||||
|
||||
void setToolTip(String toolTip) {
|
||||
this.toolTip = toolTip;
|
||||
}
|
||||
|
||||
void setDisplay(String display) {
|
||||
this.display = display;
|
||||
}
|
||||
|
||||
CellRangeAddress getCellRangeAddress() {
|
||||
return cellRangeAddress;
|
||||
}
|
||||
|
||||
public String getRelId() {
|
||||
return relId;
|
||||
}
|
||||
|
||||
public String getLocation() {
|
||||
return location;
|
||||
}
|
||||
|
||||
public String getToolTip() {
|
||||
return toolTip;
|
||||
}
|
||||
|
||||
public String getDisplay() {
|
||||
return display;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o;
|
||||
|
||||
if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null)
|
||||
return false;
|
||||
if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false;
|
||||
if (location != null ? !location.equals(that.location) : that.location != null) return false;
|
||||
if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false;
|
||||
return display != null ? display.equals(that.display) : that.display == null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0;
|
||||
result = 31 * result + (relId != null ? relId.hashCode() : 0);
|
||||
result = 31 * result + (location != null ? location.hashCode() : 0);
|
||||
result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0);
|
||||
result = 31 * result + (display != null ? display.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "XSSFHyperlinkRecord{" +
|
||||
"cellRangeAddress=" + cellRangeAddress +
|
||||
", relId='" + relId + '\'' +
|
||||
", location='" + location + '\'' +
|
||||
", toolTip='" + toolTip + '\'' +
|
||||
", display='" + display + '\'' +
|
||||
'}';
|
||||
}
|
||||
}
|
44
src/ooxml/java/org/apache/poi/xssf/binary/package.html
Normal file
44
src/ooxml/java/org/apache/poi/xssf/binary/package.html
Normal file
@ -0,0 +1,44 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
|
||||
<!--
|
||||
====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
====================================================================
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body bgcolor="white">
|
||||
|
||||
<p>The org.apache.poi.xssf.binary package includes necessary underlying components
|
||||
for streaming/read-only processing of xlsb files.
|
||||
</p>
|
||||
<p>
|
||||
POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
|
||||
in o.a.p.xssf.eventusermodel.
|
||||
</p>
|
||||
<p>
|
||||
This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes
|
||||
have been marked @Internal and the API is subject to change.
|
||||
</p>
|
||||
<h2>Related Documentation</h2>
|
||||
|
||||
For overviews, tutorials, examples, guides, and tool documentation, please see:
|
||||
<ul>
|
||||
<li><a href="http://poi.apache.org">Apache POI Project</a>
|
||||
</ul>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -0,0 +1,172 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.eventusermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackagePartName;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.xssf.binary.XSSFBCommentsTable;
|
||||
import org.apache.poi.xssf.binary.XSSFBParseException;
|
||||
import org.apache.poi.xssf.binary.XSSFBParser;
|
||||
import org.apache.poi.xssf.binary.XSSFBRecordType;
|
||||
import org.apache.poi.xssf.binary.XSSFBRelation;
|
||||
import org.apache.poi.xssf.binary.XSSFBStylesTable;
|
||||
import org.apache.poi.xssf.binary.XSSFBUtils;
|
||||
import org.apache.poi.xssf.model.CommentsTable;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
|
||||
/**
|
||||
* Reader for xlsb files.
|
||||
*/
|
||||
public class XSSFBReader extends XSSFReader {
|
||||
/**
|
||||
* Creates a new XSSFReader, for the given package
|
||||
*
|
||||
* @param pkg opc package
|
||||
*/
|
||||
public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException {
|
||||
super(pkg);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an Iterator which will let you get at all the
|
||||
* different Sheets in turn.
|
||||
* Each sheet's InputStream is only opened when fetched
|
||||
* from the Iterator. It's up to you to close the
|
||||
* InputStreams when done with each one.
|
||||
*/
|
||||
@Override
|
||||
public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
|
||||
return new SheetIterator(workbookPart);
|
||||
}
|
||||
|
||||
public XSSFBStylesTable getXSSFBStylesTable() throws IOException {
|
||||
ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType());
|
||||
if(parts.size() == 0) return null;
|
||||
|
||||
// Create the Styles Table, and associate the Themes if present
|
||||
return new XSSFBStylesTable(parts.get(0).getInputStream());
|
||||
|
||||
}
|
||||
|
||||
|
||||
public static class SheetIterator extends XSSFReader.SheetIterator {
|
||||
|
||||
/**
|
||||
* Construct a new SheetIterator
|
||||
*
|
||||
* @param wb package part holding workbook.xml
|
||||
*/
|
||||
private SheetIterator(PackagePart wb) throws IOException {
|
||||
super(wb);
|
||||
}
|
||||
|
||||
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
|
||||
SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream());
|
||||
sheetRefLoader.parse();
|
||||
return sheetRefLoader.getSheets().iterator();
|
||||
}
|
||||
|
||||
/**
|
||||
* Not supported by XSSFBReader's SheetIterator.
|
||||
* Please use {@link #getXSSFBSheetComments()} instead.
|
||||
* @return nothing, always throws IllegalArgumentException!
|
||||
*/
|
||||
@Override
|
||||
public CommentsTable getSheetComments() {
|
||||
throw new IllegalArgumentException("Please use getXSSFBSheetComments");
|
||||
}
|
||||
|
||||
public XSSFBCommentsTable getXSSFBSheetComments() {
|
||||
PackagePart sheetPkg = getSheetPart();
|
||||
|
||||
// Do we have a comments relationship? (Only ever one if so)
|
||||
try {
|
||||
PackageRelationshipCollection commentsList =
|
||||
sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
|
||||
if (commentsList.size() > 0) {
|
||||
PackageRelationship comments = commentsList.getRelationship(0);
|
||||
if (comments == null || comments.getTargetURI() == null) {
|
||||
return null;
|
||||
}
|
||||
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
|
||||
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
|
||||
return new XSSFBCommentsTable(commentsPart.getInputStream());
|
||||
}
|
||||
} catch (InvalidFormatException e) {
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static class SheetRefLoader extends XSSFBParser {
|
||||
List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>();
|
||||
|
||||
private SheetRefLoader(InputStream is) {
|
||||
super(is);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
|
||||
if (recordType == XSSFBRecordType.BrtBundleSh.getId()) {
|
||||
addWorksheet(data);
|
||||
}
|
||||
}
|
||||
|
||||
private void addWorksheet(byte[] data) {
|
||||
int offset = 0;
|
||||
//this is the sheet state #2.5.142
|
||||
long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
|
||||
|
||||
long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
|
||||
//according to #2.4.304
|
||||
if (iTabID < 1 || iTabID > 0x0000FFFFL) {
|
||||
throw new XSSFBParseException("table id out of range: "+iTabID);
|
||||
}
|
||||
StringBuilder sb = new StringBuilder();
|
||||
offset += XSSFBUtils.readXLWideString(data, offset, sb);
|
||||
String relId = sb.toString();
|
||||
sb.setLength(0);
|
||||
XSSFBUtils.readXLWideString(data, offset, sb);
|
||||
String name = sb.toString();
|
||||
if (relId != null && relId.trim().length() > 0) {
|
||||
sheets.add(new XSSFSheetRef(relId, name));
|
||||
}
|
||||
}
|
||||
|
||||
List<XSSFSheetRef> getSheets() {
|
||||
return sheets;
|
||||
}
|
||||
}
|
||||
}
|
@ -16,15 +16,16 @@
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.eventusermodel;
|
||||
|
||||
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.POIXMLException;
|
||||
@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.util.SAXHelper;
|
||||
import org.apache.poi.xssf.model.CommentsTable;
|
||||
import org.apache.poi.xssf.model.SharedStringsTable;
|
||||
import org.apache.poi.xssf.model.StylesTable;
|
||||
@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSSFDrawing;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.apache.poi.xssf.usermodel.XSSFShape;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
import org.xml.sax.XMLReader;
|
||||
import org.xml.sax.helpers.DefaultHandler;
|
||||
|
||||
/**
|
||||
* This class makes it easy to get at individual parts
|
||||
@ -62,8 +66,8 @@ public class XSSFReader {
|
||||
|
||||
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
|
||||
|
||||
private OPCPackage pkg;
|
||||
private PackagePart workbookPart;
|
||||
protected OPCPackage pkg;
|
||||
protected PackagePart workbookPart;
|
||||
|
||||
/**
|
||||
* Creates a new XSSFReader, for the given package
|
||||
@ -194,23 +198,23 @@ public class XSSFReader {
|
||||
private final Map<String, PackagePart> sheetMap;
|
||||
|
||||
/**
|
||||
* Current CTSheet bean
|
||||
* Current sheet reference
|
||||
*/
|
||||
private CTSheet ctSheet;
|
||||
|
||||
XSSFSheetRef xssfSheetRef;
|
||||
|
||||
/**
|
||||
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
|
||||
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
|
||||
* i.e. as they are stored in the underlying package
|
||||
*/
|
||||
private final Iterator<CTSheet> sheetIterator;
|
||||
final Iterator<XSSFSheetRef> sheetIterator;
|
||||
|
||||
/**
|
||||
* Construct a new SheetIterator
|
||||
*
|
||||
* @param wb package part holding workbook.xml
|
||||
*/
|
||||
private SheetIterator(PackagePart wb) throws IOException {
|
||||
SheetIterator(PackagePart wb) throws IOException {
|
||||
|
||||
/**
|
||||
* The order of sheets is defined by the order of CTSheet elements in workbook.xml
|
||||
@ -228,25 +232,44 @@ public class XSSFReader {
|
||||
sheetMap.put(rel.getId(), pkg.getPart(relName));
|
||||
}
|
||||
}
|
||||
//step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
|
||||
//Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
|
||||
CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
|
||||
List<CTSheet> validSheets = new ArrayList<CTSheet>();
|
||||
for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
|
||||
//if there's no relationship id, silently skip the sheet
|
||||
String sheetId = ctSheet.getId();
|
||||
if (sheetId != null && sheetId.length() > 0) {
|
||||
validSheets.add(ctSheet);
|
||||
}
|
||||
}
|
||||
sheetIterator = validSheets.iterator();
|
||||
//step 2. Read array of CTSheet elements, wrap it in a LinkedList
|
||||
//and construct an iterator
|
||||
sheetIterator = createSheetIteratorFromWB(wb);
|
||||
} catch (InvalidFormatException e){
|
||||
throw new POIXMLException(e);
|
||||
} catch (XmlException e){
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
}
|
||||
|
||||
Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
|
||||
|
||||
XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
|
||||
XMLReader xmlReader = null;
|
||||
try {
|
||||
xmlReader = SAXHelper.newXMLReader();
|
||||
} catch (ParserConfigurationException e) {
|
||||
throw new POIXMLException(e);
|
||||
} catch (SAXException e) {
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
xmlReader.setContentHandler(xmlSheetRefReader);
|
||||
try {
|
||||
xmlReader.parse(new InputSource(wb.getInputStream()));
|
||||
} catch (SAXException e) {
|
||||
throw new POIXMLException(e);
|
||||
}
|
||||
|
||||
List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
|
||||
for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
|
||||
//if there's no relationship id, silently skip the sheet
|
||||
String sheetId = xssfSheetRef.getId();
|
||||
if (sheetId != null && sheetId.length() > 0) {
|
||||
validSheets.add(xssfSheetRef);
|
||||
}
|
||||
}
|
||||
return validSheets.iterator();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns <tt>true</tt> if the iteration has more elements.
|
||||
*
|
||||
@ -264,9 +287,9 @@ public class XSSFReader {
|
||||
*/
|
||||
@Override
|
||||
public InputStream next() {
|
||||
ctSheet = sheetIterator.next();
|
||||
xssfSheetRef = sheetIterator.next();
|
||||
|
||||
String sheetId = ctSheet.getId();
|
||||
String sheetId = xssfSheetRef.getId();
|
||||
try {
|
||||
PackagePart sheetPkg = sheetMap.get(sheetId);
|
||||
return sheetPkg.getInputStream();
|
||||
@ -281,7 +304,7 @@ public class XSSFReader {
|
||||
* @return name of the current sheet
|
||||
*/
|
||||
public String getSheetName() {
|
||||
return ctSheet.getName();
|
||||
return xssfSheetRef.getName();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -344,7 +367,7 @@ public class XSSFReader {
|
||||
}
|
||||
|
||||
public PackagePart getSheetPart() {
|
||||
String sheetId = ctSheet.getId();
|
||||
String sheetId = xssfSheetRef.getId();
|
||||
return sheetMap.get(sheetId);
|
||||
}
|
||||
|
||||
@ -356,4 +379,52 @@ public class XSSFReader {
|
||||
throw new IllegalStateException("Not supported");
|
||||
}
|
||||
}
|
||||
|
||||
protected final static class XSSFSheetRef {
|
||||
//do we need to store sheetId, too?
|
||||
private final String id;
|
||||
private final String name;
|
||||
|
||||
public XSSFSheetRef(String id, String name) {
|
||||
this.id = id;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
//scrapes sheet reference info and order from workbook.xml
|
||||
private static class XMLSheetRefReader extends DefaultHandler {
|
||||
private final static String SHEET = "sheet";
|
||||
private final static String ID = "id";
|
||||
private final static String NAME = "name";
|
||||
|
||||
private final List<XSSFSheetRef> sheetRefs = new LinkedList();
|
||||
|
||||
@Override
|
||||
public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
|
||||
if (localName.toLowerCase(Locale.US).equals(SHEET)) {
|
||||
String name = null;
|
||||
String id = null;
|
||||
for (int i = 0; i < attrs.getLength(); i++) {
|
||||
if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
|
||||
name = attrs.getValue(i);
|
||||
} else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
|
||||
id = attrs.getValue(i);
|
||||
}
|
||||
sheetRefs.add(new XSSFSheetRef(id, name));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
List<XSSFSheetRef> getSheetRefs() {
|
||||
return Collections.unmodifiableList(sheetRefs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,160 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.extractor;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.xssf.binary.XSSFBCommentsTable;
|
||||
import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
|
||||
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
|
||||
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
|
||||
import org.apache.poi.xssf.binary.XSSFBStylesTable;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* Implementation of a text extractor or xlsb Excel
|
||||
* files that uses SAX-like binary parsing.
|
||||
*/
|
||||
public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
|
||||
implements org.apache.poi.ss.extractor.ExcelExtractor {
|
||||
|
||||
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
|
||||
XSSFRelation.XLSB_BINARY_WORKBOOK
|
||||
};
|
||||
|
||||
private boolean handleHyperlinksInCells = false;
|
||||
|
||||
public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||
super(path);
|
||||
}
|
||||
|
||||
public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
|
||||
super(container);
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length < 1) {
|
||||
System.err.println("Use:");
|
||||
System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>");
|
||||
System.exit(1);
|
||||
}
|
||||
POIXMLTextExtractor extractor =
|
||||
new XSSFBEventBasedExcelExtractor(args[0]);
|
||||
System.out.println(extractor.getText());
|
||||
extractor.close();
|
||||
}
|
||||
|
||||
public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
|
||||
this.handleHyperlinksInCells = handleHyperlinksInCells;
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we return the formula itself, and not
|
||||
* the result it produces? Default is false
|
||||
* This is currently unsupported for xssfb
|
||||
*/
|
||||
@Override
|
||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||
throw new IllegalArgumentException("Not currently supported");
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the given sheet
|
||||
*/
|
||||
public void processSheet(
|
||||
SheetContentsHandler sheetContentsExtractor,
|
||||
XSSFBStylesTable styles,
|
||||
XSSFBCommentsTable comments,
|
||||
XSSFBSharedStringsTable strings,
|
||||
InputStream sheetInputStream)
|
||||
throws IOException, SAXException {
|
||||
|
||||
DataFormatter formatter;
|
||||
if (locale == null) {
|
||||
formatter = new DataFormatter();
|
||||
} else {
|
||||
formatter = new DataFormatter(locale);
|
||||
}
|
||||
|
||||
XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
|
||||
sheetInputStream,
|
||||
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
|
||||
);
|
||||
xssfbSheetHandler.parse();
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the file and returns the text
|
||||
*/
|
||||
public String getText() {
|
||||
try {
|
||||
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
|
||||
XSSFBReader xssfbReader = new XSSFBReader(container);
|
||||
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
|
||||
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
|
||||
|
||||
StringBuffer text = new StringBuffer();
|
||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
||||
XSSFBHyperlinksTable hyperlinksTable = null;
|
||||
while (iter.hasNext()) {
|
||||
InputStream stream = iter.next();
|
||||
if (includeSheetNames) {
|
||||
text.append(iter.getSheetName());
|
||||
text.append('\n');
|
||||
}
|
||||
if (handleHyperlinksInCells) {
|
||||
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
|
||||
}
|
||||
XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
|
||||
processSheet(sheetExtractor, styles, comments, strings, stream);
|
||||
if (includeHeadersFooters) {
|
||||
sheetExtractor.appendHeaderText(text);
|
||||
}
|
||||
sheetExtractor.appendCellText(text);
|
||||
if (includeTextBoxes) {
|
||||
processShapes(iter.getShapes(), text);
|
||||
}
|
||||
if (includeHeadersFooters) {
|
||||
sheetExtractor.appendFooterText(text);
|
||||
}
|
||||
sheetExtractor.reset();
|
||||
stream.close();
|
||||
}
|
||||
|
||||
return text.toString();
|
||||
} catch (IOException e) {
|
||||
System.err.println(e);
|
||||
return null;
|
||||
} catch (SAXException se) {
|
||||
System.err.println(se);
|
||||
return null;
|
||||
} catch (OpenXML4JException o4je) {
|
||||
System.err.println(o4je);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -54,15 +54,15 @@ import org.xml.sax.XMLReader;
|
||||
*/
|
||||
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
implements org.apache.poi.ss.extractor.ExcelExtractor {
|
||||
private OPCPackage container;
|
||||
OPCPackage container;
|
||||
private POIXMLProperties properties;
|
||||
|
||||
private Locale locale;
|
||||
private boolean includeTextBoxes = true;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean includeCellComments = false;
|
||||
private boolean includeHeadersFooters = true;
|
||||
private boolean formulasNotResults = false;
|
||||
Locale locale;
|
||||
boolean includeTextBoxes = true;
|
||||
boolean includeSheetNames = true;
|
||||
boolean includeCellComments = false;
|
||||
boolean includeHeadersFooters = true;
|
||||
boolean formulasNotResults = false;
|
||||
private boolean concatenatePhoneticRuns = true;
|
||||
|
||||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||
@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
}
|
||||
}
|
||||
|
||||
private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
|
||||
void processShapes(List<XSSFShape> shapes, StringBuffer text) {
|
||||
if (shapes == null){
|
||||
return;
|
||||
}
|
||||
@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
* @see XSSFExcelExtractor#getText()
|
||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||
*/
|
||||
private void appendHeaderText(StringBuffer buffer) {
|
||||
void appendHeaderText(StringBuffer buffer) {
|
||||
appendHeaderFooterText(buffer, "firstHeader");
|
||||
appendHeaderFooterText(buffer, "oddHeader");
|
||||
appendHeaderFooterText(buffer, "evenHeader");
|
||||
@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
* @see XSSFExcelExtractor#getText()
|
||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||
*/
|
||||
private void appendFooterText(StringBuffer buffer) {
|
||||
void appendFooterText(StringBuffer buffer) {
|
||||
// append the text for each footer type in the same order
|
||||
// they are appended in XSSFExcelExtractor
|
||||
appendHeaderFooterText(buffer, "firstFooter");
|
||||
@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
/**
|
||||
* Append the cell contents we have collected.
|
||||
*/
|
||||
private void appendCellText(StringBuffer buffer) {
|
||||
void appendCellText(StringBuffer buffer) {
|
||||
checkMaxTextSize(buffer, output.toString());
|
||||
buffer.append(output);
|
||||
}
|
||||
@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
/**
|
||||
* Reset this <code>SheetTextExtractor</code> for the next sheet.
|
||||
*/
|
||||
private void reset() {
|
||||
void reset() {
|
||||
output.setLength(0);
|
||||
firstCellOfRow = true;
|
||||
if (headerFooterMap != null) {
|
||||
|
@ -68,6 +68,7 @@ public class TestExtractorFactory {
|
||||
private static File xlsxStrict;
|
||||
private static File xltx;
|
||||
private static File xlsEmb;
|
||||
private static File xlsb;
|
||||
|
||||
private static File doc;
|
||||
private static File doc6;
|
||||
@ -108,6 +109,7 @@ public class TestExtractorFactory {
|
||||
xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
|
||||
xltx = getFileAndCheck(ssTests, "test.xltx");
|
||||
xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
|
||||
xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
|
||||
|
||||
POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
|
||||
doc = getFileAndCheck(wpTests, "SampleDoc.doc");
|
||||
@ -172,6 +174,13 @@ public class TestExtractorFactory {
|
||||
);
|
||||
extractor.close();
|
||||
|
||||
extractor = ExtractorFactory.createExtractor(xlsb);
|
||||
assertTrue(
|
||||
extractor.getText().contains("test")
|
||||
);
|
||||
extractor.close();
|
||||
|
||||
|
||||
extractor = ExtractorFactory.createExtractor(xltx);
|
||||
assertTrue(
|
||||
extractor.getText().contains("test")
|
||||
|
@ -0,0 +1,56 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestXSSFBSharedStringsTable {
|
||||
|
||||
|
||||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
|
||||
|
||||
@Test
|
||||
public void testBasic() throws Exception {
|
||||
|
||||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb"));
|
||||
List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin"));
|
||||
assertEquals(1, parts.size());
|
||||
|
||||
XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0));
|
||||
List<String> strings = rtbl.getItems();
|
||||
assertEquals(49, strings.size());
|
||||
|
||||
assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
|
||||
assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
|
||||
assertEquals(55, rtbl.getCount());
|
||||
assertEquals(49, rtbl.getUniqueCount());
|
||||
|
||||
//TODO: add in tests for phonetic runs
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,54 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.binary;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestXSSFBSheetHyperlinkManager {
|
||||
|
||||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
|
||||
|
||||
@Test
|
||||
public void testBasic() throws Exception {
|
||||
|
||||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
|
||||
XSSFBReader reader = new XSSFBReader(pkg);
|
||||
XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
|
||||
it.next();
|
||||
XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
|
||||
List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
|
||||
assertNotNull(records);
|
||||
assertEquals(1, records.size());
|
||||
XSSFHyperlinkRecord record = records.get(0);
|
||||
assertEquals("http://tika.apache.org/", record.getLocation());
|
||||
assertEquals("rId2", record.getRelId());
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,224 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.eventusermodel;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.fail;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
|
||||
import org.apache.poi.xssf.binary.XSSFBSheetHandler;
|
||||
import org.apache.poi.xssf.binary.XSSFBStylesTable;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
import org.junit.Test;
|
||||
|
||||
public class TestXSSFBReader {
|
||||
|
||||
private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
|
||||
|
||||
@Test
|
||||
public void testBasic() throws Exception {
|
||||
List<String> sheetTexts = getSheets("testVarious.xlsb");
|
||||
|
||||
assertEquals(1, sheetTexts.size());
|
||||
String xsxml = sheetTexts.get(0);
|
||||
assertContains("This is a string", xsxml);
|
||||
assertContains("<td ref=\"B2\">13</td>", xsxml);
|
||||
assertContains("<td ref=\"B3\">13.12112313</td>", xsxml);
|
||||
assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml);
|
||||
assertContains("<td ref=\"B5\">20%</td>", xsxml);
|
||||
assertContains("<td ref=\"B6\">13.12</td>", xsxml);
|
||||
assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml);
|
||||
assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml);
|
||||
|
||||
assertContains("46/1963", xsxml);//custom format 1
|
||||
assertContains("3/128", xsxml);//custom format 2
|
||||
|
||||
assertContains("<tr num=\"7>\n" +
|
||||
"\t<td ref=\"A8\">longer int</td>\n" +
|
||||
"\t<td ref=\"B8\">1.23457E+15</td>\n" +
|
||||
"\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
|
||||
"test comment2</span></td>\n" +
|
||||
"</tr num=\"7>", xsxml);
|
||||
|
||||
assertContains("<tr num=\"34>\n" +
|
||||
"\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
|
||||
"comment6 actually in cell</span></td>\n" +
|
||||
"</tr num=\"34>", xsxml);
|
||||
|
||||
assertContains("<tr num=\"64>\n" +
|
||||
"\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
|
||||
"comment7 end of file</span></td>\n" +
|
||||
"</tr num=\"64>", xsxml);
|
||||
|
||||
assertContains("<tr num=\"65>\n" +
|
||||
"\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
|
||||
"comment8 end of file</span></td>\n" +
|
||||
"</tr num=\"65>", xsxml);
|
||||
|
||||
assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml);
|
||||
assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml);
|
||||
assertContains(
|
||||
"<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>",
|
||||
xsxml);
|
||||
assertContains(
|
||||
"<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>",
|
||||
xsxml);
|
||||
assertContains(
|
||||
"<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>",
|
||||
xsxml);
|
||||
assertContains(
|
||||
"<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
|
||||
xsxml);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComments() throws Exception {
|
||||
List<String> sheetTexts = getSheets("comments.xlsb");
|
||||
String xsxml = sheetTexts.get(0);
|
||||
assertContains(
|
||||
"<tr num=\"0>\n" +
|
||||
"\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
|
||||
"\t<td ref=\"B1\">row1</td>\n" +
|
||||
"</tr num=\"0>", xsxml);
|
||||
assertContains(
|
||||
"<tr num=\"1>\n" +
|
||||
"\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
|
||||
"comment row2 (index1)</span></td>\n" +
|
||||
"</tr num=\"1>",
|
||||
xsxml);
|
||||
assertContains("<tr num=\"2>\n" +
|
||||
"\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
|
||||
"\t<td ref=\"B3\">row3</td>\n", xsxml);
|
||||
|
||||
assertContains("<tr num=\"3>\n" +
|
||||
"\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
|
||||
"\t<td ref=\"B4\">row4</td>\n" +
|
||||
"</tr num=\"3></sheet>", xsxml);
|
||||
|
||||
}
|
||||
|
||||
private List<String> getSheets(String testFileName) throws Exception {
|
||||
OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
|
||||
List<String> sheetTexts = new ArrayList<String>();
|
||||
XSSFBReader r = new XSSFBReader(pkg);
|
||||
|
||||
// assertNotNull(r.getWorkbookData());
|
||||
// assertNotNull(r.getSharedStringsData());
|
||||
assertNotNull(r.getXSSFBStylesTable());
|
||||
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
|
||||
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
|
||||
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData();
|
||||
|
||||
while (it.hasNext()) {
|
||||
InputStream is = it.next();
|
||||
String name = it.getSheetName();
|
||||
TestSheetHandler testSheetHandler = new TestSheetHandler();
|
||||
testSheetHandler.startSheet(name);
|
||||
XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
|
||||
xssfbStylesTable,
|
||||
it.getXSSFBSheetComments(),
|
||||
sst, testSheetHandler,
|
||||
new DataFormatter(),
|
||||
false);
|
||||
sheetHandler.parse();
|
||||
testSheetHandler.endSheet();
|
||||
sheetTexts.add(testSheetHandler.toString());
|
||||
}
|
||||
return sheetTexts;
|
||||
|
||||
}
|
||||
|
||||
//This converts all [\r\n\t]+ to " "
|
||||
private void assertContains(String needle, String haystack) {
|
||||
needle = needle.replaceAll("[\r\n\t]+", " ");
|
||||
haystack = haystack.replaceAll("[\r\n\t]+", " ");
|
||||
if (haystack.indexOf(needle) < 0) {
|
||||
fail("couldn't find >"+needle+"< in: "+haystack );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testDate() throws Exception {
|
||||
List<String> sheets = getSheets("date.xlsb");
|
||||
assertEquals(1, sheets.size());
|
||||
assertContains("1/12/13", sheets.get(0));
|
||||
|
||||
}
|
||||
|
||||
|
||||
private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
|
||||
private final StringBuilder sb = new StringBuilder();
|
||||
|
||||
public void startSheet(String sheetName) {
|
||||
sb.append("<sheet name=\"").append(sheetName).append(">");
|
||||
}
|
||||
|
||||
public void endSheet(){
|
||||
sb.append("</sheet>");
|
||||
}
|
||||
@Override
|
||||
public void startRow(int rowNum) {
|
||||
sb.append("\n<tr num=\"").append(rowNum).append(">");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void endRow(int rowNum) {
|
||||
sb.append("\n</tr num=\"").append(rowNum).append(">");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void cell(String cellReference, String formattedValue, XSSFComment comment) {
|
||||
formattedValue = (formattedValue == null) ? "" : formattedValue;
|
||||
if (comment == null) {
|
||||
sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
|
||||
} else {
|
||||
sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
|
||||
.append(formattedValue)
|
||||
.append("<span type=\"comment\" author=\"")
|
||||
.append(comment.getAuthor()).append("\">")
|
||||
.append(comment.getString().toString().trim()).append("</span>")
|
||||
.append("</td>");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void headerFooter(String text, boolean isHeader, String tagName) {
|
||||
if (isHeader) {
|
||||
sb.append("<header tagName=\""+tagName+"\">"+text+"</header>");
|
||||
} else {
|
||||
sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>");
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,102 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.extractor;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
import org.apache.poi.xssf.XSSFTestDataSamples;
|
||||
import org.junit.Test;
|
||||
|
||||
/**
|
||||
* Tests for {@link XSSFBEventBasedExcelExtractor}
|
||||
*/
|
||||
public class TestXSSFBEventBasedExcelExtractor {
|
||||
|
||||
|
||||
protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
|
||||
return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples.
|
||||
openSamplePackage(sampleName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get text out of the simple file
|
||||
*/
|
||||
@Test
|
||||
public void testGetSimpleText() throws Exception {
|
||||
// a very simple file
|
||||
XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb");
|
||||
extractor.setIncludeCellComments(true);
|
||||
extractor.getText();
|
||||
|
||||
String text = extractor.getText();
|
||||
assertTrue(text.length() > 0);
|
||||
|
||||
// Check sheet names
|
||||
assertTrue(text.startsWith("Sheet1"));
|
||||
assertTrue(text.endsWith("Sheet3\n"));
|
||||
|
||||
// Now without, will have text
|
||||
extractor.setIncludeSheetNames(false);
|
||||
text = extractor.getText();
|
||||
String CHUNK1 =
|
||||
"Lorem\t111\n" +
|
||||
"ipsum\t222\n" +
|
||||
"dolor\t333\n" +
|
||||
"sit\t444\n" +
|
||||
"amet\t555\n" +
|
||||
"consectetuer\t666\n" +
|
||||
"adipiscing\t777\n" +
|
||||
"elit\t888\n" +
|
||||
"Nunc\t999\n";
|
||||
String CHUNK2 =
|
||||
"The quick brown fox jumps over the lazy dog\n" +
|
||||
"hello, xssf hello, xssf\n" +
|
||||
"hello, xssf hello, xssf\n" +
|
||||
"hello, xssf hello, xssf\n" +
|
||||
"hello, xssf hello, xssf\n";
|
||||
assertEquals(
|
||||
CHUNK1 +
|
||||
"at\t4995\n" +
|
||||
CHUNK2
|
||||
, text);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test text extraction from text box using getShapes()
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testShapes() throws Exception {
|
||||
XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb");
|
||||
|
||||
try {
|
||||
String text = ooxmlExtractor.getText();
|
||||
|
||||
assertTrue(text.indexOf("Line 1") > -1);
|
||||
assertTrue(text.indexOf("Line 2") > -1);
|
||||
assertTrue(text.indexOf("Line 3") > -1);
|
||||
} finally {
|
||||
ooxmlExtractor.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
BIN
test-data/spreadsheet/51519.xlsb
Normal file
BIN
test-data/spreadsheet/51519.xlsb
Normal file
Binary file not shown.
BIN
test-data/spreadsheet/WithTextBox.xlsb
Normal file
BIN
test-data/spreadsheet/WithTextBox.xlsb
Normal file
Binary file not shown.
BIN
test-data/spreadsheet/comments.xlsb
Normal file
BIN
test-data/spreadsheet/comments.xlsb
Normal file
Binary file not shown.
BIN
test-data/spreadsheet/date.xlsb
Normal file
BIN
test-data/spreadsheet/date.xlsb
Normal file
Binary file not shown.
BIN
test-data/spreadsheet/hyperlink.xlsb
Normal file
BIN
test-data/spreadsheet/hyperlink.xlsb
Normal file
Binary file not shown.
BIN
test-data/spreadsheet/sample.xlsb
Normal file
BIN
test-data/spreadsheet/sample.xlsb
Normal file
Binary file not shown.
BIN
test-data/spreadsheet/testVarious.xlsb
Normal file
BIN
test-data/spreadsheet/testVarious.xlsb
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user