60826 -- clean up. Many, many thanks to Javen O'Neal's code review!

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1787320 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2017-03-17 10:09:59 +00:00
parent 60b4624cf7
commit b52126f7f3
36 changed files with 346 additions and 243 deletions

View File

@ -16,11 +16,26 @@
==================================================================== */ ==================================================================== */
package org.apache.poi; package org.apache.poi;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/** /**
* Represents a descriptor of a OOXML relation. * Represents a descriptor of a OOXML relation.
*/ */
public abstract class POIXMLRelation { public abstract class POIXMLRelation {
private static final POILogger log = POILogFactory.getLogger(POIXMLRelation.class);
/** /**
* Describes the content stored in a part. * Describes the content stored in a part.
*/ */
@ -131,4 +146,25 @@ public abstract class POIXMLRelation {
public Class<? extends POIXMLDocumentPart> getRelationClass(){ public Class<? extends POIXMLDocumentPart> getRelationClass(){
return _cls; return _cls;
} }
/**
* Fetches the InputStream to read the contents, based
* of the specified core part, for which we are defined
* as a suitable relationship
*
* @since 3.16-beta3
*/
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
PackageRelationshipCollection prc =
corePart.getRelationshipsByType(getRelation());
Iterator<PackageRelationship> it = prc.iterator();
if(it.hasNext()) {
PackageRelationship rel = it.next();
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
PackagePart part = corePart.getPackage().getPart(relName);
return part.getInputStream();
}
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
return null;
}
} }

View File

@ -20,7 +20,7 @@ import org.apache.poi.UnsupportedFileFormatException;
/** /**
* We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}. * We don't support .xlsb for read and write via {@link org.apache.poi.xssf.usermodel.XSSFWorkbook}.
* As of POI 3.15-beta3, we do support streaming reading of xlsb files * As of POI 3.16-beta3, we do support streaming reading of xlsb files
* via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader} * via {@link org.apache.poi.xssf.eventusermodel.XSSFBReader}
*/ */
public class XLSBUnsupportedException extends UnsupportedFileFormatException { public class XLSBUnsupportedException extends UnsupportedFileFormatException {

View File

@ -17,7 +17,6 @@
package org.apache.poi.xssf.binary; package org.apache.poi.xssf.binary;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
@ -25,6 +24,8 @@ import org.apache.poi.util.LittleEndian;
* This class encapsulates what the spec calls a "Cell" object. * This class encapsulates what the spec calls a "Cell" object.
* I added "Header" to clarify that this does not contain the contents * I added "Header" to clarify that this does not contain the contents
* of the cell, only the column number, the style id and the phonetic boolean * of the cell, only the column number, the style id and the phonetic boolean
*
* @since 3.16-beta3
*/ */
@Internal @Internal
class XSSFBCellHeader { class XSSFBCellHeader {
@ -61,10 +62,6 @@ class XSSFBCellHeader {
return colNum; return colNum;
} }
String formatAddressAsString() {
return CellReference.convertNumToColString(colNum)+(rowNum+1);
}
int getStyleIdx() { int getStyleIdx() {
return styleIdx; return styleIdx;
} }

View File

@ -21,10 +21,14 @@ package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
/**
* @since 3.16-beta3
*/
@Internal @Internal
class XSSFBCellRange { class XSSFBCellRange {
//TODO: Convert this to generate an AreaReference
public final static int length = 4* LittleEndian.INT_SIZE; public static final int length = 4* LittleEndian.INT_SIZE;
/** /**
* Parses an RfX cell range from the data starting at the offset. * Parses an RfX cell range from the data starting at the offset.
* This performs no range checking. * This performs no range checking.

View File

@ -24,6 +24,9 @@ import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFComment;
/**
* @since 3.16-beta3
*/
@Internal @Internal
class XSSFBComment extends XSSFComment { class XSSFBComment extends XSSFComment {

View File

@ -20,7 +20,6 @@ package org.apache.poi.xssf.binary;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -31,10 +30,13 @@ import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
/**
* @since 3.16-beta3
*/
@Internal @Internal
public class XSSFBCommentsTable extends XSSFBParser { public class XSSFBCommentsTable extends XSSFBParser {
private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>(new CellAddressComparator());//String is the cellAddress A1 private Map<CellAddress, XSSFBComment> comments = new TreeMap<CellAddress, XSSFBComment>();
private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>(); private Queue<CellAddress> commentAddresses = new LinkedList<CellAddress>();
private List<String> authors = new ArrayList<String>(); private List<String> authors = new ArrayList<String>();
@ -92,22 +94,4 @@ public class XSSFBCommentsTable extends XSSFBParser {
} }
return comments.get(cellAddress); return comments.get(cellAddress);
} }
private final static class CellAddressComparator implements Comparator<CellAddress> {
@Override
public int compare(CellAddress o1, CellAddress o2) {
if (o1.getRow() < o2.getRow()) {
return -1;
} else if (o1.getRow() > o2.getRow()) {
return 1;
}
if (o1.getColumn() < o2.getColumn()) {
return -1;
} else if (o1.getColumn() > o2.getColumn()) {
return 1;
}
return 0;
}
}
} }

View File

@ -20,12 +20,17 @@ package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper; import org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper;
/**
* @since 3.16-beta3
*/
@Internal @Internal
class XSSFBHeaderFooter { class XSSFBHeaderFooter {
private static final HeaderFooterHelper HEADER_FOOTER_HELPER = new HeaderFooterHelper();
private final String headerFooterTypeLabel; private final String headerFooterTypeLabel;
private final boolean isHeader; private final boolean isHeader;
private String rawString; private String rawString;
private HeaderFooterHelper headerFooterHelper = new HeaderFooterHelper();
XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) { XSSFBHeaderFooter(String headerFooterTypeLabel, boolean isHeader) {
@ -43,9 +48,9 @@ class XSSFBHeaderFooter {
String getString() { String getString() {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
String left = headerFooterHelper.getLeftSection(rawString); String left = HEADER_FOOTER_HELPER.getLeftSection(rawString);
String center = headerFooterHelper.getCenterSection(rawString); String center = HEADER_FOOTER_HELPER.getCenterSection(rawString);
String right = headerFooterHelper.getRightSection(rawString); String right = HEADER_FOOTER_HELPER.getRightSection(rawString);
if (left != null && left.length() > 0) { if (left != null && left.length() > 0) {
sb.append(left); sb.append(left);
} }

View File

@ -18,10 +18,14 @@ package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
/**
* @since 3.16-beta3
*/
@Internal @Internal
class XSSFBHeaderFooters { class XSSFBHeaderFooters {
public static XSSFBHeaderFooters parse(byte[] data) { public static XSSFBHeaderFooters parse(byte[] data) {
//parse these at some point.
boolean diffOddEven = false; boolean diffOddEven = false;
boolean diffFirst = false; boolean diffFirst = false;
boolean scaleWDoc = false; boolean scaleWDoc = false;

View File

@ -36,10 +36,13 @@ import org.apache.poi.ss.util.CellRangeUtil;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.xssf.usermodel.XSSFRelation; import org.apache.poi.xssf.usermodel.XSSFRelation;
/**
* @since 3.16-beta3
*/
@Internal @Internal
public class XSSFBHyperlinksTable { public class XSSFBHyperlinksTable {
private final static BitSet RECORDS = new BitSet(); private static final BitSet RECORDS = new BitSet();
static { static {

View File

@ -18,7 +18,9 @@
package org.apache.poi.xssf.binary; package org.apache.poi.xssf.binary;
/** /**
* Parse exception while reading an xssfb * Parse exception while reading an xssfb.
*
* @since 3.16-beta3
*/ */
public class XSSFBParseException extends RuntimeException { public class XSSFBParseException extends RuntimeException {

View File

@ -25,9 +25,11 @@ import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndianInputStream; import org.apache.poi.util.LittleEndianInputStream;
/** /**
* Experimental parser for Microsoft's ooxml xssfb format. * Experimental parser for Microsoft's ooxml xlsb format.
* Not thread safe, obviously. Need to create a new one * Not thread safe, obviously. Need to create a new one
* for each thread. * for each thread.
*
* @since 3.16-beta3
*/ */
@Internal @Internal
public abstract class XSSFBParser { public abstract class XSSFBParser {
@ -40,6 +42,11 @@ public abstract class XSSFBParser {
records = null; records = null;
} }
/**
*
* @param is inputStream
* @param bitSet call {@link #handleRecord(int, byte[])} only on those records in this bitSet
*/
XSSFBParser(InputStream is, BitSet bitSet) { XSSFBParser(InputStream is, BitSet bitSet) {
this.is = new LittleEndianInputStream(is); this.is = new LittleEndianInputStream(is);
records = bitSet; records = bitSet;
@ -98,7 +105,7 @@ public abstract class XSSFBParser {
//However, on a large Excel spreadsheet, this parser was 1/3 faster than //However, on a large Excel spreadsheet, this parser was 1/3 faster than
//the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf. //the ooxml sax parser (5 seconds for xssfb and 7.5 seconds for xssf.
//The code is far cleaner to have the parser read all //The code is far cleaner to have the parser read all
//of the data rather than having every component promise that it read //of the data rather than having every component promise that it will read
//the correct amount. //the correct amount.
abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException; abstract public void handleRecord(int recordType, byte[] data) throws XSSFBParseException;

View File

@ -17,8 +17,14 @@
package org.apache.poi.xssf.binary; package org.apache.poi.xssf.binary;
import java.util.HashMap;
import java.util.Map;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
/**
* @since 3.16-beta3
*/
@Internal @Internal
public enum XSSFBRecordType { public enum XSSFBRecordType {
@ -69,6 +75,14 @@ public enum XSSFBRecordType {
BrtBundleSh(156), //defines worksheet in wb part BrtBundleSh(156), //defines worksheet in wb part
Unimplemented(-1); Unimplemented(-1);
private static final Map<Integer, XSSFBRecordType> TYPE_MAP =
new HashMap<Integer, XSSFBRecordType>();
static {
for (XSSFBRecordType type : XSSFBRecordType.values()) {
TYPE_MAP.put(type.getId(), type);
}
}
private final int id; private final int id;
@ -81,12 +95,11 @@ public enum XSSFBRecordType {
} }
public static XSSFBRecordType lookup(int id) { public static XSSFBRecordType lookup(int id) {
for (XSSFBRecordType r : XSSFBRecordType.values()) { XSSFBRecordType type = TYPE_MAP.get(id);
if (r.id == id) { if (type == null) {
return r; return Unimplemented;
}
} }
return Unimplemented; return type;
} }
} }

View File

@ -17,19 +17,9 @@
package org.apache.poi.xssf.binary; package org.apache.poi.xssf.binary;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.POIXMLRelation; import org.apache.poi.POIXMLRelation;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
@ -64,22 +54,4 @@ public class XSSFBRelation extends POIXMLRelation {
super(type, rel, defaultName, cls); super(type, rel, defaultName, cls);
} }
/**
* Fetches the InputStream to read the contents, based
* of the specified core part, for which we are defined
* as a suitable relationship
*/
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
PackageRelationshipCollection prc =
corePart.getRelationshipsByType(getRelation());
Iterator<PackageRelationship> it = prc.iterator();
if (it.hasNext()) {
PackageRelationship rel = it.next();
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
PackagePart part = corePart.getPackage().getPart(relName);
return part.getInputStream();
}
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
return null;
}
} }

View File

@ -19,6 +19,9 @@ package org.apache.poi.xssf.binary;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
/**
* @since 3.16-beta3
*/
@Internal @Internal
class XSSFBRichStr { class XSSFBRichStr {

View File

@ -19,11 +19,14 @@ package org.apache.poi.xssf.binary;
import org.apache.poi.ss.usermodel.Font; import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.NotImplemented;
import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.xssf.usermodel.XSSFRichTextString;
/** /**
* Wrapper class around String so that we can use it in Comment. * Wrapper class around String so that we can use it in Comment.
* Nothing has been implemented yet except for {@link #getString()}. * Nothing has been implemented yet except for {@link #getString()}.
*
* @since 3.16-beta3
*/ */
@Internal @Internal
class XSSFBRichTextString extends XSSFRichTextString { class XSSFBRichTextString extends XSSFRichTextString {
@ -34,21 +37,25 @@ class XSSFBRichTextString extends XSSFRichTextString {
} }
@Override @Override
@NotImplemented
public void applyFont(int startIndex, int endIndex, short fontIndex) { public void applyFont(int startIndex, int endIndex, short fontIndex) {
} }
@Override @Override
@NotImplemented
public void applyFont(int startIndex, int endIndex, Font font) { public void applyFont(int startIndex, int endIndex, Font font) {
} }
@Override @Override
@NotImplemented
public void applyFont(Font font) { public void applyFont(Font font) {
} }
@Override @Override
@NotImplemented
public void clearFormatting() { public void clearFormatting() {
} }
@ -64,16 +71,19 @@ class XSSFBRichTextString extends XSSFRichTextString {
} }
@Override @Override
@NotImplemented
public int numFormattingRuns() { public int numFormattingRuns() {
return 0; return 0;
} }
@Override @Override
@NotImplemented
public int getIndexOfFormattingRun(int index) { public int getIndexOfFormattingRun(int index) {
return 0; return 0;
} }
@Override @Override
@NotImplemented
public void applyFont(short fontIndex) { public void applyFont(short fontIndex) {
} }

View File

@ -28,6 +28,9 @@ import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.xml.sax.SAXException; import org.xml.sax.SAXException;
/**
* @since 3.16-beta3
*/
@Internal @Internal
public class XSSFBSharedStringsTable { public class XSSFBSharedStringsTable {
@ -69,8 +72,6 @@ public class XSSFBSharedStringsTable {
/** /**
* Like POIXMLDocumentPart constructor * Like POIXMLDocumentPart constructor
*
* @since POI 3.14-Beta3
*/ */
XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException { XSSFBSharedStringsTable(PackagePart part) throws IOException, SAXException {
readFrom(part.getInputStream()); readFrom(part.getInputStream());
@ -81,8 +82,14 @@ public class XSSFBSharedStringsTable {
reader.parse(); reader.parse();
} }
/**
*
* @return a defensive copy of strings
*/
public List<String> getItems() { public List<String> getItems() {
return strings; List<String> ret = new ArrayList<String>(strings.size());
ret.addAll(strings);
return ret;
} }
public String getEntryAt(int i) { public String getEntryAt(int i) {

View File

@ -29,10 +29,13 @@ import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.apache.poi.xssf.usermodel.XSSFRichTextString;
/**
* @since 3.16-beta3
*/
@Internal @Internal
public class XSSFBSheetHandler extends XSSFBParser { public class XSSFBSheetHandler extends XSSFBParser {
private final static int CHECK_ALL_ROWS = -1; private static final int CHECK_ALL_ROWS = -1;
private final XSSFBSharedStringsTable stringsTable; private final XSSFBSharedStringsTable stringsTable;
private final XSSFSheetXMLHandler.SheetContentsHandler handler; private final XSSFSheetXMLHandler.SheetContentsHandler handler;

View File

@ -28,6 +28,12 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
/**
* This is a very thin shim to gather number formats from styles.bin
* files.
*
* @since 3.16-beta3
*/
@Internal @Internal
public class XSSFBStylesTable extends XSSFBParser { public class XSSFBStylesTable extends XSSFBParser {

View File

@ -24,6 +24,9 @@ import org.apache.poi.POIXMLException;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
/**
* @since 3.16-beta3
*/
@Internal @Internal
public class XSSFBUtils { public class XSSFBUtils {

View File

@ -29,6 +29,7 @@ import org.apache.poi.util.Internal;
* The {@link #display} is often empty and should be filled from * The {@link #display} is often empty and should be filled from
* the contents of the anchor cell. * the contents of the anchor cell.
* *
* @since 3.16-beta3
*/ */
@Internal @Internal
public class XSSFHyperlinkRecord { public class XSSFHyperlinkRecord {

View File

@ -27,17 +27,17 @@ for streaming/read-only processing of xlsb files.
</p> </p>
<p> <p>
POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
in o.a.p.xssf.eventusermodel. in org.apache.poi.xssf.eventusermodel.
</p> </p>
<p> <p>
This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes This feature was added in poi-3.16-beta3 and should be considered experimental. Most classes
have been marked @Internal and the API is subject to change. have been marked @Internal and the API is subject to change.
</p> </p>
<h2>Related Documentation</h2> <h2>Related Documentation</h2>
For overviews, tutorials, examples, guides, and tool documentation, please see: For overviews, tutorials, examples, guides, and tool documentation, please see:
<ul> <ul>
<li><a href="http://poi.apache.org">Apache POI Project</a> <li><a href="https://poi.apache.org">Apache POI Project</a>
</ul> </ul>
</body> </body>

View File

@ -44,6 +44,8 @@ import org.apache.poi.xssf.usermodel.XSSFRelation;
/** /**
* Reader for xlsb files. * Reader for xlsb files.
*
* @since 3.16-beta3
*/ */
public class XSSFBReader extends XSSFReader { public class XSSFBReader extends XSSFReader {
/** /**

View File

@ -380,7 +380,7 @@ public class XSSFReader {
} }
} }
protected final static class XSSFSheetRef { protected static final class XSSFSheetRef {
//do we need to store sheetId, too? //do we need to store sheetId, too?
private final String id; private final String id;
private final String name; private final String name;
@ -401,9 +401,9 @@ public class XSSFReader {
//scrapes sheet reference info and order from workbook.xml //scrapes sheet reference info and order from workbook.xml
private static class XMLSheetRefReader extends DefaultHandler { private static class XMLSheetRefReader extends DefaultHandler {
private final static String SHEET = "sheet"; private static final String SHEET = "sheet";
private final static String ID = "id"; private static final String ID = "id";
private final static String NAME = "name"; private static final String NAME = "name";
private final List<XSSFSheetRef> sheetRefs = new LinkedList(); private final List<XSSFSheetRef> sheetRefs = new LinkedList();

View File

@ -23,6 +23,8 @@ import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xssf.binary.XSSFBCommentsTable; import org.apache.poi.xssf.binary.XSSFBCommentsTable;
import org.apache.poi.xssf.binary.XSSFBHyperlinksTable; import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
import org.apache.poi.xssf.binary.XSSFBSharedStringsTable; import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
@ -37,11 +39,15 @@ import org.xml.sax.SAXException;
/** /**
* Implementation of a text extractor or xlsb Excel * Implementation of a text extractor or xlsb Excel
* files that uses SAX-like binary parsing. * files that uses SAX-like binary parsing.
*
* @since 3.16-beta3
*/ */
public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor { implements org.apache.poi.ss.extractor.ExcelExtractor {
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] { private static final POILogger LOGGER = POILogFactory.getLogger(XSSFBEventBasedExcelExtractor.class);
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[]{
XSSFRelation.XLSB_BINARY_WORKBOOK XSSFRelation.XLSB_BINARY_WORKBOOK
}; };
@ -93,15 +99,15 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
throws IOException, SAXException { throws IOException, SAXException {
DataFormatter formatter; DataFormatter formatter;
if (locale == null) { if (getLocale() == null) {
formatter = new DataFormatter(); formatter = new DataFormatter();
} else { } else {
formatter = new DataFormatter(locale); formatter = new DataFormatter(getLocale());
} }
XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler( XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
sheetInputStream, sheetInputStream,
styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults styles, comments, strings, sheetContentsExtractor, formatter, getFormulasNotResults()
); );
xssfbSheetHandler.parse(); xssfbSheetHandler.parse();
} }
@ -111,8 +117,8 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
*/ */
public String getText() { public String getText() {
try { try {
XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container); XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(getPackage());
XSSFBReader xssfbReader = new XSSFBReader(container); XSSFBReader xssfbReader = new XSSFBReader(getPackage());
XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable(); XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData(); XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
@ -121,23 +127,23 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
XSSFBHyperlinksTable hyperlinksTable = null; XSSFBHyperlinksTable hyperlinksTable = null;
while (iter.hasNext()) { while (iter.hasNext()) {
InputStream stream = iter.next(); InputStream stream = iter.next();
if (includeSheetNames) { if (getIncludeSheetNames()) {
text.append(iter.getSheetName()); text.append(iter.getSheetName());
text.append('\n'); text.append('\n');
} }
if (handleHyperlinksInCells) { if (handleHyperlinksInCells) {
hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart()); hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
} }
XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null; XSSFBCommentsTable comments = getIncludeCellComments() ? iter.getXSSFBSheetComments() : null;
processSheet(sheetExtractor, styles, comments, strings, stream); processSheet(sheetExtractor, styles, comments, strings, stream);
if (includeHeadersFooters) { if (getIncludeHeadersFooters()) {
sheetExtractor.appendHeaderText(text); sheetExtractor.appendHeaderText(text);
} }
sheetExtractor.appendCellText(text); sheetExtractor.appendCellText(text);
if (includeTextBoxes) { if (getIncludeTextBoxes()) {
processShapes(iter.getShapes(), text); processShapes(iter.getShapes(), text);
} }
if (includeHeadersFooters) { if (getIncludeHeadersFooters()) {
sheetExtractor.appendFooterText(text); sheetExtractor.appendFooterText(text);
} }
sheetExtractor.reset(); sheetExtractor.reset();
@ -146,13 +152,13 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
return text.toString(); return text.toString();
} catch (IOException e) { } catch (IOException e) {
System.err.println(e); LOGGER.log(POILogger.WARN, e);
return null; return null;
} catch (SAXException se) { } catch (SAXException se) {
System.err.println(se); LOGGER.log(POILogger.WARN, se);
return null; return null;
} catch (OpenXML4JException o4je) { } catch (OpenXML4JException o4je) {
System.err.println(o4je); LOGGER.log(POILogger.WARN, o4je);
return null; return null;
} }
} }

View File

@ -32,6 +32,8 @@ import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.DataFormatter; import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.SAXHelper; import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable; import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFReader;
@ -54,15 +56,18 @@ import org.xml.sax.XMLReader;
*/ */
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor { implements org.apache.poi.ss.extractor.ExcelExtractor {
OPCPackage container;
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFEventBasedExcelExtractor.class);
private OPCPackage container;
private POIXMLProperties properties; private POIXMLProperties properties;
Locale locale; private Locale locale;
boolean includeTextBoxes = true; private boolean includeTextBoxes = true;
boolean includeSheetNames = true; private boolean includeSheetNames = true;
boolean includeCellComments = false; private boolean includeCellComments = false;
boolean includeHeadersFooters = true; private boolean includeHeadersFooters = true;
boolean formulasNotResults = false; private boolean formulasNotResults = false;
private boolean concatenatePhoneticRuns = true; private boolean concatenatePhoneticRuns = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
@ -93,6 +98,18 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
public void setIncludeSheetNames(boolean includeSheetNames) { public void setIncludeSheetNames(boolean includeSheetNames) {
this.includeSheetNames = includeSheetNames; this.includeSheetNames = includeSheetNames;
} }
/**
*
* @return whether to include sheet names
*
* @since 3.16-beta3
*/
public boolean getIncludeSheetNames() {
return includeSheetNames;
}
/** /**
* Should we return the formula itself, and not * Should we return the formula itself, and not
* the result it produces? Default is false * the result it produces? Default is false
@ -100,12 +117,33 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
public void setFormulasNotResults(boolean formulasNotResults) { public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults; this.formulasNotResults = formulasNotResults;
} }
/**
*
* @return whether to include formulas but not results
*
* @since 3.16-beta3
*/
public boolean getFormulasNotResults() {
return formulasNotResults;
}
/** /**
* Should headers and footers be included? Default is true * Should headers and footers be included? Default is true
*/ */
public void setIncludeHeadersFooters(boolean includeHeadersFooters) { public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
this.includeHeadersFooters = includeHeadersFooters; this.includeHeadersFooters = includeHeadersFooters;
} }
/**
*
* @return whether or not to include headers and footers
*
* @since 3.16-beta3
*/
public boolean getIncludeHeadersFooters() {
return includeHeadersFooters;
}
/** /**
* Should text from textboxes be included? Default is true * Should text from textboxes be included? Default is true
*/ */
@ -113,6 +151,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
this.includeTextBoxes = includeTextBoxes; this.includeTextBoxes = includeTextBoxes;
} }
/**
*
* @return whether or not to extract textboxes
*
* @since 3.16-beta3
*/
public boolean getIncludeTextBoxes() {
return includeTextBoxes;
}
/** /**
* Should cell comments be included? Default is false * Should cell comments be included? Default is false
*/ */
@ -120,6 +167,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
this.includeCellComments = includeCellComments; this.includeCellComments = includeCellComments;
} }
/**
* @return whether cell comments should be included
*
* @since 3.16-beta3
*/
public boolean getIncludeCellComments() {
return includeCellComments;
}
/** /**
* Concatenate text from &lt;rPh&gt; text elements in SharedStringsTable * Concatenate text from &lt;rPh&gt; text elements in SharedStringsTable
* Default is true; * Default is true;
@ -132,6 +187,14 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
this.locale = locale; this.locale = locale;
} }
/**
* @return locale
*
* @since 3.16-beta3
*/
public Locale getLocale() {
return locale;
}
/** /**
* Returns the opened OPCPackage container. * Returns the opened OPCPackage container.
*/ */
@ -162,6 +225,8 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
return properties.getCustomProperties(); return properties.getCustomProperties();
} }
/** /**
* Processes the given sheet * Processes the given sheet
*/ */
@ -229,13 +294,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
return text.toString(); return text.toString();
} catch(IOException e) { } catch(IOException e) {
System.err.println(e); LOGGER.log(POILogger.WARN, e);
return null; return null;
} catch(SAXException se) { } catch(SAXException se) {
System.err.println(se); LOGGER.log(POILogger.WARN, se);
return null; return null;
} catch(OpenXML4JException o4je) { } catch(OpenXML4JException o4je) {
System.err.println(o4je); LOGGER.log(POILogger.WARN, o4je);
return null; return null;
} }
} }

View File

@ -71,7 +71,7 @@ public class XSSFImportFromXML {
private final XSSFMap _map; private final XSSFMap _map;
private final static POILogger logger = POILogFactory.getLogger(XSSFImportFromXML.class); private static final POILogger logger = POILogFactory.getLogger(XSSFImportFromXML.class);
public XSSFImportFromXML(XSSFMap map) { public XSSFImportFromXML(XSSFMap map) {
_map = map; _map = map;

View File

@ -85,7 +85,7 @@ public class SharedStringsTable extends POIXMLDocumentPart {
private SstDocument _sstDoc; private SstDocument _sstDoc;
private final static XmlOptions options = new XmlOptions(); private static final XmlOptions options = new XmlOptions();
static { static {
options.put( XmlOptions.SAVE_INNER ); options.put( XmlOptions.SAVE_INNER );
options.put( XmlOptions.SAVE_AGGRESSIVE_NAMESPACES ); options.put( XmlOptions.SAVE_AGGRESSIVE_NAMESPACES );

View File

@ -1069,8 +1069,8 @@ public class SXSSFCell implements Cell {
static abstract class Property static abstract class Property
{ {
final static int COMMENT=1; static final int COMMENT=1;
final static int HYPERLINK=2; static final int HYPERLINK=2;
Object _value; Object _value;
Property _next; Property _next;
public Property(Object value) public Property(Object value)

View File

@ -34,7 +34,7 @@ import org.apache.poi.xssf.usermodel.BaseXSSFFormulaEvaluator;
* lookup cells within the current Window. * lookup cells within the current Window.
*/ */
public final class SXSSFFormulaEvaluator extends BaseXSSFFormulaEvaluator { public final class SXSSFFormulaEvaluator extends BaseXSSFFormulaEvaluator {
private final static POILogger logger = POILogFactory.getLogger(SXSSFFormulaEvaluator.class); private static final POILogger logger = POILogFactory.getLogger(SXSSFFormulaEvaluator.class);
private SXSSFWorkbook wb; private SXSSFWorkbook wb;

View File

@ -59,19 +59,19 @@ public final class XSSFName implements Name {
/** /**
* A built-in defined name that specifies the workbook's print area * A built-in defined name that specifies the workbook's print area
*/ */
public final static String BUILTIN_PRINT_AREA = "_xlnm.Print_Area"; public static final String BUILTIN_PRINT_AREA = "_xlnm.Print_Area";
/** /**
* A built-in defined name that specifies the row(s) or column(s) to repeat * A built-in defined name that specifies the row(s) or column(s) to repeat
* at the top of each printed page. * at the top of each printed page.
*/ */
public final static String BUILTIN_PRINT_TITLE = "_xlnm.Print_Titles"; public static final String BUILTIN_PRINT_TITLE = "_xlnm.Print_Titles";
/** /**
* A built-in defined name that refers to a range containing the criteria values * A built-in defined name that refers to a range containing the criteria values
* to be used in applying an advanced filter to a range of data * to be used in applying an advanced filter to a range of data
*/ */
public final static String BUILTIN_CRITERIA = "_xlnm.Criteria:"; public static final String BUILTIN_CRITERIA = "_xlnm.Criteria:";
/** /**
@ -79,7 +79,7 @@ public final class XSSFName implements Name {
* output values resulting from applying an advanced filter criteria to a source * output values resulting from applying an advanced filter criteria to a source
* range * range
*/ */
public final static String BUILTIN_EXTRACT = "_xlnm.Extract:"; public static final String BUILTIN_EXTRACT = "_xlnm.Extract:";
/** /**
* ?an be one of the following * ?an be one of the following
@ -88,22 +88,22 @@ public final class XSSFName implements Name {
* <li> This defined name refers to a range to which an AutoFilter has been * <li> This defined name refers to a range to which an AutoFilter has been
* applied * applied
*/ */
public final static String BUILTIN_FILTER_DB = "_xlnm._FilterDatabase"; public static final String BUILTIN_FILTER_DB = "_xlnm._FilterDatabase";
/** /**
* A built-in defined name that refers to a consolidation area * A built-in defined name that refers to a consolidation area
*/ */
public final static String BUILTIN_CONSOLIDATE_AREA = "_xlnm.Consolidate_Area"; public static final String BUILTIN_CONSOLIDATE_AREA = "_xlnm.Consolidate_Area";
/** /**
* A built-in defined name that specified that the range specified is from a database data source * A built-in defined name that specified that the range specified is from a database data source
*/ */
public final static String BUILTIN_DATABASE = "_xlnm.Database"; public static final String BUILTIN_DATABASE = "_xlnm.Database";
/** /**
* A built-in defined name that refers to a sheet title. * A built-in defined name that refers to a sheet title.
*/ */
public final static String BUILTIN_SHEET_TITLE = "_xlnm.Sheet_Title"; public static final String BUILTIN_SHEET_TITLE = "_xlnm.Sheet_Title";
private XSSFWorkbook _workbook; private XSSFWorkbook _workbook;
private CTDefinedName _ctName; private CTDefinedName _ctName;

View File

@ -18,6 +18,7 @@ package org.apache.poi.xssf.usermodel;
import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS; import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
import javax.xml.namespace.QName;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.OutputStream; import java.io.OutputStream;
@ -25,8 +26,6 @@ import java.util.ArrayList;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import javax.xml.namespace.QName;
import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Cell;
@ -40,13 +39,32 @@ import org.apache.poi.util.Beta;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.xmlbeans.XmlException; import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlOptions; import org.apache.xmlbeans.XmlOptions;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.*; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCacheSource;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTColFields;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTDataField;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTDataFields;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTField;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTItems;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTLocation;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPageField;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPageFields;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotCacheDefinition;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotField;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotFields;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotTableDefinition;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTPivotTableStyle;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRowFields;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheetSource;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STAxis;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STDataConsolidateFunction;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STItemType;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.STSourceType;
public class XSSFPivotTable extends POIXMLDocumentPart { public class XSSFPivotTable extends POIXMLDocumentPart {
protected final static short CREATED_VERSION = 3; protected static final short CREATED_VERSION = 3;
protected final static short MIN_REFRESHABLE_VERSION = 3; protected static final short MIN_REFRESHABLE_VERSION = 3;
protected final static short UPDATED_VERSION = 3; protected static final short UPDATED_VERSION = 3;
private CTPivotTableDefinition pivotTableDefinition; private CTPivotTableDefinition pivotTableDefinition;
private XSSFPivotCacheDefinition pivotCacheDefinition; private XSSFPivotCacheDefinition pivotCacheDefinition;

View File

@ -16,24 +16,13 @@
==================================================================== */ ==================================================================== */
package org.apache.poi.xssf.usermodel; package org.apache.poi.xssf.usermodel;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.POIXMLRelation; import org.apache.poi.POIXMLRelation;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xssf.model.CalculationChain; import org.apache.poi.xssf.model.CalculationChain;
import org.apache.poi.xssf.model.CommentsTable; import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.ExternalLinksTable; import org.apache.poi.xssf.model.ExternalLinksTable;
@ -49,8 +38,6 @@ import org.apache.poi.xssf.model.ThemesTable;
*/ */
public final class XSSFRelation extends POIXMLRelation { public final class XSSFRelation extends POIXMLRelation {
private static final POILogger log = POILogFactory.getLogger(XSSFRelation.class);
/** /**
* A map to lookup POIXMLRelation by its relation type * A map to lookup POIXMLRelation by its relation type
*/ */
@ -368,25 +355,6 @@ public final class XSSFRelation extends POIXMLRelation {
_table.put(rel, this); _table.put(rel, this);
} }
/**
* Fetches the InputStream to read the contents, based
* of the specified core part, for which we are defined
* as a suitable relationship
*/
public InputStream getContents(PackagePart corePart) throws IOException, InvalidFormatException {
PackageRelationshipCollection prc =
corePart.getRelationshipsByType(getRelation());
Iterator<PackageRelationship> it = prc.iterator();
if(it.hasNext()) {
PackageRelationship rel = it.next();
PackagePartName relName = PackagingURIHelper.createPartName(rel.getTargetURI());
PackagePart part = corePart.getPackage().getPart(relName);
return part.getInputStream();
}
log.log(POILogger.WARN, "No part " + getDefaultFileName() + " found");
return null;
}
/** /**
* Get POIXMLRelation by relation type * Get POIXMLRelation by relation type
* *

View File

@ -175,16 +175,12 @@ public class TestExtractorFactory {
extractor.close(); extractor.close();
extractor = ExtractorFactory.createExtractor(xlsb); extractor = ExtractorFactory.createExtractor(xlsb);
assertTrue( assertContains(extractor.getText(), "test");
extractor.getText().contains("test")
);
extractor.close(); extractor.close();
extractor = ExtractorFactory.createExtractor(xltx); extractor = ExtractorFactory.createExtractor(xltx);
assertTrue( assertContains(extractor.getText(), "test");
extractor.getText().contains("test")
);
extractor.close(); extractor.close();
// TODO Support OOXML-Strict, see bug #57699 // TODO Support OOXML-Strict, see bug #57699
@ -258,9 +254,7 @@ public class TestExtractorFactory {
extractor.close(); extractor.close();
extractor = ExtractorFactory.createExtractor(dotx); extractor = ExtractorFactory.createExtractor(dotx);
assertTrue( assertContains(extractor.getText(), "Test");
extractor.getText().contains("Test")
);
extractor.close(); extractor.close();
// PowerPoint (PPT) // PowerPoint (PPT)

View File

@ -17,9 +17,9 @@
package org.apache.poi.xssf.eventusermodel; package org.apache.poi.xssf.eventusermodel;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
@ -44,54 +44,52 @@ public class TestXSSFBReader {
assertEquals(1, sheetTexts.size()); assertEquals(1, sheetTexts.size());
String xsxml = sheetTexts.get(0); String xsxml = sheetTexts.get(0);
assertContains("This is a string", xsxml); assertContains(xsxml, "This is a string");
assertContains("<td ref=\"B2\">13</td>", xsxml); assertContains(xsxml, "<td ref=\"B2\">13</td>");
assertContains("<td ref=\"B3\">13.12112313</td>", xsxml); assertContains(xsxml, "<td ref=\"B3\">13.12112313</td>");
assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml); assertContains(xsxml, "<td ref=\"B4\">$ 3.03</td>");
assertContains("<td ref=\"B5\">20%</td>", xsxml); assertContains(xsxml, "<td ref=\"B5\">20%</td>");
assertContains("<td ref=\"B6\">13.12</td>", xsxml); assertContains(xsxml, "<td ref=\"B6\">13.12</td>");
assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml); assertContains(xsxml, "<td ref=\"B7\">1.23457E+14</td>");
assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml); assertContains(xsxml, "<td ref=\"B8\">1.23457E+15</td>");
assertContains("46/1963", xsxml);//custom format 1 assertContains(xsxml, "46/1963");//custom format 1
assertContains("3/128", xsxml);//custom format 2 assertContains(xsxml, "3/128");//custom format 2
assertContains("<tr num=\"7>\n" + assertContains(xsxml, "<tr num=\"7>\n" +
"\t<td ref=\"A8\">longer int</td>\n" + "\t<td ref=\"A8\">longer int</td>\n" +
"\t<td ref=\"B8\">1.23457E+15</td>\n" + "\t<td ref=\"B8\">1.23457E+15</td>\n" +
"\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + "\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"test comment2</span></td>\n" + "test comment2</span></td>\n" +
"</tr num=\"7>", xsxml); "</tr num=\"7>");
assertContains("<tr num=\"34>\n" + assertContains(xsxml, "<tr num=\"34>\n" +
"\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + "\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment6 actually in cell</span></td>\n" + "comment6 actually in cell</span></td>\n" +
"</tr num=\"34>", xsxml); "</tr num=\"34>");
assertContains("<tr num=\"64>\n" + assertContains(xsxml, "<tr num=\"64>\n" +
"\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + "\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment7 end of file</span></td>\n" + "comment7 end of file</span></td>\n" +
"</tr num=\"64>", xsxml); "</tr num=\"64>");
assertContains("<tr num=\"65>\n" + assertContains(xsxml, "<tr num=\"65>\n" +
"\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + "\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment8 end of file</span></td>\n" + "comment8 end of file</span></td>\n" +
"</tr num=\"65>", xsxml); "</tr num=\"65>");
assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml); assertContains(xsxml,
assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml); "<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>");
assertContains( assertContains(xsxml,
"<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>", "<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>");
xsxml); assertContains(xsxml,
assertContains( "<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>");
"<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>", assertContains(xsxml,
xsxml); "<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>");
assertContains( assertContains(xsxml,
"<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>", "<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>");
xsxml); assertContains(xsxml,
assertContains( "<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>");
"<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
xsxml);
} }
@ -99,25 +97,24 @@ public class TestXSSFBReader {
public void testComments() throws Exception { public void testComments() throws Exception {
List<String> sheetTexts = getSheets("comments.xlsb"); List<String> sheetTexts = getSheets("comments.xlsb");
String xsxml = sheetTexts.get(0); String xsxml = sheetTexts.get(0);
assertContains( assertContains(xsxml,
"<tr num=\"0>\n" + "<tr num=\"0>\n" +
"\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" + "\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
"\t<td ref=\"B1\">row1</td>\n" + "\t<td ref=\"B1\">row1</td>\n" +
"</tr num=\"0>", xsxml); "</tr num=\"0>");
assertContains( assertContains(xsxml,
"<tr num=\"1>\n" + "<tr num=\"1>\n" +
"\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" + "\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
"comment row2 (index1)</span></td>\n" + "comment row2 (index1)</span></td>\n" +
"</tr num=\"1>", "</tr num=\"1>");
xsxml); assertContains(xsxml, "<tr num=\"2>\n" +
assertContains("<tr num=\"2>\n" +
"\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" + "\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
"\t<td ref=\"B3\">row3</td>\n", xsxml); "\t<td ref=\"B3\">row3</td>\n");
assertContains("<tr num=\"3>\n" + assertContains(xsxml, "<tr num=\"3>\n" +
"\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" + "\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
"\t<td ref=\"B4\">row4</td>\n" + "\t<td ref=\"B4\">row4</td>\n" +
"</tr num=\"3></sheet>", xsxml); "</tr num=\"3></sheet>");
} }
@ -131,7 +128,7 @@ public class TestXSSFBReader {
assertNotNull(r.getXSSFBStylesTable()); assertNotNull(r.getXSSFBStylesTable());
XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg); XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable(); XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData(); XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator) r.getSheetsData();
while (it.hasNext()) { while (it.hasNext()) {
InputStream is = it.next(); InputStream is = it.next();
@ -152,22 +149,11 @@ public class TestXSSFBReader {
} }
//This converts all [\r\n\t]+ to " "
private void assertContains(String needle, String haystack) {
needle = needle.replaceAll("[\r\n\t]+", " ");
haystack = haystack.replaceAll("[\r\n\t]+", " ");
if (haystack.indexOf(needle) < 0) {
fail("couldn't find >"+needle+"< in: "+haystack );
}
}
@Test @Test
public void testDate() throws Exception { public void testDate() throws Exception {
List<String> sheets = getSheets("date.xlsb"); List<String> sheets = getSheets("date.xlsb");
assertEquals(1, sheets.size()); assertEquals(1, sheets.size());
assertContains("1/12/13", sheets.get(0)); assertContains(sheets.get(0), "1/12/13");
} }
@ -178,9 +164,10 @@ public class TestXSSFBReader {
sb.append("<sheet name=\"").append(sheetName).append(">"); sb.append("<sheet name=\"").append(sheetName).append(">");
} }
public void endSheet(){ public void endSheet() {
sb.append("</sheet>"); sb.append("</sheet>");
} }
@Override @Override
public void startRow(int rowNum) { public void startRow(int rowNum) {
sb.append("\n<tr num=\"").append(rowNum).append(">"); sb.append("\n<tr num=\"").append(rowNum).append(">");
@ -209,9 +196,9 @@ public class TestXSSFBReader {
@Override @Override
public void headerFooter(String text, boolean isHeader, String tagName) { public void headerFooter(String text, boolean isHeader, String tagName) {
if (isHeader) { if (isHeader) {
sb.append("<header tagName=\""+tagName+"\">"+text+"</header>"); sb.append("<header tagName=\"" + tagName + "\">" + text + "</header>");
} else { } else {
sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>"); sb.append("<footer tagName=\"" + tagName + "\">" + text + "</footer>");
} }
} }

View File

@ -17,6 +17,7 @@
package org.apache.poi.xssf.extractor; package org.apache.poi.xssf.extractor;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
@ -90,10 +91,9 @@ public class TestXSSFBEventBasedExcelExtractor {
try { try {
String text = ooxmlExtractor.getText(); String text = ooxmlExtractor.getText();
assertContains(text, "Line 1");
assertTrue(text.indexOf("Line 1") > -1); assertContains(text, "Line 2");
assertTrue(text.indexOf("Line 2") > -1); assertContains(text, "Line 3");
assertTrue(text.indexOf("Line 3") > -1);
} finally { } finally {
ooxmlExtractor.close(); ooxmlExtractor.close();
} }

View File

@ -17,6 +17,7 @@
package org.apache.poi.xssf.extractor; package org.apache.poi.xssf.extractor;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNotNull;
@ -130,20 +131,20 @@ public class TestXSSFEventBasedExcelExtractor {
String text = extractor.getText(); String text = extractor.getText();
// Numbers // Numbers
assertTrue("Unable to find expected word in text\n" + text, text.contains("43")); assertContains(text, "43");
assertTrue("Unable to find expected word in text\n" + text, text.contains("22")); assertContains(text, "22");
// Strings // Strings
assertTrue("Unable to find expected word in text\n" + text, text.contains("ABCDE")); assertContains(text, "ABCDE");
assertTrue("Unable to find expected word in text\n" + text, text.contains("Long Text")); assertContains(text, "Long Text");
// Inline Strings // Inline Strings
assertTrue("Unable to find expected word in text\n" + text, text.contains("1st Inline String")); assertContains(text, "1st Inline String");
assertTrue("Unable to find expected word in text\n" + text, text.contains("And More")); assertContains(text, "And More");
// Formulas // Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); assertContains(text, "A2");
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); assertContains(text, "A5-A$2");
extractor.close(); extractor.close();
} }
@ -185,10 +186,9 @@ public class TestXSSFEventBasedExcelExtractor {
try { try {
String text = ooxmlExtractor.getText(); String text = ooxmlExtractor.getText();
assertContains(text, "Line 1");
assertTrue(text.indexOf("Line 1") > -1); assertContains(text, "Line 2");
assertTrue(text.indexOf("Line 2") > -1); assertContains(text, "Line 3");
assertTrue(text.indexOf("Line 3") > -1);
} finally { } finally {
ooxmlExtractor.close(); ooxmlExtractor.close();
} }
@ -356,8 +356,8 @@ public class TestXSSFEventBasedExcelExtractor {
new XSSFEventBasedExcelExtractor( new XSSFEventBasedExcelExtractor(
XSSFTestDataSamples.openSamplePackage("59021.xlsx")); XSSFTestDataSamples.openSamplePackage("59021.xlsx"));
String text = ex.getText(); String text = ex.getText();
assertTrue("can't find Abhkazia", text.contains("Abkhazia - Fixed")); assertContains(text, "Abkhazia - Fixed");
assertTrue("can't find 10/02/2016", text.contains("10/02/2016")); assertContains(text, "10/02/2016");
ex.close(); ex.close();
} }
@ -368,7 +368,7 @@ public class TestXSSFEventBasedExcelExtractor {
new XSSFEventBasedExcelExtractor( new XSSFEventBasedExcelExtractor(
XSSFTestDataSamples.openSamplePackage("51519.xlsx")); XSSFTestDataSamples.openSamplePackage("51519.xlsx"));
String text = ex.getText(); String text = ex.getText();
assertTrue("can't find appended phonetic run", text.contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3")); assertContains(text, "\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3");
ex.close(); ex.close();
//now try turning them off //now try turning them off