add SharedStrings interface to allow our XSSF code to more easily extended by external projects

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1836674 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
PJ Fanning 2018-07-25 21:24:29 +00:00
parent c4290ae2be
commit 826c15ef59
9 changed files with 149 additions and 19 deletions

View File

@ -33,6 +33,7 @@ import org.apache.poi.ss.util.CellReference;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.xml.sax.ContentHandler;
@ -180,7 +181,7 @@ public class XLSX2CSV {
*/
public void processSheet(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SharedStrings strings,
SheetContentsHandler sheetHandler,
InputStream sheetInputStream) throws IOException, SAXException {
DataFormatter formatter = new DataFormatter();

View File

@ -24,15 +24,19 @@ import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.Removal;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.SAXException;
/**
* @since 3.16-beta3
*/
@Internal
public class XSSFBSharedStringsTable {
public class XSSFBSharedStringsTable implements SharedStrings {
/**
* An integer representing the total count of strings in the workbook. This count does not
@ -83,17 +87,37 @@ public class XSSFBSharedStringsTable {
}
/**
* Return all the strings.
* Formatting is ignored.
*
* @return a defensive copy of strings
* @return a list with all the shared strings.
* @deprecated use <code>getItemAt</code> instead
*/
@Removal(version = "4.2")
@Deprecated
public List<String> getItems() {
List<String> ret = new ArrayList<>(strings.size());
ret.addAll(strings);
return ret;
}
public String getEntryAt(int i) {
return strings.get(i);
/**
* Return the string at a given index.
* Formatting is ignored.
*
* @param idx index of item to return.
* @return the item at the specified position in this Shared String table.
* @deprecated use <code>getItemAt</code> instead
*/
@Removal(version = "4.2")
@Deprecated
public String getEntryAt(int idx) {
return strings.get(idx);
}
@Override
public RichTextString getItemAt(int idx) {
return new XSSFRichTextString(getEntryAt(idx));
}
/**
@ -102,6 +126,7 @@ public class XSSFBSharedStringsTable {
*
* @return the total count of strings in the workbook
*/
@Override
public int getCount() {
return this.count;
}
@ -113,6 +138,7 @@ public class XSSFBSharedStringsTable {
*
* @return the total count of unique strings in the workbook
*/
@Override
public int getUniqueCount() {
return this.uniqueCount;
}

View File

@ -23,12 +23,13 @@ import java.util.Queue;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
/**
* @since 3.16-beta3
@ -38,7 +39,7 @@ public class XSSFBSheetHandler extends XSSFBParser {
private static final int CHECK_ALL_ROWS = -1;
private final XSSFBSharedStringsTable stringsTable;
private final SharedStrings stringsTable;
private final XSSFSheetXMLHandler.SheetContentsHandler handler;
private final XSSFBStylesTable styles;
private final XSSFBCommentsTable comments;
@ -56,7 +57,7 @@ public class XSSFBSheetHandler extends XSSFBParser {
public XSSFBSheetHandler(InputStream is,
XSSFBStylesTable styles,
XSSFBCommentsTable comments,
XSSFBSharedStringsTable strings,
SharedStrings strings,
XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) {
@ -208,7 +209,7 @@ public class XSSFBSheetHandler extends XSSFBParser {
private void handleBrtCellIsst(byte[] data) {
beforeCellValue(data);
int idx = XSSFBUtils.castToInt(LittleEndian.getUInt(data, XSSFBCellHeader.length));
XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt(idx));
RichTextString rtss = stringsTable.getItemAt(idx);
handleCellValue(rtss.getString());
}

View File

@ -28,7 +28,11 @@ import java.util.List;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.ooxml.util.SAXHelper;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.util.Removal;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@ -75,7 +79,7 @@ import org.xml.sax.helpers.DefaultHandler;
* </pre>
*
*/
public class ReadOnlySharedStringsTable extends DefaultHandler {
public class ReadOnlySharedStringsTable extends DefaultHandler implements SharedStrings {
protected final boolean includePhoneticRuns;
@ -205,15 +209,32 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
*
* @param idx index of item to return.
* @return the item at the specified position in this Shared String table.
* @deprecated use <code>getItemAt</code> instead
*/
@Removal(version = "4.2")
@Deprecated
public String getEntryAt(int idx) {
return strings.get(idx);
}
/**
* Returns all the strings.
* Formatting is ignored.
*
* @return a list with all the strings
* @deprecated use <code>getItemAt</code> instead
*/
@Removal(version = "4.2")
@Deprecated
public List<String> getItems() {
return strings;
}
@Override
public RichTextString getItemAt(int idx) {
return new XSSFRichTextString(getEntryAt(idx));
}
//// ContentHandler methods ////
private StringBuilder characters;

View File

@ -23,10 +23,12 @@ import java.util.Queue;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.RichTextString;
import org.apache.poi.ss.util.CellAddress;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFComment;
@ -72,7 +74,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
* Read only access to the shared strings table, for looking
* up (most) string cell's contents
*/
private ReadOnlySharedStringsTable sharedStringsTable;
private SharedStrings sharedStringsTable;
/**
* Where our text is going
@ -117,7 +119,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
public XSSFSheetXMLHandler(
StylesTable styles,
CommentsTable comments,
ReadOnlySharedStringsTable strings,
SharedStrings strings,
SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) {
@ -139,7 +141,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
*/
public XSSFSheetXMLHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SharedStrings strings,
SheetContentsHandler sheetContentsHandler,
DataFormatter dataFormatter,
boolean formulasNotResults) {
@ -154,7 +156,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
*/
public XSSFSheetXMLHandler(
StylesTable styles,
ReadOnlySharedStringsTable strings,
SharedStrings strings,
SheetContentsHandler sheetContentsHandler,
boolean formulasNotResults) {
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
@ -351,7 +353,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
String sstIndex = value.toString();
try {
int idx = Integer.parseInt(sstIndex);
XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
RichTextString rtss = sharedStringsTable.getItemAt(idx);
thisStr = rtss.toString();
}
catch (NumberFormatException ex) {

View File

@ -32,6 +32,7 @@ import org.apache.poi.xssf.binary.XSSFBSheetHandler;
import org.apache.poi.xssf.binary.XSSFBStylesTable;
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.xmlbeans.XmlException;
import org.xml.sax.SAXException;
@ -94,7 +95,7 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
SheetContentsHandler sheetContentsExtractor,
XSSFBStylesTable styles,
XSSFBCommentsTable comments,
XSSFBSharedStringsTable strings,
SharedStrings strings,
InputStream sheetInputStream)
throws IOException, SAXException {

View File

@ -40,6 +40,7 @@ import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStrings;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFComment;
import org.apache.poi.xssf.usermodel.XSSFShape;
@ -232,7 +233,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
SheetContentsHandler sheetContentsExtractor,
StylesTable styles,
CommentsTable comments,
ReadOnlySharedStringsTable strings,
SharedStrings strings,
InputStream sheetInputStream)
throws IOException, SAXException {
@ -255,12 +256,17 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
}
}
protected SharedStrings createSharedStringsTable(OPCPackage container, boolean concatenatePhoneticRuns)
throws IOException, SAXException {
return new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
}
/**
* Processes the file and returns the text
*/
public String getText() {
try {
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
SharedStrings strings = createSharedStringsTable(container, concatenatePhoneticRuns);
XSSFReader xssfReader = new XSSFReader(container);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

View File

@ -0,0 +1,69 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.xssf.model;
import org.apache.poi.ss.usermodel.RichTextString;
/**
* Table of strings shared across all sheets in a workbook.
* <p>
* A workbook may contain thousands of cells containing string (non-numeric) data. Furthermore this data is very
* likely to be repeated across many rows or columns. The goal of implementing a single string table that is shared
* across the workbook is to improve performance in opening and saving the file by only reading and writing the
* repetitive information once.
* </p>
* <p>
* Consider for example a workbook summarizing information for cities within various countries. There may be a
* column for the name of the country, a column for the name of each city in that country, and a column
* containing the data for each city. In this case the country name is repetitive, being duplicated in many cells.
* In many cases the repetition is extensive, and a tremendous savings is realized by making use of a shared string
* table when saving the workbook. When displaying text in the spreadsheet, the cell table will just contain an
* index into the string table as the value of a cell, instead of the full string.
* </p>
* <p>
* The shared string table contains all the necessary information for displaying the string: the text, formatting
* properties, and phonetic properties (for East Asian languages).
* </p>
*/
public interface SharedStrings {
/**
* Return a string item by index
*
* @param idx index of item to return.
* @return the item at the specified position in this Shared String table.
*/
public RichTextString getItemAt(int idx);
/**
* Return an integer representing the total count of strings in the workbook. This count does not
* include any numbers, it counts only the total of text strings in the workbook.
*
* @return the total count of strings in the workbook
*/
public int getCount();
/**
* Returns an integer representing the total count of unique strings in the Shared String Table.
* A string is unique even if it is a copy of another string, but has different formatting applied
* at the character level.
*
* @return the total count of unique strings in the workbook
*/
public int getUniqueCount();
}

View File

@ -62,7 +62,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
* properties, and phonetic properties (for East Asian languages).
* </p>
*/
public class SharedStringsTable extends POIXMLDocumentPart implements Closeable {
public class SharedStringsTable extends POIXMLDocumentPart implements SharedStrings, Closeable {
/**
* Array of individual string items in the Shared String table.
@ -157,6 +157,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable
* @param idx index of item to return.
* @return the item at the specified position in this Shared String table.
*/
@Override
public RichTextString getItemAt(int idx) {
return new XSSFRichTextString(strings.get(idx));
}
@ -167,6 +168,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable
*
* @return the total count of strings in the workbook
*/
@Override
public int getCount(){
return count;
}
@ -178,6 +180,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable
*
* @return the total count of unique strings in the workbook
*/
@Override
public int getUniqueCount(){
return uniqueCount;
}