add SharedStrings interface to allow our XSSF code to more easily extended by external projects
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1836674 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c4290ae2be
commit
826c15ef59
@ -33,6 +33,7 @@ import org.apache.poi.ss.util.CellReference;
|
||||
import org.apache.poi.ooxml.util.SAXHelper;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
|
||||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.model.StylesTable;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
import org.xml.sax.ContentHandler;
|
||||
@ -180,7 +181,7 @@ public class XLSX2CSV {
|
||||
*/
|
||||
public void processSheet(
|
||||
StylesTable styles,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
SheetContentsHandler sheetHandler,
|
||||
InputStream sheetInputStream) throws IOException, SAXException {
|
||||
DataFormatter formatter = new DataFormatter();
|
||||
|
@ -24,15 +24,19 @@ import java.util.List;
|
||||
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.ss.usermodel.RichTextString;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.Removal;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
/**
|
||||
* @since 3.16-beta3
|
||||
*/
|
||||
@Internal
|
||||
public class XSSFBSharedStringsTable {
|
||||
public class XSSFBSharedStringsTable implements SharedStrings {
|
||||
|
||||
/**
|
||||
* An integer representing the total count of strings in the workbook. This count does not
|
||||
@ -83,17 +87,37 @@ public class XSSFBSharedStringsTable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all the strings.
|
||||
* Formatting is ignored.
|
||||
*
|
||||
* @return a defensive copy of strings
|
||||
* @return a list with all the shared strings.
|
||||
* @deprecated use <code>getItemAt</code> instead
|
||||
*/
|
||||
@Removal(version = "4.2")
|
||||
@Deprecated
|
||||
public List<String> getItems() {
|
||||
List<String> ret = new ArrayList<>(strings.size());
|
||||
ret.addAll(strings);
|
||||
return ret;
|
||||
}
|
||||
|
||||
public String getEntryAt(int i) {
|
||||
return strings.get(i);
|
||||
/**
|
||||
* Return the string at a given index.
|
||||
* Formatting is ignored.
|
||||
*
|
||||
* @param idx index of item to return.
|
||||
* @return the item at the specified position in this Shared String table.
|
||||
* @deprecated use <code>getItemAt</code> instead
|
||||
*/
|
||||
@Removal(version = "4.2")
|
||||
@Deprecated
|
||||
public String getEntryAt(int idx) {
|
||||
return strings.get(idx);
|
||||
}
|
||||
|
||||
@Override
|
||||
public RichTextString getItemAt(int idx) {
|
||||
return new XSSFRichTextString(getEntryAt(idx));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -102,6 +126,7 @@ public class XSSFBSharedStringsTable {
|
||||
*
|
||||
* @return the total count of strings in the workbook
|
||||
*/
|
||||
@Override
|
||||
public int getCount() {
|
||||
return this.count;
|
||||
}
|
||||
@ -113,6 +138,7 @@ public class XSSFBSharedStringsTable {
|
||||
*
|
||||
* @return the total count of unique strings in the workbook
|
||||
*/
|
||||
@Override
|
||||
public int getUniqueCount() {
|
||||
return this.uniqueCount;
|
||||
}
|
||||
|
@ -23,12 +23,13 @@ import java.util.Queue;
|
||||
|
||||
import org.apache.poi.ss.usermodel.BuiltinFormats;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.ss.usermodel.RichTextString;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||
|
||||
/**
|
||||
* @since 3.16-beta3
|
||||
@ -38,7 +39,7 @@ public class XSSFBSheetHandler extends XSSFBParser {
|
||||
|
||||
private static final int CHECK_ALL_ROWS = -1;
|
||||
|
||||
private final XSSFBSharedStringsTable stringsTable;
|
||||
private final SharedStrings stringsTable;
|
||||
private final XSSFSheetXMLHandler.SheetContentsHandler handler;
|
||||
private final XSSFBStylesTable styles;
|
||||
private final XSSFBCommentsTable comments;
|
||||
@ -56,7 +57,7 @@ public class XSSFBSheetHandler extends XSSFBParser {
|
||||
public XSSFBSheetHandler(InputStream is,
|
||||
XSSFBStylesTable styles,
|
||||
XSSFBCommentsTable comments,
|
||||
XSSFBSharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
|
||||
DataFormatter dataFormatter,
|
||||
boolean formulasNotResults) {
|
||||
@ -208,7 +209,7 @@ public class XSSFBSheetHandler extends XSSFBParser {
|
||||
private void handleBrtCellIsst(byte[] data) {
|
||||
beforeCellValue(data);
|
||||
int idx = XSSFBUtils.castToInt(LittleEndian.getUInt(data, XSSFBCellHeader.length));
|
||||
XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt(idx));
|
||||
RichTextString rtss = stringsTable.getItemAt(idx);
|
||||
handleCellValue(rtss.getString());
|
||||
}
|
||||
|
||||
|
@ -28,7 +28,11 @@ import java.util.List;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.ooxml.util.SAXHelper;
|
||||
import org.apache.poi.ss.usermodel.RichTextString;
|
||||
import org.apache.poi.util.Removal;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
|
||||
import org.xml.sax.Attributes;
|
||||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
@ -75,7 +79,7 @@ import org.xml.sax.helpers.DefaultHandler;
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
public class ReadOnlySharedStringsTable extends DefaultHandler {
|
||||
public class ReadOnlySharedStringsTable extends DefaultHandler implements SharedStrings {
|
||||
|
||||
protected final boolean includePhoneticRuns;
|
||||
|
||||
@ -205,15 +209,32 @@ public class ReadOnlySharedStringsTable extends DefaultHandler {
|
||||
*
|
||||
* @param idx index of item to return.
|
||||
* @return the item at the specified position in this Shared String table.
|
||||
* @deprecated use <code>getItemAt</code> instead
|
||||
*/
|
||||
@Removal(version = "4.2")
|
||||
@Deprecated
|
||||
public String getEntryAt(int idx) {
|
||||
return strings.get(idx);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all the strings.
|
||||
* Formatting is ignored.
|
||||
*
|
||||
* @return a list with all the strings
|
||||
* @deprecated use <code>getItemAt</code> instead
|
||||
*/
|
||||
@Removal(version = "4.2")
|
||||
@Deprecated
|
||||
public List<String> getItems() {
|
||||
return strings;
|
||||
}
|
||||
|
||||
@Override
|
||||
public RichTextString getItemAt(int idx) {
|
||||
return new XSSFRichTextString(getEntryAt(idx));
|
||||
}
|
||||
|
||||
//// ContentHandler methods ////
|
||||
|
||||
private StringBuilder characters;
|
||||
|
@ -23,10 +23,12 @@ import java.util.Queue;
|
||||
|
||||
import org.apache.poi.ss.usermodel.BuiltinFormats;
|
||||
import org.apache.poi.ss.usermodel.DataFormatter;
|
||||
import org.apache.poi.ss.usermodel.RichTextString;
|
||||
import org.apache.poi.ss.util.CellAddress;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.apache.poi.xssf.model.CommentsTable;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.model.StylesTable;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
@ -72,7 +74,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||
* Read only access to the shared strings table, for looking
|
||||
* up (most) string cell's contents
|
||||
*/
|
||||
private ReadOnlySharedStringsTable sharedStringsTable;
|
||||
private SharedStrings sharedStringsTable;
|
||||
|
||||
/**
|
||||
* Where our text is going
|
||||
@ -117,7 +119,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||
public XSSFSheetXMLHandler(
|
||||
StylesTable styles,
|
||||
CommentsTable comments,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
SheetContentsHandler sheetContentsHandler,
|
||||
DataFormatter dataFormatter,
|
||||
boolean formulasNotResults) {
|
||||
@ -139,7 +141,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||
*/
|
||||
public XSSFSheetXMLHandler(
|
||||
StylesTable styles,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
SheetContentsHandler sheetContentsHandler,
|
||||
DataFormatter dataFormatter,
|
||||
boolean formulasNotResults) {
|
||||
@ -154,7 +156,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||
*/
|
||||
public XSSFSheetXMLHandler(
|
||||
StylesTable styles,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
SheetContentsHandler sheetContentsHandler,
|
||||
boolean formulasNotResults) {
|
||||
this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults);
|
||||
@ -351,7 +353,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
|
||||
String sstIndex = value.toString();
|
||||
try {
|
||||
int idx = Integer.parseInt(sstIndex);
|
||||
XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
|
||||
RichTextString rtss = sharedStringsTable.getItemAt(idx);
|
||||
thisStr = rtss.toString();
|
||||
}
|
||||
catch (NumberFormatException ex) {
|
||||
|
@ -32,6 +32,7 @@ import org.apache.poi.xssf.binary.XSSFBSheetHandler;
|
||||
import org.apache.poi.xssf.binary.XSSFBStylesTable;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFBReader;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.xml.sax.SAXException;
|
||||
@ -94,7 +95,7 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
|
||||
SheetContentsHandler sheetContentsExtractor,
|
||||
XSSFBStylesTable styles,
|
||||
XSSFBCommentsTable comments,
|
||||
XSSFBSharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
InputStream sheetInputStream)
|
||||
throws IOException, SAXException {
|
||||
|
||||
|
@ -40,6 +40,7 @@ import org.apache.poi.xssf.eventusermodel.XSSFReader;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler;
|
||||
import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
|
||||
import org.apache.poi.xssf.model.CommentsTable;
|
||||
import org.apache.poi.xssf.model.SharedStrings;
|
||||
import org.apache.poi.xssf.model.StylesTable;
|
||||
import org.apache.poi.xssf.usermodel.XSSFComment;
|
||||
import org.apache.poi.xssf.usermodel.XSSFShape;
|
||||
@ -232,7 +233,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
SheetContentsHandler sheetContentsExtractor,
|
||||
StylesTable styles,
|
||||
CommentsTable comments,
|
||||
ReadOnlySharedStringsTable strings,
|
||||
SharedStrings strings,
|
||||
InputStream sheetInputStream)
|
||||
throws IOException, SAXException {
|
||||
|
||||
@ -255,12 +256,17 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
||||
}
|
||||
}
|
||||
|
||||
protected SharedStrings createSharedStringsTable(OPCPackage container, boolean concatenatePhoneticRuns)
|
||||
throws IOException, SAXException {
|
||||
return new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes the file and returns the text
|
||||
*/
|
||||
public String getText() {
|
||||
try {
|
||||
ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
|
||||
SharedStrings strings = createSharedStringsTable(container, concatenatePhoneticRuns);
|
||||
XSSFReader xssfReader = new XSSFReader(container);
|
||||
StylesTable styles = xssfReader.getStylesTable();
|
||||
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
||||
|
69
src/ooxml/java/org/apache/poi/xssf/model/SharedStrings.java
Normal file
69
src/ooxml/java/org/apache/poi/xssf/model/SharedStrings.java
Normal file
@ -0,0 +1,69 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.model;
|
||||
|
||||
import org.apache.poi.ss.usermodel.RichTextString;
|
||||
|
||||
/**
|
||||
* Table of strings shared across all sheets in a workbook.
|
||||
* <p>
|
||||
* A workbook may contain thousands of cells containing string (non-numeric) data. Furthermore this data is very
|
||||
* likely to be repeated across many rows or columns. The goal of implementing a single string table that is shared
|
||||
* across the workbook is to improve performance in opening and saving the file by only reading and writing the
|
||||
* repetitive information once.
|
||||
* </p>
|
||||
* <p>
|
||||
* Consider for example a workbook summarizing information for cities within various countries. There may be a
|
||||
* column for the name of the country, a column for the name of each city in that country, and a column
|
||||
* containing the data for each city. In this case the country name is repetitive, being duplicated in many cells.
|
||||
* In many cases the repetition is extensive, and a tremendous savings is realized by making use of a shared string
|
||||
* table when saving the workbook. When displaying text in the spreadsheet, the cell table will just contain an
|
||||
* index into the string table as the value of a cell, instead of the full string.
|
||||
* </p>
|
||||
* <p>
|
||||
* The shared string table contains all the necessary information for displaying the string: the text, formatting
|
||||
* properties, and phonetic properties (for East Asian languages).
|
||||
* </p>
|
||||
*/
|
||||
public interface SharedStrings {
|
||||
|
||||
/**
|
||||
* Return a string item by index
|
||||
*
|
||||
* @param idx index of item to return.
|
||||
* @return the item at the specified position in this Shared String table.
|
||||
*/
|
||||
public RichTextString getItemAt(int idx);
|
||||
|
||||
/**
|
||||
* Return an integer representing the total count of strings in the workbook. This count does not
|
||||
* include any numbers, it counts only the total of text strings in the workbook.
|
||||
*
|
||||
* @return the total count of strings in the workbook
|
||||
*/
|
||||
public int getCount();
|
||||
|
||||
/**
|
||||
* Returns an integer representing the total count of unique strings in the Shared String Table.
|
||||
* A string is unique even if it is a copy of another string, but has different formatting applied
|
||||
* at the character level.
|
||||
*
|
||||
* @return the total count of unique strings in the workbook
|
||||
*/
|
||||
public int getUniqueCount();
|
||||
}
|
@ -62,7 +62,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
|
||||
* properties, and phonetic properties (for East Asian languages).
|
||||
* </p>
|
||||
*/
|
||||
public class SharedStringsTable extends POIXMLDocumentPart implements Closeable {
|
||||
public class SharedStringsTable extends POIXMLDocumentPart implements SharedStrings, Closeable {
|
||||
|
||||
/**
|
||||
* Array of individual string items in the Shared String table.
|
||||
@ -157,6 +157,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable
|
||||
* @param idx index of item to return.
|
||||
* @return the item at the specified position in this Shared String table.
|
||||
*/
|
||||
@Override
|
||||
public RichTextString getItemAt(int idx) {
|
||||
return new XSSFRichTextString(strings.get(idx));
|
||||
}
|
||||
@ -167,6 +168,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable
|
||||
*
|
||||
* @return the total count of strings in the workbook
|
||||
*/
|
||||
@Override
|
||||
public int getCount(){
|
||||
return count;
|
||||
}
|
||||
@ -178,6 +180,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable
|
||||
*
|
||||
* @return the total count of unique strings in the workbook
|
||||
*/
|
||||
@Override
|
||||
public int getUniqueCount(){
|
||||
return uniqueCount;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user