From 826c15ef592bdef6e9e6a054b93cb794cba7b5f6 Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Wed, 25 Jul 2018 21:24:29 +0000 Subject: [PATCH] add SharedStrings interface to allow our XSSF code to more easily extended by external projects git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1836674 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/xssf/eventusermodel/XLSX2CSV.java | 3 +- .../xssf/binary/XSSFBSharedStringsTable.java | 34 +++++++-- .../poi/xssf/binary/XSSFBSheetHandler.java | 9 +-- .../ReadOnlySharedStringsTable.java | 23 ++++++- .../eventusermodel/XSSFSheetXMLHandler.java | 12 ++-- .../XSSFBEventBasedExcelExtractor.java | 3 +- .../XSSFEventBasedExcelExtractor.java | 10 ++- .../apache/poi/xssf/model/SharedStrings.java | 69 +++++++++++++++++++ .../poi/xssf/model/SharedStringsTable.java | 5 +- 9 files changed, 149 insertions(+), 19 deletions(-) create mode 100644 src/ooxml/java/org/apache/poi/xssf/model/SharedStrings.java diff --git a/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java b/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java index 6bbfc6b85..899ea1581 100644 --- a/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java +++ b/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java @@ -33,6 +33,7 @@ import org.apache.poi.ss.util.CellReference; import org.apache.poi.ooxml.util.SAXHelper; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; +import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFComment; import org.xml.sax.ContentHandler; @@ -180,7 +181,7 @@ public class XLSX2CSV { */ public void processSheet( StylesTable styles, - ReadOnlySharedStringsTable strings, + SharedStrings strings, SheetContentsHandler sheetHandler, InputStream sheetInputStream) throws IOException, SAXException { DataFormatter formatter = new DataFormatter(); diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java index c14cdc4ed..03a1238fc 100644 --- a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSharedStringsTable.java @@ -24,15 +24,19 @@ import java.util.List; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.ss.usermodel.RichTextString; import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.Removal; +import org.apache.poi.xssf.model.SharedStrings; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.SAXException; /** * @since 3.16-beta3 */ @Internal -public class XSSFBSharedStringsTable { +public class XSSFBSharedStringsTable implements SharedStrings { /** * An integer representing the total count of strings in the workbook. This count does not @@ -83,17 +87,37 @@ public class XSSFBSharedStringsTable { } /** + * Return all the strings. + * Formatting is ignored. * - * @return a defensive copy of strings + * @return a list with all the shared strings. + * @deprecated use getItemAt instead */ + @Removal(version = "4.2") + @Deprecated public List getItems() { List ret = new ArrayList<>(strings.size()); ret.addAll(strings); return ret; } - public String getEntryAt(int i) { - return strings.get(i); + /** + * Return the string at a given index. + * Formatting is ignored. + * + * @param idx index of item to return. + * @return the item at the specified position in this Shared String table. + * @deprecated use getItemAt instead + */ + @Removal(version = "4.2") + @Deprecated + public String getEntryAt(int idx) { + return strings.get(idx); + } + + @Override + public RichTextString getItemAt(int idx) { + return new XSSFRichTextString(getEntryAt(idx)); } /** @@ -102,6 +126,7 @@ public class XSSFBSharedStringsTable { * * @return the total count of strings in the workbook */ + @Override public int getCount() { return this.count; } @@ -113,6 +138,7 @@ public class XSSFBSharedStringsTable { * * @return the total count of unique strings in the workbook */ + @Override public int getUniqueCount() { return this.uniqueCount; } diff --git a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java index 639300df1..fb3f433bd 100644 --- a/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/binary/XSSFBSheetHandler.java @@ -23,12 +23,13 @@ import java.util.Queue; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.usermodel.RichTextString; import org.apache.poi.ss.util.CellAddress; import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; +import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.xssf.usermodel.XSSFComment; -import org.apache.poi.xssf.usermodel.XSSFRichTextString; /** * @since 3.16-beta3 @@ -38,7 +39,7 @@ public class XSSFBSheetHandler extends XSSFBParser { private static final int CHECK_ALL_ROWS = -1; - private final XSSFBSharedStringsTable stringsTable; + private final SharedStrings stringsTable; private final XSSFSheetXMLHandler.SheetContentsHandler handler; private final XSSFBStylesTable styles; private final XSSFBCommentsTable comments; @@ -56,7 +57,7 @@ public class XSSFBSheetHandler extends XSSFBParser { public XSSFBSheetHandler(InputStream is, XSSFBStylesTable styles, XSSFBCommentsTable comments, - XSSFBSharedStringsTable strings, + SharedStrings strings, XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) { @@ -208,7 +209,7 @@ public class XSSFBSheetHandler extends XSSFBParser { private void handleBrtCellIsst(byte[] data) { beforeCellValue(data); int idx = XSSFBUtils.castToInt(LittleEndian.getUInt(data, XSSFBCellHeader.length)); - XSSFRichTextString rtss = new XSSFRichTextString(stringsTable.getEntryAt(idx)); + RichTextString rtss = stringsTable.getItemAt(idx); handleCellValue(rtss.getString()); } diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java index 4879e347f..09191f9ab 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java @@ -28,7 +28,11 @@ import java.util.List; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.ooxml.util.SAXHelper; +import org.apache.poi.ss.usermodel.RichTextString; +import org.apache.poi.util.Removal; +import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.xssf.usermodel.XSSFRelation; +import org.apache.poi.xssf.usermodel.XSSFRichTextString; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.SAXException; @@ -75,7 +79,7 @@ import org.xml.sax.helpers.DefaultHandler; * * */ -public class ReadOnlySharedStringsTable extends DefaultHandler { +public class ReadOnlySharedStringsTable extends DefaultHandler implements SharedStrings { protected final boolean includePhoneticRuns; @@ -205,15 +209,32 @@ public class ReadOnlySharedStringsTable extends DefaultHandler { * * @param idx index of item to return. * @return the item at the specified position in this Shared String table. + * @deprecated use getItemAt instead */ + @Removal(version = "4.2") + @Deprecated public String getEntryAt(int idx) { return strings.get(idx); } + /** + * Returns all the strings. + * Formatting is ignored. + * + * @return a list with all the strings + * @deprecated use getItemAt instead + */ + @Removal(version = "4.2") + @Deprecated public List getItems() { return strings; } + @Override + public RichTextString getItemAt(int idx) { + return new XSSFRichTextString(getEntryAt(idx)); + } + //// ContentHandler methods //// private StringBuilder characters; diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 64c5afacd..a61ac95ca 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -23,10 +23,12 @@ import java.util.Queue; import org.apache.poi.ss.usermodel.BuiltinFormats; import org.apache.poi.ss.usermodel.DataFormatter; +import org.apache.poi.ss.usermodel.RichTextString; import org.apache.poi.ss.util.CellAddress; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.apache.poi.xssf.model.CommentsTable; +import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFCellStyle; import org.apache.poi.xssf.usermodel.XSSFComment; @@ -72,7 +74,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { * Read only access to the shared strings table, for looking * up (most) string cell's contents */ - private ReadOnlySharedStringsTable sharedStringsTable; + private SharedStrings sharedStringsTable; /** * Where our text is going @@ -117,7 +119,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { public XSSFSheetXMLHandler( StylesTable styles, CommentsTable comments, - ReadOnlySharedStringsTable strings, + SharedStrings strings, SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) { @@ -139,7 +141,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { */ public XSSFSheetXMLHandler( StylesTable styles, - ReadOnlySharedStringsTable strings, + SharedStrings strings, SheetContentsHandler sheetContentsHandler, DataFormatter dataFormatter, boolean formulasNotResults) { @@ -154,7 +156,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { */ public XSSFSheetXMLHandler( StylesTable styles, - ReadOnlySharedStringsTable strings, + SharedStrings strings, SheetContentsHandler sheetContentsHandler, boolean formulasNotResults) { this(styles, strings, sheetContentsHandler, new DataFormatter(), formulasNotResults); @@ -351,7 +353,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { String sstIndex = value.toString(); try { int idx = Integer.parseInt(sstIndex); - XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx)); + RichTextString rtss = sharedStringsTable.getItemAt(idx); thisStr = rtss.toString(); } catch (NumberFormatException ex) { diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java index 7a58af6f5..5dac6ab7d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java @@ -32,6 +32,7 @@ import org.apache.poi.xssf.binary.XSSFBSheetHandler; import org.apache.poi.xssf.binary.XSSFBStylesTable; import org.apache.poi.xssf.eventusermodel.XSSFBReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; +import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.xssf.usermodel.XSSFRelation; import org.apache.xmlbeans.XmlException; import org.xml.sax.SAXException; @@ -94,7 +95,7 @@ public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor SheetContentsHandler sheetContentsExtractor, XSSFBStylesTable styles, XSSFBCommentsTable comments, - XSSFBSharedStringsTable strings, + SharedStrings strings, InputStream sheetInputStream) throws IOException, SAXException { diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index ea585da30..b335d06e9 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -40,6 +40,7 @@ import org.apache.poi.xssf.eventusermodel.XSSFReader; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler; import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler; import org.apache.poi.xssf.model.CommentsTable; +import org.apache.poi.xssf.model.SharedStrings; import org.apache.poi.xssf.model.StylesTable; import org.apache.poi.xssf.usermodel.XSSFComment; import org.apache.poi.xssf.usermodel.XSSFShape; @@ -232,7 +233,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor SheetContentsHandler sheetContentsExtractor, StylesTable styles, CommentsTable comments, - ReadOnlySharedStringsTable strings, + SharedStrings strings, InputStream sheetInputStream) throws IOException, SAXException { @@ -255,12 +256,17 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor } } + protected SharedStrings createSharedStringsTable(OPCPackage container, boolean concatenatePhoneticRuns) + throws IOException, SAXException { + return new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns); + } + /** * Processes the file and returns the text */ public String getText() { try { - ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns); + SharedStrings strings = createSharedStringsTable(container, concatenatePhoneticRuns); XSSFReader xssfReader = new XSSFReader(container); StylesTable styles = xssfReader.getStylesTable(); XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData(); diff --git a/src/ooxml/java/org/apache/poi/xssf/model/SharedStrings.java b/src/ooxml/java/org/apache/poi/xssf/model/SharedStrings.java new file mode 100644 index 000000000..eeb8669c2 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/model/SharedStrings.java @@ -0,0 +1,69 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.model; + +import org.apache.poi.ss.usermodel.RichTextString; + +/** + * Table of strings shared across all sheets in a workbook. + *

+ * A workbook may contain thousands of cells containing string (non-numeric) data. Furthermore this data is very + * likely to be repeated across many rows or columns. The goal of implementing a single string table that is shared + * across the workbook is to improve performance in opening and saving the file by only reading and writing the + * repetitive information once. + *

+ *

+ * Consider for example a workbook summarizing information for cities within various countries. There may be a + * column for the name of the country, a column for the name of each city in that country, and a column + * containing the data for each city. In this case the country name is repetitive, being duplicated in many cells. + * In many cases the repetition is extensive, and a tremendous savings is realized by making use of a shared string + * table when saving the workbook. When displaying text in the spreadsheet, the cell table will just contain an + * index into the string table as the value of a cell, instead of the full string. + *

+ *

+ * The shared string table contains all the necessary information for displaying the string: the text, formatting + * properties, and phonetic properties (for East Asian languages). + *

+ */ +public interface SharedStrings { + + /** + * Return a string item by index + * + * @param idx index of item to return. + * @return the item at the specified position in this Shared String table. + */ + public RichTextString getItemAt(int idx); + + /** + * Return an integer representing the total count of strings in the workbook. This count does not + * include any numbers, it counts only the total of text strings in the workbook. + * + * @return the total count of strings in the workbook + */ + public int getCount(); + + /** + * Returns an integer representing the total count of unique strings in the Shared String Table. + * A string is unique even if it is a copy of another string, but has different formatting applied + * at the character level. + * + * @return the total count of unique strings in the workbook + */ + public int getUniqueCount(); +} diff --git a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java index 4ccdd7492..5d12f8f17 100644 --- a/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java +++ b/src/ooxml/java/org/apache/poi/xssf/model/SharedStringsTable.java @@ -62,7 +62,7 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument; * properties, and phonetic properties (for East Asian languages). *

*/ -public class SharedStringsTable extends POIXMLDocumentPart implements Closeable { +public class SharedStringsTable extends POIXMLDocumentPart implements SharedStrings, Closeable { /** * Array of individual string items in the Shared String table. @@ -157,6 +157,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable * @param idx index of item to return. * @return the item at the specified position in this Shared String table. */ + @Override public RichTextString getItemAt(int idx) { return new XSSFRichTextString(strings.get(idx)); } @@ -167,6 +168,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable * * @return the total count of strings in the workbook */ + @Override public int getCount(){ return count; } @@ -178,6 +180,7 @@ public class SharedStringsTable extends POIXMLDocumentPart implements Closeable * * @return the total count of unique strings in the workbook */ + @Override public int getUniqueCount(){ return uniqueCount; }