2010-01-26 06:10:48 -05:00
|
|
|
/* ====================================================================
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.xssf.eventusermodel;
|
|
|
|
|
2016-02-23 17:24:09 -05:00
|
|
|
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
|
|
|
|
|
2017-03-08 08:41:07 -05:00
|
|
|
import javax.xml.parsers.ParserConfigurationException;
|
2010-01-26 06:10:48 -05:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
2016-07-16 20:14:06 -04:00
|
|
|
import java.io.PushbackInputStream;
|
2010-01-26 06:10:48 -05:00
|
|
|
import java.util.ArrayList;
|
2017-03-08 08:41:07 -05:00
|
|
|
import java.util.HashMap;
|
2012-03-11 03:19:07 -04:00
|
|
|
import java.util.List;
|
2017-03-08 08:41:07 -05:00
|
|
|
import java.util.Map;
|
2010-01-26 06:10:48 -05:00
|
|
|
|
|
|
|
import org.apache.poi.openxml4j.opc.OPCPackage;
|
|
|
|
import org.apache.poi.openxml4j.opc.PackagePart;
|
2014-08-13 18:34:53 -04:00
|
|
|
import org.apache.poi.util.SAXHelper;
|
2010-01-26 06:10:48 -05:00
|
|
|
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
|
|
|
import org.xml.sax.Attributes;
|
|
|
|
import org.xml.sax.InputSource;
|
|
|
|
import org.xml.sax.SAXException;
|
|
|
|
import org.xml.sax.XMLReader;
|
|
|
|
import org.xml.sax.helpers.DefaultHandler;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* <p>This is a lightweight way to process the Shared Strings
|
|
|
|
* table. Most of the text cells will reference something
|
|
|
|
* from in here.
|
|
|
|
* <p>Note that each SI entry can have multiple T elements, if the
|
|
|
|
* string is made up of bits with different formatting.
|
|
|
|
* <p>Example input:
|
|
|
|
* <pre>
|
|
|
|
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
|
|
|
|
<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
|
|
|
|
<si>
|
|
|
|
<r>
|
|
|
|
<rPr>
|
|
|
|
<b />
|
|
|
|
<sz val="11" />
|
|
|
|
<color theme="1" />
|
|
|
|
<rFont val="Calibri" />
|
|
|
|
<family val="2" />
|
|
|
|
<scheme val="minor" />
|
|
|
|
</rPr>
|
|
|
|
<t>This:</t>
|
|
|
|
</r>
|
|
|
|
<r>
|
|
|
|
<rPr>
|
|
|
|
<sz val="11" />
|
|
|
|
<color theme="1" />
|
|
|
|
<rFont val="Calibri" />
|
|
|
|
<family val="2" />
|
|
|
|
<scheme val="minor" />
|
|
|
|
</rPr>
|
|
|
|
<t xml:space="preserve">Causes Problems</t>
|
|
|
|
</r>
|
|
|
|
</si>
|
|
|
|
<si>
|
|
|
|
<t>This does not</t>
|
|
|
|
</si>
|
|
|
|
</sst>
|
|
|
|
* </pre>
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
public class ReadOnlySharedStringsTable extends DefaultHandler {
|
|
|
|
/**
|
|
|
|
* An integer representing the total count of strings in the workbook. This count does not
|
|
|
|
* include any numbers, it counts only the total of text strings in the workbook.
|
|
|
|
*/
|
|
|
|
private int count;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* An integer representing the total count of unique strings in the Shared String Table.
|
|
|
|
* A string is unique even if it is a copy of another string, but has different formatting applied
|
|
|
|
* at the character level.
|
|
|
|
*/
|
|
|
|
private int uniqueCount;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* The shared strings table.
|
|
|
|
*/
|
2012-03-11 03:19:07 -04:00
|
|
|
private List<String> strings;
|
2010-01-26 06:10:48 -05:00
|
|
|
|
2017-03-08 08:41:07 -05:00
|
|
|
/**
|
|
|
|
* Map of phonetic strings (if they exist) indexed
|
|
|
|
* with the integer matching the index in strings
|
|
|
|
*/
|
|
|
|
private Map<Integer, String> phoneticStrings;
|
|
|
|
|
2010-01-26 06:10:48 -05:00
|
|
|
/**
|
2016-12-11 16:13:46 -05:00
|
|
|
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
|
|
|
|
* @throws IOException If reading the data from the package fails.
|
|
|
|
* @throws SAXException if parsing the XML data fails.
|
2010-01-26 06:10:48 -05:00
|
|
|
*/
|
|
|
|
public ReadOnlySharedStringsTable(OPCPackage pkg)
|
|
|
|
throws IOException, SAXException {
|
|
|
|
ArrayList<PackagePart> parts =
|
|
|
|
pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
|
|
|
|
|
|
|
|
// Some workbooks have no shared strings table.
|
|
|
|
if (parts.size() > 0) {
|
|
|
|
PackagePart sstPart = parts.get(0);
|
|
|
|
readFrom(sstPart.getInputStream());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Like POIXMLDocumentPart constructor
|
2016-01-10 15:44:17 -05:00
|
|
|
*
|
|
|
|
* @since POI 3.14-Beta1
|
2010-01-26 06:10:48 -05:00
|
|
|
*/
|
2016-01-10 15:44:17 -05:00
|
|
|
public ReadOnlySharedStringsTable(PackagePart part) throws IOException, SAXException {
|
2010-01-26 06:10:48 -05:00
|
|
|
readFrom(part.getInputStream());
|
|
|
|
}
|
2016-01-10 15:44:17 -05:00
|
|
|
|
2010-01-26 06:10:48 -05:00
|
|
|
/**
|
|
|
|
* Read this shared strings table from an XML file.
|
|
|
|
*
|
|
|
|
* @param is The input stream containing the XML document.
|
2016-12-11 16:13:46 -05:00
|
|
|
* @throws IOException if an error occurs while reading.
|
|
|
|
* @throws SAXException if parsing the XML data fails.
|
2010-01-26 06:10:48 -05:00
|
|
|
*/
|
|
|
|
public void readFrom(InputStream is) throws IOException, SAXException {
|
2016-07-16 20:14:06 -04:00
|
|
|
// test if the file is empty, otherwise parse it
|
|
|
|
PushbackInputStream pis = new PushbackInputStream(is, 1);
|
|
|
|
int emptyTest = pis.read();
|
|
|
|
if (emptyTest > -1) {
|
|
|
|
pis.unread(emptyTest);
|
|
|
|
InputSource sheetSource = new InputSource(pis);
|
2015-08-19 07:10:13 -04:00
|
|
|
try {
|
|
|
|
XMLReader sheetParser = SAXHelper.newXMLReader();
|
|
|
|
sheetParser.setContentHandler(this);
|
|
|
|
sheetParser.parse(sheetSource);
|
|
|
|
} catch(ParserConfigurationException e) {
|
|
|
|
throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
|
|
|
|
}
|
2010-01-26 06:10:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return an integer representing the total count of strings in the workbook. This count does not
|
|
|
|
* include any numbers, it counts only the total of text strings in the workbook.
|
|
|
|
*
|
|
|
|
* @return the total count of strings in the workbook
|
|
|
|
*/
|
|
|
|
public int getCount() {
|
|
|
|
return this.count;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Returns an integer representing the total count of unique strings in the Shared String Table.
|
|
|
|
* A string is unique even if it is a copy of another string, but has different formatting applied
|
|
|
|
* at the character level.
|
|
|
|
*
|
|
|
|
* @return the total count of unique strings in the workbook
|
|
|
|
*/
|
|
|
|
public int getUniqueCount() {
|
|
|
|
return this.uniqueCount;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Return the string at a given index.
|
|
|
|
* Formatting is ignored.
|
|
|
|
*
|
|
|
|
* @param idx index of item to return.
|
|
|
|
* @return the item at the specified position in this Shared String table.
|
|
|
|
*/
|
|
|
|
public String getEntryAt(int idx) {
|
2012-03-11 03:19:07 -04:00
|
|
|
return strings.get(idx);
|
|
|
|
}
|
|
|
|
|
2017-03-08 08:41:07 -05:00
|
|
|
/**
|
|
|
|
* Return the phonetic string at a given index.
|
|
|
|
* Returns <code>null</code> if no phonetic string
|
|
|
|
* exists at that index.
|
|
|
|
* @param idx
|
|
|
|
* @return
|
|
|
|
*/
|
|
|
|
public String getPhoneticStringAt(int idx) {
|
|
|
|
//avoid an NPE. If the parser hasn't
|
|
|
|
//yet hit <sst/> phoneticStrings could be null
|
|
|
|
if (phoneticStrings == null) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return phoneticStrings.get(idx);
|
|
|
|
}
|
|
|
|
|
2012-03-11 03:19:07 -04:00
|
|
|
public List<String> getItems() {
|
|
|
|
return strings;
|
2010-01-26 06:10:48 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
//// ContentHandler methods ////
|
|
|
|
|
|
|
|
private StringBuffer characters;
|
2017-03-08 08:41:07 -05:00
|
|
|
private StringBuffer rphCharacters;
|
2010-01-26 06:10:48 -05:00
|
|
|
private boolean tIsOpen;
|
2017-03-08 08:41:07 -05:00
|
|
|
private boolean inRPh;
|
2010-01-26 06:10:48 -05:00
|
|
|
|
|
|
|
public void startElement(String uri, String localName, String name,
|
|
|
|
Attributes attributes) throws SAXException {
|
2016-02-23 17:24:09 -05:00
|
|
|
if (uri != null && ! uri.equals(NS_SPREADSHEETML)) {
|
|
|
|
return;
|
|
|
|
}
|
2017-03-08 08:41:07 -05:00
|
|
|
|
2016-02-23 17:24:09 -05:00
|
|
|
if ("sst".equals(localName)) {
|
2010-01-26 06:10:48 -05:00
|
|
|
String count = attributes.getValue("count");
|
2012-03-11 03:19:07 -04:00
|
|
|
if(count != null) this.count = Integer.parseInt(count);
|
2010-01-26 06:10:48 -05:00
|
|
|
String uniqueCount = attributes.getValue("uniqueCount");
|
2012-03-11 03:19:07 -04:00
|
|
|
if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount);
|
|
|
|
|
|
|
|
this.strings = new ArrayList<String>(this.uniqueCount);
|
2017-03-08 08:41:07 -05:00
|
|
|
this.phoneticStrings = new HashMap<Integer, String>();
|
2010-01-26 06:10:48 -05:00
|
|
|
characters = new StringBuffer();
|
2017-03-08 08:41:07 -05:00
|
|
|
rphCharacters = new StringBuffer();
|
2016-02-23 17:24:09 -05:00
|
|
|
} else if ("si".equals(localName)) {
|
2010-01-26 06:10:48 -05:00
|
|
|
characters.setLength(0);
|
2016-02-23 17:24:09 -05:00
|
|
|
} else if ("t".equals(localName)) {
|
2010-01-26 06:10:48 -05:00
|
|
|
tIsOpen = true;
|
2017-03-08 08:41:07 -05:00
|
|
|
} else if ("rPh".equals(localName)) {
|
|
|
|
inRPh = true;
|
2010-01-26 06:10:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
public void endElement(String uri, String localName, String name)
|
|
|
|
throws SAXException {
|
2016-02-23 17:24:09 -05:00
|
|
|
if (uri != null && ! uri.equals(NS_SPREADSHEETML)) {
|
|
|
|
return;
|
|
|
|
}
|
2017-03-08 08:41:07 -05:00
|
|
|
|
2016-02-23 17:24:09 -05:00
|
|
|
if ("si".equals(localName)) {
|
2012-03-11 03:19:07 -04:00
|
|
|
strings.add(characters.toString());
|
2017-03-08 08:41:07 -05:00
|
|
|
if (rphCharacters.length() > 0) {
|
|
|
|
phoneticStrings.put(strings.size()-1, rphCharacters.toString());
|
|
|
|
rphCharacters.setLength(0);
|
|
|
|
}
|
2016-02-23 17:24:09 -05:00
|
|
|
} else if ("t".equals(localName)) {
|
2017-03-08 08:41:07 -05:00
|
|
|
tIsOpen = false;
|
|
|
|
} else if ("rPh".equals(localName)) {
|
|
|
|
inRPh = false;
|
2010-01-26 06:10:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Captures characters only if a t(ext) element is open.
|
|
|
|
*/
|
|
|
|
public void characters(char[] ch, int start, int length)
|
|
|
|
throws SAXException {
|
2017-03-08 08:41:07 -05:00
|
|
|
if (tIsOpen) {
|
|
|
|
if (inRPh) {
|
|
|
|
rphCharacters.append(ch, start, length);
|
|
|
|
} else {
|
|
|
|
characters.append(ch, start, length);
|
|
|
|
}
|
|
|
|
}
|
2010-01-26 06:10:48 -05:00
|
|
|
}
|
|
|
|
}
|