diff --git a/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java b/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
index 93dae5dce..892c3bb2b 100644
--- a/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
+++ b/src/examples/src/org/apache/poi/xssf/eventusermodel/XLSX2CSV.java
@@ -21,7 +21,6 @@ import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintStream;
-import java.util.ArrayList;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
@@ -30,15 +29,11 @@ import javax.xml.parsers.SAXParserFactory;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess;
-import org.apache.poi.openxml4j.opc.PackagePart;
-import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
-import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
-import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
@@ -84,192 +79,6 @@ public class XLSX2CSV {
NUMBER,
}
- /**
- * Each cell is enclosed in "si". Each cell can have multiple "t" elements.
- * Example input
- *
- *
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
- <sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
- <si>
- <r>
- <rPr>
- <b />
- <sz val="11" />
- <color theme="1" />
- <rFont val="Calibri" />
- <family val="2" />
- <scheme val="minor" />
- </rPr>
- <t>This:</t>
- </r>
- <r>
- <rPr>
- <sz val="11" />
- <color theme="1" />
- <rFont val="Calibri" />
- <family val="2" />
- <scheme val="minor" />
- </rPr>
- <t xml:space="preserve">Causes Problems</t>
- </r>
- </si>
- <si>
- <t>This does not</t>
- </si>
- </sst>
- *
- *
- */
- static class ReadonlySharedStringsTable extends DefaultHandler {
-
- /**
- * An integer representing the total count of strings in the workbook. This count does not
- * include any numbers, it counts only the total of text strings in the workbook.
- */
- private int count;
-
- /**
- * An integer representing the total count of unique strings in the Shared String Table.
- * A string is unique even if it is a copy of another string, but has different formatting applied
- * at the character level.
- */
- private int uniqueCount;
-
- /**
- * The shared strings table.
- */
- private String[] strings;
-
- /**
- * @param pkg
- * @throws IOException
- * @throws SAXException
- * @throws ParserConfigurationException
- */
- public ReadonlySharedStringsTable(OPCPackage pkg)
- throws IOException, SAXException, ParserConfigurationException {
- ArrayList parts =
- pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
-
- // Some workbooks have no shared strings table.
- if (parts.size() > 0) {
- PackagePart sstPart = parts.get(0);
- readFrom(sstPart.getInputStream());
- }
- }
-
- /**
- * Like POIXMLDocumentPart constructor
- *
- * @param part
- * @param rel_ignored
- * @throws IOException
- */
- public ReadonlySharedStringsTable(PackagePart part, PackageRelationship rel_ignored)
- throws IOException, SAXException, ParserConfigurationException {
- readFrom(part.getInputStream());
- }
-
- /**
- * Read this shared strings table from an XML file.
- *
- * @param is The input stream containing the XML document.
- * @throws IOException if an error occurs while reading.
- * @throws SAXException
- * @throws ParserConfigurationException
- */
- public void readFrom(InputStream is) throws IOException, SAXException, ParserConfigurationException {
- InputSource sheetSource = new InputSource(is);
- SAXParserFactory saxFactory = SAXParserFactory.newInstance();
- SAXParser saxParser = saxFactory.newSAXParser();
- XMLReader sheetParser = saxParser.getXMLReader();
- sheetParser.setContentHandler(this);
- sheetParser.parse(sheetSource);
- }
-
- /**
- * Return an integer representing the total count of strings in the workbook. This count does not
- * include any numbers, it counts only the total of text strings in the workbook.
- *
- * @return the total count of strings in the workbook
- */
- public int getCount() {
- return this.count;
- }
-
- /**
- * Returns an integer representing the total count of unique strings in the Shared String Table.
- * A string is unique even if it is a copy of another string, but has different formatting applied
- * at the character level.
- *
- * @return the total count of unique strings in the workbook
- */
- public int getUniqueCount() {
- return this.uniqueCount;
- }
-
- /**
- * Return a string item by index
- *
- * @param idx index of item to return.
- * @return the item at the specified position in this Shared String table.
- */
- public String getEntryAt(int idx) {
- return strings[idx];
- }
-
- //// ContentHandler methods ////
-
- private StringBuffer characters;
- private boolean tIsOpen;
- private int index;
-
- /*
- * (non-Javadoc)
- * @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
- */
- public void startElement(String uri, String localName, String name,
- Attributes attributes) throws SAXException {
- if ("sst".equals(name)) {
- String count = attributes.getValue("count");
- String uniqueCount = attributes.getValue("uniqueCount");
- this.count = Integer.parseInt(count);
- this.uniqueCount = Integer.parseInt(uniqueCount);
- this.strings = new String[this.uniqueCount];
- index = 0;
- characters = new StringBuffer();
- } else if ("si".equals(name)) {
- characters.setLength(0);
- } else if ("t".equals(name)) {
- tIsOpen = true;
- }
- }
-
- /*
- * (non-Javadoc)
- * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
- */
- public void endElement(String uri, String localName, String name)
- throws SAXException {
- if ("si".equals(name)) {
- strings[index] = characters.toString();
- ++index;
- } else if ("t".equals(name)) {
- tIsOpen = false;
- }
- }
-
- /**
- * Captures characters only if a t(ext?) element is open.
- */
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- if (tIsOpen)
- characters.append(ch, start, length);
- }
-
- }
/**
* Derived from http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
@@ -289,7 +98,7 @@ public class XLSX2CSV {
/**
* Table with unique strings
*/
- private ReadonlySharedStringsTable sharedStringsTable;
+ private ReadOnlySharedStringsTable sharedStringsTable;
/**
* Destination for data
@@ -330,7 +139,7 @@ public class XLSX2CSV {
*/
public MyXSSFSheetHandler(
StylesTable styles,
- ReadonlySharedStringsTable strings,
+ ReadOnlySharedStringsTable strings,
int cols,
PrintStream target) {
this.stylesTable = styles;
@@ -384,12 +193,8 @@ public class XLSX2CSV {
else if ("str".equals(cellType))
nextDataType = xssfDataType.FORMULA;
else if (cellStyleStr != null) {
- /*
- * It's a number, but possibly has a style and/or special format.
- * Nick Burch said to use org.apache.poi.ss.usermodel.BuiltinFormats,
- * and I see javadoc for that at apache.org, but it's not in the
- * POI 3.5 Beta 5 jars. Scheduled to appear in 3.5 beta 6.
- */
+ // It's a number, but almost certainly one
+ // with a special style or format
int styleIndex = Integer.parseInt(cellStyleStr);
XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
this.formatIndex = style.getDataFormat();
@@ -553,7 +358,7 @@ public class XLSX2CSV {
*/
public void processSheet(
StylesTable styles,
- ReadonlySharedStringsTable strings,
+ ReadOnlySharedStringsTable strings,
InputStream sheetInputStream)
throws IOException, ParserConfigurationException, SAXException {
@@ -577,7 +382,7 @@ public class XLSX2CSV {
public void process()
throws IOException, OpenXML4JException, ParserConfigurationException, SAXException {
- ReadonlySharedStringsTable strings = new ReadonlySharedStringsTable(this.xlsxPackage);
+ ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(this.xlsxPackage);
XSSFReader xssfReader = new XSSFReader(this.xlsxPackage);
StylesTable styles = xssfReader.getStylesTable();
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
new file mode 100644
index 000000000..52e2c258a
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
@@ -0,0 +1,221 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.eventusermodel;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * This is a lightweight way to process the Shared Strings
+ * table. Most of the text cells will reference something
+ * from in here.
+ *
Note that each SI entry can have multiple T elements, if the
+ * string is made up of bits with different formatting.
+ *
Example input:
+ *
+<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+<sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
+ <si>
+ <r>
+ <rPr>
+ <b />
+ <sz val="11" />
+ <color theme="1" />
+ <rFont val="Calibri" />
+ <family val="2" />
+ <scheme val="minor" />
+ </rPr>
+ <t>This:</t>
+ </r>
+ <r>
+ <rPr>
+ <sz val="11" />
+ <color theme="1" />
+ <rFont val="Calibri" />
+ <family val="2" />
+ <scheme val="minor" />
+ </rPr>
+ <t xml:space="preserve">Causes Problems</t>
+ </r>
+ </si>
+ <si>
+ <t>This does not</t>
+ </si>
+</sst>
+*
+ *
+ */
+public class ReadOnlySharedStringsTable extends DefaultHandler {
+ /**
+ * An integer representing the total count of strings in the workbook. This count does not
+ * include any numbers, it counts only the total of text strings in the workbook.
+ */
+ private int count;
+
+ /**
+ * An integer representing the total count of unique strings in the Shared String Table.
+ * A string is unique even if it is a copy of another string, but has different formatting applied
+ * at the character level.
+ */
+ private int uniqueCount;
+
+ /**
+ * The shared strings table.
+ */
+ private String[] strings;
+
+ /**
+ * @param pkg
+ * @throws IOException
+ * @throws SAXException
+ * @throws ParserConfigurationException
+ */
+ public ReadOnlySharedStringsTable(OPCPackage pkg)
+ throws IOException, SAXException {
+ ArrayList parts =
+ pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
+
+ // Some workbooks have no shared strings table.
+ if (parts.size() > 0) {
+ PackagePart sstPart = parts.get(0);
+ readFrom(sstPart.getInputStream());
+ }
+ }
+
+ /**
+ * Like POIXMLDocumentPart constructor
+ *
+ * @param part
+ * @param rel_ignored
+ * @throws IOException
+ */
+ public ReadOnlySharedStringsTable(PackagePart part, PackageRelationship rel_ignored)
+ throws IOException, SAXException {
+ readFrom(part.getInputStream());
+ }
+
+ /**
+ * Read this shared strings table from an XML file.
+ *
+ * @param is The input stream containing the XML document.
+ * @throws IOException if an error occurs while reading.
+ * @throws SAXException
+ * @throws ParserConfigurationException
+ */
+ public void readFrom(InputStream is) throws IOException, SAXException {
+ InputSource sheetSource = new InputSource(is);
+ SAXParserFactory saxFactory = SAXParserFactory.newInstance();
+ try {
+ SAXParser saxParser = saxFactory.newSAXParser();
+ XMLReader sheetParser = saxParser.getXMLReader();
+ sheetParser.setContentHandler(this);
+ sheetParser.parse(sheetSource);
+ } catch(ParserConfigurationException e) {
+ throw new RuntimeException("SAX parser appears to be broken - " + e.getMessage());
+ }
+ }
+
+ /**
+ * Return an integer representing the total count of strings in the workbook. This count does not
+ * include any numbers, it counts only the total of text strings in the workbook.
+ *
+ * @return the total count of strings in the workbook
+ */
+ public int getCount() {
+ return this.count;
+ }
+
+ /**
+ * Returns an integer representing the total count of unique strings in the Shared String Table.
+ * A string is unique even if it is a copy of another string, but has different formatting applied
+ * at the character level.
+ *
+ * @return the total count of unique strings in the workbook
+ */
+ public int getUniqueCount() {
+ return this.uniqueCount;
+ }
+
+ /**
+ * Return the string at a given index.
+ * Formatting is ignored.
+ *
+ * @param idx index of item to return.
+ * @return the item at the specified position in this Shared String table.
+ */
+ public String getEntryAt(int idx) {
+ return strings[idx];
+ }
+
+ //// ContentHandler methods ////
+
+ private StringBuffer characters;
+ private boolean tIsOpen;
+ private int index;
+
+ public void startElement(String uri, String localName, String name,
+ Attributes attributes) throws SAXException {
+ if ("sst".equals(name)) {
+ String count = attributes.getValue("count");
+ String uniqueCount = attributes.getValue("uniqueCount");
+ this.count = Integer.parseInt(count);
+ this.uniqueCount = Integer.parseInt(uniqueCount);
+ this.strings = new String[this.uniqueCount];
+ index = 0;
+ characters = new StringBuffer();
+ } else if ("si".equals(name)) {
+ characters.setLength(0);
+ } else if ("t".equals(name)) {
+ tIsOpen = true;
+ }
+ }
+
+ public void endElement(String uri, String localName, String name)
+ throws SAXException {
+ if ("si".equals(name)) {
+ strings[index] = characters.toString();
+ ++index;
+ } else if ("t".equals(name)) {
+ tIsOpen = false;
+ }
+ }
+
+ /**
+ * Captures characters only if a t(ext) element is open.
+ */
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ if (tIsOpen)
+ characters.append(ch, start, length);
+ }
+
+}