fixed XLSX2CSV to avoid exception when processing cells with multiple "t" elements, see Bugzilla 47757

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@880864 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2009-11-16 17:31:16 +00:00
parent a584c030ff
commit 909c13b8b1
2 changed files with 51 additions and 4 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.6-beta1" date="2009-??-??">
<action dev="POI-DEVELOPERS" type="fix">47757 - fixed XLSX2CSV to avoid exception when processing cells with multiple "t" elements</action>
<action dev="POI-DEVELOPERS" type="add">48195 - short-circuit evaluation of IF() and CHOOSE()</action>
<action dev="POI-DEVELOPERS" type="add">48161 - support for text extraction from PPT master slides</action>
<action dev="POI-DEVELOPERS" type="add">47970 - added a method to set arabic mode in HSSFSheet</action>

View File

@ -34,6 +34,7 @@ import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationship;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
@ -60,6 +61,11 @@ import org.xml.sax.helpers.DefaultHandler;
* (read-only) class is used for the shared string table
* because the standard POI SharedStringsTable grows very
* quickly with the number of unique strings.
* <p/>
* Thanks to Eric Smith for a patch that fixes a problem
* triggered by cells with multiple "t" elements, which is
* how Excel represents different formats (e.g., one word
* plain and one word bold).
*
* @author Chris Lott
*/
@ -78,6 +84,43 @@ public class XLSX2CSV {
NUMBER,
}
/**
* Each cell is enclosed in "si". Each cell can have multiple "t" elements.
* Example input
*
* <pre>
&lt;?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
&lt;sst xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" count="2" uniqueCount="2">
&lt;si>
&lt;r>
&lt;rPr>
&lt;b />
&lt;sz val="11" />
&lt;color theme="1" />
&lt;rFont val="Calibri" />
&lt;family val="2" />
&lt;scheme val="minor" />
&lt;/rPr>
&lt;t>This:&lt;/t>
&lt;/r>
&lt;r>
&lt;rPr>
&lt;sz val="11" />
&lt;color theme="1" />
&lt;rFont val="Calibri" />
&lt;family val="2" />
&lt;scheme val="minor" />
&lt;/rPr>
&lt;t xml:space="preserve">Causes Problems&lt;/t>
&lt;/r>
&lt;/si>
&lt;si>
&lt;t>This does not&lt;/t>
&lt;/si>
&lt;/sst>
* </pre>
*
*/
static class ReadonlySharedStringsTable extends DefaultHandler {
/**
@ -192,8 +235,9 @@ public class XLSX2CSV {
this.strings = new String[this.uniqueCount];
index = 0;
characters = new StringBuffer();
} else if ("t".equals(name)) {
} else if ("si".equals(name)) {
characters.setLength(0);
} else if ("t".equals(name)) {
tIsOpen = true;
}
}
@ -204,9 +248,11 @@ public class XLSX2CSV {
*/
public void endElement(String uri, String localName, String name)
throws SAXException {
if ("t".equals(name)) {
if ("si".equals(name)) {
strings[index] = characters.toString();
++index;
} else if ("t".equals(name)) {
tIsOpen = false;
}
}