From ab0a81f75aeb6dd1fc8fd045e10978eebce0dd01 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 9 Jan 2008 18:46:30 +0000 Subject: [PATCH] Fix for SharedStringsTable in ooxml excel support, and related test updates now we have more to test git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@610506 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hssf/HSSFXML.java | 9 +- .../poi/hssf/model/SharedStringsTable.java | 93 +++++++++---------- .../poi/hssf/usermodel/HSSFXMLCell.java | 2 + .../org/apache/poi/hssf/TestHSSFXML.java | 33 +++++++ .../hssf/extractor/TestHXFExcelExtractor.java | 67 ++++++------- 5 files changed, 121 insertions(+), 83 deletions(-) diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java index bb476c1e6..3766a046a 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java @@ -49,7 +49,6 @@ public class HSSFXML extends HXFDocument { public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"; private WorkbookDocument workbookDoc; - private SharedStringsTable sharedStrings; public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException { @@ -92,8 +91,14 @@ public class HSSFXML extends HXFDocument { WorksheetDocument.Factory.parse(sheetPart.getInputStream()); return sheetDoc.getWorksheet(); } - + + /** + * Returns the shared string at the given index + */ public String getSharedString(int index) { return this.sharedStrings.get(index); } + protected SharedStringsTable _getSharedStringsTable() { + return sharedStrings; + } } diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java index d97454b01..b3e219256 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java @@ -18,64 +18,61 @@ package org.apache.poi.hssf.model; import java.io.IOException; -import java.io.InputStream; +import java.io.OutputStream; import java.util.LinkedList; -import javax.xml.parsers.DocumentBuilder; -import javax.xml.parsers.DocumentBuilderFactory; -import javax.xml.parsers.ParserConfigurationException; - +import org.apache.xmlbeans.XmlException; import org.openxml4j.opc.PackagePart; -import org.w3c.dom.Document; -import org.w3c.dom.Element; -import org.w3c.dom.NodeList; -import org.xml.sax.SAXException; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument; public class SharedStringsTable extends LinkedList { - - private static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; - - /** XXX: should have been using an XMLBeans object, but it cannot parse the sharedStrings schema, so we'll use DOM temporarily. - CTSst sst; - */ - + public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; + + private SstDocument doc; private PackagePart part; - private DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance(); - - private DocumentBuilder parser; - - public SharedStringsTable(PackagePart part) throws IOException { + public SharedStringsTable(PackagePart part) throws IOException, XmlException { this.part = part; - InputStream is = part.getInputStream(); - try { - builderFactory.setNamespaceAware(true); - this.parser = builderFactory.newDocumentBuilder(); - readFrom(is); - } catch (ParserConfigurationException e) { - throw new RuntimeException(e); - } catch (SAXException e) { - throw new RuntimeException(e); - } finally { - if (is != null) is.close(); - } - - + doc = SstDocument.Factory.parse( + part.getInputStream() + ); + read(); } - public void readFrom(InputStream is) throws IOException, SAXException { - Document doc = parser.parse(is); - Element root = doc.getDocumentElement(); - NodeList sis = root.getElementsByTagNameNS(MAIN_SML_NS_URI, "si"); - for (int i = 0 ; i < sis.getLength() ; ++i) { - Element si = (Element) sis.item(i); - NodeList ts = si.getElementsByTagNameNS(MAIN_SML_NS_URI, "t"); - String t = ""; - if (ts.getLength() > 0 && ts.item(0).getFirstChild() != null) { - t = ts.item(0).getFirstChild().getNodeValue(); - add(t); - } - } + private void read() { + CTRst[] sts = doc.getSst().getSiArray(); + for (int i = 0; i < sts.length; i++) { + add(sts[i].getT()); + } + } + + /** + * Writes the current shared strings table into + * the associated OOXML PackagePart + */ + public void write() throws IOException { + CTSst sst = doc.getSst(); + + // Remove the old list + for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) { + sst.removeSi(i); + } + + // Add the new one + for(String s : this) { + sst.addNewSi().setT(s); + } + + // Update the counts + sst.setCount(this.size()); + sst.setUniqueCount(this.size()); + + // Write out + OutputStream out = part.getOutputStream(); + doc.save(out); + out.close(); } } diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java index 549f32eaa..b24556cd8 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java @@ -42,6 +42,8 @@ public class HSSFXMLCell { switch (cell.getT().intValue()) { case STCellType.INT_S: return this.workbook.getSharedString(Integer.valueOf(cell.getV())); + case STCellType.INT_INLINE_STR: + return cell.getV(); case STCellType.INT_N: return cell.getV(); // TODO: support other types diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java index 9c3ef65c4..97453265c 100644 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java +++ b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java @@ -18,6 +18,7 @@ package org.apache.poi.hssf; import java.io.File; +import org.apache.poi.hssf.model.SharedStringsTable; import org.apache.poi.hxf.HXFDocument; import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; @@ -124,4 +125,36 @@ public class TestHSSFXML extends TestCase { assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); } + + public void testSharedStringBasics() throws Exception { + HSSFXML xml = new HSSFXML( + HXFDocument.openPackage(sampleFile) + ); + assertNotNull(xml._getSharedStringsTable()); + + SharedStringsTable sst = xml._getSharedStringsTable(); + assertEquals(10, sst.size()); + + assertEquals("Lorem", sst.get(0)); + for(int i=0; i 0); // Might not have all formatting it should do! + // TODO decide if we should really have the "null" in there assertTrue(text.startsWith( "Avgtxfull\n" + - "3\t13\t3\t2\t2\t3\t2\t" + "null\t(iii) AVERAGE TAX RATES ON ANNUAL" )); } @@ -184,8 +185,8 @@ public class TestHXFExcelExtractor extends TestCase { POITextExtractor extractor = extractors[i]; String text = extractor.getText().replaceAll("[\r\t]", ""); - System.out.println(text.length()); - System.out.println(text); + //System.out.println(text.length()); + //System.out.println(text); assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL); Matcher m = pattern.matcher(text);