Fix for SharedStringsTable in ooxml excel support, and related test updates now we have more to test
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@610506 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e9107b8aca
commit
ab0a81f75a
@ -49,7 +49,6 @@ public class HSSFXML extends HXFDocument {
|
|||||||
public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
|
public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
|
||||||
|
|
||||||
private WorkbookDocument workbookDoc;
|
private WorkbookDocument workbookDoc;
|
||||||
|
|
||||||
private SharedStringsTable sharedStrings;
|
private SharedStringsTable sharedStrings;
|
||||||
|
|
||||||
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
|
public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
|
||||||
@ -92,8 +91,14 @@ public class HSSFXML extends HXFDocument {
|
|||||||
WorksheetDocument.Factory.parse(sheetPart.getInputStream());
|
WorksheetDocument.Factory.parse(sheetPart.getInputStream());
|
||||||
return sheetDoc.getWorksheet();
|
return sheetDoc.getWorksheet();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the shared string at the given index
|
||||||
|
*/
|
||||||
public String getSharedString(int index) {
|
public String getSharedString(int index) {
|
||||||
return this.sharedStrings.get(index);
|
return this.sharedStrings.get(index);
|
||||||
}
|
}
|
||||||
|
protected SharedStringsTable _getSharedStringsTable() {
|
||||||
|
return sharedStrings;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,64 +18,61 @@
|
|||||||
package org.apache.poi.hssf.model;
|
package org.apache.poi.hssf.model;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
||||||
import javax.xml.parsers.DocumentBuilder;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import javax.xml.parsers.DocumentBuilderFactory;
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
|
||||||
|
|
||||||
import org.openxml4j.opc.PackagePart;
|
import org.openxml4j.opc.PackagePart;
|
||||||
import org.w3c.dom.Document;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
|
||||||
import org.w3c.dom.Element;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
|
||||||
import org.w3c.dom.NodeList;
|
import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
|
|
||||||
public class SharedStringsTable extends LinkedList<String> {
|
public class SharedStringsTable extends LinkedList<String> {
|
||||||
|
public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
|
||||||
private static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
|
|
||||||
|
private SstDocument doc;
|
||||||
/** XXX: should have been using an XMLBeans object, but it cannot parse the sharedStrings schema, so we'll use DOM temporarily.
|
|
||||||
CTSst sst;
|
|
||||||
*/
|
|
||||||
|
|
||||||
private PackagePart part;
|
private PackagePart part;
|
||||||
|
|
||||||
private DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
|
public SharedStringsTable(PackagePart part) throws IOException, XmlException {
|
||||||
|
|
||||||
private DocumentBuilder parser;
|
|
||||||
|
|
||||||
public SharedStringsTable(PackagePart part) throws IOException {
|
|
||||||
this.part = part;
|
this.part = part;
|
||||||
InputStream is = part.getInputStream();
|
doc = SstDocument.Factory.parse(
|
||||||
try {
|
part.getInputStream()
|
||||||
builderFactory.setNamespaceAware(true);
|
);
|
||||||
this.parser = builderFactory.newDocumentBuilder();
|
read();
|
||||||
readFrom(is);
|
|
||||||
} catch (ParserConfigurationException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch (SAXException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} finally {
|
|
||||||
if (is != null) is.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void readFrom(InputStream is) throws IOException, SAXException {
|
private void read() {
|
||||||
Document doc = parser.parse(is);
|
CTRst[] sts = doc.getSst().getSiArray();
|
||||||
Element root = doc.getDocumentElement();
|
for (int i = 0; i < sts.length; i++) {
|
||||||
NodeList sis = root.getElementsByTagNameNS(MAIN_SML_NS_URI, "si");
|
add(sts[i].getT());
|
||||||
for (int i = 0 ; i < sis.getLength() ; ++i) {
|
}
|
||||||
Element si = (Element) sis.item(i);
|
}
|
||||||
NodeList ts = si.getElementsByTagNameNS(MAIN_SML_NS_URI, "t");
|
|
||||||
String t = "";
|
/**
|
||||||
if (ts.getLength() > 0 && ts.item(0).getFirstChild() != null) {
|
* Writes the current shared strings table into
|
||||||
t = ts.item(0).getFirstChild().getNodeValue();
|
* the associated OOXML PackagePart
|
||||||
add(t);
|
*/
|
||||||
}
|
public void write() throws IOException {
|
||||||
}
|
CTSst sst = doc.getSst();
|
||||||
|
|
||||||
|
// Remove the old list
|
||||||
|
for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
|
||||||
|
sst.removeSi(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add the new one
|
||||||
|
for(String s : this) {
|
||||||
|
sst.addNewSi().setT(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update the counts
|
||||||
|
sst.setCount(this.size());
|
||||||
|
sst.setUniqueCount(this.size());
|
||||||
|
|
||||||
|
// Write out
|
||||||
|
OutputStream out = part.getOutputStream();
|
||||||
|
doc.save(out);
|
||||||
|
out.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,6 +42,8 @@ public class HSSFXMLCell {
|
|||||||
switch (cell.getT().intValue()) {
|
switch (cell.getT().intValue()) {
|
||||||
case STCellType.INT_S:
|
case STCellType.INT_S:
|
||||||
return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
|
return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
|
||||||
|
case STCellType.INT_INLINE_STR:
|
||||||
|
return cell.getV();
|
||||||
case STCellType.INT_N:
|
case STCellType.INT_N:
|
||||||
return cell.getV();
|
return cell.getV();
|
||||||
// TODO: support other types
|
// TODO: support other types
|
||||||
|
@ -18,6 +18,7 @@ package org.apache.poi.hssf;
|
|||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
|
||||||
|
import org.apache.poi.hssf.model.SharedStringsTable;
|
||||||
import org.apache.poi.hxf.HXFDocument;
|
import org.apache.poi.hxf.HXFDocument;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxml4j.opc.PackagePart;
|
import org.openxml4j.opc.PackagePart;
|
||||||
@ -124,4 +125,36 @@ public class TestHSSFXML extends TestCase {
|
|||||||
assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
|
assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
|
||||||
assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
|
assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSharedStringBasics() throws Exception {
|
||||||
|
HSSFXML xml = new HSSFXML(
|
||||||
|
HXFDocument.openPackage(sampleFile)
|
||||||
|
);
|
||||||
|
assertNotNull(xml._getSharedStringsTable());
|
||||||
|
|
||||||
|
SharedStringsTable sst = xml._getSharedStringsTable();
|
||||||
|
assertEquals(10, sst.size());
|
||||||
|
|
||||||
|
assertEquals("Lorem", sst.get(0));
|
||||||
|
for(int i=0; i<sst.size(); i++) {
|
||||||
|
assertEquals(sst.get(i), xml.getSharedString(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add a few more, then save and reload, checking
|
||||||
|
// changes have been kept
|
||||||
|
sst.add("Foo");
|
||||||
|
sst.add("Bar");
|
||||||
|
sst.set(0, "LoremLorem");
|
||||||
|
|
||||||
|
sst.write();
|
||||||
|
|
||||||
|
xml = new HSSFXML(xml.getPackage());
|
||||||
|
sst = xml._getSharedStringsTable();
|
||||||
|
assertEquals(12, sst.size());
|
||||||
|
|
||||||
|
assertEquals("LoremLorem", sst.get(0));
|
||||||
|
for(int i=0; i<sst.size(); i++) {
|
||||||
|
assertEquals(sst.get(i), xml.getSharedString(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
@ -101,32 +101,32 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||||||
extractor.setIncludeSheetNames(false);
|
extractor.setIncludeSheetNames(false);
|
||||||
text = extractor.getText();
|
text = extractor.getText();
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"0\t111\n" +
|
"Lorem\t111\n" +
|
||||||
"1\t222\n" +
|
"ipsum\t222\n" +
|
||||||
"2\t333\n" +
|
"dolor\t333\n" +
|
||||||
"3\t444\n" +
|
"sit\t444\n" +
|
||||||
"4\t555\n" +
|
"amet\t555\n" +
|
||||||
"5\t666\n" +
|
"consectetuer\t666\n" +
|
||||||
"6\t777\n" +
|
"adipiscing\t777\n" +
|
||||||
"7\t888\n" +
|
"elit\t888\n" +
|
||||||
"8\t999\n" +
|
"Nunc\t999\n" +
|
||||||
"9\t4995\n" +
|
"at\t4995\n" +
|
||||||
"\n\n", text);
|
"\n\n", text);
|
||||||
|
|
||||||
// Now get formulas not their values
|
// Now get formulas not their values
|
||||||
extractor.setFormulasNotResults(true);
|
extractor.setFormulasNotResults(true);
|
||||||
text = extractor.getText();
|
text = extractor.getText();
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"0\t111\n" +
|
"Lorem\t111\n" +
|
||||||
"1\t222\n" +
|
"ipsum\t222\n" +
|
||||||
"2\t333\n" +
|
"dolor\t333\n" +
|
||||||
"3\t444\n" +
|
"sit\t444\n" +
|
||||||
"4\t555\n" +
|
"amet\t555\n" +
|
||||||
"5\t666\n" +
|
"consectetuer\t666\n" +
|
||||||
"6\t777\n" +
|
"adipiscing\t777\n" +
|
||||||
"7\t888\n" +
|
"elit\t888\n" +
|
||||||
"8\t999\n" +
|
"Nunc\t999\n" +
|
||||||
"9\tSUM(B1:B9)\n" +
|
"at\tSUM(B1:B9)\n" +
|
||||||
"\n\n", text);
|
"\n\n", text);
|
||||||
|
|
||||||
// With sheet names too
|
// With sheet names too
|
||||||
@ -134,16 +134,16 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||||||
text = extractor.getText();
|
text = extractor.getText();
|
||||||
assertEquals(
|
assertEquals(
|
||||||
"Sheet1\n" +
|
"Sheet1\n" +
|
||||||
"0\t111\n" +
|
"Lorem\t111\n" +
|
||||||
"1\t222\n" +
|
"ipsum\t222\n" +
|
||||||
"2\t333\n" +
|
"dolor\t333\n" +
|
||||||
"3\t444\n" +
|
"sit\t444\n" +
|
||||||
"4\t555\n" +
|
"amet\t555\n" +
|
||||||
"5\t666\n" +
|
"consectetuer\t666\n" +
|
||||||
"6\t777\n" +
|
"adipiscing\t777\n" +
|
||||||
"7\t888\n" +
|
"elit\t888\n" +
|
||||||
"8\t999\n" +
|
"Nunc\t999\n" +
|
||||||
"9\tSUM(B1:B9)\n\n" +
|
"at\tSUM(B1:B9)\n\n" +
|
||||||
"Sheet2\n\n" +
|
"Sheet2\n\n" +
|
||||||
"Sheet3\n"
|
"Sheet3\n"
|
||||||
, text);
|
, text);
|
||||||
@ -161,9 +161,10 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||||||
assertTrue(text.length() > 0);
|
assertTrue(text.length() > 0);
|
||||||
|
|
||||||
// Might not have all formatting it should do!
|
// Might not have all formatting it should do!
|
||||||
|
// TODO decide if we should really have the "null" in there
|
||||||
assertTrue(text.startsWith(
|
assertTrue(text.startsWith(
|
||||||
"Avgtxfull\n" +
|
"Avgtxfull\n" +
|
||||||
"3\t13\t3\t2\t2\t3\t2\t"
|
"null\t(iii) AVERAGE TAX RATES ON ANNUAL"
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,8 +185,8 @@ public class TestHXFExcelExtractor extends TestCase {
|
|||||||
POITextExtractor extractor = extractors[i];
|
POITextExtractor extractor = extractors[i];
|
||||||
|
|
||||||
String text = extractor.getText().replaceAll("[\r\t]", "");
|
String text = extractor.getText().replaceAll("[\r\t]", "");
|
||||||
System.out.println(text.length());
|
//System.out.println(text.length());
|
||||||
System.out.println(text);
|
//System.out.println(text);
|
||||||
assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
|
assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
|
||||||
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
|
Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
|
||||||
Matcher m = pattern.matcher(text);
|
Matcher m = pattern.matcher(text);
|
||||||
|
Loading…
Reference in New Issue
Block a user