diff --git a/src/java/org/apache/poi/hssf/record/common/UnicodeString.java b/src/java/org/apache/poi/hssf/record/common/UnicodeString.java
index bd1758e81..b10d28204 100644
--- a/src/java/org/apache/poi/hssf/record/common/UnicodeString.java
+++ b/src/java/org/apache/poi/hssf/record/common/UnicodeString.java
@@ -26,9 +26,9 @@ import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
import org.apache.poi.util.BitField;
import org.apache.poi.util.BitFieldFactory;
-import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndianInput;
import org.apache.poi.util.LittleEndianOutput;
+import org.apache.poi.util.StringUtil;
/**
* Title: Unicode String
@@ -42,8 +42,8 @@ public final class UnicodeString implements Comparable {
private short field_1_charCount;
private byte field_2_optionflags;
private String field_3_string;
- private List field_4_format_runs;
- private byte[] field_5_ext_rst;
+ private List field_4_format_runs;
+ private ExtRst field_5_ext_rst;
private static final BitField highByte = BitFieldFactory.getInstance(0x1);
// 0x2 is reserved
private static final BitField extBit = BitFieldFactory.getInstance(0x4);
@@ -98,6 +98,225 @@ public final class UnicodeString implements Comparable {
out.writeShort(_fontIndex);
}
}
+
+ // See page 681
+ public static class ExtRst implements Comparable {
+ private short reserved;
+
+ // This is a Phs (see page 881)
+ private short formattingFontIndex;
+ private short formattingOptions;
+
+ // This is a RPHSSub (see page 894)
+ private int numberOfRuns;
+ private String phoneticText;
+
+ // This is an array of PhRuns (see page 881)
+ private PhRun[] phRuns;
+ // Sometimes there's some cruft at the end
+ private byte[] extraData;
+
+ private void populateEmpty() {
+ reserved = 1;
+ phoneticText = "";
+ phRuns = new PhRun[0];
+ extraData = new byte[0];
+ }
+
+ protected ExtRst() {
+ populateEmpty();
+ }
+ protected ExtRst(LittleEndianInput in, int expectedLength) {
+ reserved = in.readShort();
+
+ // Old style detection (Reserved = 0xFF)
+ if(reserved == -1) {
+ populateEmpty();
+ return;
+ }
+
+ // Spot corrupt records
+ if(reserved != 1) {
+ System.err.println("Warning - ExtRst was has wrong magic marker, expecting 1 but found " + reserved + " - ignoring");
+ // Grab all the remaining data, and ignore it
+ for(int i=0; i 0) {
+ length2 = 0;
+ }
+ if(length1 != length2) {
+ throw new IllegalStateException(
+ "The two length fields of the Phonetic Text don't agree! " +
+ length1 + " vs " + length2
+ );
+ }
+ phoneticText = StringUtil.readUnicodeLE(in, length1);
+
+ int runData = stringDataSize - 4 - 6 - (2*phoneticText.length());
+ int numRuns = (runData / 6);
+ phRuns = new PhRun[numRuns];
+ for(int i=0; i {
return false;
}
- //Well the format runs are equal as well!, better check the ExtRst data
- //Which by the way we dont know how to decode!
- if ((field_5_ext_rst == null) && (other.field_5_ext_rst == null))
- return true;
- if (((field_5_ext_rst == null) && (other.field_5_ext_rst != null)) ||
- ((field_5_ext_rst != null) && (other.field_5_ext_rst == null)))
- return false;
- size = field_5_ext_rst.length;
- if (size != field_5_ext_rst.length)
- return false;
-
- //Check individual bytes!
- for (int i=0;i {
}
if (isExtendedText() && (extensionLength > 0)) {
- field_5_ext_rst = new byte[extensionLength];
- for (int i=0;i {
}
- void setExtendedRst(byte[] ext_rst) {
- if (ext_rst != null)
- field_2_optionflags = extBit.setByte(field_2_optionflags);
- else field_2_optionflags = extBit.clearByte(field_2_optionflags);
+ public ExtRst getExtendedRst() {
+ return this.field_5_ext_rst;
+ }
+ void setExtendedRst(ExtRst ext_rst) {
+ if (ext_rst != null) {
+ field_2_optionflags = extBit.setByte(field_2_optionflags);
+ } else {
+ field_2_optionflags = extBit.clearByte(field_2_optionflags);
+ }
this.field_5_ext_rst = ext_rst;
}
@@ -452,12 +674,18 @@ public final class UnicodeString implements Comparable {
}
}
if (field_5_ext_rst != null) {
- buffer.append(" .field_5_ext_rst = ").append("\n").append(HexDump.toHex(field_5_ext_rst)).append("\n");
+ buffer.append(" .field_5_ext_rst = ").append("\n");
+ buffer.append( field_5_ext_rst.toString() ).append("\n");
}
buffer.append("[/UNICODESTRING]\n");
return buffer.toString();
}
+ /**
+ * Serialises out the String. There are special rules
+ * about where we can and can't split onto
+ * Continue records.
+ */
public void serialize(ContinuableRecordOutput out) {
int numberOfRichTextRuns = 0;
int extendedDataSize = 0;
@@ -465,9 +693,11 @@ public final class UnicodeString implements Comparable {
numberOfRichTextRuns = field_4_format_runs.size();
}
if (isExtendedText() && field_5_ext_rst != null) {
- extendedDataSize = field_5_ext_rst.length;
+ extendedDataSize = 4 + field_5_ext_rst.getDataSize();
}
-
+
+ // Serialise the bulk of the String
+ // The writeString handles tricky continue stuff for us
out.writeString(field_3_string, numberOfRichTextRuns, extendedDataSize);
if (numberOfRichTextRuns > 0) {
@@ -477,25 +707,13 @@ public final class UnicodeString implements Comparable {
if (out.getAvailableSpace() < 4) {
out.writeContinue();
}
- FormatRun r = field_4_format_runs.get(i);
- r.serialize(out);
+ FormatRun r = field_4_format_runs.get(i);
+ r.serialize(out);
}
}
if (extendedDataSize > 0) {
- // OK ExtRst is actually not documented, so i am going to hope
- // that we can actually continue on byte boundaries
-
- int extPos = 0;
- while (true) {
- int nBytesToWrite = Math.min(extendedDataSize - extPos, out.getAvailableSpace());
- out.write(field_5_ext_rst, extPos, nBytesToWrite);
- extPos += nBytesToWrite;
- if (extPos >= extendedDataSize) {
- break;
- }
- out.writeContinue();
- }
+ field_5_ext_rst.serialize(out);
}
}
@@ -534,7 +752,6 @@ public final class UnicodeString implements Comparable {
}
//Well the format runs are equal as well!, better check the ExtRst data
- //Which by the way we don't know how to decode!
if ((field_5_ext_rst == null) && (str.field_5_ext_rst == null))
return 0;
if ((field_5_ext_rst == null) && (str.field_5_ext_rst != null))
@@ -542,15 +759,10 @@ public final class UnicodeString implements Comparable {
if ((field_5_ext_rst != null) && (str.field_5_ext_rst == null))
return -1;
- size = field_5_ext_rst.length;
- if (size != field_5_ext_rst.length)
- return size - field_5_ext_rst.length;
+ result = field_5_ext_rst.compareTo(str.field_5_ext_rst);
+ if (result != 0)
+ return result;
- //Check individual bytes!
- for (int i=0;i {
str.field_4_format_runs = new ArrayList();
for (FormatRun r : field_4_format_runs) {
str.field_4_format_runs.add(new FormatRun(r._character, r._fontIndex));
- }
+ }
}
if (field_5_ext_rst != null) {
- str.field_5_ext_rst = new byte[field_5_ext_rst.length];
- System.arraycopy(field_5_ext_rst, 0, str.field_5_ext_rst, 0,
- field_5_ext_rst.length);
+ str.field_5_ext_rst = field_5_ext_rst.clone();
}
return str;
diff --git a/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java b/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java
index b171a77a1..80380ac5e 100644
--- a/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java
+++ b/src/testcases/org/apache/poi/hssf/record/TestSSTRecordSizeCalculator.java
@@ -33,9 +33,8 @@ public final class TestSSTRecordSizeCalculator extends TestCase {
private static final int COMPRESSED_PLAIN_STRING_OVERHEAD = 3;
private static final int OPTION_FIELD_SIZE = 1;
- private final IntMapper strings = new IntMapper();
+ private final IntMapper strings = new IntMapper();
-
private void confirmSize(int expectedSize) {
ContinuableRecordOutput cro = ContinuableRecordOutput.createForCountingOnly();
SSTSerializer ss = new SSTSerializer(strings, 0, 0);
diff --git a/src/testcases/org/apache/poi/hssf/record/common/TestUnicodeString.java b/src/testcases/org/apache/poi/hssf/record/common/TestUnicodeString.java
index 6ecab71a5..591042d7e 100644
--- a/src/testcases/org/apache/poi/hssf/record/common/TestUnicodeString.java
+++ b/src/testcases/org/apache/poi/hssf/record/common/TestUnicodeString.java
@@ -17,12 +17,19 @@
package org.apache.poi.hssf.record.common;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+
import junit.framework.TestCase;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.RecordInputStream;
import org.apache.poi.hssf.record.SSTRecord;
+import org.apache.poi.hssf.record.common.UnicodeString.ExtRst;
+import org.apache.poi.hssf.record.common.UnicodeString.FormatRun;
import org.apache.poi.hssf.record.cont.ContinuableRecordOutput;
+import org.apache.poi.util.LittleEndianInputStream;
+import org.apache.poi.util.LittleEndianOutputStream;
/**
* Tests that {@link UnicodeString} record size calculates correctly. The record size
@@ -85,13 +92,23 @@ public final class TestUnicodeString extends TestCase {
//Test a compressed small string that has rich text and extended text
s.setString("Test");
s.setOptionFlags((byte)0xC);
- s.setExtendedRst(new byte[]{(byte)0x1,(byte)0x2,(byte)0x3,(byte)0x4,(byte)0x5});
- confirmSize(26, s);
+ confirmSize(17, s);
+
+ // Extended phonetics data
+ // Minimum size is 14
+ // Also adds 4 bytes to hold the length
+ s.setExtendedRst(
+ new ExtRst()
+ );
+ confirmSize(35, s);
//Test a uncompressed small string that has rich text and extended text
s.setString(STR_16_BIT);
s.setOptionFlags((byte)0xD);
- confirmSize(30, s);
+ confirmSize(39, s);
+
+ s.setExtendedRst(null);
+ confirmSize(21, s);
}
public void testPerfectStringSize() {
@@ -144,6 +161,146 @@ public final class TestUnicodeString extends TestCase {
UnicodeString s = makeUnicodeString(strSize);
confirmSize(MAX_DATA_SIZE*2, s);
}
+
+ public void testFormatRun() throws Exception {
+ FormatRun fr = new FormatRun((short)4, (short)0x15c);
+ assertEquals(4, fr.getCharacterPos());
+ assertEquals(0x15c, fr.getFontIndex());
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ LittleEndianOutputStream out = new LittleEndianOutputStream(baos);
+
+ fr.serialize(out);
+
+ byte[] b = baos.toByteArray();
+ assertEquals(4, b.length);
+ assertEquals(4, b[0]);
+ assertEquals(0, b[1]);
+ assertEquals(0x5c, b[2]);
+ assertEquals(0x01, b[3]);
+
+ LittleEndianInputStream inp = new LittleEndianInputStream(
+ new ByteArrayInputStream(b)
+ );
+ fr = new FormatRun(inp);
+ assertEquals(4, fr.getCharacterPos());
+ assertEquals(0x15c, fr.getFontIndex());
+ }
+
+ public void testExtRstFromEmpty() throws Exception {
+ ExtRst ext = new ExtRst();
+
+ assertEquals(0, ext.getNumberOfRuns());
+ assertEquals(0, ext.getFormattingFontIndex());
+ assertEquals(0, ext.getFormattingOptions());
+ assertEquals("", ext.getPhoneticText());
+ assertEquals(0, ext.getPhRuns().length);
+ assertEquals(10, ext.getDataSize()); // Excludes 4 byte header
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ LittleEndianOutputStream out = new LittleEndianOutputStream(baos);
+ ContinuableRecordOutput cout = new ContinuableRecordOutput(out, 0xffff);
+
+ ext.serialize(cout);
+ cout.writeContinue();
+
+ byte[] b = baos.toByteArray();
+ assertEquals(20, b.length);
+
+ // First 4 bytes from the outputstream
+ assertEquals(-1, b[0]);
+ assertEquals(-1, b[1]);
+ assertEquals(14, b[2]);
+ assertEquals(00, b[3]);
+
+ // Reserved
+ assertEquals(1, b[4]);
+ assertEquals(0, b[5]);
+ // Data size
+ assertEquals(10, b[6]);
+ assertEquals(00, b[7]);
+ // Font*2
+ assertEquals(0, b[8]);
+ assertEquals(0, b[9]);
+ assertEquals(0, b[10]);
+ assertEquals(0, b[11]);
+ // 0 Runs
+ assertEquals(0, b[12]);
+ assertEquals(0, b[13]);
+ // Size=0, *2
+ assertEquals(0, b[14]);
+ assertEquals(0, b[15]);
+ assertEquals(0, b[16]);
+ assertEquals(0, b[17]);
+
+ // Last 2 bytes from the outputstream
+ assertEquals(ContinueRecord.sid, b[18]);
+ assertEquals(0, b[19]);
+
+
+ // Load in again and re-test
+ byte[] data = new byte[14];
+ System.arraycopy(b, 4, data, 0, data.length);
+ LittleEndianInputStream inp = new LittleEndianInputStream(
+ new ByteArrayInputStream(data)
+ );
+ ext = new ExtRst(inp, data.length);
+
+ assertEquals(0, ext.getNumberOfRuns());
+ assertEquals(0, ext.getFormattingFontIndex());
+ assertEquals(0, ext.getFormattingOptions());
+ assertEquals("", ext.getPhoneticText());
+ assertEquals(0, ext.getPhRuns().length);
+ }
+
+ public void testExtRstFromData() throws Exception {
+ byte[] data = new byte[] {
+ 01, 00, 0x0C, 00,
+ 00, 00, 0x37, 00,
+ 00, 00,
+ 00, 00, 00, 00,
+ 00, 00 // Cruft at the end, as found from real files
+ };
+ assertEquals(16, data.length);
+
+ LittleEndianInputStream inp = new LittleEndianInputStream(
+ new ByteArrayInputStream(data)
+ );
+ ExtRst ext = new ExtRst(inp, data.length);
+ assertEquals(0x0c, ext.getDataSize()); // Excludes 4 byte header
+
+ assertEquals(0, ext.getNumberOfRuns());
+ assertEquals(0x37, ext.getFormattingOptions());
+ assertEquals(0, ext.getFormattingFontIndex());
+ assertEquals("", ext.getPhoneticText());
+ assertEquals(0, ext.getPhRuns().length);
+ }
+
+ public void testCorruptExtRstDetection() throws Exception {
+ byte[] data = new byte[] {
+ 0x79, 0x79, 0x11, 0x11,
+ 0x22, 0x22, 0x33, 0x33,
+ };
+ assertEquals(8, data.length);
+
+ LittleEndianInputStream inp = new LittleEndianInputStream(
+ new ByteArrayInputStream(data)
+ );
+ ExtRst ext = new ExtRst(inp, data.length);
+
+ // Will be empty
+ assertEquals(ext, new ExtRst());
+
+ // If written, will be the usual size
+ assertEquals(10, ext.getDataSize()); // Excludes 4 byte header
+
+ // Is empty
+ assertEquals(0, ext.getNumberOfRuns());
+ assertEquals(0, ext.getFormattingOptions());
+ assertEquals(0, ext.getFormattingFontIndex());
+ assertEquals("", ext.getPhoneticText());
+ assertEquals(0, ext.getPhRuns().length);
+ }
private static UnicodeString makeUnicodeString(String s) {
diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java
index d96ad7466..337499415 100644
--- a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java
+++ b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java
@@ -36,6 +36,7 @@ import org.apache.poi.hssf.record.CellValueRecordInterface;
import org.apache.poi.hssf.record.EmbeddedObjectRefSubRecord;
import org.apache.poi.hssf.record.NameRecord;
import org.apache.poi.hssf.record.aggregates.FormulaRecordAggregate;
+import org.apache.poi.hssf.record.common.UnicodeString;
import org.apache.poi.hssf.record.formula.DeletedArea3DPtg;
import org.apache.poi.hssf.record.formula.Ptg;
import org.apache.poi.ss.usermodel.*;
@@ -1538,12 +1539,37 @@ public final class TestBugs extends BaseTestBugzillaIssues {
}
/**
- * Round trip a file with an unusual ExtRst record
+ * Round trip a file with an unusual UnicodeString/ExtRst record parts
*/
- public void test47847() {
- HSSFWorkbook wb = openSample("47251.xls");
- assertEquals(1, wb.getNumberOfSheets());
+ public void test47847() throws Exception {
+ HSSFWorkbook wb = openSample("47847.xls");
+ assertEquals(3, wb.getNumberOfSheets());
+
+ // Find the SST record
+ UnicodeString withExt = wb.getWorkbook().getSSTString(0);
+ UnicodeString withoutExt = wb.getWorkbook().getSSTString(31);
+
+ assertEquals("O:Alloc:Qty", withExt.getString());
+ assertTrue((withExt.getOptionFlags() & 0x0004) == 0x0004);
+
+ assertEquals("RT", withoutExt.getString());
+ assertTrue((withoutExt.getOptionFlags() & 0x0004) == 0x0000);
+
+ // Something about continues...
+
+
+ // Write out and re-read
wb = writeOutAndReadBack(wb);
- assertEquals(1, wb.getNumberOfSheets());
+ assertEquals(3, wb.getNumberOfSheets());
+
+ // Check it's the same now
+ withExt = wb.getWorkbook().getSSTString(0);
+ withoutExt = wb.getWorkbook().getSSTString(31);
+
+ assertEquals("O:Alloc:Qty", withExt.getString());
+ assertTrue((withExt.getOptionFlags() & 0x0004) == 0x0004);
+
+ assertEquals("RT", withoutExt.getString());
+ assertTrue((withoutExt.getOptionFlags() & 0x0004) == 0x0000);
}
}
diff --git a/test-data/spreadsheet/47847.xls b/test-data/spreadsheet/47847.xls
new file mode 100644
index 000000000..4a7a631d4
Binary files /dev/null and b/test-data/spreadsheet/47847.xls differ