From 19e41b0614ac0c81f287dc41040744a355ecd66b Mon Sep 17 00:00:00 2001 From: Josh Micich Date: Wed, 19 Nov 2008 21:49:17 +0000 Subject: [PATCH] Fixed NoteRecord to allow for unicode author names git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@719084 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/hssf/record/NoteRecord.java | 113 ++++++++++-------- .../poi/hssf/record/TestNoteRecord.java | 50 +++++--- 2 files changed, 93 insertions(+), 70 deletions(-) diff --git a/src/java/org/apache/poi/hssf/record/NoteRecord.java b/src/java/org/apache/poi/hssf/record/NoteRecord.java index 2888614f4..eac7596e9 100644 --- a/src/java/org/apache/poi/hssf/record/NoteRecord.java +++ b/src/java/org/apache/poi/hssf/record/NoteRecord.java @@ -17,14 +17,15 @@ package org.apache.poi.hssf.record; -import org.apache.poi.util.LittleEndian; +import org.apache.poi.util.LittleEndianOutput; +import org.apache.poi.util.StringUtil; /** - * NOTE: Comment Associated with a Cell (1Ch) + * NOTE: Comment Associated with a Cell (0x001C)

* * @author Yegor Kozlov */ -public final class NoteRecord extends Record { +public final class NoteRecord extends StandardRecord { public final static short sid = 0x001C; /** @@ -37,88 +38,95 @@ public final class NoteRecord extends Record { */ public final static short NOTE_VISIBLE = 0x2; - private short field_1_row; - private short field_2_col; - private short field_3_flags; - private short field_4_shapeid; - private String field_5_author; + private static final Byte DEFAULT_PADDING = new Byte((byte)0); + + private short field_1_row; + private short field_2_col; + private short field_3_flags; + private short field_4_shapeid; + private boolean field_5_hasMultibyte; + private String field_6_author; + /** + * Saves padding byte value to reduce delta during round-trip serialization.
+ * + * The documentation is not clear about how padding should work. In any case + * Excel(2007) does something different. + */ + private Byte field_7_padding; /** * Construct a new NoteRecord and * fill its data with the default values */ - public NoteRecord() - { - field_5_author = ""; + public NoteRecord() { + field_6_author = ""; field_3_flags = 0; + field_7_padding = DEFAULT_PADDING; // seems to be always present regardless of author text } /** * @return id of this record. */ - public short getSid() - { + public short getSid() { return sid; } /** * Read the record data from the supplied RecordInputStream */ - public NoteRecord(RecordInputStream in) - { + public NoteRecord(RecordInputStream in) { field_1_row = in.readShort(); field_2_col = in.readShort(); field_3_flags = in.readShort(); field_4_shapeid = in.readShort(); int length = in.readShort(); - byte[] bytes = in.readRemainder(); - field_5_author = new String(bytes, 1, length); + field_5_hasMultibyte = in.readByte() != 0x00; + if (field_5_hasMultibyte) { + field_6_author = StringUtil.readUnicodeLE(in, length); + } else { + field_6_author = StringUtil.readCompressedUnicode(in, length); + } + if (in.available() == 1) { + field_7_padding = new Byte(in.readByte()); + } } - /** - * Serialize the record data into the supplied array of bytes - * - * @param offset offset in the data - * @param data the data to serialize into - * - * @return size of the record - */ - public int serialize(int offset, byte [] data) - { - LittleEndian.putShort(data, 0 + offset, sid); - LittleEndian.putShort(data, 2 + offset, (short)(getRecordSize() - 4)); - - LittleEndian.putShort(data, 4 + offset , field_1_row); - LittleEndian.putShort(data, 6 + offset , field_2_col); - LittleEndian.putShort(data, 8 + offset , field_3_flags); - LittleEndian.putShort(data, 10 + offset , field_4_shapeid); - LittleEndian.putShort(data, 12 + offset , (short)field_5_author.length()); - - byte[] str = field_5_author.getBytes(); - System.arraycopy(str, 0, data, 15 + offset, str.length); - - return getRecordSize(); + public void serialize(LittleEndianOutput out) { + out.writeShort(field_1_row); + out.writeShort(field_2_col); + out.writeShort(field_3_flags); + out.writeShort(field_4_shapeid); + out.writeShort(field_6_author.length()); + out.writeByte(field_5_hasMultibyte ? 0x01 : 0x00); + if (field_5_hasMultibyte) { + StringUtil.putUnicodeLE(field_6_author, out); + } else { + StringUtil.putCompressedUnicode(field_6_author, out); + } + if (field_7_padding != null) { + out.writeByte(field_7_padding.intValue()); + } } protected int getDataSize() { - return 2 + 2 + 2 + 2 + 2 + 1 + field_5_author.length() + 1; + return 11 // 5 shorts + 1 byte + + field_6_author.length() * (field_5_hasMultibyte ? 2 : 1) + + (field_7_padding == null ? 0 : 1); } /** * Convert this record to string. * Used by BiffViewer and other utilities. */ - public String toString() - { + public String toString() { StringBuffer buffer = new StringBuffer(); buffer.append("[NOTE]\n"); - buffer.append(" .recordid = 0x" + Integer.toHexString( getSid() ) + ", size = " + getRecordSize() + "\n"); - buffer.append(" .row = " + field_1_row + "\n"); - buffer.append(" .col = " + field_2_col + "\n"); - buffer.append(" .flags = " + field_3_flags + "\n"); - buffer.append(" .shapeid = " + field_4_shapeid + "\n"); - buffer.append(" .author = " + field_5_author + "\n"); + buffer.append(" .row = ").append(field_1_row).append("\n"); + buffer.append(" .col = ").append(field_2_col).append("\n"); + buffer.append(" .flags = ").append(field_3_flags).append("\n"); + buffer.append(" .shapeid= ").append(field_4_shapeid).append("\n"); + buffer.append(" .author = ").append(field_6_author).append("\n"); buffer.append("[/NOTE]\n"); return buffer.toString(); } @@ -201,7 +209,7 @@ public final class NoteRecord extends Record { * @return the name of the original author of the comment */ public String getAuthor(){ - return field_5_author; + return field_6_author; } /** @@ -210,7 +218,7 @@ public final class NoteRecord extends Record { * @param author the name of the original author of the comment */ public void setAuthor(String author){ - field_5_author = author; + field_6_author = author; } public Object clone() { @@ -219,8 +227,7 @@ public final class NoteRecord extends Record { rec.field_2_col = field_2_col; rec.field_3_flags = field_3_flags; rec.field_4_shapeid = field_4_shapeid; - rec.field_5_author = field_5_author; + rec.field_6_author = field_6_author; return rec; } - } diff --git a/src/testcases/org/apache/poi/hssf/record/TestNoteRecord.java b/src/testcases/org/apache/poi/hssf/record/TestNoteRecord.java index e31601be5..85776eacd 100644 --- a/src/testcases/org/apache/poi/hssf/record/TestNoteRecord.java +++ b/src/testcases/org/apache/poi/hssf/record/TestNoteRecord.java @@ -17,11 +17,13 @@ package org.apache.poi.hssf.record; - +import junit.framework.AssertionFailedError; import junit.framework.TestCase; import java.util.Arrays; +import org.apache.poi.util.HexRead; + /** * Tests the serialization and deserialization of the NoteRecord * class works correctly. Test data taken directly from a real @@ -30,16 +32,16 @@ import java.util.Arrays; * @author Yegor Kozlov */ public final class TestNoteRecord extends TestCase { - private byte[] data = new byte[] { - 0x06, 0x00, 0x01, 0x00, 0x02, 0x00, 0x02, 0x04, 0x1A, 0x00, - 0x00, 0x41, 0x70, 0x61, 0x63, 0x68, 0x65, 0x20, 0x53, 0x6F, - 0x66, 0x74, 0x77, 0x61, 0x72, 0x65, 0x20, 0x46, 0x6F, 0x75, - 0x6E, 0x64, 0x61, 0x74, 0x69, 0x6F, 0x6E, 0x00 - }; + private byte[] testData = HexRead.readFromString( + "06 00 01 00 02 00 02 04 " + + "1A 00 00 " + + "41 70 61 63 68 65 20 53 6F 66 74 77 61 72 65 20 46 6F 75 6E 64 61 74 69 6F 6E " + + "00" // padding byte + ); public void testRead() { - NoteRecord record = new NoteRecord(TestcaseRecordInputStream.create(NoteRecord.sid, data)); + NoteRecord record = new NoteRecord(TestcaseRecordInputStream.create(NoteRecord.sid, testData)); assertEquals(NoteRecord.sid, record.getSid()); assertEquals(6, record.getRow()); @@ -47,7 +49,6 @@ public final class TestNoteRecord extends TestCase { assertEquals(NoteRecord.NOTE_VISIBLE, record.getFlags()); assertEquals(1026, record.getShapeId()); assertEquals("Apache Software Foundation", record.getAuthor()); - } public void testWrite() { @@ -60,16 +61,11 @@ public final class TestNoteRecord extends TestCase { record.setShapeId((short)1026); record.setAuthor("Apache Software Foundation"); - byte [] ser = record.serialize(); - assertEquals(ser.length - 4, data.length); - - byte[] recdata = new byte[ser.length - 4]; - System.arraycopy(ser, 4, recdata, 0, recdata.length); - assertTrue(Arrays.equals(data, recdata)); + byte[] ser = record.serialize(); + TestcaseRecordInputStream.confirmRecordEncoding(NoteRecord.sid, testData, ser); } - public void testClone() - { + public void testClone() { NoteRecord record = new NoteRecord(); record.setRow((short)1); @@ -90,4 +86,24 @@ public final class TestNoteRecord extends TestCase { byte[] cln = cloned.serialize(); assertTrue(Arrays.equals(src, cln)); } + + public void testUnicodeAuthor() { + // This sample data was created by setting the 'user name' field in the 'Personalize' + // section of Excel's options to \u30A2\u30D1\u30C3\u30C1\u65CF, and then + // creating a cell comment. + byte[] data = HexRead.readFromString("01 00 01 00 00 00 03 00 " + + "05 00 01 " + // len=5, 16bit + "A2 30 D1 30 C3 30 C1 30 CF 65 " + // character data + "00 " // padding byte + ); + RecordInputStream in = TestcaseRecordInputStream.create(NoteRecord.sid, data); + NoteRecord nr = new NoteRecord(in); + if ("\u00A2\u0030\u00D1\u0030\u00C3".equals(nr.getAuthor())) { + throw new AssertionFailedError("Identified bug in reading note with unicode author"); + } + assertEquals("\u30A2\u30D1\u30C3\u30C1\u65CF", nr.getAuthor()); + + byte[] ser = nr.serialize(); + TestcaseRecordInputStream.confirmRecordEncoding(NoteRecord.sid, data, ser); + } }