diff --git a/src/java/org/apache/poi/hpsf/MutableSection.java b/src/java/org/apache/poi/hpsf/MutableSection.java index 7fe040e78..6d6c1b11e 100644 --- a/src/java/org/apache/poi/hpsf/MutableSection.java +++ b/src/java/org/apache/poi/hpsf/MutableSection.java @@ -356,8 +356,7 @@ public class MutableSection extends Section getPropertyCount() * 2 * LittleEndian.INT_SIZE; /* Writing the section's dictionary it tricky. If there is a dictionary - * (property 0) the codepage property (property 1) has to be set, too. - * Since HPSF supports Unicode only, the codepage must be 1200. */ + * (property 0) the codepage property (property 1) must be set, too. */ int codepage = -1; if (getProperty(PropertyIDMap.PID_DICTIONARY) != null) { @@ -370,9 +369,11 @@ public class MutableSection extends Section "Integer object."); } else - throw new IllegalPropertySetDataException - ("The codepage property (ID = 1) must be set if the " + - "section contains a dictionary."); + /* Warning: The codepage property is not set although a + * dictionary is present. In order to cope with this problem we + * add the codepage property and set it to Unicode. */ + setProperty(PropertyIDMap.PID_CODEPAGE, (long) Variant.VT_I2, + new Integer(Constants.CP_UNICODE)); codepage = getCodepage(); } @@ -594,11 +595,14 @@ public class MutableSection extends Section * don't have a type. */ setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary); - /* Set the codepage property (ID 1) for the strings used in the - * dictionary. HPSF always writes Unicode strings to the - * dictionary. */ - setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2, - new Integer(Constants.CP_UNICODE)); + /* If the codepage property (ID 1) for the strings (keys and + * values) used in the dictionary is not yet defined, set it to + * Unicode. */ + final Integer codepage = + (Integer) getProperty(PropertyIDMap.PID_CODEPAGE); + if (codepage == null) + setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2, + new Integer(Constants.CP_UNICODE)); } else /* Setting the dictionary to null means to remove property 0. diff --git a/src/java/org/apache/poi/hpsf/Property.java b/src/java/org/apache/poi/hpsf/Property.java index a82aec30a..81c346b81 100644 --- a/src/java/org/apache/poi/hpsf/Property.java +++ b/src/java/org/apache/poi/hpsf/Property.java @@ -1,4 +1,3 @@ - /* ==================================================================== Copyright 2002-2004 Apache Software Foundation @@ -44,6 +43,10 @@ import org.apache.poi.util.LittleEndian; * over time but largely depends on your feedback so that the POI team knows * which variant types are really needed. So please feel free to submit error * reports or patches for the types you need.
+ * + *Microsoft documentation: + * Property Set Display Name Dictionary. * * @author Rainer Klute <klute@rainer-klute.de> @@ -162,17 +165,19 @@ public class Property /** *
Reads a dictionary.
- * - * @param src The byte array containing the bytes making out the - * dictionary. - * @param offset At this offset within src the - * dictionary starts. + * + * @param src The byte array containing the bytes making out the dictionary. + * @param offset At this offset within src the dictionary + * starts. * @param length The dictionary contains at most this many bytes. * @param codepage The codepage of the string values. * @return The dictonary + * @throws UnsupportedEncodingException if the dictionary's codepage is not + * (yet) supported. */ protected Map readDictionary(final byte[] src, final long offset, final int length, final int codepage) + throws UnsupportedEncodingException { /* Check whether "offset" points into the "src" array". */ if (offset < 0 || offset > src.length) @@ -195,25 +200,45 @@ public class Property o += LittleEndian.INT_SIZE; /* The value (a string). The length is the either the - * number of characters if the character set is Unicode or - * else the number of bytes. The length includes - * terminating 0x00 bytes which we have to strip off to - * create a Java string. */ + * number of (two-byte) characters if the character set is Unicode + * or the number of bytes if the character set is not Unicode. + * The length includes terminating 0x00 bytes which we have to strip + * off to create a Java string. */ long sLength = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - /* Read the bytes or characters depending on whether the - * character set is Unicode or not. */ - StringBuffer b = new StringBuffer((int) sLength); - for (int j = 0; j < sLength; j++) - if (codepage == Constants.CP_UNICODE) + /* Read the string. */ + final StringBuffer b = new StringBuffer(); + switch (codepage) + { + case -1: { - final int i1 = o + (j * 2); - final int i2 = i1 + 1; - b.append((char) ((src[i2] << 8) + src[i1])); + /* Without a codepage the length is equal to the number of + * bytes. */ + b.append(new String(src, o, (int) sLength)); + break; } - else - b.append((char) src[o + j]); + case Constants.CP_UNICODE: + { + /* The length is the number of characters, i.e. the number + * of bytes is twice the number of the characters. */ + for (int j = 0; j < sLength; j++) + { + final int i1 = o + (j * 2); + final int i2 = i1 + 1; + b.append((char) ((src[i2] << 8) + src[i1])); + } + break; + } + default: + { + /* For encodings other than Unicode the length is the number + * of bytes. */ + b.append(new String(src, o, (int) sLength, + VariantSupport.codepageToEncoding(codepage))); + break; + } + } /* Strip 0x00 characters from the end of the string: */ while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)