Bug 34247 fixed. Dictionaries are read with the specified codepage now.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353636 13f79535-47bb-0310-9956-ffa450edef68
2005-04-01 16:58:00 +00:00 · 2005-04-01 16:58:00 +00:00 · d25395a486
commit d25395a486
parent 80db4fa339
2 changed files with 59 additions and 30 deletions
--- a/src/java/org/apache/poi/hpsf/MutableSection.java
+++ b/src/java/org/apache/poi/hpsf/MutableSection.java
@ -356,8 +356,7 @@ public class MutableSection extends Section
                    getPropertyCount() * 2 * LittleEndian.INT_SIZE;

        /* Writing the section's dictionary it tricky. If there is a dictionary
-         * (property 0) the codepage property (property 1) has to be set, too.
-         * Since HPSF supports Unicode only, the codepage must be 1200. */
+         * (property 0) the codepage property (property 1) must be set, too. */
        int codepage = -1;
        if (getProperty(PropertyIDMap.PID_DICTIONARY) != null)
        {
@ -370,9 +369,11 @@ public class MutableSection extends Section
                         "Integer object.");
            }
            else
-                throw new IllegalPropertySetDataException
-                    ("The codepage property (ID = 1) must be set if the " +
-                     "section contains a dictionary.");
+                /* Warning: The codepage property is not set although a
+                 * dictionary is present. In order to cope with this problem we
+                 * add the codepage property and set it to Unicode. */
+                setProperty(PropertyIDMap.PID_CODEPAGE, (long) Variant.VT_I2,
+                            new Integer(Constants.CP_UNICODE));
            codepage = getCodepage();
        }

@ -594,11 +595,14 @@ public class MutableSection extends Section
             * don't have a type. */
            setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);

-            /* Set the codepage property (ID 1) for the strings used in the 
-             * dictionary. HPSF always writes Unicode strings to the
-             * dictionary. */
-            setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
-                        new Integer(Constants.CP_UNICODE));
+            /* If the codepage property (ID 1) for the strings (keys and
+             * values) used in the dictionary is not yet defined, set it to
+             * Unicode. */
+            final Integer codepage =
+                (Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
+            if (codepage == null)
+                setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
+                            new Integer(Constants.CP_UNICODE));
        }
        else
            /* Setting the dictionary to null means to remove property 0.
--- a/src/java/org/apache/poi/hpsf/Property.java
+++ b/src/java/org/apache/poi/hpsf/Property.java
@ -1,4 +1,3 @@
-
 /* ====================================================================
   Copyright 2002-2004   Apache Software Foundation

@ -45,6 +44,10 @@ import org.apache.poi.util.LittleEndian;
 * which variant types are really needed. So please feel free to submit error
 * reports or patches for the types you need.</p>
 * 
+ * <p>Microsoft documentation: <a
+ * href="http://msdn.microsoft.com/library/en-us/stg/stg/property_set_display_name_dictionary.asp?frame=true">
+ * Property Set Display Name Dictionary</a>.
+ *
 * @author Rainer Klute <a
 * href="mailto:klute@rainer-klute.de">&lt;klute@rainer-klute.de&gt;</a>
 * @author Drew Varner (Drew.Varner InAndAround sc.edu)
@ -163,16 +166,18 @@ public class Property
    /**
     * <p>Reads a dictionary.</p>
     * 
-     * @param src The byte array containing the bytes making out the
-     * dictionary.
-     * @param offset At this offset within <var>src</var> the
-     * dictionary starts.
+     * @param src The byte array containing the bytes making out the dictionary.
+     * @param offset At this offset within <var>src </var> the dictionary
+     *        starts.
     * @param length The dictionary contains at most this many bytes.
     * @param codepage The codepage of the string values.
     * @return The dictonary
+     * @throws UnsupportedEncodingException if the dictionary's codepage is not
+     *         (yet) supported.
     */
    protected Map readDictionary(final byte[] src, final long offset,
                                 final int length, final int codepage)
+    throws UnsupportedEncodingException
    {
        /* Check whether "offset" points into the "src" array". */
        if (offset < 0 || offset > src.length)
@ -195,25 +200,45 @@ public class Property
            o += LittleEndian.INT_SIZE;

            /* The value (a string). The length is the either the
-             * number of characters if the character set is Unicode or
-             * else the number of bytes. The length includes
-             * terminating 0x00 bytes which we have to strip off to
-             * create a Java string. */
+             * number of (two-byte) characters if the character set is Unicode
+             * or the number of bytes if the character set is not Unicode.
+             * The length includes terminating 0x00 bytes which we have to strip
+             * off to create a Java string. */
            long sLength = LittleEndian.getUInt(src, o);
            o += LittleEndian.INT_SIZE;

-            /* Read the bytes or characters depending on whether the
-             * character set is Unicode or not. */
-            StringBuffer b = new StringBuffer((int) sLength);
-            for (int j = 0; j < sLength; j++)
-                if (codepage == Constants.CP_UNICODE)
+            /* Read the string. */
+            final StringBuffer b = new StringBuffer();
+            switch (codepage)
+            {
+                case -1:
                {
-                    final int i1 = o + (j * 2);
-                    final int i2 = i1 + 1;
-                    b.append((char) ((src[i2] << 8) + src[i1]));
+                    /* Without a codepage the length is equal to the number of
+                     * bytes. */
+                    b.append(new String(src, o, (int) sLength));
+                    break;
                }
-                else
-                    b.append((char) src[o + j]);
+                case Constants.CP_UNICODE:
+                {
+                    /* The length is the number of characters, i.e. the number
+                     * of bytes is twice the number of the characters. */
+                    for (int j = 0; j < sLength; j++)
+                    {
+                        final int i1 = o + (j * 2);
+                        final int i2 = i1 + 1;
+                        b.append((char) ((src[i2] << 8) + src[i1]));
+                    }
+                    break;
+                }
+                default:
+                {
+                    /* For encodings other than Unicode the length is the number
+                     * of bytes. */
+                    b.append(new String(src, o, (int) sLength,
+                             VariantSupport.codepageToEncoding(codepage)));
+                    break;
+                }
+            }

            /* Strip 0x00 characters from the end of the string: */
            while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)