Bug 34247 fixed. Dictionaries are read with the specified codepage now.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353636 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Rainer Klute 2005-04-01 16:58:00 +00:00
parent 80db4fa339
commit d25395a486
2 changed files with 59 additions and 30 deletions

View File

@ -356,8 +356,7 @@ public class MutableSection extends Section
getPropertyCount() * 2 * LittleEndian.INT_SIZE;
/* Writing the section's dictionary it tricky. If there is a dictionary
* (property 0) the codepage property (property 1) has to be set, too.
* Since HPSF supports Unicode only, the codepage must be 1200. */
* (property 0) the codepage property (property 1) must be set, too. */
int codepage = -1;
if (getProperty(PropertyIDMap.PID_DICTIONARY) != null)
{
@ -370,9 +369,11 @@ public class MutableSection extends Section
"Integer object.");
}
else
throw new IllegalPropertySetDataException
("The codepage property (ID = 1) must be set if the " +
"section contains a dictionary.");
/* Warning: The codepage property is not set although a
* dictionary is present. In order to cope with this problem we
* add the codepage property and set it to Unicode. */
setProperty(PropertyIDMap.PID_CODEPAGE, (long) Variant.VT_I2,
new Integer(Constants.CP_UNICODE));
codepage = getCodepage();
}
@ -594,11 +595,14 @@ public class MutableSection extends Section
* don't have a type. */
setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);
/* Set the codepage property (ID 1) for the strings used in the
* dictionary. HPSF always writes Unicode strings to the
* dictionary. */
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
new Integer(Constants.CP_UNICODE));
/* If the codepage property (ID 1) for the strings (keys and
* values) used in the dictionary is not yet defined, set it to
* Unicode. */
final Integer codepage =
(Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
if (codepage == null)
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
new Integer(Constants.CP_UNICODE));
}
else
/* Setting the dictionary to null means to remove property 0.

View File

@ -1,4 +1,3 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
@ -45,6 +44,10 @@ import org.apache.poi.util.LittleEndian;
* which variant types are really needed. So please feel free to submit error
* reports or patches for the types you need.</p>
*
* <p>Microsoft documentation: <a
* href="http://msdn.microsoft.com/library/en-us/stg/stg/property_set_display_name_dictionary.asp?frame=true">
* Property Set Display Name Dictionary</a>.
*
* @author Rainer Klute <a
* href="mailto:klute@rainer-klute.de">&lt;klute@rainer-klute.de&gt;</a>
* @author Drew Varner (Drew.Varner InAndAround sc.edu)
@ -163,16 +166,18 @@ public class Property
/**
* <p>Reads a dictionary.</p>
*
* @param src The byte array containing the bytes making out the
* dictionary.
* @param offset At this offset within <var>src</var> the
* dictionary starts.
* @param src The byte array containing the bytes making out the dictionary.
* @param offset At this offset within <var>src </var> the dictionary
* starts.
* @param length The dictionary contains at most this many bytes.
* @param codepage The codepage of the string values.
* @return The dictonary
* @throws UnsupportedEncodingException if the dictionary's codepage is not
* (yet) supported.
*/
protected Map readDictionary(final byte[] src, final long offset,
final int length, final int codepage)
throws UnsupportedEncodingException
{
/* Check whether "offset" points into the "src" array". */
if (offset < 0 || offset > src.length)
@ -195,25 +200,45 @@ public class Property
o += LittleEndian.INT_SIZE;
/* The value (a string). The length is the either the
* number of characters if the character set is Unicode or
* else the number of bytes. The length includes
* terminating 0x00 bytes which we have to strip off to
* create a Java string. */
* number of (two-byte) characters if the character set is Unicode
* or the number of bytes if the character set is not Unicode.
* The length includes terminating 0x00 bytes which we have to strip
* off to create a Java string. */
long sLength = LittleEndian.getUInt(src, o);
o += LittleEndian.INT_SIZE;
/* Read the bytes or characters depending on whether the
* character set is Unicode or not. */
StringBuffer b = new StringBuffer((int) sLength);
for (int j = 0; j < sLength; j++)
if (codepage == Constants.CP_UNICODE)
/* Read the string. */
final StringBuffer b = new StringBuffer();
switch (codepage)
{
case -1:
{
final int i1 = o + (j * 2);
final int i2 = i1 + 1;
b.append((char) ((src[i2] << 8) + src[i1]));
/* Without a codepage the length is equal to the number of
* bytes. */
b.append(new String(src, o, (int) sLength));
break;
}
else
b.append((char) src[o + j]);
case Constants.CP_UNICODE:
{
/* The length is the number of characters, i.e. the number
* of bytes is twice the number of the characters. */
for (int j = 0; j < sLength; j++)
{
final int i1 = o + (j * 2);
final int i2 = i1 + 1;
b.append((char) ((src[i2] << 8) + src[i1]));
}
break;
}
default:
{
/* For encodings other than Unicode the length is the number
* of bytes. */
b.append(new String(src, o, (int) sLength,
VariantSupport.codepageToEncoding(codepage)));
break;
}
}
/* Strip 0x00 characters from the end of the string: */
while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)