Bug 34247 fixed. Dictionaries are read with the specified codepage now.
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353636 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
80db4fa339
commit
d25395a486
@ -356,8 +356,7 @@ public class MutableSection extends Section
|
||||
getPropertyCount() * 2 * LittleEndian.INT_SIZE;
|
||||
|
||||
/* Writing the section's dictionary it tricky. If there is a dictionary
|
||||
* (property 0) the codepage property (property 1) has to be set, too.
|
||||
* Since HPSF supports Unicode only, the codepage must be 1200. */
|
||||
* (property 0) the codepage property (property 1) must be set, too. */
|
||||
int codepage = -1;
|
||||
if (getProperty(PropertyIDMap.PID_DICTIONARY) != null)
|
||||
{
|
||||
@ -370,9 +369,11 @@ public class MutableSection extends Section
|
||||
"Integer object.");
|
||||
}
|
||||
else
|
||||
throw new IllegalPropertySetDataException
|
||||
("The codepage property (ID = 1) must be set if the " +
|
||||
"section contains a dictionary.");
|
||||
/* Warning: The codepage property is not set although a
|
||||
* dictionary is present. In order to cope with this problem we
|
||||
* add the codepage property and set it to Unicode. */
|
||||
setProperty(PropertyIDMap.PID_CODEPAGE, (long) Variant.VT_I2,
|
||||
new Integer(Constants.CP_UNICODE));
|
||||
codepage = getCodepage();
|
||||
}
|
||||
|
||||
@ -594,9 +595,12 @@ public class MutableSection extends Section
|
||||
* don't have a type. */
|
||||
setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);
|
||||
|
||||
/* Set the codepage property (ID 1) for the strings used in the
|
||||
* dictionary. HPSF always writes Unicode strings to the
|
||||
* dictionary. */
|
||||
/* If the codepage property (ID 1) for the strings (keys and
|
||||
* values) used in the dictionary is not yet defined, set it to
|
||||
* Unicode. */
|
||||
final Integer codepage =
|
||||
(Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
|
||||
if (codepage == null)
|
||||
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
|
||||
new Integer(Constants.CP_UNICODE));
|
||||
}
|
||||
|
@ -1,4 +1,3 @@
|
||||
|
||||
/* ====================================================================
|
||||
Copyright 2002-2004 Apache Software Foundation
|
||||
|
||||
@ -45,6 +44,10 @@ import org.apache.poi.util.LittleEndian;
|
||||
* which variant types are really needed. So please feel free to submit error
|
||||
* reports or patches for the types you need.</p>
|
||||
*
|
||||
* <p>Microsoft documentation: <a
|
||||
* href="http://msdn.microsoft.com/library/en-us/stg/stg/property_set_display_name_dictionary.asp?frame=true">
|
||||
* Property Set Display Name Dictionary</a>.
|
||||
*
|
||||
* @author Rainer Klute <a
|
||||
* href="mailto:klute@rainer-klute.de"><klute@rainer-klute.de></a>
|
||||
* @author Drew Varner (Drew.Varner InAndAround sc.edu)
|
||||
@ -163,16 +166,18 @@ public class Property
|
||||
/**
|
||||
* <p>Reads a dictionary.</p>
|
||||
*
|
||||
* @param src The byte array containing the bytes making out the
|
||||
* dictionary.
|
||||
* @param offset At this offset within <var>src</var> the
|
||||
* dictionary starts.
|
||||
* @param src The byte array containing the bytes making out the dictionary.
|
||||
* @param offset At this offset within <var>src </var> the dictionary
|
||||
* starts.
|
||||
* @param length The dictionary contains at most this many bytes.
|
||||
* @param codepage The codepage of the string values.
|
||||
* @return The dictonary
|
||||
* @throws UnsupportedEncodingException if the dictionary's codepage is not
|
||||
* (yet) supported.
|
||||
*/
|
||||
protected Map readDictionary(final byte[] src, final long offset,
|
||||
final int length, final int codepage)
|
||||
throws UnsupportedEncodingException
|
||||
{
|
||||
/* Check whether "offset" points into the "src" array". */
|
||||
if (offset < 0 || offset > src.length)
|
||||
@ -195,25 +200,45 @@ public class Property
|
||||
o += LittleEndian.INT_SIZE;
|
||||
|
||||
/* The value (a string). The length is the either the
|
||||
* number of characters if the character set is Unicode or
|
||||
* else the number of bytes. The length includes
|
||||
* terminating 0x00 bytes which we have to strip off to
|
||||
* create a Java string. */
|
||||
* number of (two-byte) characters if the character set is Unicode
|
||||
* or the number of bytes if the character set is not Unicode.
|
||||
* The length includes terminating 0x00 bytes which we have to strip
|
||||
* off to create a Java string. */
|
||||
long sLength = LittleEndian.getUInt(src, o);
|
||||
o += LittleEndian.INT_SIZE;
|
||||
|
||||
/* Read the bytes or characters depending on whether the
|
||||
* character set is Unicode or not. */
|
||||
StringBuffer b = new StringBuffer((int) sLength);
|
||||
/* Read the string. */
|
||||
final StringBuffer b = new StringBuffer();
|
||||
switch (codepage)
|
||||
{
|
||||
case -1:
|
||||
{
|
||||
/* Without a codepage the length is equal to the number of
|
||||
* bytes. */
|
||||
b.append(new String(src, o, (int) sLength));
|
||||
break;
|
||||
}
|
||||
case Constants.CP_UNICODE:
|
||||
{
|
||||
/* The length is the number of characters, i.e. the number
|
||||
* of bytes is twice the number of the characters. */
|
||||
for (int j = 0; j < sLength; j++)
|
||||
if (codepage == Constants.CP_UNICODE)
|
||||
{
|
||||
final int i1 = o + (j * 2);
|
||||
final int i2 = i1 + 1;
|
||||
b.append((char) ((src[i2] << 8) + src[i1]));
|
||||
}
|
||||
else
|
||||
b.append((char) src[o + j]);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
/* For encodings other than Unicode the length is the number
|
||||
* of bytes. */
|
||||
b.append(new String(src, o, (int) sLength,
|
||||
VariantSupport.codepageToEncoding(codepage)));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Strip 0x00 characters from the end of the string: */
|
||||
while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
|
||||
|
Loading…
Reference in New Issue
Block a user