rewrite VariantSupport to use TypedPropertyValue at max without breaking compatibility

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1187640 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-10-22 02:02:34 +00:00
parent f6db23f921
commit 0148cd3e20
3 changed files with 306 additions and 183 deletions

View File

@ -1,7 +1,12 @@
package org.apache.poi.hpsf; package org.apache.poi.hpsf;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
@Internal
class ClipboardData class ClipboardData
{ {
private int _format; private int _format;
@ -24,4 +29,29 @@ class ClipboardData
{ {
return LittleEndian.INT_SIZE * 2 + _value.length; return LittleEndian.INT_SIZE * 2 + _value.length;
} }
byte[] getValue()
{
return _value;
}
byte[] toByteArray()
{
byte[] result = new byte[getSize()];
LittleEndian.putInt( result, 0 * LittleEndian.INT_SIZE,
LittleEndian.INT_SIZE + _value.length );
LittleEndian.putInt( result, 1 * LittleEndian.INT_SIZE, _format );
LittleEndian.putInt( result, 2 * LittleEndian.INT_SIZE, _value.length );
System.arraycopy( _value, 0, result, LittleEndian.INT_SIZE
+ LittleEndian.INT_SIZE, _value.length );
return result;
}
int write( OutputStream out ) throws IOException
{
LittleEndian.putInt( LittleEndian.INT_SIZE + _value.length, out );
LittleEndian.putInt( _format, out );
out.write( _value );
return 2 * LittleEndian.INT_SIZE + _value.length;
}
} }

View File

@ -1,14 +1,133 @@
package org.apache.poi.hpsf; package org.apache.poi.hpsf;
import org.apache.poi.util.LittleEndian; import java.io.IOException;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndian;
@Internal @Internal
class CodePageString class CodePageString
{ {
byte[] _value; private static String codepageToEncoding( final int codepage )
throws UnsupportedEncodingException
{
if ( codepage <= 0 )
throw new UnsupportedEncodingException(
"Codepage number may not be " + codepage );
switch ( codepage )
{
case Constants.CP_UTF16:
return "UTF-16";
case Constants.CP_UTF16_BE:
return "UTF-16BE";
case Constants.CP_UTF8:
return "UTF-8";
case Constants.CP_037:
return "cp037";
case Constants.CP_GBK:
return "GBK";
case Constants.CP_MS949:
return "ms949";
case Constants.CP_WINDOWS_1250:
return "windows-1250";
case Constants.CP_WINDOWS_1251:
return "windows-1251";
case Constants.CP_WINDOWS_1252:
return "windows-1252";
case Constants.CP_WINDOWS_1253:
return "windows-1253";
case Constants.CP_WINDOWS_1254:
return "windows-1254";
case Constants.CP_WINDOWS_1255:
return "windows-1255";
case Constants.CP_WINDOWS_1256:
return "windows-1256";
case Constants.CP_WINDOWS_1257:
return "windows-1257";
case Constants.CP_WINDOWS_1258:
return "windows-1258";
case Constants.CP_JOHAB:
return "johab";
case Constants.CP_MAC_ROMAN:
return "MacRoman";
case Constants.CP_MAC_JAPAN:
return "SJIS";
case Constants.CP_MAC_CHINESE_TRADITIONAL:
return "Big5";
case Constants.CP_MAC_KOREAN:
return "EUC-KR";
case Constants.CP_MAC_ARABIC:
return "MacArabic";
case Constants.CP_MAC_HEBREW:
return "MacHebrew";
case Constants.CP_MAC_GREEK:
return "MacGreek";
case Constants.CP_MAC_CYRILLIC:
return "MacCyrillic";
case Constants.CP_MAC_CHINESE_SIMPLE:
return "EUC_CN";
case Constants.CP_MAC_ROMANIA:
return "MacRomania";
case Constants.CP_MAC_UKRAINE:
return "MacUkraine";
case Constants.CP_MAC_THAI:
return "MacThai";
case Constants.CP_MAC_CENTRAL_EUROPE:
return "MacCentralEurope";
case Constants.CP_MAC_ICELAND:
return "MacIceland";
case Constants.CP_MAC_TURKISH:
return "MacTurkish";
case Constants.CP_MAC_CROATIAN:
return "MacCroatian";
case Constants.CP_US_ACSII:
case Constants.CP_US_ASCII2:
return "US-ASCII";
case Constants.CP_KOI8_R:
return "KOI8-R";
case Constants.CP_ISO_8859_1:
return "ISO-8859-1";
case Constants.CP_ISO_8859_2:
return "ISO-8859-2";
case Constants.CP_ISO_8859_3:
return "ISO-8859-3";
case Constants.CP_ISO_8859_4:
return "ISO-8859-4";
case Constants.CP_ISO_8859_5:
return "ISO-8859-5";
case Constants.CP_ISO_8859_6:
return "ISO-8859-6";
case Constants.CP_ISO_8859_7:
return "ISO-8859-7";
case Constants.CP_ISO_8859_8:
return "ISO-8859-8";
case Constants.CP_ISO_8859_9:
return "ISO-8859-9";
case Constants.CP_ISO_2022_JP1:
case Constants.CP_ISO_2022_JP2:
case Constants.CP_ISO_2022_JP3:
return "ISO-2022-JP";
case Constants.CP_ISO_2022_KR:
return "ISO-2022-KR";
case Constants.CP_EUC_JP:
return "EUC-JP";
case Constants.CP_EUC_KR:
return "EUC-KR";
case Constants.CP_GB2312:
return "GB2312";
case Constants.CP_GB18030:
return "GB18030";
case Constants.CP_SJIS:
return "SJIS";
default:
return "cp" + codepage;
}
}
private byte[] _value;
CodePageString( final byte[] data, final int startOffset ) CodePageString( final byte[] data, final int startOffset )
{ {
@ -24,8 +143,41 @@ class CodePageString
+ " is not NULL-terminated" ); + " is not NULL-terminated" );
} }
CodePageString( String string, int codepage )
throws UnsupportedEncodingException
{
setJavaValue( string, codepage );
}
String getJavaValue( int codepage ) throws UnsupportedEncodingException
{
String result;
if ( codepage == -1 )
result = new String( _value );
else
result = new String( _value, codepageToEncoding( codepage ) );
return result.substring( 0, result.length() - 1 );
}
int getSize() int getSize()
{ {
return LittleEndian.INT_SIZE + _value.length; return LittleEndian.INT_SIZE + _value.length;
} }
void setJavaValue( String string, int codepage )
throws UnsupportedEncodingException
{
if ( codepage == -1 )
_value = ( string + "\0" ).getBytes();
else
_value = ( string + "\0" )
.getBytes( codepageToEncoding( codepage ) );
}
int write( OutputStream out ) throws IOException
{
LittleEndian.putInt( _value.length, out );
out.write( _value );
return LittleEndian.INT_SIZE + _value.length;
}
} }

View File

@ -24,9 +24,6 @@ import java.util.Date;
import java.util.LinkedList; import java.util.LinkedList;
import java.util.List; import java.util.List;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.LittleEndianConsts;
/** /**
* <p>Supports reading and writing of variant data.</p> * <p>Supports reading and writing of variant data.</p>
* *
@ -153,169 +150,124 @@ public class VariantSupport extends Variant
* @exception UnsupportedEncodingException if the specified codepage is not * @exception UnsupportedEncodingException if the specified codepage is not
* supported. * supported.
* @see Variant * @see Variant
* @deprecated Use {@link #read(byte[],int,long,int)} instead
*/ */
public static Object read(final byte[] src, final int offset, @Deprecated
final int length, final long type, public static Object read( final byte[] src, final int offset,
final int codepage) final int length, final long type, final int codepage )
throws ReadingNotSupportedException, UnsupportedEncodingException throws ReadingNotSupportedException, UnsupportedEncodingException
{ {
Object value; return read( src, offset, type, codepage );
int o1 = offset;
int l1 = length - LittleEndian.INT_SIZE;
long lType = type;
/* Instead of trying to read 8-bit characters from a Unicode string,
* read 16-bit characters. */
if (codepage == Constants.CP_UNICODE && type == Variant.VT_LPSTR)
lType = Variant.VT_LPWSTR;
switch ((int) lType)
{
case Variant.VT_EMPTY:
{
value = null;
break;
}
case Variant.VT_I2:
{
/*
* Read a short. In Java it is represented as an
* Integer object.
*/
value = Integer.valueOf(LittleEndian.getShort(src, o1));
break;
}
case Variant.VT_I4:
{
/*
* Read a word. In Java it is represented as an
* Integer object.
*/
value = Integer.valueOf(LittleEndian.getInt(src, o1));
break;
}
case Variant.VT_I8:
{
/*
* Read a double word. In Java it is represented as a
* Long object.
*/
value = Long.valueOf(LittleEndian.getLong(src, o1));
break;
}
case Variant.VT_R8:
{
/*
* Read an eight-byte double value. In Java it is represented as
* a Double object.
*/
value = new Double(LittleEndian.getDouble(src, o1));
break;
}
case Variant.VT_FILETIME:
{
/*
* Read a FILETIME object. In Java it is represented
* as a Date object.
*/
final long low = LittleEndian.getUInt(src, o1);
o1 += LittleEndian.INT_SIZE;
final long high = LittleEndian.getUInt(src, o1);
value = Util.filetimeToDate((int) high, (int) low);
break;
}
case Variant.VT_LPSTR:
{
/*
* Read a byte string. In Java it is represented as a
* String object. The 0x00 bytes at the end must be
* stripped.
*/
final int first = o1 + LittleEndian.INT_SIZE;
long last = first + LittleEndian.getUInt(src, o1) - 1;
o1 += LittleEndian.INT_SIZE;
while (src[(int) last] == 0 && first <= last)
last--;
final int l = (int) (last - first + 1);
value = codepage != -1 ?
new String(src, first, l,
codepageToEncoding(codepage)) :
new String(src, first, l);
break;
}
case Variant.VT_LPWSTR:
{
/*
* Read a Unicode string. In Java it is represented as
* a String object. The 0x00 bytes at the end must be
* stripped.
*/
final int first = o1 + LittleEndian.INT_SIZE;
long last = first + LittleEndian.getUInt(src, o1) - 1;
long l = last - first;
o1 += LittleEndian.INT_SIZE;
StringBuffer b = new StringBuffer((int) (last - first));
for (int i = 0; i <= l; i++)
{
final int i1 = o1 + (i * 2);
final int i2 = i1 + 1;
final int high = src[i2] << 8;
final int low = src[i1] & 0x00ff;
final char c = (char) (high | low);
b.append(c);
}
/* Strip 0x00 characters from the end of the string: */
while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
b.setLength(b.length() - 1);
value = b.toString();
break;
}
case Variant.VT_CF:
{
if(l1 < 0) {
/**
* YK: reading the ClipboardData packet (VT_CF) is not quite correct.
* The size of the data is determined by the first four bytes of the packet
* while the current implementation calculates it in the Section constructor.
* Test files in Bugzilla 42726 and 45583 clearly show that this approach does not always work.
* The workaround below attempts to gracefully handle such cases instead of throwing exceptions.
*
* August 20, 2009
*/
l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
}
final byte[] v = new byte[l1];
System.arraycopy(src, o1, v, 0, v.length);
value = v;
break;
}
case Variant.VT_BOOL:
{
/*
* The first four bytes in src, from src[offset] to
* src[offset + 3] contain the DWord for VT_BOOL, so
* skip it, we don't need it.
*/
// final int first = offset + LittleEndian.INT_SIZE;
long bool = LittleEndian.getUInt(src, o1);
if (bool != 0)
value = Boolean.TRUE;
else
value = Boolean.FALSE;
break;
}
default:
{
final byte[] v = new byte[l1];
for (int i = 0; i < l1; i++)
v[i] = src[(o1 + i)];
throw new ReadingNotSupportedException(type, v);
}
}
return value;
} }
/**
* <p>Reads a variant type from a byte array.</p>
*
* @param src The byte array
* @param offset The offset in the byte array where the variant starts
* @param type The variant type to read
* @param codepage The codepage to use for non-wide strings
* @return A Java object that corresponds best to the variant field. For
* example, a VT_I4 is returned as a {@link Long}, a VT_LPSTR as a
* {@link String}.
* @exception ReadingNotSupportedException if a property is to be written
* who's variant type HPSF does not yet support
* @exception UnsupportedEncodingException if the specified codepage is not
* supported.
* @see Variant
*/
public static Object read(final byte[] src, final int offset,
final long type, final int codepage)
throws ReadingNotSupportedException, UnsupportedEncodingException
{
TypedPropertyValue typedPropertyValue = new TypedPropertyValue(
(int) type, null );
int unpadded = typedPropertyValue.readValue( src, offset );
switch ( (int) type )
{
case Variant.VT_EMPTY:
case Variant.VT_I4:
case Variant.VT_I8:
case Variant.VT_R8:
/*
* we have more property types that can be converted into Java
* objects, but current API need to be preserved, and it returns
* other types as byte arrays. In future major versions it shall be
* changed -- sergey
*/
return typedPropertyValue.getValue();
case Variant.VT_I2:
{
/*
* also for backward-compatibility with prev. versions of POI
* --sergey
*/
return Integer.valueOf( ( (Short) typedPropertyValue.getValue() )
.intValue() );
}
case Variant.VT_FILETIME:
{
Filetime filetime = (Filetime) typedPropertyValue.getValue();
return Util.filetimeToDate( (int) filetime.getHigh(),
(int) filetime.getLow() );
}
case Variant.VT_LPSTR:
{
CodePageString string = (CodePageString) typedPropertyValue
.getValue();
return string.getJavaValue( codepage );
}
case Variant.VT_LPWSTR:
{
UnicodeString string = (UnicodeString) typedPropertyValue
.getValue();
return string.toJavaString();
}
case Variant.VT_CF:
{
// if(l1 < 0) {
/**
* YK: reading the ClipboardData packet (VT_CF) is not quite
* correct. The size of the data is determined by the first four
* bytes of the packet while the current implementation calculates
* it in the Section constructor. Test files in Bugzilla 42726 and
* 45583 clearly show that this approach does not always work. The
* workaround below attempts to gracefully handle such cases instead
* of throwing exceptions.
*
* August 20, 2009
*/
// l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
// }
// final byte[] v = new byte[l1];
// System.arraycopy(src, o1, v, 0, v.length);
// value = v;
// break;
ClipboardData clipboardData = (ClipboardData) typedPropertyValue
.getValue();
return clipboardData.toByteArray();
}
case Variant.VT_BOOL:
{
VariantBool bool = (VariantBool) typedPropertyValue.getValue();
return Boolean.valueOf( bool.getValue() );
}
default:
{
/*
* it is not very good, but what can do without breaking current
* API? --sergey
*/
final byte[] v = new byte[unpadded];
System.arraycopy( src, offset, v, 0, unpadded );
throw new ReadingNotSupportedException( type, v );
}
}
}
/** /**
* <p>Turns a codepage number into the equivalent character encoding's * <p>Turns a codepage number into the equivalent character encoding's
@ -491,16 +443,9 @@ public class VariantSupport extends Variant
} }
case Variant.VT_LPSTR: case Variant.VT_LPSTR:
{ {
final byte[] bytes = CodePageString codePageString = new CodePageString( (String) value,
(codepage == -1 ? codepage );
((String) value).getBytes() : length += codePageString.write( out );
((String) value).getBytes(codepageToEncoding(codepage)));
length = TypeWriter.writeUIntToStream(out, bytes.length + 1);
final byte[] b = new byte[bytes.length + 1];
System.arraycopy(bytes, 0, b, 0, bytes.length);
b[b.length - 1] = 0x00;
out.write(b);
length += b.length;
break; break;
} }
case Variant.VT_LPWSTR: case Variant.VT_LPWSTR:
@ -533,14 +478,13 @@ public class VariantSupport extends Variant
} }
case Variant.VT_EMPTY: case Variant.VT_EMPTY:
{ {
TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY); length += TypeWriter.writeUIntToStream( out, Variant.VT_EMPTY );
length = LittleEndianConsts.INT_SIZE;
break; break;
} }
case Variant.VT_I2: case Variant.VT_I2:
{ {
TypeWriter.writeToStream(out, ((Integer) value).shortValue()); length += TypeWriter.writeToStream( out,
length = LittleEndianConsts.SHORT_SIZE; ( (Integer) value ).shortValue() );
break; break;
} }
case Variant.VT_I4: case Variant.VT_I4:
@ -558,8 +502,7 @@ public class VariantSupport extends Variant
} }
case Variant.VT_I8: case Variant.VT_I8:
{ {
TypeWriter.writeToStream(out, ((Long) value).longValue()); length += TypeWriter.writeToStream(out, ((Long) value).longValue());
length = LittleEndianConsts.LONG_SIZE;
break; break;
} }
case Variant.VT_R8: case Variant.VT_R8:
@ -573,10 +516,8 @@ public class VariantSupport extends Variant
long filetime = Util.dateToFileTime((Date) value); long filetime = Util.dateToFileTime((Date) value);
int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL); int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL);
int low = (int) (filetime & 0x00000000FFFFFFFFL); int low = (int) (filetime & 0x00000000FFFFFFFFL);
length += TypeWriter.writeUIntToStream Filetime filetimeValue = new Filetime( low, high);
(out, 0x0000000FFFFFFFFL & low); length += filetimeValue.write( out );
length += TypeWriter.writeUIntToStream
(out, 0x0000000FFFFFFFFL & high);
break; break;
} }
default: default: