From 0148cd3e204cb9a65e686ad3c1348c4485b12a39 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Sat, 22 Oct 2011 02:02:34 +0000 Subject: [PATCH] rewrite VariantSupport to use TypedPropertyValue at max without breaking compatibility git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1187640 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hpsf/ClipboardData.java | 30 ++ .../org/apache/poi/hpsf/CodePageString.java | 156 ++++++++- .../org/apache/poi/hpsf/VariantSupport.java | 303 +++++++----------- 3 files changed, 306 insertions(+), 183 deletions(-) diff --git a/src/java/org/apache/poi/hpsf/ClipboardData.java b/src/java/org/apache/poi/hpsf/ClipboardData.java index 477a40b95..72e680489 100644 --- a/src/java/org/apache/poi/hpsf/ClipboardData.java +++ b/src/java/org/apache/poi/hpsf/ClipboardData.java @@ -1,7 +1,12 @@ package org.apache.poi.hpsf; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.poi.util.Internal; import org.apache.poi.util.LittleEndian; +@Internal class ClipboardData { private int _format; @@ -24,4 +29,29 @@ class ClipboardData { return LittleEndian.INT_SIZE * 2 + _value.length; } + + byte[] getValue() + { + return _value; + } + + byte[] toByteArray() + { + byte[] result = new byte[getSize()]; + LittleEndian.putInt( result, 0 * LittleEndian.INT_SIZE, + LittleEndian.INT_SIZE + _value.length ); + LittleEndian.putInt( result, 1 * LittleEndian.INT_SIZE, _format ); + LittleEndian.putInt( result, 2 * LittleEndian.INT_SIZE, _value.length ); + System.arraycopy( _value, 0, result, LittleEndian.INT_SIZE + + LittleEndian.INT_SIZE, _value.length ); + return result; + } + + int write( OutputStream out ) throws IOException + { + LittleEndian.putInt( LittleEndian.INT_SIZE + _value.length, out ); + LittleEndian.putInt( _format, out ); + out.write( _value ); + return 2 * LittleEndian.INT_SIZE + _value.length; + } } diff --git a/src/java/org/apache/poi/hpsf/CodePageString.java b/src/java/org/apache/poi/hpsf/CodePageString.java index c0e9de92e..89f368483 100644 --- a/src/java/org/apache/poi/hpsf/CodePageString.java +++ b/src/java/org/apache/poi/hpsf/CodePageString.java @@ -1,14 +1,133 @@ package org.apache.poi.hpsf; -import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; +import java.io.UnsupportedEncodingException; import org.apache.poi.util.Internal; +import org.apache.poi.util.LittleEndian; @Internal class CodePageString { - byte[] _value; + private static String codepageToEncoding( final int codepage ) + throws UnsupportedEncodingException + { + if ( codepage <= 0 ) + throw new UnsupportedEncodingException( + "Codepage number may not be " + codepage ); + switch ( codepage ) + { + case Constants.CP_UTF16: + return "UTF-16"; + case Constants.CP_UTF16_BE: + return "UTF-16BE"; + case Constants.CP_UTF8: + return "UTF-8"; + case Constants.CP_037: + return "cp037"; + case Constants.CP_GBK: + return "GBK"; + case Constants.CP_MS949: + return "ms949"; + case Constants.CP_WINDOWS_1250: + return "windows-1250"; + case Constants.CP_WINDOWS_1251: + return "windows-1251"; + case Constants.CP_WINDOWS_1252: + return "windows-1252"; + case Constants.CP_WINDOWS_1253: + return "windows-1253"; + case Constants.CP_WINDOWS_1254: + return "windows-1254"; + case Constants.CP_WINDOWS_1255: + return "windows-1255"; + case Constants.CP_WINDOWS_1256: + return "windows-1256"; + case Constants.CP_WINDOWS_1257: + return "windows-1257"; + case Constants.CP_WINDOWS_1258: + return "windows-1258"; + case Constants.CP_JOHAB: + return "johab"; + case Constants.CP_MAC_ROMAN: + return "MacRoman"; + case Constants.CP_MAC_JAPAN: + return "SJIS"; + case Constants.CP_MAC_CHINESE_TRADITIONAL: + return "Big5"; + case Constants.CP_MAC_KOREAN: + return "EUC-KR"; + case Constants.CP_MAC_ARABIC: + return "MacArabic"; + case Constants.CP_MAC_HEBREW: + return "MacHebrew"; + case Constants.CP_MAC_GREEK: + return "MacGreek"; + case Constants.CP_MAC_CYRILLIC: + return "MacCyrillic"; + case Constants.CP_MAC_CHINESE_SIMPLE: + return "EUC_CN"; + case Constants.CP_MAC_ROMANIA: + return "MacRomania"; + case Constants.CP_MAC_UKRAINE: + return "MacUkraine"; + case Constants.CP_MAC_THAI: + return "MacThai"; + case Constants.CP_MAC_CENTRAL_EUROPE: + return "MacCentralEurope"; + case Constants.CP_MAC_ICELAND: + return "MacIceland"; + case Constants.CP_MAC_TURKISH: + return "MacTurkish"; + case Constants.CP_MAC_CROATIAN: + return "MacCroatian"; + case Constants.CP_US_ACSII: + case Constants.CP_US_ASCII2: + return "US-ASCII"; + case Constants.CP_KOI8_R: + return "KOI8-R"; + case Constants.CP_ISO_8859_1: + return "ISO-8859-1"; + case Constants.CP_ISO_8859_2: + return "ISO-8859-2"; + case Constants.CP_ISO_8859_3: + return "ISO-8859-3"; + case Constants.CP_ISO_8859_4: + return "ISO-8859-4"; + case Constants.CP_ISO_8859_5: + return "ISO-8859-5"; + case Constants.CP_ISO_8859_6: + return "ISO-8859-6"; + case Constants.CP_ISO_8859_7: + return "ISO-8859-7"; + case Constants.CP_ISO_8859_8: + return "ISO-8859-8"; + case Constants.CP_ISO_8859_9: + return "ISO-8859-9"; + case Constants.CP_ISO_2022_JP1: + case Constants.CP_ISO_2022_JP2: + case Constants.CP_ISO_2022_JP3: + return "ISO-2022-JP"; + case Constants.CP_ISO_2022_KR: + return "ISO-2022-KR"; + case Constants.CP_EUC_JP: + return "EUC-JP"; + case Constants.CP_EUC_KR: + return "EUC-KR"; + case Constants.CP_GB2312: + return "GB2312"; + case Constants.CP_GB18030: + return "GB18030"; + case Constants.CP_SJIS: + return "SJIS"; + default: + return "cp" + codepage; + } + } + + private byte[] _value; CodePageString( final byte[] data, final int startOffset ) { @@ -24,8 +143,41 @@ class CodePageString + " is not NULL-terminated" ); } + CodePageString( String string, int codepage ) + throws UnsupportedEncodingException + { + setJavaValue( string, codepage ); + } + + String getJavaValue( int codepage ) throws UnsupportedEncodingException + { + String result; + if ( codepage == -1 ) + result = new String( _value ); + else + result = new String( _value, codepageToEncoding( codepage ) ); + return result.substring( 0, result.length() - 1 ); + } + int getSize() { return LittleEndian.INT_SIZE + _value.length; } + + void setJavaValue( String string, int codepage ) + throws UnsupportedEncodingException + { + if ( codepage == -1 ) + _value = ( string + "\0" ).getBytes(); + else + _value = ( string + "\0" ) + .getBytes( codepageToEncoding( codepage ) ); + } + + int write( OutputStream out ) throws IOException + { + LittleEndian.putInt( _value.length, out ); + out.write( _value ); + return LittleEndian.INT_SIZE + _value.length; + } } diff --git a/src/java/org/apache/poi/hpsf/VariantSupport.java b/src/java/org/apache/poi/hpsf/VariantSupport.java index f1752de8a..914376713 100644 --- a/src/java/org/apache/poi/hpsf/VariantSupport.java +++ b/src/java/org/apache/poi/hpsf/VariantSupport.java @@ -24,9 +24,6 @@ import java.util.Date; import java.util.LinkedList; import java.util.List; -import org.apache.poi.util.LittleEndian; -import org.apache.poi.util.LittleEndianConsts; - /** *

Supports reading and writing of variant data.

* @@ -153,169 +150,124 @@ public class VariantSupport extends Variant * @exception UnsupportedEncodingException if the specified codepage is not * supported. * @see Variant + * @deprecated Use {@link #read(byte[],int,long,int)} instead */ - public static Object read(final byte[] src, final int offset, - final int length, final long type, - final int codepage) - throws ReadingNotSupportedException, UnsupportedEncodingException + @Deprecated + public static Object read( final byte[] src, final int offset, + final int length, final long type, final int codepage ) + throws ReadingNotSupportedException, UnsupportedEncodingException { - Object value; - int o1 = offset; - int l1 = length - LittleEndian.INT_SIZE; - long lType = type; - - /* Instead of trying to read 8-bit characters from a Unicode string, - * read 16-bit characters. */ - if (codepage == Constants.CP_UNICODE && type == Variant.VT_LPSTR) - lType = Variant.VT_LPWSTR; - - switch ((int) lType) - { - case Variant.VT_EMPTY: - { - value = null; - break; - } - case Variant.VT_I2: - { - /* - * Read a short. In Java it is represented as an - * Integer object. - */ - value = Integer.valueOf(LittleEndian.getShort(src, o1)); - break; - } - case Variant.VT_I4: - { - /* - * Read a word. In Java it is represented as an - * Integer object. - */ - value = Integer.valueOf(LittleEndian.getInt(src, o1)); - break; - } - case Variant.VT_I8: - { - /* - * Read a double word. In Java it is represented as a - * Long object. - */ - value = Long.valueOf(LittleEndian.getLong(src, o1)); - break; - } - case Variant.VT_R8: - { - /* - * Read an eight-byte double value. In Java it is represented as - * a Double object. - */ - value = new Double(LittleEndian.getDouble(src, o1)); - break; - } - case Variant.VT_FILETIME: - { - /* - * Read a FILETIME object. In Java it is represented - * as a Date object. - */ - final long low = LittleEndian.getUInt(src, o1); - o1 += LittleEndian.INT_SIZE; - final long high = LittleEndian.getUInt(src, o1); - value = Util.filetimeToDate((int) high, (int) low); - break; - } - case Variant.VT_LPSTR: - { - /* - * Read a byte string. In Java it is represented as a - * String object. The 0x00 bytes at the end must be - * stripped. - */ - final int first = o1 + LittleEndian.INT_SIZE; - long last = first + LittleEndian.getUInt(src, o1) - 1; - o1 += LittleEndian.INT_SIZE; - while (src[(int) last] == 0 && first <= last) - last--; - final int l = (int) (last - first + 1); - value = codepage != -1 ? - new String(src, first, l, - codepageToEncoding(codepage)) : - new String(src, first, l); - break; - } - case Variant.VT_LPWSTR: - { - /* - * Read a Unicode string. In Java it is represented as - * a String object. The 0x00 bytes at the end must be - * stripped. - */ - final int first = o1 + LittleEndian.INT_SIZE; - long last = first + LittleEndian.getUInt(src, o1) - 1; - long l = last - first; - o1 += LittleEndian.INT_SIZE; - StringBuffer b = new StringBuffer((int) (last - first)); - for (int i = 0; i <= l; i++) - { - final int i1 = o1 + (i * 2); - final int i2 = i1 + 1; - final int high = src[i2] << 8; - final int low = src[i1] & 0x00ff; - final char c = (char) (high | low); - b.append(c); - } - /* Strip 0x00 characters from the end of the string: */ - while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00) - b.setLength(b.length() - 1); - value = b.toString(); - break; - } - case Variant.VT_CF: - { - if(l1 < 0) { - /** - * YK: reading the ClipboardData packet (VT_CF) is not quite correct. - * The size of the data is determined by the first four bytes of the packet - * while the current implementation calculates it in the Section constructor. - * Test files in Bugzilla 42726 and 45583 clearly show that this approach does not always work. - * The workaround below attempts to gracefully handle such cases instead of throwing exceptions. - * - * August 20, 2009 - */ - l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE; - } - final byte[] v = new byte[l1]; - System.arraycopy(src, o1, v, 0, v.length); - value = v; - break; - } - case Variant.VT_BOOL: - { - /* - * The first four bytes in src, from src[offset] to - * src[offset + 3] contain the DWord for VT_BOOL, so - * skip it, we don't need it. - */ - // final int first = offset + LittleEndian.INT_SIZE; - long bool = LittleEndian.getUInt(src, o1); - if (bool != 0) - value = Boolean.TRUE; - else - value = Boolean.FALSE; - break; - } - default: - { - final byte[] v = new byte[l1]; - for (int i = 0; i < l1; i++) - v[i] = src[(o1 + i)]; - throw new ReadingNotSupportedException(type, v); - } - } - return value; + return read( src, offset, type, codepage ); } + /** + *

Reads a variant type from a byte array.

+ * + * @param src The byte array + * @param offset The offset in the byte array where the variant starts + * @param type The variant type to read + * @param codepage The codepage to use for non-wide strings + * @return A Java object that corresponds best to the variant field. For + * example, a VT_I4 is returned as a {@link Long}, a VT_LPSTR as a + * {@link String}. + * @exception ReadingNotSupportedException if a property is to be written + * who's variant type HPSF does not yet support + * @exception UnsupportedEncodingException if the specified codepage is not + * supported. + * @see Variant + */ + public static Object read(final byte[] src, final int offset, + final long type, final int codepage) + throws ReadingNotSupportedException, UnsupportedEncodingException + { + TypedPropertyValue typedPropertyValue = new TypedPropertyValue( + (int) type, null ); + int unpadded = typedPropertyValue.readValue( src, offset ); + switch ( (int) type ) + { + case Variant.VT_EMPTY: + case Variant.VT_I4: + case Variant.VT_I8: + case Variant.VT_R8: + /* + * we have more property types that can be converted into Java + * objects, but current API need to be preserved, and it returns + * other types as byte arrays. In future major versions it shall be + * changed -- sergey + */ + return typedPropertyValue.getValue(); + + case Variant.VT_I2: + { + /* + * also for backward-compatibility with prev. versions of POI + * --sergey + */ + return Integer.valueOf( ( (Short) typedPropertyValue.getValue() ) + .intValue() ); + } + case Variant.VT_FILETIME: + { + Filetime filetime = (Filetime) typedPropertyValue.getValue(); + return Util.filetimeToDate( (int) filetime.getHigh(), + (int) filetime.getLow() ); + } + case Variant.VT_LPSTR: + { + CodePageString string = (CodePageString) typedPropertyValue + .getValue(); + return string.getJavaValue( codepage ); + } + case Variant.VT_LPWSTR: + { + UnicodeString string = (UnicodeString) typedPropertyValue + .getValue(); + return string.toJavaString(); + } + case Variant.VT_CF: + { + // if(l1 < 0) { + /** + * YK: reading the ClipboardData packet (VT_CF) is not quite + * correct. The size of the data is determined by the first four + * bytes of the packet while the current implementation calculates + * it in the Section constructor. Test files in Bugzilla 42726 and + * 45583 clearly show that this approach does not always work. The + * workaround below attempts to gracefully handle such cases instead + * of throwing exceptions. + * + * August 20, 2009 + */ + // l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE; + // } + // final byte[] v = new byte[l1]; + // System.arraycopy(src, o1, v, 0, v.length); + // value = v; + // break; + ClipboardData clipboardData = (ClipboardData) typedPropertyValue + .getValue(); + return clipboardData.toByteArray(); + } + + case Variant.VT_BOOL: + { + VariantBool bool = (VariantBool) typedPropertyValue.getValue(); + return Boolean.valueOf( bool.getValue() ); + } + + default: + { + /* + * it is not very good, but what can do without breaking current + * API? --sergey + */ + final byte[] v = new byte[unpadded]; + System.arraycopy( src, offset, v, 0, unpadded ); + throw new ReadingNotSupportedException( type, v ); + } + } + } /** *

Turns a codepage number into the equivalent character encoding's @@ -491,16 +443,9 @@ public class VariantSupport extends Variant } case Variant.VT_LPSTR: { - final byte[] bytes = - (codepage == -1 ? - ((String) value).getBytes() : - ((String) value).getBytes(codepageToEncoding(codepage))); - length = TypeWriter.writeUIntToStream(out, bytes.length + 1); - final byte[] b = new byte[bytes.length + 1]; - System.arraycopy(bytes, 0, b, 0, bytes.length); - b[b.length - 1] = 0x00; - out.write(b); - length += b.length; + CodePageString codePageString = new CodePageString( (String) value, + codepage ); + length += codePageString.write( out ); break; } case Variant.VT_LPWSTR: @@ -533,14 +478,13 @@ public class VariantSupport extends Variant } case Variant.VT_EMPTY: { - TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY); - length = LittleEndianConsts.INT_SIZE; + length += TypeWriter.writeUIntToStream( out, Variant.VT_EMPTY ); break; } case Variant.VT_I2: { - TypeWriter.writeToStream(out, ((Integer) value).shortValue()); - length = LittleEndianConsts.SHORT_SIZE; + length += TypeWriter.writeToStream( out, + ( (Integer) value ).shortValue() ); break; } case Variant.VT_I4: @@ -558,8 +502,7 @@ public class VariantSupport extends Variant } case Variant.VT_I8: { - TypeWriter.writeToStream(out, ((Long) value).longValue()); - length = LittleEndianConsts.LONG_SIZE; + length += TypeWriter.writeToStream(out, ((Long) value).longValue()); break; } case Variant.VT_R8: @@ -573,10 +516,8 @@ public class VariantSupport extends Variant long filetime = Util.dateToFileTime((Date) value); int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL); int low = (int) (filetime & 0x00000000FFFFFFFFL); - length += TypeWriter.writeUIntToStream - (out, 0x0000000FFFFFFFFL & low); - length += TypeWriter.writeUIntToStream - (out, 0x0000000FFFFFFFFL & high); + Filetime filetimeValue = new Filetime( low, high); + length += filetimeValue.write( out ); break; } default: