598 lines
22 KiB
Java
598 lines
22 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.hpsf;
|
|
|
|
import java.io.IOException;
|
|
import java.io.OutputStream;
|
|
import java.io.UnsupportedEncodingException;
|
|
import java.util.Date;
|
|
import java.util.LinkedList;
|
|
import java.util.List;
|
|
|
|
import org.apache.poi.util.LittleEndian;
|
|
import org.apache.poi.util.LittleEndianConsts;
|
|
|
|
/**
|
|
* <p>Supports reading and writing of variant data.</p>
|
|
*
|
|
* <p><strong>FIXME (3):</strong> Reading and writing should be made more
|
|
* uniform than it is now. The following items should be resolved:
|
|
*
|
|
* <ul>
|
|
*
|
|
* <li><p>Reading requires a length parameter that is 4 byte greater than the
|
|
* actual data, because the variant type field is included. </p></li>
|
|
*
|
|
* <li><p>Reading reads from a byte array while writing writes to an byte array
|
|
* output stream.</p></li>
|
|
*
|
|
* </ul>
|
|
*
|
|
* @author Rainer Klute <a
|
|
* href="mailto:klute@rainer-klute.de"><klute@rainer-klute.de></a>
|
|
*/
|
|
public class VariantSupport extends Variant
|
|
{
|
|
|
|
private static boolean logUnsupportedTypes = false;
|
|
|
|
/**
|
|
* <p>Specifies whether warnings about unsupported variant types are to be
|
|
* written to <code>System.err</code> or not.</p>
|
|
*
|
|
* @param logUnsupportedTypes If <code>true</code> warnings will be written,
|
|
* if <code>false</code> they won't.
|
|
*/
|
|
public static void setLogUnsupportedTypes(final boolean logUnsupportedTypes)
|
|
{
|
|
VariantSupport.logUnsupportedTypes = logUnsupportedTypes;
|
|
}
|
|
|
|
/**
|
|
* <p>Checks whether logging of unsupported variant types warning is turned
|
|
* on or off.</p>
|
|
*
|
|
* @return <code>true</code> if logging is turned on, else
|
|
* <code>false</code>.
|
|
*/
|
|
public static boolean isLogUnsupportedTypes()
|
|
{
|
|
return logUnsupportedTypes;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* <p>Keeps a list of the variant types an "unsupported" message has already
|
|
* been issued for.</p>
|
|
*/
|
|
protected static List unsupportedMessage;
|
|
|
|
/**
|
|
* <p>Writes a warning to <code>System.err</code> that a variant type is
|
|
* unsupported by HPSF. Such a warning is written only once for each variant
|
|
* type. Log messages can be turned on or off by </p>
|
|
*
|
|
* @param ex The exception to log
|
|
*/
|
|
protected static void writeUnsupportedTypeMessage
|
|
(final UnsupportedVariantTypeException ex)
|
|
{
|
|
if (isLogUnsupportedTypes())
|
|
{
|
|
if (unsupportedMessage == null)
|
|
unsupportedMessage = new LinkedList();
|
|
Long vt = new Long(ex.getVariantType());
|
|
if (!unsupportedMessage.contains(vt))
|
|
{
|
|
System.err.println(ex.getMessage());
|
|
unsupportedMessage.add(vt);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* <p>HPSF is able to read these {@link Variant} types.</p>
|
|
*/
|
|
final static public int[] SUPPORTED_TYPES = { Variant.VT_EMPTY,
|
|
Variant.VT_I2, Variant.VT_I4, Variant.VT_I8, Variant.VT_R8,
|
|
Variant.VT_FILETIME, Variant.VT_LPSTR, Variant.VT_LPWSTR,
|
|
Variant.VT_CF, Variant.VT_BOOL };
|
|
|
|
|
|
|
|
/**
|
|
* <p>Checks whether HPSF supports the specified variant type. Unsupported
|
|
* types should be implemented included in the {@link #SUPPORTED_TYPES}
|
|
* array.</p>
|
|
*
|
|
* @see Variant
|
|
* @param variantType the variant type to check
|
|
* @return <code>true</code> if HPFS supports this type, else
|
|
* <code>false</code>
|
|
*/
|
|
public boolean isSupportedType(final int variantType)
|
|
{
|
|
for (int i = 0; i < SUPPORTED_TYPES.length; i++)
|
|
if (variantType == SUPPORTED_TYPES[i])
|
|
return true;
|
|
return false;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* <p>Reads a variant type from a byte array.</p>
|
|
*
|
|
* @param src The byte array
|
|
* @param offset The offset in the byte array where the variant starts
|
|
* @param length The length of the variant including the variant type field
|
|
* @param type The variant type to read
|
|
* @param codepage The codepage to use for non-wide strings
|
|
* @return A Java object that corresponds best to the variant field. For
|
|
* example, a VT_I4 is returned as a {@link Long}, a VT_LPSTR as a
|
|
* {@link String}.
|
|
* @exception ReadingNotSupportedException if a property is to be written
|
|
* who's variant type HPSF does not yet support
|
|
* @exception UnsupportedEncodingException if the specified codepage is not
|
|
* supported.
|
|
* @see Variant
|
|
*/
|
|
public static Object read(final byte[] src, final int offset,
|
|
final int length, final long type,
|
|
final int codepage)
|
|
throws ReadingNotSupportedException, UnsupportedEncodingException
|
|
{
|
|
Object value;
|
|
int o1 = offset;
|
|
int l1 = length - LittleEndian.INT_SIZE;
|
|
long lType = type;
|
|
|
|
/* Instead of trying to read 8-bit characters from a Unicode string,
|
|
* read 16-bit characters. */
|
|
if (codepage == Constants.CP_UNICODE && type == Variant.VT_LPSTR)
|
|
lType = Variant.VT_LPWSTR;
|
|
|
|
switch ((int) lType)
|
|
{
|
|
case Variant.VT_EMPTY:
|
|
{
|
|
value = null;
|
|
break;
|
|
}
|
|
case Variant.VT_I2:
|
|
{
|
|
/*
|
|
* Read a short. In Java it is represented as an
|
|
* Integer object.
|
|
*/
|
|
value = new Integer(LittleEndian.getShort(src, o1));
|
|
break;
|
|
}
|
|
case Variant.VT_I4:
|
|
{
|
|
/*
|
|
* Read a word. In Java it is represented as an
|
|
* Integer object.
|
|
*/
|
|
value = new Integer(LittleEndian.getInt(src, o1));
|
|
break;
|
|
}
|
|
case Variant.VT_I8:
|
|
{
|
|
/*
|
|
* Read a double word. In Java it is represented as a
|
|
* Long object.
|
|
*/
|
|
value = new Long(LittleEndian.getLong(src, o1));
|
|
break;
|
|
}
|
|
case Variant.VT_R8:
|
|
{
|
|
/*
|
|
* Read an eight-byte double value. In Java it is represented as
|
|
* a Double object.
|
|
*/
|
|
value = new Double(LittleEndian.getDouble(src, o1));
|
|
break;
|
|
}
|
|
case Variant.VT_FILETIME:
|
|
{
|
|
/*
|
|
* Read a FILETIME object. In Java it is represented
|
|
* as a Date object.
|
|
*/
|
|
final long low = LittleEndian.getUInt(src, o1);
|
|
o1 += LittleEndian.INT_SIZE;
|
|
final long high = LittleEndian.getUInt(src, o1);
|
|
value = Util.filetimeToDate((int) high, (int) low);
|
|
break;
|
|
}
|
|
case Variant.VT_LPSTR:
|
|
{
|
|
/*
|
|
* Read a byte string. In Java it is represented as a
|
|
* String object. The 0x00 bytes at the end must be
|
|
* stripped.
|
|
*/
|
|
final int first = o1 + LittleEndian.INT_SIZE;
|
|
long last = first + LittleEndian.getUInt(src, o1) - 1;
|
|
o1 += LittleEndian.INT_SIZE;
|
|
while (src[(int) last] == 0 && first <= last)
|
|
last--;
|
|
final int l = (int) (last - first + 1);
|
|
value = codepage != -1 ?
|
|
new String(src, first, l,
|
|
codepageToEncoding(codepage)) :
|
|
new String(src, first, l);
|
|
break;
|
|
}
|
|
case Variant.VT_LPWSTR:
|
|
{
|
|
/*
|
|
* Read a Unicode string. In Java it is represented as
|
|
* a String object. The 0x00 bytes at the end must be
|
|
* stripped.
|
|
*/
|
|
final int first = o1 + LittleEndian.INT_SIZE;
|
|
long last = first + LittleEndian.getUInt(src, o1) - 1;
|
|
long l = last - first;
|
|
o1 += LittleEndian.INT_SIZE;
|
|
StringBuffer b = new StringBuffer((int) (last - first));
|
|
for (int i = 0; i <= l; i++)
|
|
{
|
|
final int i1 = o1 + (i * 2);
|
|
final int i2 = i1 + 1;
|
|
final int high = src[i2] << 8;
|
|
final int low = src[i1] & 0x00ff;
|
|
final char c = (char) (high | low);
|
|
b.append(c);
|
|
}
|
|
/* Strip 0x00 characters from the end of the string: */
|
|
while (b.length() > 0 && b.charAt(b.length() - 1) == 0x00)
|
|
b.setLength(b.length() - 1);
|
|
value = b.toString();
|
|
break;
|
|
}
|
|
case Variant.VT_CF:
|
|
{
|
|
if(l1 < 0) {
|
|
/**
|
|
* YK: reading the ClipboardData packet (VT_CF) is not quite correct.
|
|
* The size of the data is determined by the first four bytes of the packet
|
|
* while the current implementation calculates it in the Section constructor.
|
|
* Test files in Bugzilla 42726 and 45583 clearly show that this approach does not always work.
|
|
* The workaround below attempts to gracefully handle such cases instead of throwing exceptions.
|
|
*
|
|
* August 20, 2009
|
|
*/
|
|
l1 = LittleEndian.getInt(src, o1); o1 += LittleEndian.INT_SIZE;
|
|
}
|
|
final byte[] v = new byte[l1];
|
|
System.arraycopy(src, o1, v, 0, v.length);
|
|
value = v;
|
|
break;
|
|
}
|
|
case Variant.VT_BOOL:
|
|
{
|
|
/*
|
|
* The first four bytes in src, from src[offset] to
|
|
* src[offset + 3] contain the DWord for VT_BOOL, so
|
|
* skip it, we don't need it.
|
|
*/
|
|
// final int first = offset + LittleEndian.INT_SIZE;
|
|
long bool = LittleEndian.getUInt(src, o1);
|
|
if (bool != 0)
|
|
value = Boolean.TRUE;
|
|
else
|
|
value = Boolean.FALSE;
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
final byte[] v = new byte[l1];
|
|
for (int i = 0; i < l1; i++)
|
|
v[i] = src[(o1 + i)];
|
|
throw new ReadingNotSupportedException(type, v);
|
|
}
|
|
}
|
|
return value;
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
* <p>Turns a codepage number into the equivalent character encoding's
|
|
* name.</p>
|
|
*
|
|
* @param codepage The codepage number
|
|
*
|
|
* @return The character encoding's name. If the codepage number is 65001,
|
|
* the encoding name is "UTF-8". All other positive numbers are mapped to
|
|
* "cp" followed by the number, e.g. if the codepage number is 1252 the
|
|
* returned character encoding name will be "cp1252".
|
|
*
|
|
* @exception UnsupportedEncodingException if the specified codepage is
|
|
* less than zero.
|
|
*/
|
|
public static String codepageToEncoding(final int codepage)
|
|
throws UnsupportedEncodingException
|
|
{
|
|
if (codepage <= 0)
|
|
throw new UnsupportedEncodingException
|
|
("Codepage number may not be " + codepage);
|
|
switch (codepage)
|
|
{
|
|
case Constants.CP_UTF16:
|
|
return "UTF-16";
|
|
case Constants.CP_UTF16_BE:
|
|
return "UTF-16BE";
|
|
case Constants.CP_UTF8:
|
|
return "UTF-8";
|
|
case Constants.CP_037:
|
|
return "cp037";
|
|
case Constants.CP_GBK:
|
|
return "GBK";
|
|
case Constants.CP_MS949:
|
|
return "ms949";
|
|
case Constants.CP_WINDOWS_1250:
|
|
return "windows-1250";
|
|
case Constants.CP_WINDOWS_1251:
|
|
return "windows-1251";
|
|
case Constants.CP_WINDOWS_1252:
|
|
return "windows-1252";
|
|
case Constants.CP_WINDOWS_1253:
|
|
return "windows-1253";
|
|
case Constants.CP_WINDOWS_1254:
|
|
return "windows-1254";
|
|
case Constants.CP_WINDOWS_1255:
|
|
return "windows-1255";
|
|
case Constants.CP_WINDOWS_1256:
|
|
return "windows-1256";
|
|
case Constants.CP_WINDOWS_1257:
|
|
return "windows-1257";
|
|
case Constants.CP_WINDOWS_1258:
|
|
return "windows-1258";
|
|
case Constants.CP_JOHAB:
|
|
return "johab";
|
|
case Constants.CP_MAC_ROMAN:
|
|
return "MacRoman";
|
|
case Constants.CP_MAC_JAPAN:
|
|
return "SJIS";
|
|
case Constants.CP_MAC_CHINESE_TRADITIONAL:
|
|
return "Big5";
|
|
case Constants.CP_MAC_KOREAN:
|
|
return "EUC-KR";
|
|
case Constants.CP_MAC_ARABIC:
|
|
return "MacArabic";
|
|
case Constants.CP_MAC_HEBREW:
|
|
return "MacHebrew";
|
|
case Constants.CP_MAC_GREEK:
|
|
return "MacGreek";
|
|
case Constants.CP_MAC_CYRILLIC:
|
|
return "MacCyrillic";
|
|
case Constants.CP_MAC_CHINESE_SIMPLE:
|
|
return "EUC_CN";
|
|
case Constants.CP_MAC_ROMANIA:
|
|
return "MacRomania";
|
|
case Constants.CP_MAC_UKRAINE:
|
|
return "MacUkraine";
|
|
case Constants.CP_MAC_THAI:
|
|
return "MacThai";
|
|
case Constants.CP_MAC_CENTRAL_EUROPE:
|
|
return "MacCentralEurope";
|
|
case Constants.CP_MAC_ICELAND:
|
|
return "MacIceland";
|
|
case Constants.CP_MAC_TURKISH:
|
|
return "MacTurkish";
|
|
case Constants.CP_MAC_CROATIAN:
|
|
return "MacCroatian";
|
|
case Constants.CP_US_ACSII:
|
|
case Constants.CP_US_ASCII2:
|
|
return "US-ASCII";
|
|
case Constants.CP_KOI8_R:
|
|
return "KOI8-R";
|
|
case Constants.CP_ISO_8859_1:
|
|
return "ISO-8859-1";
|
|
case Constants.CP_ISO_8859_2:
|
|
return "ISO-8859-2";
|
|
case Constants.CP_ISO_8859_3:
|
|
return "ISO-8859-3";
|
|
case Constants.CP_ISO_8859_4:
|
|
return "ISO-8859-4";
|
|
case Constants.CP_ISO_8859_5:
|
|
return "ISO-8859-5";
|
|
case Constants.CP_ISO_8859_6:
|
|
return "ISO-8859-6";
|
|
case Constants.CP_ISO_8859_7:
|
|
return "ISO-8859-7";
|
|
case Constants.CP_ISO_8859_8:
|
|
return "ISO-8859-8";
|
|
case Constants.CP_ISO_8859_9:
|
|
return "ISO-8859-9";
|
|
case Constants.CP_ISO_2022_JP1:
|
|
case Constants.CP_ISO_2022_JP2:
|
|
case Constants.CP_ISO_2022_JP3:
|
|
return "ISO-2022-JP";
|
|
case Constants.CP_ISO_2022_KR:
|
|
return "ISO-2022-KR";
|
|
case Constants.CP_EUC_JP:
|
|
return "EUC-JP";
|
|
case Constants.CP_EUC_KR:
|
|
return "EUC-KR";
|
|
case Constants.CP_GB2312:
|
|
return "GB2312";
|
|
case Constants.CP_GB18030:
|
|
return "GB18030";
|
|
case Constants.CP_SJIS:
|
|
return "SJIS";
|
|
default:
|
|
return "cp" + codepage;
|
|
}
|
|
}
|
|
|
|
|
|
/**
|
|
* <p>Writes a variant value to an output stream. This method ensures that
|
|
* always a multiple of 4 bytes is written.</p>
|
|
*
|
|
* <p>If the codepage is UTF-16, which is encouraged, strings
|
|
* <strong>must</strong> always be written as {@link Variant#VT_LPWSTR}
|
|
* strings, not as {@link Variant#VT_LPSTR} strings. This method ensure this
|
|
* by converting strings appropriately, if needed.</p>
|
|
*
|
|
* @param out The stream to write the value to.
|
|
* @param type The variant's type.
|
|
* @param value The variant's value.
|
|
* @param codepage The codepage to use to write non-wide strings
|
|
* @return The number of entities that have been written. In many cases an
|
|
* "entity" is a byte but this is not always the case.
|
|
* @exception IOException if an I/O exceptions occurs
|
|
* @exception WritingNotSupportedException if a property is to be written
|
|
* who's variant type HPSF does not yet support
|
|
*/
|
|
public static int write(final OutputStream out, final long type,
|
|
final Object value, final int codepage)
|
|
throws IOException, WritingNotSupportedException
|
|
{
|
|
int length = 0;
|
|
switch ((int) type)
|
|
{
|
|
case Variant.VT_BOOL:
|
|
{
|
|
int trueOrFalse;
|
|
if (((Boolean) value).booleanValue())
|
|
trueOrFalse = 1;
|
|
else
|
|
trueOrFalse = 0;
|
|
length = TypeWriter.writeUIntToStream(out, trueOrFalse);
|
|
break;
|
|
}
|
|
case Variant.VT_LPSTR:
|
|
{
|
|
final byte[] bytes =
|
|
(codepage == -1 ?
|
|
((String) value).getBytes() :
|
|
((String) value).getBytes(codepageToEncoding(codepage)));
|
|
length = TypeWriter.writeUIntToStream(out, bytes.length + 1);
|
|
final byte[] b = new byte[bytes.length + 1];
|
|
System.arraycopy(bytes, 0, b, 0, bytes.length);
|
|
b[b.length - 1] = 0x00;
|
|
out.write(b);
|
|
length += b.length;
|
|
break;
|
|
}
|
|
case Variant.VT_LPWSTR:
|
|
{
|
|
final int nrOfChars = ((String) value).length() + 1;
|
|
length += TypeWriter.writeUIntToStream(out, nrOfChars);
|
|
char[] s = Util.pad4((String) value);
|
|
for (int i = 0; i < s.length; i++)
|
|
{
|
|
final int high = ((s[i] & 0x0000ff00) >> 8);
|
|
final int low = (s[i] & 0x000000ff);
|
|
final byte highb = (byte) high;
|
|
final byte lowb = (byte) low;
|
|
out.write(lowb);
|
|
out.write(highb);
|
|
length += 2;
|
|
}
|
|
out.write(0x00);
|
|
out.write(0x00);
|
|
length += 2;
|
|
break;
|
|
}
|
|
case Variant.VT_CF:
|
|
{
|
|
final byte[] b = (byte[]) value;
|
|
out.write(b);
|
|
length = b.length;
|
|
break;
|
|
}
|
|
case Variant.VT_EMPTY:
|
|
{
|
|
TypeWriter.writeUIntToStream(out, Variant.VT_EMPTY);
|
|
length = LittleEndianConsts.INT_SIZE;
|
|
break;
|
|
}
|
|
case Variant.VT_I2:
|
|
{
|
|
TypeWriter.writeToStream(out, ((Integer) value).shortValue());
|
|
length = LittleEndianConsts.SHORT_SIZE;
|
|
break;
|
|
}
|
|
case Variant.VT_I4:
|
|
{
|
|
if (!(value instanceof Integer))
|
|
{
|
|
throw new ClassCastException("Could not cast an object to "
|
|
+ Integer.class.toString() + ": "
|
|
+ value.getClass().toString() + ", "
|
|
+ value.toString());
|
|
}
|
|
length += TypeWriter.writeToStream(out,
|
|
((Integer) value).intValue());
|
|
break;
|
|
}
|
|
case Variant.VT_I8:
|
|
{
|
|
TypeWriter.writeToStream(out, ((Long) value).longValue());
|
|
length = LittleEndianConsts.LONG_SIZE;
|
|
break;
|
|
}
|
|
case Variant.VT_R8:
|
|
{
|
|
length += TypeWriter.writeToStream(out,
|
|
((Double) value).doubleValue());
|
|
break;
|
|
}
|
|
case Variant.VT_FILETIME:
|
|
{
|
|
long filetime = Util.dateToFileTime((Date) value);
|
|
int high = (int) ((filetime >> 32) & 0x00000000FFFFFFFFL);
|
|
int low = (int) (filetime & 0x00000000FFFFFFFFL);
|
|
length += TypeWriter.writeUIntToStream
|
|
(out, 0x0000000FFFFFFFFL & low);
|
|
length += TypeWriter.writeUIntToStream
|
|
(out, 0x0000000FFFFFFFFL & high);
|
|
break;
|
|
}
|
|
default:
|
|
{
|
|
/* The variant type is not supported yet. However, if the value
|
|
* is a byte array we can write it nevertheless. */
|
|
if (value instanceof byte[])
|
|
{
|
|
final byte[] b = (byte[]) value;
|
|
out.write(b);
|
|
length = b.length;
|
|
writeUnsupportedTypeMessage
|
|
(new WritingNotSupportedException(type, value));
|
|
}
|
|
else
|
|
throw new WritingNotSupportedException(type, value);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return length;
|
|
}
|
|
|
|
}
|