diff --git a/src/documentation/xdocs/hpsf/internals.xml b/src/documentation/xdocs/hpsf/internals.xml index 25d374511..3e0400394 100644 --- a/src/documentation/xdocs/hpsf/internals.xml +++ b/src/documentation/xdocs/hpsf/internals.xml @@ -690,6 +690,19 @@
Property ID string
VT type
0
Dictionary
PID_DICTIONARY
[Special format]
1
Code page
PID_CODEPAGE
VT_I2
2
Category
What is a FILETIME
? The answer can be found for example under
- http://www.vbapi.com/ref/f/filetime.html
+
What is a FILETIME
? The answer can be found for example
+ under http://www.vbapi.com/ref/f/filetime.html
or
- http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html. In
- short:
- The FILETIME structure holds a date and time associated with a file.
- The structure identifies a 64-bit integer specifying the number of
- 100-nanosecond intervals which have passed since January 1, 1601. This
- 64-bit value is split into the two dwords stored in the
- structure.
This documentation origins from the HPSF description available at http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html.
+Information about the code page property in the + DocumentSummaryInformation stream is available at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/property_id_1.asp.
+This documentation origins from the HPSF description available at http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html.
+ *
Convenience class representing a DocumentSummary Information stream in a + * Microsoft Office document.
* - * Convenience class representing a DocumentSummary Information stream in a - * Microsoft Office document. - * - *@author Rainer Klute (klute@rainer-klute.de) - *@author Drew Varner (Drew.Varner closeTo sc.edu) - *@created May 10, 2002 - *@see SummaryInformation - *@version $Id: DocumentSummaryInformation.java,v 1.6 2002/05/03 07:29:09 - * klute Exp $ - *@since 2002-02-09 + * @author Rainer Klute (klute@rainer-klute.de) + * @author Drew Varner (Drew.Varner closeTo sc.edu) + * @see SummaryInformation + * @version $Id$ + * @since 2002-02-09 */ -public class DocumentSummaryInformation extends SpecialPropertySet { +public class DocumentSummaryInformation extends SpecialPropertySet +{ /** - *+ *
Creates a {@link DocumentSummaryInformation} from a given + * {@link PropertySet}.
* - * Creates a {@link DocumentSummaryInformation} from a given {@link - * PropertySet}. - * - *@param ps A property set which - * should be created from a document summary information stream. - *@exception UnexpectedPropertySetTypeException Description of the - * Exception - *@throws UnexpectedPropertySetTypeException if ps does not - * contain a document summary information stream. + * @param ps A property set which should be created from a + * document summary information stream. + * @throws UnexpectedPropertySetTypeException if ps + * does not contain a document summary information stream. */ public DocumentSummaryInformation(final PropertySet ps) - throws UnexpectedPropertySetTypeException { + throws UnexpectedPropertySetTypeException + { super(ps); - if (!isDocumentSummaryInformation()) { + if (!isDocumentSummaryInformation()) throw new UnexpectedPropertySetTypeException - ("Not a " + getClass().getName()); - } + ("Not a " + getClass().getName()); } /** - *+ *
Returns the stream's category (or null
).
null
).
- *
- *@return The category value
+ * @return The category value
*/
- public String getCategory() {
+ public String getCategory()
+ {
return (String) getProperty(PropertyIDMap.PID_CATEGORY);
}
/**
- * + *
Returns the stream's presentation format (or
+ * null
).
null
).
- *
- *@return The presentationFormat value
+ * @return The presentationFormat value
*/
- public String getPresentationFormat() {
+ public String getPresentationFormat()
+ {
return (String) getProperty(PropertyIDMap.PID_PRESFORMAT);
}
/**
- * + *
Returns the stream's byte count or 0 if the {@link + * DocumentSummaryInformation} does not contain a byte count.
* - * Returns the stream's byte count or 0 if the {@link - * DocumentSummaryInformation} does not contain a byte count. - * - *@return The byteCount value + * @return The byteCount value */ - public int getByteCount() { + public int getByteCount() + { return getPropertyIntValue(PropertyIDMap.PID_BYTECOUNT); } /** - *+ *
Returns the stream's line count or 0 if the {@link + * DocumentSummaryInformation} does not contain a line count.
* - * Returns the stream's line count or 0 if the {@link - * DocumentSummaryInformation} does not contain a line count. - * - *@return The lineCount value + * @return The lineCount value */ - public int getLineCount() { + public int getLineCount() + { return getPropertyIntValue(PropertyIDMap.PID_LINECOUNT); } /** - *+ *
Returns the stream's par count or 0 if the {@link + * DocumentSummaryInformation} does not contain a par count.
* - * Returns the stream's par count or 0 if the {@link - * DocumentSummaryInformation} does not contain a par count. - * - *@return The parCount value + * @return The parCount value */ - public int getParCount() { + public int getParCount() + { return getPropertyIntValue(PropertyIDMap.PID_PARCOUNT); } /** - *+ *
Returns the stream's slide count or 0 if the {@link + * DocumentSummaryInformation} does not contain a slide count.
* - * Returns the stream's slide count or 0 if the {@link - * DocumentSummaryInformation} does not contain a slide count. - * - *@return The slideCount value + * @return The slideCount value */ - public int getSlideCount() { + public int getSlideCount() + { return getPropertyIntValue(PropertyIDMap.PID_SLIDECOUNT); } /** - *+ *
Returns the stream's note count or 0 if the {@link + * DocumentSummaryInformation} does not contain a note count.
* - * Returns the stream's note count or 0 if the {@link - * DocumentSummaryInformation} does not contain a note count. - * - *@return The noteCount value + * @return The noteCount value */ - public int getNoteCount() { + public int getNoteCount() + { return getPropertyIntValue(PropertyIDMap.PID_NOTECOUNT); } /** - *+ *
Returns the stream's hidden count or 0 if the {@link + * DocumentSummaryInformation} does not contain a hidden + * count.
* - * Returns the stream's hidden count or 0 if the {@link - * DocumentSummaryInformation} does not contain a hidden count. - * - *@return The hiddenCount value + * @return The hiddenCount value */ - public int getHiddenCount() { + public int getHiddenCount() + { return getPropertyIntValue(PropertyIDMap.PID_HIDDENCOUNT); } /** - *+ *
Returns the stream's mmclip count or 0 if the {@link + * DocumentSummaryInformation} does not contain a mmclip + * count.
* - * Returns the stream's mmclip count or 0 if the {@link - * DocumentSummaryInformation} does not contain a mmclip count. - * - *@return The mMClipCount value + * @return The mMClipCount value */ - public int getMMClipCount() { + public int getMMClipCount() + { return getPropertyIntValue(PropertyIDMap.PID_MMCLIPCOUNT); } /** - *+ *
Returns true
when scaling of the thumbnail is
+ * desired, false
if cropping is desired.
true
when scaling of the thumbnail is desired,
- * false
if cropping is desired.
- *
- *@return The scale value
+ * @return The scale value
*/
- public boolean getScale() {
+ public boolean getScale()
+ {
return getPropertyBooleanValue(PropertyIDMap.PID_SCALE);
}
/**
- * + *
Returns the stream's heading pair (or null
)
+ * when this method is implemented. Please note that the
+ * return type is likely to change!
*
- * Returns the stream's heading pair (or null
) when
- * this method is implemented. Please note that the return type is likely
- * to change!
- *
- *@return The headingPair value
+ * @return The headingPair value
*/
- public byte[] getHeadingPair() {
- if (true) {
+ public byte[] getHeadingPair()
+ {
+ if (true)
throw new UnsupportedOperationException("FIXME");
- }
return (byte[]) getProperty(PropertyIDMap.PID_HEADINGPAIR);
}
/**
- *
+ *
Returns the stream's doc parts (or null
)
+ * when this method is implemented. Please note that the
+ * return type is likely to change!
*
- * Returns the stream's doc parts (or null
) when this
- * method is implemented. Please note that the return type is likely to
- * change!
- *
- *@return The docparts value
+ * @return The docparts value
*/
- public byte[] getDocparts() {
- if (true) {
+ public byte[] getDocparts()
+ {
+ if (true)
throw new UnsupportedOperationException("FIXME");
- }
return (byte[]) getProperty(PropertyIDMap.PID_DOCPARTS);
}
/**
- *
+ *
Returns the stream's manager (or null
).
null
).
- *
- *@return The manager value
+ * @return The manager value
*/
- public String getManager() {
+ public String getManager()
+ {
return (String) getProperty(PropertyIDMap.PID_MANAGER);
}
/**
- * + *
Returns the stream's company (or null
).
null
).
- *
- *@return The company value
+ * @return The company value
*/
- public String getCompany() {
+ public String getCompany()
+ {
return (String) getProperty(PropertyIDMap.PID_COMPANY);
}
/**
- * + *
Returns true
if the custom links are hampered
+ * by excessive noise, for all applications.
*
- * Returns true
if the custom links are hampered by excessive
- * noise, for all applications.
+ * FIXME: Explain this some more! I (Rainer) + * don't understand it.
* - * FIXME: Explain this some more! I (Rainer) don't - * understand it. - * - *@return The linksDirty value + * @return The linksDirty value */ - public boolean getLinksDirty() { + public boolean getLinksDirty() + { return getPropertyBooleanValue(PropertyIDMap.PID_LINKSDIRTY); } diff --git a/src/java/org/apache/poi/hpsf/Property.java b/src/java/org/apache/poi/hpsf/Property.java index 13c6917c6..9ea91ae7b 100644 --- a/src/java/org/apache/poi/hpsf/Property.java +++ b/src/java/org/apache/poi/hpsf/Property.java @@ -81,10 +81,9 @@ import org.apache.poi.util.LittleEndian; * value, {@link Variant#VT_FILETIME} some date and time (of a * file). * - *FIXME: Reading of other types than {@link - * Variant#VT_I4}, {@link Variant#VT_FILETIME}, {@link - * Variant#VT_LPSTR}, {@link Variant#VT_CF}, {@link Variant#VT_BOOL}, - * and reading the dictionary property is not yet implemented.
+ *FIXME: Reading is not implemented for all + * {@link Variant} types yet. Feel free to submit error reports or + * patches for the types you need.
* * @author Rainer Klute (klute@rainer-klute.de) * @author Drew Varner (Drew.Varner InAndAround sc.edu) @@ -96,6 +95,9 @@ import org.apache.poi.util.LittleEndian; public class Property { + /* Codepage 1200 denotes Unicode. */ + private static int CP_UNICODE = 1200; + private int id; @@ -150,121 +152,37 @@ public class Property * @param offset The property's type/value pair's offset in the * section. * @param length The property's type/value pair's length in bytes. + * @param codepage The section's and thus the property's + * codepage. It is needed only when reading string values. */ public Property(final int id, final byte[] src, final long offset, - int length) + int length, int codepage) { this.id = id; /* - * ID 0 is a special case since it specifies a dictionary of - * property IDs and property names. + * ID 0 is a special case since it specifies a dictionary of + * property IDs and property names. */ if (id == 0) { - value = readDictionary(src, offset, length); + value = readDictionary(src, offset, length, codepage); return; } - /* - * FIXME: Support this! - */ -// /* ID 1 is another special case: It denotes the code page of -// * byte strings in this section. */ -// if (id == 1) -// { -// value = readCodepage(src, offset); -// return; -// } - int o = (int) offset; type = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - /* - * FIXME: Support reading more types! - */ - switch ((int)type) { - case Variant.VT_I4: - { - /* - * Read a word. In Java it is represented as an - * Integer object. - */ - value = new Long(LittleEndian.getUInt(src, o)); - break; - } - case Variant.VT_FILETIME: - { - /* - * Read a FILETIME object. In Java it is represented - * as a Date. - */ - final long low = LittleEndian.getUInt(src, o); - o += LittleEndian.INT_SIZE; - final long high = LittleEndian.getUInt(src, o); - value = Util.filetimeToDate((int)high, (int)low); - break; - } - case Variant.VT_LPSTR: - { - /* - * Read a byte string. In Java it is represented as a - * String. The null bytes at the end of the byte - * strings must be stripped. - */ - final int first = o + LittleEndian.INT_SIZE; - long last = first + LittleEndian.getUInt(src, o) - 1; - o += LittleEndian.INT_SIZE; - while (src[(int)last] == 0 && first <= last) { - last--; - } - value = new String(src, (int)first, (int)(last - first + 1)); - break; - } - case Variant.VT_CF: - { - /* - * The first four bytes in src, from rc[offset] to - * src[offset + 3] contain the DWord for VT_CF, so - * skip it, we don't need it. - */ - /* - * Truncate the length of the return array by a DWord - * length (4 bytes). - */ - length = length - LittleEndian.INT_SIZE; - - final byte[] v = new byte[length]; - for (int i = 0; i < length; i++) - v[i] = src[(int)(o + i)]; - value = v; - break; - } - case Variant.VT_BOOL: - { - /* - * The first four bytes in src, from src[offset] to - * src[offset + 3] contain the DWord for VT_BOOL, so - * skip it, we don't need it. - */ - final int first = o + LittleEndian.INT_SIZE; - long bool = LittleEndian.getUInt(src, o); - if (bool != 0) - value = new Boolean(true); - else - value = new Boolean(false); - break; - } - default: - { - final byte[] v = new byte[length]; - for (int i = 0; i < length; i++) - v[i] = src[(int)(offset + i)]; - value = v; - break; - } - } + try + { + value = TypeReader.read(src, o, length, (int) type); + } + catch (Throwable t) + { + t.printStackTrace(); + value = "*** null ***"; + } } @@ -277,64 +195,67 @@ public class Property * @param offset At this offset within src the * dictionary starts. * @param length The dictionary contains at most this many bytes. + * @param codepage The codepage of the string values. * @return The dictonary */ protected Map readDictionary(final byte[] src, final long offset, - final int length) + final int length, final int codepage) { - /* - * FIXME: Check the length! - */ - int o = (int)offset; + /* Check whether "offset" points into the "src" array". */ + if (offset < 0 || offset > src.length) + throw new HPSFRuntimeException + ("Illegal offset " + offset + " while HPSF stream contains " + + length + " bytes."); + int o = (int) offset; /* - * Read the number of dictionary entries. + * Read the number of dictionary entries. */ final long nrEntries = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - final Map m = new HashMap((int)nrEntries, (float) 1.0); + final Map m = new HashMap((int) nrEntries, (float) 1.0); for (int i = 0; i < nrEntries; i++) { - /* - * The key - */ + /* The key. */ final Long id = new Long(LittleEndian.getUInt(src, o)); o += LittleEndian.INT_SIZE; - /* - * The value (a string) - */ - final long sLength = LittleEndian.getUInt(src, o); + /* The value (a string). The length is the either the + * number of characters if the character set is Unicode or + * else the number of bytes. The length includes + * terminating 0x00 bytes which we have to strip off to + * create a Java string. */ + long sLength = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - /* - * Strip trailing 0x00 bytes. - */ - long l = sLength; - while (src[(int)(o + l - 1)] == 0x00) - l--; - final String s = new String(src, o, (int)l); - o += sLength; - m.put(id, s); + /* Read the bytes or characters depending on whether the + * character set is Unicode or not. */ + StringBuffer b = new StringBuffer((int) sLength); + for (int j = 0; j < sLength; j++) + if (codepage == CP_UNICODE) + { + final int i1 = o + (j * 2); + final int i2 = i1 + 1; + b.append((char) ((src[i2] << 8) + src[i1])); + } + else + b.append((char) src[o + j]); + + /* Strip 0x00 characters from the end of the string: */ + while (b.charAt(b.length() - 1) == 0x00) + b.setLength(b.length() - 1); + if (codepage == CP_UNICODE) + { + if (sLength % 2 == 1) + sLength++; + o += (sLength + sLength); + } + else + o += sLength; + m.put(id, b.toString()); } return m; } - - - /** - *Reads a code page.
- * - * @param src The byte array containing the bytes making out the - * code page. - * @param offset At this offset within src the code - * page starts. - * @return The code page. - */ - protected int readCodePage(final byte[] src, final long offset) - { - throw new UnsupportedOperationException("FIXME"); - } - } diff --git a/src/java/org/apache/poi/hpsf/Section.java b/src/java/org/apache/poi/hpsf/Section.java index a640fd9bd..c859d5cf9 100644 --- a/src/java/org/apache/poi/hpsf/Section.java +++ b/src/java/org/apache/poi/hpsf/Section.java @@ -161,66 +161,117 @@ public class Section public Section(final byte[] src, int offset) { /* - * Read the format ID. + * Read the format ID. */ formatID = new ClassID(src, offset); offset += ClassID.LENGTH; /* - * Read the offset from the stream's start and positions to - * the section header. + * Read the offset from the stream's start and positions to + * the section header. */ this.offset = LittleEndian.getUInt(src, offset); offset = (int)this.offset; /* - * Read the section length. + * Read the section length. */ size = (int)LittleEndian.getUInt(src, offset); offset += LittleEndian.INT_SIZE; /* - * Read the number of properties. + * Read the number of properties. */ propertyCount = (int)LittleEndian.getUInt(src, offset); offset += LittleEndian.INT_SIZE; /* - * Read the properties. The offset is positioned at the first - * entry of the property list. + * Read the properties. The offset is positioned at the first + * entry of the property list. The problem is that we have to + * read the property with ID 1 before we read other + * properties, at least before other properties containing + * strings. The reason is that property 1 specifies the + * codepage. If it is 1200, all strings are in Unicode. In + * other words: Before we can read any strings we have to know + * whether they are in Unicode or not. Unfortunately property + * 1 is not guaranteed to be the first in a section. + * + * The algorithm below reads the properties in two passes: The + * first one looks for property ID 1 and extracts the codepage + * number. The seconds pass reads the other properties. */ properties = new Property[propertyCount]; - for (int i = 0; i < properties.length; i++) { - final int id = (int)LittleEndian.getUInt(src, offset); - offset += LittleEndian.INT_SIZE; + Property propertyOne; - /* - * Offset from the section. - */ - final int sOffset = (int)LittleEndian.getUInt(src, offset); - offset += LittleEndian.INT_SIZE; + /* Pass 1: Look for the codepage. */ + int codepage = -1; + int pass1Offset = offset; + for (int i = 0; i < properties.length; i++) + { + /* Read the property ID. */ + final int id = (int) LittleEndian.getUInt(src, pass1Offset); + pass1Offset += LittleEndian.INT_SIZE; - /* - * Calculate the length of the property. - */ + /* Offset from the section's start. */ + final int sOffset = (int) LittleEndian.getUInt(src, pass1Offset); + pass1Offset += LittleEndian.INT_SIZE; + + /* Calculate the length of the property. */ int length; - if (i == properties.length - 1) { - length = (int)(src.length - this.offset - sOffset); - } else { + if (i == properties.length - 1) + length = (int) (src.length - this.offset - sOffset); + else + length = (int) + LittleEndian.getUInt(src, pass1Offset + + LittleEndian.INT_SIZE) - sOffset; + + if (id == PropertyIDMap.PID_CODEPAGE) + { + /* Read the codepage if the property ID is 1. */ + + /* Read the property's value type. It must be + * VT_I2. */ + int o = (int) (this.offset + sOffset); + final long type = LittleEndian.getUInt(src, o); + o += LittleEndian.INT_SIZE; + + if (type != Variant.VT_I2) + throw new HPSFRuntimeException + ("Value type of property ID 1 is not VT_I2 but " + + type + "."); + + /* Read the codepage number. */ + codepage = LittleEndian.getUShort(src, o); + } + } + + /* Pass 2: Read all properties, including 1. */ + for (int i = 0; i < properties.length; i++) + { + /* Read the property ID. */ + final int id = (int) LittleEndian.getUInt(src, offset); + offset += LittleEndian.INT_SIZE; + + /* Offset from the section. */ + final int sOffset = (int) LittleEndian.getUInt(src, offset); + offset += LittleEndian.INT_SIZE; + + /* Calculate the length of the property. */ + int length; + if (i == properties.length - 1) + length = (int) (src.length - this.offset - sOffset); + else length = (int) LittleEndian.getUInt(src, offset + LittleEndian.INT_SIZE) - sOffset; - } - /* - * Create it. - */ - properties[i] = - new Property(id, src, this.offset + sOffset, length); + /* Create it. */ + properties[i] = new Property(id, src, this.offset + sOffset, + length, codepage); } /* - * Extract the dictionary (if available). + * Extract the dictionary (if available). */ dictionary = (Map) getProperty(0); } @@ -237,7 +288,7 @@ public class Section * * @return The property's value */ - protected Object getProperty(final int id) + public Object getProperty(final int id) { wasNull = false; for (int i = 0; i < properties.length; i++) diff --git a/src/java/org/apache/poi/hpsf/TypeReader.java b/src/java/org/apache/poi/hpsf/TypeReader.java new file mode 100644 index 000000000..c9f8aaefd --- /dev/null +++ b/src/java/org/apache/poi/hpsf/TypeReader.java @@ -0,0 +1,208 @@ +/* + * ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + *Reader for specific data types.
+ * + * @author Rainer Klute (klute@rainer-klute.de) + * @see Property + * @see Variant + * @version $Id$ + * @since 2002-12-09 + */ +public class TypeReader +{ + + /** + *Reads a variant data type from a byte array.
+ * + * @param src The byte array + * @param offset The offset in the byte array where the variant + * starts + * @param length The length of the variant including the variant + * type field + * @return A Java object that corresponds best to the variant + * field. For example, a VT_I4 is returned as a {@link Long}, a + * VT_LPSTR as a {@link String}. + * + * @see Variant + */ + public static Object read(final byte[] src, int offset, int length, + final int type) + { + /* + * FIXME: Support reading more types and clean up this code! + */ + Object value; + length = length - LittleEndian.INT_SIZE; + switch (type) + { + case Variant.VT_I2: + { + /* + * Read a short. In Java it is represented as an + * Integer object. + */ + value = new Integer(LittleEndian.getUShort(src, offset)); + break; + } + case Variant.VT_I4: + { + /* + * Read a word. In Java it is represented as a + * Long object. + */ + value = new Long(LittleEndian.getUInt(src, offset)); + break; + } + case Variant.VT_FILETIME: + { + /* + * Read a FILETIME object. In Java it is represented + * as a Date object. + */ + final long low = LittleEndian.getUInt(src, offset); + offset += LittleEndian.INT_SIZE; + final long high = LittleEndian.getUInt(src, offset); + value = Util.filetimeToDate((int) high, (int) low); + break; + } + case Variant.VT_LPSTR: + { + /* + * Read a byte string. In Java it is represented as a + * String object. The 0x00 bytes at the end must be + * stripped. + */ + final int first = offset + LittleEndian.INT_SIZE; + long last = first + LittleEndian.getUInt(src, offset) - 1; + offset += LittleEndian.INT_SIZE; + while (src[(int) last] == 0 && first <= last) + last--; + value = new String(src, (int) first, (int) (last - first + 1)); + break; + } + case Variant.VT_LPWSTR: + { + /* + * Read a Unicode string. In Java it is represented as + * a String object. The 0x00 bytes at the end must be + * stripped. + */ + final int first = offset + LittleEndian.INT_SIZE; + long last = first + LittleEndian.getUInt(src, offset) - 1; + long l = last - first; + offset += LittleEndian.INT_SIZE; + StringBuffer b = new StringBuffer((int) (last - first)); + for (int i = 0; i <= l; i++) + { + final int i1 = offset + (i * 2); + final int i2 = i1 + 1; + b.append((char) ((src[i2] << 8) + src[i1])); + } + /* Strip 0x00 characters from the end of the string: */ + while (b.charAt(b.length() - 1) == 0x00) + b.setLength(b.length() - 1); + value = b.toString(); + break; + } + case Variant.VT_CF: + { + final byte[] v = new byte[length]; + for (int i = 0; i < length; i++) + v[i] = src[(int) (offset + i)]; + value = v; + break; + } + case Variant.VT_BOOL: + { + /* + * The first four bytes in src, from src[offset] to + * src[offset + 3] contain the DWord for VT_BOOL, so + * skip it, we don't need it. + */ + final int first = offset + LittleEndian.INT_SIZE; + long bool = LittleEndian.getUInt(src, offset); + if (bool != 0) + value = new Boolean(true); + else + value = new Boolean(false); + break; + } + default: + { + final byte[] v = new byte[length]; + for (int i = 0; i < length; i++) + v[i] = src[(int) (offset + i)]; + value = v; + break; + } + } + return value; + } + +} diff --git a/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java b/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java index dc7f32e6f..94138caac 100644 --- a/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java +++ b/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java @@ -100,25 +100,110 @@ public class PropertyIDMap extends HashMap public final static int PID_APPNAME = 18; public final static int PID_SECURITY = 19; + + /* * The following definitions are for the Document Summary Information. */ + + /** + *The entry is a dictionary.
+ */ + public final static int PID_DICTIONARY = 0; + + /** + *The entry denotes a code page.
+ */ + public final static int PID_CODEPAGE = 1; + + /** + *The entry is a string denoting the category the file belongs + * to, e.g. review, memo, etc. This is useful to find documents of + * same type.
+ */ public final static int PID_CATEGORY = 2; + + /** + *Target format for power point presentation, e.g. 35mm, + * printer, video etc.
+ */ public final static int PID_PRESFORMAT = 3; + + /** + *Number of bytes.
+ */ public final static int PID_BYTECOUNT = 4; + + /** + *Number of lines.
+ */ public final static int PID_LINECOUNT = 5; + + /** + *Number of paragraphs.
+ */ public final static int PID_PARCOUNT = 6; + + /** + *Number of slides in a power point presentation.
+ */ public final static int PID_SLIDECOUNT = 7; + + /** + *Number of slides with notes.
+ */ public final static int PID_NOTECOUNT = 8; + + /** + *Number of hidden slides.
+ */ public final static int PID_HIDDENCOUNT = 9; + + /** + *Number of multimedia clips, e.g. sound or video.
+ */ public final static int PID_MMCLIPCOUNT = 10; + + /** + *This entry is set to -1 when scaling of the thumbnail is + * desired. Otherwise the thumbnail should be cropped.
+ */ public final static int PID_SCALE = 11; + + /** + *This entry denotes an internally used property. It is a + * vector of variants consisting of pairs of a string (VT_LPSTR) + * and a number (VT_I4). The string is a heading name, and the + * number tells how many document parts are under that + * heading.
+ */ public final static int PID_HEADINGPAIR = 12; + + /** + *This entry contains the names of document parts (word: names + * of the documents in the master document, excel: sheet names, + * power point: slide titles, binder: document names).
+ */ public final static int PID_DOCPARTS = 13; + + /** + *This entry contains the name of the project manager.
+ */ public final static int PID_MANAGER = 14; + + /** + *This entry contains the company name.
+ */ public final static int PID_COMPANY = 15; + + /** + *If this entry is -1 the links are dirty and should be + * re-evaluated.
+ */ public final static int PID_LINKSDIRTY = 16; + + /** *Contains the summary information property ID values and * associated strings. See the overall HPSF documentation for @@ -184,7 +269,7 @@ public class PropertyIDMap extends HashMap { if (summaryInformationProperties == null) { - PropertyIDMap m = new PropertyIDMap(17, (float) 1.0); + PropertyIDMap m = new PropertyIDMap(18, (float) 1.0); m.put(PID_TITLE, "PID_TITLE"); m.put(PID_SUBJECT, "PID_SUBJECT"); m.put(PID_AUTHOR, "PID_AUTHOR"); @@ -221,6 +306,8 @@ public class PropertyIDMap extends HashMap if (documentSummaryInformationProperties == null) { PropertyIDMap m = new PropertyIDMap(17, (float) 1.0); + m.put(PID_DICTIONARY, "PID_DICTIONARY"); + m.put(PID_CODEPAGE, "PID_CODEPAGE"); m.put(PID_CATEGORY, "PID_CATEGORY"); m.put(PID_PRESFORMAT, "PID_PRESFORMAT"); m.put(PID_BYTECOUNT, "PID_BYTECOUNT"); diff --git a/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java b/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java index 13579c8ca..4a7bfbc6b 100644 --- a/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java +++ b/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java @@ -57,54 +57,51 @@ package org.apache.poi.hpsf.wellknown; import java.util.*; /** - *
+ *
Maps section format IDs to {@link PropertyIDMap}s. It is + * initialized with two well-known section format IDs: those of the + * \005SummaryInformation stream and the + * \005DocumentSummaryInformation stream.
* - * Maps section format IDs to {@link PropertyIDMap}s. It is initialized with - * two well-known section format IDs: those of the \005SummaryInformation - * stream and the \005DocumentSummaryInformation stream.+ *
If you have a section format ID you can use it as a key to query
+ * this map. If you get a {@link PropertyIDMap} returned your section
+ * is well-known and you can query the {@link PropertyIDMap} for PID
+ * strings. If you get back null
you are on your own.
null
you are on your own. + *
This {@link Map} expects the byte arrays of section format IDs + * as keys. A key maps to a {@link PropertyIDMap} describing the + * property IDs in sections with the specified section format ID.
* - * This {@link Map} expects the byte arrays of section format IDs as keys. A - * key maps to a {@link PropertyIDMap} describing the property IDs in sections - * with the specified section format ID. - * - *@author Rainer Klute (klute@rainer-klute.de) - *@created May 10, 2002 - *@version $Id$ - *@since 2002-02-09 + * @author Rainer Klute (klute@rainer-klute.de) + * @version $Id$ + * @since 2002-02-09 */ -public class SectionIDMap extends HashMap { +public class SectionIDMap extends HashMap +{ /** - *- * - * The SummaryInformation's section's format ID.
+ *The SummaryInformation's section's format ID.
*/ - public final static byte[] SUMMARY_INFORMATION_ID = - new byte[]{(byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0, - (byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68, - (byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00, - (byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9}; + public final static byte[] SUMMARY_INFORMATION_ID = new byte[] + { + (byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0, + (byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68, + (byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00, + (byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9 + }; /** - *- * - * The DocumentSummaryInformation's first section's format ID. The second - * section has a different format ID which is not well-known.
+ *The DocumentSummaryInformation's first section's format + * ID. The second section has a different format ID which is not + * well-known.
*/ - public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID = - new byte[]{(byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02, - (byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B, - (byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00, - (byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE}; + public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID = new byte[] + { + (byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02, + (byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B, + (byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00, + (byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE + }; - /** - * Description of the Field - */ public final static String UNDEFINED = "[undefined]"; private static SectionIDMap defaultMap; @@ -112,19 +109,20 @@ public class SectionIDMap extends HashMap { /** - *+ *
Returns the singleton instance of the default {@link + * SectionIDMap}.
* - * Returns the singleton instance of the default {@link SectionIDMap}. - * - *@return The instance value + * @return The instance value */ - public static SectionIDMap getInstance() { - if (defaultMap == null) { + public static SectionIDMap getInstance() + { + if (defaultMap == null) + { final SectionIDMap m = new SectionIDMap(); m.put(SUMMARY_INFORMATION_ID, - PropertyIDMap.getSummaryInformationProperties()); + PropertyIDMap.getSummaryInformationProperties()); m.put(DOCUMENT_SUMMARY_INFORMATION_ID, - PropertyIDMap.getDocumentSummaryInformationProperties()); + PropertyIDMap.getDocumentSummaryInformationProperties()); defaultMap = m; } return defaultMap; @@ -133,31 +131,30 @@ public class SectionIDMap extends HashMap { /** - *+ *
Returns the property ID string that is associated with a + * given property ID in a section format ID's namespace.
* - * Returns the property ID string that is associated with a given property - * ID in a section format ID's namespace. - * - *@param sectionFormatID Each section format ID has its own name space of - * property ID strings and thus must be specified. - *@param pid The property ID - *@return The well-known property ID string associated with - * the property ID pid in the name space spanned by - * sectionFormatID . If the pid /sectionFormatID - * combination is not well-known, the string "[undefined]" is - * returned. + * @param sectionFormatID Each section format ID has its own name + * space of property ID strings and thus must be specified. + * @param pid The property ID + * @return The well-known property ID string associated with the + * property ID pid in the name space spanned by + * sectionFormatID . If the pid + * /sectionFormatID combination is not well-known, the + * string "[undefined]" is returned. */ public static String getPIDString(final byte[] sectionFormatID, - final int pid) { + final int pid) + { final PropertyIDMap m = - (PropertyIDMap) getInstance().get(sectionFormatID); - if (m == null) { + (PropertyIDMap) getInstance().get(sectionFormatID); + if (m == null) return UNDEFINED; - } else { + else + { final String s = (String) m.get(pid); - if (s == null) { + if (s == null) return UNDEFINED; - } return s; } } @@ -165,57 +162,47 @@ public class SectionIDMap extends HashMap { /** - *- * - * Returns the {@link PropertyIDMap} for a given section format ID.
- * - *@param sectionFormatID Description of the Parameter - *@return Description of the Return Value + *Returns the {@link PropertyIDMap} for a given section format + * ID.
*/ - public PropertyIDMap get(final byte[] sectionFormatID) { + public PropertyIDMap get(final byte[] sectionFormatID) + { return (PropertyIDMap) super.get(new String(sectionFormatID)); } /** - *+ *
Returns the {@link PropertyIDMap} for a given section format + * ID.
* - * Returns the {@link PropertyIDMap} for a given section format ID. - * - *@param sectionFormatID A section format ID as a byte[] . - *@return Description of the Return Value - *@deprecated Use {@link #get(byte[])} instead! + * @param sectionFormatID A section format ID as a byte[] . + * @deprecated Use {@link #get(byte[])} instead! */ - public Object get(final Object sectionFormatID) { + public Object get(final Object sectionFormatID) + { return get((byte[]) sectionFormatID); } /** - *- * - * Associates a section format ID with a {@link PropertyIDMap}.
- * - *@param sectionFormatID Description of the Parameter - *@param propertyIDMap Description of the Parameter - *@return Description of the Return Value + *Associates a section format ID with a {@link + * PropertyIDMap}.
*/ public Object put(final byte[] sectionFormatID, - final PropertyIDMap propertyIDMap) { + final PropertyIDMap propertyIDMap) + { return super.put(new String(sectionFormatID), propertyIDMap); } /** - *@param key Description of the Parameter - *@param value Description of the Parameter - *@return Description of the Return Value - *@deprecated Use {@link #put(byte[], PropertyIDMap)} instead! + * @deprecated Use {@link #put(byte[], PropertyIDMap)} instead! */ - public Object put(final Object key, final Object value) { + public Object put(final Object key, final Object value) + { return put((byte[]) key, (PropertyIDMap) value); } diff --git a/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java b/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java index 071540986..c66613c74 100644 --- a/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java +++ b/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java @@ -183,7 +183,7 @@ public class TestBasic extends TestCase /** *Tests the {@link PropertySet} methods. The test file has two - * property set: the first one is a {@link SummaryInformation}, + * property sets: the first one is a {@link SummaryInformation}, * the second one is a {@link DocumentSummaryInformation}.
*/ public void testPropertySetMethods() throws IOException, HPSFException @@ -214,11 +214,11 @@ public class TestBasic extends TestCase /** *Runs the test cases stand-alone.
*/ - public static void main(String[] args) + public static void main(String[] args) throws Throwable { System.setProperty("HPSF.testdata.path", "./src/testcases/org/apache/poi/hpsf/data"); - junit.textui.TestRunner.run(TestBasic.class); + junit.textui.TestRunner.run(TestBasic.class); } } diff --git a/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java b/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java new file mode 100644 index 000000000..db524272d --- /dev/null +++ b/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java @@ -0,0 +1,142 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache POI" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache POI", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + *Tests whether Unicode string can be read from a + * DocumentSummaryInformation.
+ * + * @author Rainer Klute (klute@rainer-klute.de) + * @since 2002-12-09 + * @version $Id$ + */ +public class TestUnicode extends TestCase +{ + + final static String POI_FS = "TestUnicode.xls"; + final static String[] POI_FILES = new String[] + { + "\005DocumentSummaryInformation", + }; + File data; + POIFile[] poiFiles; + + + + public TestUnicode(String name) + { + super(name); + } + + + + /** + *Read a the test file from the "data" directory.
+ */ + public void setUp() throws FileNotFoundException, IOException + { + final File dataDir = + new File(System.getProperty("HPSF.testdata.path")); + data = new File(dataDir, POI_FS); + } + + + + /** + *Tests the {@link PropertySet} methods. The test file has two + * property set: the first one is a {@link SummaryInformation}, + * the second one is a {@link DocumentSummaryInformation}.
+ */ + public void testPropertySetMethods() throws IOException, HPSFException + { + POIFile poiFile = Util.readPOIFiles(data, POI_FILES)[0]; + byte[] b = poiFile.getBytes(); + PropertySet ps = + PropertySetFactory.create(new ByteArrayInputStream(b)); + Assert.assertTrue(ps.isDocumentSummaryInformation()); + Assert.assertEquals(ps.getSectionCount(), 2); + Section s = (Section) ps.getSections().get(1); + Assert.assertEquals(s.getProperty(1), + new Integer(1200)); + Assert.assertEquals(s.getProperty(2), + new Long(4198897018l)); + Assert.assertEquals(s.getProperty(3), + "MCon_Info zu Office bei Schreiner"); + Assert.assertEquals(s.getProperty(4), + "petrovitsch@schreiner-online.de"); + Assert.assertEquals(s.getProperty(5), + "Petrovitsch, Wilhelm"); + } + + + + /** + *Runs the test cases stand-alone.
+ */ + public static void main(String[] args) + { + System.setProperty("HPSF.testdata.path", + "./src/testcases/org/apache/poi/hpsf/data"); + junit.textui.TestRunner.run(TestUnicode.class); + } + +} diff --git a/src/testcases/org/apache/poi/hpsf/basic/Util.java b/src/testcases/org/apache/poi/hpsf/basic/Util.java index b8ae824ae..6b541d816 100644 --- a/src/testcases/org/apache/poi/hpsf/basic/Util.java +++ b/src/testcases/org/apache/poi/hpsf/basic/Util.java @@ -114,10 +114,33 @@ public class Util */ public static POIFile[] readPOIFiles(final File poiFs) throws FileNotFoundException, IOException + { + return readPOIFiles(poiFs, null); + } + + + + /** + *Reads a set of files from a POI filesystem and returns them + * as an array of {@link POIFile} instances. This method loads all + * files into memory and thus does not cope well with large POI + * filessystems.
+ * + * @param file The name of the POI filesystem as seen by the + * operating system. (This is the "filename".) + * + * @param poiFiles The names of the POI files to be read. + * + * @return The POI files. The elements are ordered in the same way + * as the files in the POI filesystem. + */ + public static POIFile[] readPOIFiles(final File poiFs, + final String[] poiFiles) + throws FileNotFoundException, IOException { final List files = new ArrayList(); POIFSReader r = new POIFSReader(); - r.registerListener(new POIFSReaderListener() + POIFSReaderListener pfl = new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { @@ -140,7 +163,17 @@ public class Util throw new RuntimeException(ex.getMessage()); } } - }); + }; + if (poiFiles == null) + /* Register the listener for all POI files. */ + r.registerListener(pfl); + else + /* Register the listener for the specified POI files + * only. */ + for (int i = 0; i < poiFiles.length; i++) + r.registerListener(pfl, poiFiles[i]); + + /* Read the POI filesystem. */ r.read(new FileInputStream(poiFs)); POIFile[] result = new POIFile[files.size()]; for (int i = 0; i < result.length; i++) diff --git a/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls b/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls new file mode 100644 index 000000000..30c8f3c1c Binary files /dev/null and b/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls differ