From d9ef0d47cff7e0751a44c8bc4dc5d482001ae637 Mon Sep 17 00:00:00 2001 From: Rainer Klute Date: Tue, 10 Dec 2002 06:15:20 +0000 Subject: [PATCH] HPSF: Reading Unicode properties are supported now. git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352933 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/xdocs/hpsf/internals.xml | 42 +++- .../poi/hpsf/DocumentSummaryInformation.java | 219 ++++++++---------- src/java/org/apache/poi/hpsf/Property.java | 205 +++++----------- src/java/org/apache/poi/hpsf/Section.java | 109 ++++++--- src/java/org/apache/poi/hpsf/TypeReader.java | 208 +++++++++++++++++ .../poi/hpsf/wellknown/PropertyIDMap.java | 89 ++++++- .../poi/hpsf/wellknown/SectionIDMap.java | 167 ++++++------- .../org/apache/poi/hpsf/basic/TestBasic.java | 6 +- .../apache/poi/hpsf/basic/TestUnicode.java | 142 ++++++++++++ .../org/apache/poi/hpsf/basic/Util.java | 37 ++- .../org/apache/poi/hpsf/data/TestUnicode.xls | Bin 0 -> 7680 bytes 11 files changed, 825 insertions(+), 399 deletions(-) create mode 100644 src/java/org/apache/poi/hpsf/TypeReader.java create mode 100644 src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java create mode 100644 src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls diff --git a/src/documentation/xdocs/hpsf/internals.xml b/src/documentation/xdocs/hpsf/internals.xml index 25d374511..3e0400394 100644 --- a/src/documentation/xdocs/hpsf/internals.xml +++ b/src/documentation/xdocs/hpsf/internals.xml @@ -690,6 +690,19 @@

Property ID string

VT type

+ + +

0

+

Dictionary

+

PID_DICTIONARY

+

[Special format]

+ + +

1

+

Code page

+

PID_CODEPAGE

+

VT_I2

+

2

Category

@@ -1128,20 +1141,25 @@
  • -

    What is a FILETIME? The answer can be found for example under - http://www.vbapi.com/ref/f/filetime.html +

    What is a FILETIME? The answer can be found for example + under http://www.vbapi.com/ref/f/filetime.html or - http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html. In - short: - The FILETIME structure holds a date and time associated with a file. - The structure identifies a 64-bit integer specifying the number of - 100-nanosecond intervals which have passed since January 1, 1601. This - 64-bit value is split into the two dwords stored in the - structure.

    -
  • + http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html. + In short: The FILETIME structure holds a date and time associated + with a file. The structure identifies a 64-bit integer specifying the + number of 100-nanosecond intervals which have passed since January 1, + 1601. This 64-bit value is split into the two dwords stored in the + structure.

    + -
  • -

    This documentation origins from the HPSF description available at http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html.

    +
  • +

    Information about the code page property in the + DocumentSummaryInformation stream is available at http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/property_id_1.asp.

    +
  • + +
  • +

    This documentation origins from the HPSF description available at http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html.

  • diff --git a/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java b/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java index c14aeadaf..aa5a84478 100644 --- a/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java +++ b/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java @@ -59,257 +59,236 @@ import java.util.*; import org.apache.poi.hpsf.wellknown.*; /** - *

    + *

    Convenience class representing a DocumentSummary Information stream in a + * Microsoft Office document.

    * - * Convenience class representing a DocumentSummary Information stream in a - * Microsoft Office document.

    - * - *@author Rainer Klute (klute@rainer-klute.de) - *@author Drew Varner (Drew.Varner closeTo sc.edu) - *@created May 10, 2002 - *@see SummaryInformation - *@version $Id: DocumentSummaryInformation.java,v 1.6 2002/05/03 07:29:09 - * klute Exp $ - *@since 2002-02-09 + * @author Rainer Klute (klute@rainer-klute.de) + * @author Drew Varner (Drew.Varner closeTo sc.edu) + * @see SummaryInformation + * @version $Id$ + * @since 2002-02-09 */ -public class DocumentSummaryInformation extends SpecialPropertySet { +public class DocumentSummaryInformation extends SpecialPropertySet +{ /** - *

    + *

    Creates a {@link DocumentSummaryInformation} from a given + * {@link PropertySet}.

    * - * Creates a {@link DocumentSummaryInformation} from a given {@link - * PropertySet}.

    - * - *@param ps A property set which - * should be created from a document summary information stream. - *@exception UnexpectedPropertySetTypeException Description of the - * Exception - *@throws UnexpectedPropertySetTypeException if ps does not - * contain a document summary information stream. + * @param ps A property set which should be created from a + * document summary information stream. + * @throws UnexpectedPropertySetTypeException if ps + * does not contain a document summary information stream. */ public DocumentSummaryInformation(final PropertySet ps) - throws UnexpectedPropertySetTypeException { + throws UnexpectedPropertySetTypeException + { super(ps); - if (!isDocumentSummaryInformation()) { + if (!isDocumentSummaryInformation()) throw new UnexpectedPropertySetTypeException - ("Not a " + getClass().getName()); - } + ("Not a " + getClass().getName()); } /** - *

    + *

    Returns the stream's category (or null).

    * - * Returns the stream's category (or null).

    - * - *@return The category value + * @return The category value */ - public String getCategory() { + public String getCategory() + { return (String) getProperty(PropertyIDMap.PID_CATEGORY); } /** - *

    + *

    Returns the stream's presentation format (or + * null).

    * - * Returns the stream's presentation format (or null).

    - * - *@return The presentationFormat value + * @return The presentationFormat value */ - public String getPresentationFormat() { + public String getPresentationFormat() + { return (String) getProperty(PropertyIDMap.PID_PRESFORMAT); } /** - *

    + *

    Returns the stream's byte count or 0 if the {@link + * DocumentSummaryInformation} does not contain a byte count.

    * - * Returns the stream's byte count or 0 if the {@link - * DocumentSummaryInformation} does not contain a byte count.

    - * - *@return The byteCount value + * @return The byteCount value */ - public int getByteCount() { + public int getByteCount() + { return getPropertyIntValue(PropertyIDMap.PID_BYTECOUNT); } /** - *

    + *

    Returns the stream's line count or 0 if the {@link + * DocumentSummaryInformation} does not contain a line count.

    * - * Returns the stream's line count or 0 if the {@link - * DocumentSummaryInformation} does not contain a line count.

    - * - *@return The lineCount value + * @return The lineCount value */ - public int getLineCount() { + public int getLineCount() + { return getPropertyIntValue(PropertyIDMap.PID_LINECOUNT); } /** - *

    + *

    Returns the stream's par count or 0 if the {@link + * DocumentSummaryInformation} does not contain a par count.

    * - * Returns the stream's par count or 0 if the {@link - * DocumentSummaryInformation} does not contain a par count.

    - * - *@return The parCount value + * @return The parCount value */ - public int getParCount() { + public int getParCount() + { return getPropertyIntValue(PropertyIDMap.PID_PARCOUNT); } /** - *

    + *

    Returns the stream's slide count or 0 if the {@link + * DocumentSummaryInformation} does not contain a slide count.

    * - * Returns the stream's slide count or 0 if the {@link - * DocumentSummaryInformation} does not contain a slide count.

    - * - *@return The slideCount value + * @return The slideCount value */ - public int getSlideCount() { + public int getSlideCount() + { return getPropertyIntValue(PropertyIDMap.PID_SLIDECOUNT); } /** - *

    + *

    Returns the stream's note count or 0 if the {@link + * DocumentSummaryInformation} does not contain a note count.

    * - * Returns the stream's note count or 0 if the {@link - * DocumentSummaryInformation} does not contain a note count.

    - * - *@return The noteCount value + * @return The noteCount value */ - public int getNoteCount() { + public int getNoteCount() + { return getPropertyIntValue(PropertyIDMap.PID_NOTECOUNT); } /** - *

    + *

    Returns the stream's hidden count or 0 if the {@link + * DocumentSummaryInformation} does not contain a hidden + * count.

    * - * Returns the stream's hidden count or 0 if the {@link - * DocumentSummaryInformation} does not contain a hidden count.

    - * - *@return The hiddenCount value + * @return The hiddenCount value */ - public int getHiddenCount() { + public int getHiddenCount() + { return getPropertyIntValue(PropertyIDMap.PID_HIDDENCOUNT); } /** - *

    + *

    Returns the stream's mmclip count or 0 if the {@link + * DocumentSummaryInformation} does not contain a mmclip + * count.

    * - * Returns the stream's mmclip count or 0 if the {@link - * DocumentSummaryInformation} does not contain a mmclip count.

    - * - *@return The mMClipCount value + * @return The mMClipCount value */ - public int getMMClipCount() { + public int getMMClipCount() + { return getPropertyIntValue(PropertyIDMap.PID_MMCLIPCOUNT); } /** - *

    + *

    Returns true when scaling of the thumbnail is + * desired, false if cropping is desired.

    * - * Returns true when scaling of the thumbnail is desired, - * false if cropping is desired.

    - * - *@return The scale value + * @return The scale value */ - public boolean getScale() { + public boolean getScale() + { return getPropertyBooleanValue(PropertyIDMap.PID_SCALE); } /** - *

    + *

    Returns the stream's heading pair (or null) + * when this method is implemented. Please note that the + * return type is likely to change! * - * Returns the stream's heading pair (or null) when - * this method is implemented. Please note that the return type is likely - * to change! - * - *@return The headingPair value + * @return The headingPair value */ - public byte[] getHeadingPair() { - if (true) { + public byte[] getHeadingPair() + { + if (true) throw new UnsupportedOperationException("FIXME"); - } return (byte[]) getProperty(PropertyIDMap.PID_HEADINGPAIR); } /** - *

    + *

    Returns the stream's doc parts (or null) + * when this method is implemented. Please note that the + * return type is likely to change! * - * Returns the stream's doc parts (or null) when this - * method is implemented. Please note that the return type is likely to - * change! - * - *@return The docparts value + * @return The docparts value */ - public byte[] getDocparts() { - if (true) { + public byte[] getDocparts() + { + if (true) throw new UnsupportedOperationException("FIXME"); - } return (byte[]) getProperty(PropertyIDMap.PID_DOCPARTS); } /** - *

    + *

    Returns the stream's manager (or null).

    * - * Returns the stream's manager (or null).

    - * - *@return The manager value + * @return The manager value */ - public String getManager() { + public String getManager() + { return (String) getProperty(PropertyIDMap.PID_MANAGER); } /** - *

    + *

    Returns the stream's company (or null).

    * - * Returns the stream's company (or null).

    - * - *@return The company value + * @return The company value */ - public String getCompany() { + public String getCompany() + { return (String) getProperty(PropertyIDMap.PID_COMPANY); } /** - *

    + *

    Returns true if the custom links are hampered + * by excessive noise, for all applications.

    * - * Returns true if the custom links are hampered by excessive - * noise, for all applications.

    + * FIXME: Explain this some more! I (Rainer) + * don't understand it.

    * - * FIXME: Explain this some more! I (Rainer) don't - * understand it.

    - * - *@return The linksDirty value + * @return The linksDirty value */ - public boolean getLinksDirty() { + public boolean getLinksDirty() + { return getPropertyBooleanValue(PropertyIDMap.PID_LINKSDIRTY); } diff --git a/src/java/org/apache/poi/hpsf/Property.java b/src/java/org/apache/poi/hpsf/Property.java index 13c6917c6..9ea91ae7b 100644 --- a/src/java/org/apache/poi/hpsf/Property.java +++ b/src/java/org/apache/poi/hpsf/Property.java @@ -81,10 +81,9 @@ import org.apache.poi.util.LittleEndian; * value, {@link Variant#VT_FILETIME} some date and time (of a * file).

    * - *

    FIXME: Reading of other types than {@link - * Variant#VT_I4}, {@link Variant#VT_FILETIME}, {@link - * Variant#VT_LPSTR}, {@link Variant#VT_CF}, {@link Variant#VT_BOOL}, - * and reading the dictionary property is not yet implemented.

    + *

    FIXME: Reading is not implemented for all + * {@link Variant} types yet. Feel free to submit error reports or + * patches for the types you need.

    * * @author Rainer Klute (klute@rainer-klute.de) * @author Drew Varner (Drew.Varner InAndAround sc.edu) @@ -96,6 +95,9 @@ import org.apache.poi.util.LittleEndian; public class Property { + /* Codepage 1200 denotes Unicode. */ + private static int CP_UNICODE = 1200; + private int id; @@ -150,121 +152,37 @@ public class Property * @param offset The property's type/value pair's offset in the * section. * @param length The property's type/value pair's length in bytes. + * @param codepage The section's and thus the property's + * codepage. It is needed only when reading string values. */ public Property(final int id, final byte[] src, final long offset, - int length) + int length, int codepage) { this.id = id; /* - * ID 0 is a special case since it specifies a dictionary of - * property IDs and property names. + * ID 0 is a special case since it specifies a dictionary of + * property IDs and property names. */ if (id == 0) { - value = readDictionary(src, offset, length); + value = readDictionary(src, offset, length, codepage); return; } - /* - * FIXME: Support this! - */ -// /* ID 1 is another special case: It denotes the code page of -// * byte strings in this section. */ -// if (id == 1) -// { -// value = readCodepage(src, offset); -// return; -// } - int o = (int) offset; type = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - /* - * FIXME: Support reading more types! - */ - switch ((int)type) { - case Variant.VT_I4: - { - /* - * Read a word. In Java it is represented as an - * Integer object. - */ - value = new Long(LittleEndian.getUInt(src, o)); - break; - } - case Variant.VT_FILETIME: - { - /* - * Read a FILETIME object. In Java it is represented - * as a Date. - */ - final long low = LittleEndian.getUInt(src, o); - o += LittleEndian.INT_SIZE; - final long high = LittleEndian.getUInt(src, o); - value = Util.filetimeToDate((int)high, (int)low); - break; - } - case Variant.VT_LPSTR: - { - /* - * Read a byte string. In Java it is represented as a - * String. The null bytes at the end of the byte - * strings must be stripped. - */ - final int first = o + LittleEndian.INT_SIZE; - long last = first + LittleEndian.getUInt(src, o) - 1; - o += LittleEndian.INT_SIZE; - while (src[(int)last] == 0 && first <= last) { - last--; - } - value = new String(src, (int)first, (int)(last - first + 1)); - break; - } - case Variant.VT_CF: - { - /* - * The first four bytes in src, from rc[offset] to - * src[offset + 3] contain the DWord for VT_CF, so - * skip it, we don't need it. - */ - /* - * Truncate the length of the return array by a DWord - * length (4 bytes). - */ - length = length - LittleEndian.INT_SIZE; - - final byte[] v = new byte[length]; - for (int i = 0; i < length; i++) - v[i] = src[(int)(o + i)]; - value = v; - break; - } - case Variant.VT_BOOL: - { - /* - * The first four bytes in src, from src[offset] to - * src[offset + 3] contain the DWord for VT_BOOL, so - * skip it, we don't need it. - */ - final int first = o + LittleEndian.INT_SIZE; - long bool = LittleEndian.getUInt(src, o); - if (bool != 0) - value = new Boolean(true); - else - value = new Boolean(false); - break; - } - default: - { - final byte[] v = new byte[length]; - for (int i = 0; i < length; i++) - v[i] = src[(int)(offset + i)]; - value = v; - break; - } - } + try + { + value = TypeReader.read(src, o, length, (int) type); + } + catch (Throwable t) + { + t.printStackTrace(); + value = "*** null ***"; + } } @@ -277,64 +195,67 @@ public class Property * @param offset At this offset within src the * dictionary starts. * @param length The dictionary contains at most this many bytes. + * @param codepage The codepage of the string values. * @return The dictonary */ protected Map readDictionary(final byte[] src, final long offset, - final int length) + final int length, final int codepage) { - /* - * FIXME: Check the length! - */ - int o = (int)offset; + /* Check whether "offset" points into the "src" array". */ + if (offset < 0 || offset > src.length) + throw new HPSFRuntimeException + ("Illegal offset " + offset + " while HPSF stream contains " + + length + " bytes."); + int o = (int) offset; /* - * Read the number of dictionary entries. + * Read the number of dictionary entries. */ final long nrEntries = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - final Map m = new HashMap((int)nrEntries, (float) 1.0); + final Map m = new HashMap((int) nrEntries, (float) 1.0); for (int i = 0; i < nrEntries; i++) { - /* - * The key - */ + /* The key. */ final Long id = new Long(LittleEndian.getUInt(src, o)); o += LittleEndian.INT_SIZE; - /* - * The value (a string) - */ - final long sLength = LittleEndian.getUInt(src, o); + /* The value (a string). The length is the either the + * number of characters if the character set is Unicode or + * else the number of bytes. The length includes + * terminating 0x00 bytes which we have to strip off to + * create a Java string. */ + long sLength = LittleEndian.getUInt(src, o); o += LittleEndian.INT_SIZE; - /* - * Strip trailing 0x00 bytes. - */ - long l = sLength; - while (src[(int)(o + l - 1)] == 0x00) - l--; - final String s = new String(src, o, (int)l); - o += sLength; - m.put(id, s); + /* Read the bytes or characters depending on whether the + * character set is Unicode or not. */ + StringBuffer b = new StringBuffer((int) sLength); + for (int j = 0; j < sLength; j++) + if (codepage == CP_UNICODE) + { + final int i1 = o + (j * 2); + final int i2 = i1 + 1; + b.append((char) ((src[i2] << 8) + src[i1])); + } + else + b.append((char) src[o + j]); + + /* Strip 0x00 characters from the end of the string: */ + while (b.charAt(b.length() - 1) == 0x00) + b.setLength(b.length() - 1); + if (codepage == CP_UNICODE) + { + if (sLength % 2 == 1) + sLength++; + o += (sLength + sLength); + } + else + o += sLength; + m.put(id, b.toString()); } return m; } - - - /** - *

    Reads a code page.

    - * - * @param src The byte array containing the bytes making out the - * code page. - * @param offset At this offset within src the code - * page starts. - * @return The code page. - */ - protected int readCodePage(final byte[] src, final long offset) - { - throw new UnsupportedOperationException("FIXME"); - } - } diff --git a/src/java/org/apache/poi/hpsf/Section.java b/src/java/org/apache/poi/hpsf/Section.java index a640fd9bd..c859d5cf9 100644 --- a/src/java/org/apache/poi/hpsf/Section.java +++ b/src/java/org/apache/poi/hpsf/Section.java @@ -161,66 +161,117 @@ public class Section public Section(final byte[] src, int offset) { /* - * Read the format ID. + * Read the format ID. */ formatID = new ClassID(src, offset); offset += ClassID.LENGTH; /* - * Read the offset from the stream's start and positions to - * the section header. + * Read the offset from the stream's start and positions to + * the section header. */ this.offset = LittleEndian.getUInt(src, offset); offset = (int)this.offset; /* - * Read the section length. + * Read the section length. */ size = (int)LittleEndian.getUInt(src, offset); offset += LittleEndian.INT_SIZE; /* - * Read the number of properties. + * Read the number of properties. */ propertyCount = (int)LittleEndian.getUInt(src, offset); offset += LittleEndian.INT_SIZE; /* - * Read the properties. The offset is positioned at the first - * entry of the property list. + * Read the properties. The offset is positioned at the first + * entry of the property list. The problem is that we have to + * read the property with ID 1 before we read other + * properties, at least before other properties containing + * strings. The reason is that property 1 specifies the + * codepage. If it is 1200, all strings are in Unicode. In + * other words: Before we can read any strings we have to know + * whether they are in Unicode or not. Unfortunately property + * 1 is not guaranteed to be the first in a section. + * + * The algorithm below reads the properties in two passes: The + * first one looks for property ID 1 and extracts the codepage + * number. The seconds pass reads the other properties. */ properties = new Property[propertyCount]; - for (int i = 0; i < properties.length; i++) { - final int id = (int)LittleEndian.getUInt(src, offset); - offset += LittleEndian.INT_SIZE; + Property propertyOne; - /* - * Offset from the section. - */ - final int sOffset = (int)LittleEndian.getUInt(src, offset); - offset += LittleEndian.INT_SIZE; + /* Pass 1: Look for the codepage. */ + int codepage = -1; + int pass1Offset = offset; + for (int i = 0; i < properties.length; i++) + { + /* Read the property ID. */ + final int id = (int) LittleEndian.getUInt(src, pass1Offset); + pass1Offset += LittleEndian.INT_SIZE; - /* - * Calculate the length of the property. - */ + /* Offset from the section's start. */ + final int sOffset = (int) LittleEndian.getUInt(src, pass1Offset); + pass1Offset += LittleEndian.INT_SIZE; + + /* Calculate the length of the property. */ int length; - if (i == properties.length - 1) { - length = (int)(src.length - this.offset - sOffset); - } else { + if (i == properties.length - 1) + length = (int) (src.length - this.offset - sOffset); + else + length = (int) + LittleEndian.getUInt(src, pass1Offset + + LittleEndian.INT_SIZE) - sOffset; + + if (id == PropertyIDMap.PID_CODEPAGE) + { + /* Read the codepage if the property ID is 1. */ + + /* Read the property's value type. It must be + * VT_I2. */ + int o = (int) (this.offset + sOffset); + final long type = LittleEndian.getUInt(src, o); + o += LittleEndian.INT_SIZE; + + if (type != Variant.VT_I2) + throw new HPSFRuntimeException + ("Value type of property ID 1 is not VT_I2 but " + + type + "."); + + /* Read the codepage number. */ + codepage = LittleEndian.getUShort(src, o); + } + } + + /* Pass 2: Read all properties, including 1. */ + for (int i = 0; i < properties.length; i++) + { + /* Read the property ID. */ + final int id = (int) LittleEndian.getUInt(src, offset); + offset += LittleEndian.INT_SIZE; + + /* Offset from the section. */ + final int sOffset = (int) LittleEndian.getUInt(src, offset); + offset += LittleEndian.INT_SIZE; + + /* Calculate the length of the property. */ + int length; + if (i == properties.length - 1) + length = (int) (src.length - this.offset - sOffset); + else length = (int) LittleEndian.getUInt(src, offset + LittleEndian.INT_SIZE) - sOffset; - } - /* - * Create it. - */ - properties[i] = - new Property(id, src, this.offset + sOffset, length); + /* Create it. */ + properties[i] = new Property(id, src, this.offset + sOffset, + length, codepage); } /* - * Extract the dictionary (if available). + * Extract the dictionary (if available). */ dictionary = (Map) getProperty(0); } @@ -237,7 +288,7 @@ public class Section * * @return The property's value */ - protected Object getProperty(final int id) + public Object getProperty(final int id) { wasNull = false; for (int i = 0; i < properties.length; i++) diff --git a/src/java/org/apache/poi/hpsf/TypeReader.java b/src/java/org/apache/poi/hpsf/TypeReader.java new file mode 100644 index 000000000..c9f8aaefd --- /dev/null +++ b/src/java/org/apache/poi/hpsf/TypeReader.java @@ -0,0 +1,208 @@ +/* + * ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2000 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + * Portions of this software are based upon public domain software + * originally written at the National Center for Supercomputing Applications, + * University of Illinois, Urbana-Champaign. + * + * Portions of this software are based upon public domain software + * originally written at the National Center for Supercomputing Applications, + * University of Illinois, Urbana-Champaign. + */ +package org.apache.poi.hpsf; + +import java.util.*; +import org.apache.poi.util.LittleEndian; + +/** + *

    Reader for specific data types.

    + * + * @author Rainer Klute (klute@rainer-klute.de) + * @see Property + * @see Variant + * @version $Id$ + * @since 2002-12-09 + */ +public class TypeReader +{ + + /** + *

    Reads a variant data type from a byte array.

    + * + * @param src The byte array + * @param offset The offset in the byte array where the variant + * starts + * @param length The length of the variant including the variant + * type field + * @return A Java object that corresponds best to the variant + * field. For example, a VT_I4 is returned as a {@link Long}, a + * VT_LPSTR as a {@link String}. + * + * @see Variant + */ + public static Object read(final byte[] src, int offset, int length, + final int type) + { + /* + * FIXME: Support reading more types and clean up this code! + */ + Object value; + length = length - LittleEndian.INT_SIZE; + switch (type) + { + case Variant.VT_I2: + { + /* + * Read a short. In Java it is represented as an + * Integer object. + */ + value = new Integer(LittleEndian.getUShort(src, offset)); + break; + } + case Variant.VT_I4: + { + /* + * Read a word. In Java it is represented as a + * Long object. + */ + value = new Long(LittleEndian.getUInt(src, offset)); + break; + } + case Variant.VT_FILETIME: + { + /* + * Read a FILETIME object. In Java it is represented + * as a Date object. + */ + final long low = LittleEndian.getUInt(src, offset); + offset += LittleEndian.INT_SIZE; + final long high = LittleEndian.getUInt(src, offset); + value = Util.filetimeToDate((int) high, (int) low); + break; + } + case Variant.VT_LPSTR: + { + /* + * Read a byte string. In Java it is represented as a + * String object. The 0x00 bytes at the end must be + * stripped. + */ + final int first = offset + LittleEndian.INT_SIZE; + long last = first + LittleEndian.getUInt(src, offset) - 1; + offset += LittleEndian.INT_SIZE; + while (src[(int) last] == 0 && first <= last) + last--; + value = new String(src, (int) first, (int) (last - first + 1)); + break; + } + case Variant.VT_LPWSTR: + { + /* + * Read a Unicode string. In Java it is represented as + * a String object. The 0x00 bytes at the end must be + * stripped. + */ + final int first = offset + LittleEndian.INT_SIZE; + long last = first + LittleEndian.getUInt(src, offset) - 1; + long l = last - first; + offset += LittleEndian.INT_SIZE; + StringBuffer b = new StringBuffer((int) (last - first)); + for (int i = 0; i <= l; i++) + { + final int i1 = offset + (i * 2); + final int i2 = i1 + 1; + b.append((char) ((src[i2] << 8) + src[i1])); + } + /* Strip 0x00 characters from the end of the string: */ + while (b.charAt(b.length() - 1) == 0x00) + b.setLength(b.length() - 1); + value = b.toString(); + break; + } + case Variant.VT_CF: + { + final byte[] v = new byte[length]; + for (int i = 0; i < length; i++) + v[i] = src[(int) (offset + i)]; + value = v; + break; + } + case Variant.VT_BOOL: + { + /* + * The first four bytes in src, from src[offset] to + * src[offset + 3] contain the DWord for VT_BOOL, so + * skip it, we don't need it. + */ + final int first = offset + LittleEndian.INT_SIZE; + long bool = LittleEndian.getUInt(src, offset); + if (bool != 0) + value = new Boolean(true); + else + value = new Boolean(false); + break; + } + default: + { + final byte[] v = new byte[length]; + for (int i = 0; i < length; i++) + v[i] = src[(int) (offset + i)]; + value = v; + break; + } + } + return value; + } + +} diff --git a/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java b/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java index dc7f32e6f..94138caac 100644 --- a/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java +++ b/src/java/org/apache/poi/hpsf/wellknown/PropertyIDMap.java @@ -100,25 +100,110 @@ public class PropertyIDMap extends HashMap public final static int PID_APPNAME = 18; public final static int PID_SECURITY = 19; + + /* * The following definitions are for the Document Summary Information. */ + + /** + *

    The entry is a dictionary.

    + */ + public final static int PID_DICTIONARY = 0; + + /** + *

    The entry denotes a code page.

    + */ + public final static int PID_CODEPAGE = 1; + + /** + *

    The entry is a string denoting the category the file belongs + * to, e.g. review, memo, etc. This is useful to find documents of + * same type.

    + */ public final static int PID_CATEGORY = 2; + + /** + *

    Target format for power point presentation, e.g. 35mm, + * printer, video etc.

    + */ public final static int PID_PRESFORMAT = 3; + + /** + *

    Number of bytes.

    + */ public final static int PID_BYTECOUNT = 4; + + /** + *

    Number of lines.

    + */ public final static int PID_LINECOUNT = 5; + + /** + *

    Number of paragraphs.

    + */ public final static int PID_PARCOUNT = 6; + + /** + *

    Number of slides in a power point presentation.

    + */ public final static int PID_SLIDECOUNT = 7; + + /** + *

    Number of slides with notes.

    + */ public final static int PID_NOTECOUNT = 8; + + /** + *

    Number of hidden slides.

    + */ public final static int PID_HIDDENCOUNT = 9; + + /** + *

    Number of multimedia clips, e.g. sound or video.

    + */ public final static int PID_MMCLIPCOUNT = 10; + + /** + *

    This entry is set to -1 when scaling of the thumbnail is + * desired. Otherwise the thumbnail should be cropped.

    + */ public final static int PID_SCALE = 11; + + /** + *

    This entry denotes an internally used property. It is a + * vector of variants consisting of pairs of a string (VT_LPSTR) + * and a number (VT_I4). The string is a heading name, and the + * number tells how many document parts are under that + * heading.

    + */ public final static int PID_HEADINGPAIR = 12; + + /** + *

    This entry contains the names of document parts (word: names + * of the documents in the master document, excel: sheet names, + * power point: slide titles, binder: document names).

    + */ public final static int PID_DOCPARTS = 13; + + /** + *

    This entry contains the name of the project manager.

    + */ public final static int PID_MANAGER = 14; + + /** + *

    This entry contains the company name.

    + */ public final static int PID_COMPANY = 15; + + /** + *

    If this entry is -1 the links are dirty and should be + * re-evaluated.

    + */ public final static int PID_LINKSDIRTY = 16; + + /** *

    Contains the summary information property ID values and * associated strings. See the overall HPSF documentation for @@ -184,7 +269,7 @@ public class PropertyIDMap extends HashMap { if (summaryInformationProperties == null) { - PropertyIDMap m = new PropertyIDMap(17, (float) 1.0); + PropertyIDMap m = new PropertyIDMap(18, (float) 1.0); m.put(PID_TITLE, "PID_TITLE"); m.put(PID_SUBJECT, "PID_SUBJECT"); m.put(PID_AUTHOR, "PID_AUTHOR"); @@ -221,6 +306,8 @@ public class PropertyIDMap extends HashMap if (documentSummaryInformationProperties == null) { PropertyIDMap m = new PropertyIDMap(17, (float) 1.0); + m.put(PID_DICTIONARY, "PID_DICTIONARY"); + m.put(PID_CODEPAGE, "PID_CODEPAGE"); m.put(PID_CATEGORY, "PID_CATEGORY"); m.put(PID_PRESFORMAT, "PID_PRESFORMAT"); m.put(PID_BYTECOUNT, "PID_BYTECOUNT"); diff --git a/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java b/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java index 13579c8ca..4a7bfbc6b 100644 --- a/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java +++ b/src/java/org/apache/poi/hpsf/wellknown/SectionIDMap.java @@ -57,54 +57,51 @@ package org.apache.poi.hpsf.wellknown; import java.util.*; /** - *

    + *

    Maps section format IDs to {@link PropertyIDMap}s. It is + * initialized with two well-known section format IDs: those of the + * \005SummaryInformation stream and the + * \005DocumentSummaryInformation stream.

    * - * Maps section format IDs to {@link PropertyIDMap}s. It is initialized with - * two well-known section format IDs: those of the \005SummaryInformation - * stream and the \005DocumentSummaryInformation stream.

    + *

    If you have a section format ID you can use it as a key to query + * this map. If you get a {@link PropertyIDMap} returned your section + * is well-known and you can query the {@link PropertyIDMap} for PID + * strings. If you get back null you are on your own.

    * - * If you have a section format ID you can use it as a key to query this map. - * If you get a {@link PropertyIDMap} returned your section is well-known and - * you can query the {@link PropertyIDMap} for PID strings. If you get back - * null you are on your own.

    + *

    This {@link Map} expects the byte arrays of section format IDs + * as keys. A key maps to a {@link PropertyIDMap} describing the + * property IDs in sections with the specified section format ID.

    * - * This {@link Map} expects the byte arrays of section format IDs as keys. A - * key maps to a {@link PropertyIDMap} describing the property IDs in sections - * with the specified section format ID.

    - * - *@author Rainer Klute (klute@rainer-klute.de) - *@created May 10, 2002 - *@version $Id$ - *@since 2002-02-09 + * @author Rainer Klute (klute@rainer-klute.de) + * @version $Id$ + * @since 2002-02-09 */ -public class SectionIDMap extends HashMap { +public class SectionIDMap extends HashMap +{ /** - *

    - * - * The SummaryInformation's section's format ID.

    + *

    The SummaryInformation's section's format ID.

    */ - public final static byte[] SUMMARY_INFORMATION_ID = - new byte[]{(byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0, - (byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68, - (byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00, - (byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9}; + public final static byte[] SUMMARY_INFORMATION_ID = new byte[] + { + (byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0, + (byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68, + (byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00, + (byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9 + }; /** - *

    - * - * The DocumentSummaryInformation's first section's format ID. The second - * section has a different format ID which is not well-known.

    + *

    The DocumentSummaryInformation's first section's format + * ID. The second section has a different format ID which is not + * well-known.

    */ - public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID = - new byte[]{(byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02, - (byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B, - (byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00, - (byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE}; + public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID = new byte[] + { + (byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02, + (byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B, + (byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00, + (byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE + }; - /** - * Description of the Field - */ public final static String UNDEFINED = "[undefined]"; private static SectionIDMap defaultMap; @@ -112,19 +109,20 @@ public class SectionIDMap extends HashMap { /** - *

    + *

    Returns the singleton instance of the default {@link + * SectionIDMap}.

    * - * Returns the singleton instance of the default {@link SectionIDMap}.

    - * - *@return The instance value + * @return The instance value */ - public static SectionIDMap getInstance() { - if (defaultMap == null) { + public static SectionIDMap getInstance() + { + if (defaultMap == null) + { final SectionIDMap m = new SectionIDMap(); m.put(SUMMARY_INFORMATION_ID, - PropertyIDMap.getSummaryInformationProperties()); + PropertyIDMap.getSummaryInformationProperties()); m.put(DOCUMENT_SUMMARY_INFORMATION_ID, - PropertyIDMap.getDocumentSummaryInformationProperties()); + PropertyIDMap.getDocumentSummaryInformationProperties()); defaultMap = m; } return defaultMap; @@ -133,31 +131,30 @@ public class SectionIDMap extends HashMap { /** - *

    + *

    Returns the property ID string that is associated with a + * given property ID in a section format ID's namespace.

    * - * Returns the property ID string that is associated with a given property - * ID in a section format ID's namespace.

    - * - *@param sectionFormatID Each section format ID has its own name space of - * property ID strings and thus must be specified. - *@param pid The property ID - *@return The well-known property ID string associated with - * the property ID pid in the name space spanned by - * sectionFormatID . If the pid /sectionFormatID - * combination is not well-known, the string "[undefined]" is - * returned. + * @param sectionFormatID Each section format ID has its own name + * space of property ID strings and thus must be specified. + * @param pid The property ID + * @return The well-known property ID string associated with the + * property ID pid in the name space spanned by + * sectionFormatID . If the pid + * /sectionFormatID combination is not well-known, the + * string "[undefined]" is returned. */ public static String getPIDString(final byte[] sectionFormatID, - final int pid) { + final int pid) + { final PropertyIDMap m = - (PropertyIDMap) getInstance().get(sectionFormatID); - if (m == null) { + (PropertyIDMap) getInstance().get(sectionFormatID); + if (m == null) return UNDEFINED; - } else { + else + { final String s = (String) m.get(pid); - if (s == null) { + if (s == null) return UNDEFINED; - } return s; } } @@ -165,57 +162,47 @@ public class SectionIDMap extends HashMap { /** - *

    - * - * Returns the {@link PropertyIDMap} for a given section format ID.

    - * - *@param sectionFormatID Description of the Parameter - *@return Description of the Return Value + *

    Returns the {@link PropertyIDMap} for a given section format + * ID.

    */ - public PropertyIDMap get(final byte[] sectionFormatID) { + public PropertyIDMap get(final byte[] sectionFormatID) + { return (PropertyIDMap) super.get(new String(sectionFormatID)); } /** - *

    + *

    Returns the {@link PropertyIDMap} for a given section format + * ID.

    * - * Returns the {@link PropertyIDMap} for a given section format ID.

    - * - *@param sectionFormatID A section format ID as a byte[] . - *@return Description of the Return Value - *@deprecated Use {@link #get(byte[])} instead! + * @param sectionFormatID A section format ID as a byte[] . + * @deprecated Use {@link #get(byte[])} instead! */ - public Object get(final Object sectionFormatID) { + public Object get(final Object sectionFormatID) + { return get((byte[]) sectionFormatID); } /** - *

    - * - * Associates a section format ID with a {@link PropertyIDMap}.

    - * - *@param sectionFormatID Description of the Parameter - *@param propertyIDMap Description of the Parameter - *@return Description of the Return Value + *

    Associates a section format ID with a {@link + * PropertyIDMap}.

    */ public Object put(final byte[] sectionFormatID, - final PropertyIDMap propertyIDMap) { + final PropertyIDMap propertyIDMap) + { return super.put(new String(sectionFormatID), propertyIDMap); } /** - *@param key Description of the Parameter - *@param value Description of the Parameter - *@return Description of the Return Value - *@deprecated Use {@link #put(byte[], PropertyIDMap)} instead! + * @deprecated Use {@link #put(byte[], PropertyIDMap)} instead! */ - public Object put(final Object key, final Object value) { + public Object put(final Object key, final Object value) + { return put((byte[]) key, (PropertyIDMap) value); } diff --git a/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java b/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java index 071540986..c66613c74 100644 --- a/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java +++ b/src/testcases/org/apache/poi/hpsf/basic/TestBasic.java @@ -183,7 +183,7 @@ public class TestBasic extends TestCase /** *

    Tests the {@link PropertySet} methods. The test file has two - * property set: the first one is a {@link SummaryInformation}, + * property sets: the first one is a {@link SummaryInformation}, * the second one is a {@link DocumentSummaryInformation}.

    */ public void testPropertySetMethods() throws IOException, HPSFException @@ -214,11 +214,11 @@ public class TestBasic extends TestCase /** *

    Runs the test cases stand-alone.

    */ - public static void main(String[] args) + public static void main(String[] args) throws Throwable { System.setProperty("HPSF.testdata.path", "./src/testcases/org/apache/poi/hpsf/data"); - junit.textui.TestRunner.run(TestBasic.class); + junit.textui.TestRunner.run(TestBasic.class); } } diff --git a/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java b/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java new file mode 100644 index 000000000..db524272d --- /dev/null +++ b/src/testcases/org/apache/poi/hpsf/basic/TestUnicode.java @@ -0,0 +1,142 @@ +/* ==================================================================== + * The Apache Software License, Version 1.1 + * + * Copyright (c) 2002 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Apache" and "Apache Software Foundation" and + * "Apache POI" must not be used to endorse or promote products + * derived from this software without prior written permission. For + * written permission, please contact apache@apache.org. + * + * 5. Products derived from this software may not be called "Apache", + * "Apache POI", nor may "Apache" appear in their name, without + * prior written permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + */ + +package org.apache.poi.hpsf.basic; + +import java.io.*; +import java.util.*; +import junit.framework.*; +import org.apache.poi.hpsf.*; + + + +/** + *

    Tests whether Unicode string can be read from a + * DocumentSummaryInformation.

    + * + * @author Rainer Klute (klute@rainer-klute.de) + * @since 2002-12-09 + * @version $Id$ + */ +public class TestUnicode extends TestCase +{ + + final static String POI_FS = "TestUnicode.xls"; + final static String[] POI_FILES = new String[] + { + "\005DocumentSummaryInformation", + }; + File data; + POIFile[] poiFiles; + + + + public TestUnicode(String name) + { + super(name); + } + + + + /** + *

    Read a the test file from the "data" directory.

    + */ + public void setUp() throws FileNotFoundException, IOException + { + final File dataDir = + new File(System.getProperty("HPSF.testdata.path")); + data = new File(dataDir, POI_FS); + } + + + + /** + *

    Tests the {@link PropertySet} methods. The test file has two + * property set: the first one is a {@link SummaryInformation}, + * the second one is a {@link DocumentSummaryInformation}.

    + */ + public void testPropertySetMethods() throws IOException, HPSFException + { + POIFile poiFile = Util.readPOIFiles(data, POI_FILES)[0]; + byte[] b = poiFile.getBytes(); + PropertySet ps = + PropertySetFactory.create(new ByteArrayInputStream(b)); + Assert.assertTrue(ps.isDocumentSummaryInformation()); + Assert.assertEquals(ps.getSectionCount(), 2); + Section s = (Section) ps.getSections().get(1); + Assert.assertEquals(s.getProperty(1), + new Integer(1200)); + Assert.assertEquals(s.getProperty(2), + new Long(4198897018l)); + Assert.assertEquals(s.getProperty(3), + "MCon_Info zu Office bei Schreiner"); + Assert.assertEquals(s.getProperty(4), + "petrovitsch@schreiner-online.de"); + Assert.assertEquals(s.getProperty(5), + "Petrovitsch, Wilhelm"); + } + + + + /** + *

    Runs the test cases stand-alone.

    + */ + public static void main(String[] args) + { + System.setProperty("HPSF.testdata.path", + "./src/testcases/org/apache/poi/hpsf/data"); + junit.textui.TestRunner.run(TestUnicode.class); + } + +} diff --git a/src/testcases/org/apache/poi/hpsf/basic/Util.java b/src/testcases/org/apache/poi/hpsf/basic/Util.java index b8ae824ae..6b541d816 100644 --- a/src/testcases/org/apache/poi/hpsf/basic/Util.java +++ b/src/testcases/org/apache/poi/hpsf/basic/Util.java @@ -114,10 +114,33 @@ public class Util */ public static POIFile[] readPOIFiles(final File poiFs) throws FileNotFoundException, IOException + { + return readPOIFiles(poiFs, null); + } + + + + /** + *

    Reads a set of files from a POI filesystem and returns them + * as an array of {@link POIFile} instances. This method loads all + * files into memory and thus does not cope well with large POI + * filessystems.

    + * + * @param file The name of the POI filesystem as seen by the + * operating system. (This is the "filename".) + * + * @param poiFiles The names of the POI files to be read. + * + * @return The POI files. The elements are ordered in the same way + * as the files in the POI filesystem. + */ + public static POIFile[] readPOIFiles(final File poiFs, + final String[] poiFiles) + throws FileNotFoundException, IOException { final List files = new ArrayList(); POIFSReader r = new POIFSReader(); - r.registerListener(new POIFSReaderListener() + POIFSReaderListener pfl = new POIFSReaderListener() { public void processPOIFSReaderEvent(POIFSReaderEvent event) { @@ -140,7 +163,17 @@ public class Util throw new RuntimeException(ex.getMessage()); } } - }); + }; + if (poiFiles == null) + /* Register the listener for all POI files. */ + r.registerListener(pfl); + else + /* Register the listener for the specified POI files + * only. */ + for (int i = 0; i < poiFiles.length; i++) + r.registerListener(pfl, poiFiles[i]); + + /* Read the POI filesystem. */ r.read(new FileInputStream(poiFs)); POIFile[] result = new POIFile[files.size()]; for (int i = 0; i < result.length; i++) diff --git a/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls b/src/testcases/org/apache/poi/hpsf/data/TestUnicode.xls new file mode 100644 index 0000000000000000000000000000000000000000..30c8f3c1c1a1fe6a3012242ce6dc2a69da8d6107 GIT binary patch literal 7680 zcmeHMU2IfU5T3j2wp%G>TYglO^`^xLZI`t0APTgLmPQgQ4b(tl&9b|_wJY0}++E}+ zMi+$_gNX(cH6cL~UyO;O#-9ftXf&oTL}MTZLjo9HOf;g=1W4)neRKCNw+l!C5IO9Wp~E^EFcC;Txz2-F8UiHq^e1J7twTx^mB=70|#q-Rz61+{=m^71se9 z@~OV_y>i-Rfs8{w|NJWqZIpnqMr=t*T+;IIJq`QA62PdWskN!8t>0|y>S8I)pAckY-wAGt|BAsUxGiibXF#9AVmjbjhD1GsSl}=mj9TC|DSPy^WtyiFmYqos*{Q*-HDdNznQR`bbx_99do857ayywnG$r%T^f~cN zd#LpjEdS>LvoufE0uRV+t=DU|3_<6_JHLM0lj!Y?DZg3yEs9@IP7X3|xflLIZk0WF zGUjJr^8hcZ^j^h>rBNje^lxCyEH5owHF5d5)159Ac^tABgRf4X(^ALYpbx8=)^$Sy z=4`pFqZQcA@*Q$0dl2dHHR$+6qS|)QLdeZO%13_?R&Z&78BflTGN|maURdJGOeoZ>WwNfMWl& z&SFLa|UTVMfAX8t}-#$>#RPe?1mMltjAh32(3xzF=4NTx(VBpu+P@k2(S%3)MWy;q0Lo% zRK1afJ(P_WsP~R1X-(pKu>olNgF^|YdoO?V>>ofhygBr9b)dteefrhtBR zQ%7N-d1l|C-!85n3cdSkP+IQ$=wuss#{pay_`DaO-)DiB25kW309-5H0Pvf78^G8< z0^qm%8Ne-UpMw%64(1y7PbNllIUaJ2^rVvZXl8UUYp&gyw9<%h^t*chhaZ16X0R@k zZ;rF>WF1F8*Q&l$)=EEUemT@??ucj1PKy4WQqF%Im+!=}69<>x6*}-b{MUN%y)N(q zCA>B`@+a^)cGss|TQ>u42hgX?&CF38_p<={axY*$U_XF9ehomMe*y6Nlxz1!+yq;x R5#-^%IQ{W+T}?6H{|O4irPKfb literal 0 HcmV?d00001