HPSF: Reading Unicode properties are supported now.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352933 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Rainer Klute 2002-12-10 06:15:20 +00:00
parent 7f760c3244
commit d9ef0d47cf
11 changed files with 825 additions and 399 deletions

View File

@ -690,6 +690,19 @@
<th><p>Property ID string</p></th>
<th><p>VT type</p></th>
</tr>
<tr>
<td><p>0</p></td>
<td><p>Dictionary</p></td>
<td><p>PID_DICTIONARY</p></td>
<td><p>[Special format]</p></td>
</tr>
<tr>
<td><p>1</p></td>
<td><p>Code page</p></td>
<td><p>PID_CODEPAGE</p></td>
<td><p>VT_I2</p></td>
</tr>
<tr>
<td><p>2</p></td>
<td><p>Category</p></td>
@ -1128,20 +1141,25 @@
</li>
<li>
<p>What is a <code>FILETIME</code>? The answer can be found for example under
<link href="http://www.vbapi.com/ref/f/filetime.html">http://www.vbapi.com/ref/f/filetime.html</link>
<p>What is a <code>FILETIME</code>? The answer can be found for example
under <link href="http://www.vbapi.com/ref/f/filetime.html">http://www.vbapi.com/ref/f/filetime.html</link>
or
<link href="http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html">http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html</link>. In
short:
<em>The FILETIME structure holds a date and time associated with a file.
The structure identifies a 64-bit integer specifying the number of
100-nanosecond intervals which have passed since January 1, 1601. This
64-bit value is split into the two dwords stored in the
structure.</em></p>
</li>
<link href="http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html">http://www.cs.rpi.edu/courses/fall01/os/FILETIME.html</link>.
In short: <em>The FILETIME structure holds a date and time associated
with a file. The structure identifies a 64-bit integer specifying the
number of 100-nanosecond intervals which have passed since January 1,
1601. This 64-bit value is split into the two dwords stored in the
structure.</em></p>
</li>
<li>
<p>This documentation origins from the <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">HPSF description</link> available at <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html</link>.</p>
<li>
<p>Information about the code page property in the
DocumentSummaryInformation stream is available at <link
href="http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/property_id_1.asp">http://msdn.microsoft.com/library/default.asp?url=/library/en-us/stg/stg/property_id_1.asp</link>.</p>
</li>
<li>
<p>This documentation origins from the <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">HPSF description</link> available at <link href="http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html">http://www.rainer-klute.de/~klute/Software/poibrowser/doc/HPSF-Description.html</link>.</p>
</li>
</ol>
</section>

View File

@ -59,257 +59,236 @@ import java.util.*;
import org.apache.poi.hpsf.wellknown.*;
/**
* <p>
* <p>Convenience class representing a DocumentSummary Information stream in a
* Microsoft Office document.</p>
*
* Convenience class representing a DocumentSummary Information stream in a
* Microsoft Office document.</p>
*
*@author Rainer Klute (klute@rainer-klute.de)
*@author Drew Varner (Drew.Varner closeTo sc.edu)
*@created May 10, 2002
*@see SummaryInformation
*@version $Id: DocumentSummaryInformation.java,v 1.6 2002/05/03 07:29:09
* klute Exp $
*@since 2002-02-09
* @author Rainer Klute (klute@rainer-klute.de)
* @author Drew Varner (Drew.Varner closeTo sc.edu)
* @see SummaryInformation
* @version $Id$
* @since 2002-02-09
*/
public class DocumentSummaryInformation extends SpecialPropertySet {
public class DocumentSummaryInformation extends SpecialPropertySet
{
/**
* <p>
* <p>Creates a {@link DocumentSummaryInformation} from a given
* {@link PropertySet}.</p>
*
* Creates a {@link DocumentSummaryInformation} from a given {@link
* PropertySet}.</p>
*
*@param ps A property set which
* should be created from a document summary information stream.
*@exception UnexpectedPropertySetTypeException Description of the
* Exception
*@throws UnexpectedPropertySetTypeException if <var>ps</var> does not
* contain a document summary information stream.
* @param ps A property set which should be created from a
* document summary information stream.
* @throws UnexpectedPropertySetTypeException if <var>ps</var>
* does not contain a document summary information stream.
*/
public DocumentSummaryInformation(final PropertySet ps)
throws UnexpectedPropertySetTypeException {
throws UnexpectedPropertySetTypeException
{
super(ps);
if (!isDocumentSummaryInformation()) {
if (!isDocumentSummaryInformation())
throw new UnexpectedPropertySetTypeException
("Not a " + getClass().getName());
}
("Not a " + getClass().getName());
}
/**
* <p>
* <p>Returns the stream's category (or <code>null</code>).</p>
*
* Returns the stream's category (or <code>null</code>).</p>
*
*@return The category value
* @return The category value
*/
public String getCategory() {
public String getCategory()
{
return (String) getProperty(PropertyIDMap.PID_CATEGORY);
}
/**
* <p>
* <p>Returns the stream's presentation format (or
* <code>null</code>).</p>
*
* Returns the stream's presentation format (or <code>null</code>).</p>
*
*@return The presentationFormat value
* @return The presentationFormat value
*/
public String getPresentationFormat() {
public String getPresentationFormat()
{
return (String) getProperty(PropertyIDMap.PID_PRESFORMAT);
}
/**
* <p>
* <p>Returns the stream's byte count or 0 if the {@link
* DocumentSummaryInformation} does not contain a byte count.</p>
*
* Returns the stream's byte count or 0 if the {@link
* DocumentSummaryInformation} does not contain a byte count.</p>
*
*@return The byteCount value
* @return The byteCount value
*/
public int getByteCount() {
public int getByteCount()
{
return getPropertyIntValue(PropertyIDMap.PID_BYTECOUNT);
}
/**
* <p>
* <p>Returns the stream's line count or 0 if the {@link
* DocumentSummaryInformation} does not contain a line count.</p>
*
* Returns the stream's line count or 0 if the {@link
* DocumentSummaryInformation} does not contain a line count.</p>
*
*@return The lineCount value
* @return The lineCount value
*/
public int getLineCount() {
public int getLineCount()
{
return getPropertyIntValue(PropertyIDMap.PID_LINECOUNT);
}
/**
* <p>
* <p>Returns the stream's par count or 0 if the {@link
* DocumentSummaryInformation} does not contain a par count.</p>
*
* Returns the stream's par count or 0 if the {@link
* DocumentSummaryInformation} does not contain a par count.</p>
*
*@return The parCount value
* @return The parCount value
*/
public int getParCount() {
public int getParCount()
{
return getPropertyIntValue(PropertyIDMap.PID_PARCOUNT);
}
/**
* <p>
* <p>Returns the stream's slide count or 0 if the {@link
* DocumentSummaryInformation} does not contain a slide count.</p>
*
* Returns the stream's slide count or 0 if the {@link
* DocumentSummaryInformation} does not contain a slide count.</p>
*
*@return The slideCount value
* @return The slideCount value
*/
public int getSlideCount() {
public int getSlideCount()
{
return getPropertyIntValue(PropertyIDMap.PID_SLIDECOUNT);
}
/**
* <p>
* <p>Returns the stream's note count or 0 if the {@link
* DocumentSummaryInformation} does not contain a note count.</p>
*
* Returns the stream's note count or 0 if the {@link
* DocumentSummaryInformation} does not contain a note count.</p>
*
*@return The noteCount value
* @return The noteCount value
*/
public int getNoteCount() {
public int getNoteCount()
{
return getPropertyIntValue(PropertyIDMap.PID_NOTECOUNT);
}
/**
* <p>
* <p>Returns the stream's hidden count or 0 if the {@link
* DocumentSummaryInformation} does not contain a hidden
* count.</p>
*
* Returns the stream's hidden count or 0 if the {@link
* DocumentSummaryInformation} does not contain a hidden count.</p>
*
*@return The hiddenCount value
* @return The hiddenCount value
*/
public int getHiddenCount() {
public int getHiddenCount()
{
return getPropertyIntValue(PropertyIDMap.PID_HIDDENCOUNT);
}
/**
* <p>
* <p>Returns the stream's mmclip count or 0 if the {@link
* DocumentSummaryInformation} does not contain a mmclip
* count.</p>
*
* Returns the stream's mmclip count or 0 if the {@link
* DocumentSummaryInformation} does not contain a mmclip count.</p>
*
*@return The mMClipCount value
* @return The mMClipCount value
*/
public int getMMClipCount() {
public int getMMClipCount()
{
return getPropertyIntValue(PropertyIDMap.PID_MMCLIPCOUNT);
}
/**
* <p>
* <p>Returns <code>true</code> when scaling of the thumbnail is
* desired, <code>false</code> if cropping is desired.</p>
*
* Returns <code>true</code> when scaling of the thumbnail is desired,
* <code>false</code> if cropping is desired.</p>
*
*@return The scale value
* @return The scale value
*/
public boolean getScale() {
public boolean getScale()
{
return getPropertyBooleanValue(PropertyIDMap.PID_SCALE);
}
/**
* <p>
* <p>Returns the stream's heading pair (or <code>null</code>)
* <strong>when this method is implemented. Please note that the
* return type is likely to change!</strong>
*
* Returns the stream's heading pair (or <code>null</code>) <strong>when
* this method is implemented. Please note that the return type is likely
* to change!</strong>
*
*@return The headingPair value
* @return The headingPair value
*/
public byte[] getHeadingPair() {
if (true) {
public byte[] getHeadingPair()
{
if (true)
throw new UnsupportedOperationException("FIXME");
}
return (byte[]) getProperty(PropertyIDMap.PID_HEADINGPAIR);
}
/**
* <p>
* <p>Returns the stream's doc parts (or <code>null</code>)
* <strong>when this method is implemented. Please note that the
* return type is likely to change!</strong>
*
* Returns the stream's doc parts (or <code>null</code>) <strong>when this
* method is implemented. Please note that the return type is likely to
* change!</strong>
*
*@return The docparts value
* @return The docparts value
*/
public byte[] getDocparts() {
if (true) {
public byte[] getDocparts()
{
if (true)
throw new UnsupportedOperationException("FIXME");
}
return (byte[]) getProperty(PropertyIDMap.PID_DOCPARTS);
}
/**
* <p>
* <p>Returns the stream's manager (or <code>null</code>).</p>
*
* Returns the stream's manager (or <code>null</code>).</p>
*
*@return The manager value
* @return The manager value
*/
public String getManager() {
public String getManager()
{
return (String) getProperty(PropertyIDMap.PID_MANAGER);
}
/**
* <p>
* <p>Returns the stream's company (or <code>null</code>).</p>
*
* Returns the stream's company (or <code>null</code>).</p>
*
*@return The company value
* @return The company value
*/
public String getCompany() {
public String getCompany()
{
return (String) getProperty(PropertyIDMap.PID_COMPANY);
}
/**
* <p>
* <p>Returns <code>true</code> if the custom links are hampered
* by excessive noise, for all applications.</p> <p>
*
* Returns <code>true</code> if the custom links are hampered by excessive
* noise, for all applications.</p> <p>
* <strong>FIXME:</strong> Explain this some more! I (Rainer)
* don't understand it.</p>
*
* <strong>FIXME:</strong> Explain this some more! I (Rainer) don't
* understand it.</p>
*
*@return The linksDirty value
* @return The linksDirty value
*/
public boolean getLinksDirty() {
public boolean getLinksDirty()
{
return getPropertyBooleanValue(PropertyIDMap.PID_LINKSDIRTY);
}

View File

@ -81,10 +81,9 @@ import org.apache.poi.util.LittleEndian;
* value, {@link Variant#VT_FILETIME} some date and time (of a
* file).</p>
*
* <p><strong>FIXME:</strong> Reading of other types than {@link
* Variant#VT_I4}, {@link Variant#VT_FILETIME}, {@link
* Variant#VT_LPSTR}, {@link Variant#VT_CF}, {@link Variant#VT_BOOL},
* and reading the dictionary property is not yet implemented.</p>
* <p><strong>FIXME:</strong> Reading is not implemented for all
* {@link Variant} types yet. Feel free to submit error reports or
* patches for the types you need.</p>
*
* @author Rainer Klute (klute@rainer-klute.de)
* @author Drew Varner (Drew.Varner InAndAround sc.edu)
@ -96,6 +95,9 @@ import org.apache.poi.util.LittleEndian;
public class Property
{
/* Codepage 1200 denotes Unicode. */
private static int CP_UNICODE = 1200;
private int id;
@ -150,121 +152,37 @@ public class Property
* @param offset The property's type/value pair's offset in the
* section.
* @param length The property's type/value pair's length in bytes.
* @param codepage The section's and thus the property's
* codepage. It is needed only when reading string values.
*/
public Property(final int id, final byte[] src, final long offset,
int length)
int length, int codepage)
{
this.id = id;
/*
* ID 0 is a special case since it specifies a dictionary of
* property IDs and property names.
* ID 0 is a special case since it specifies a dictionary of
* property IDs and property names.
*/
if (id == 0)
{
value = readDictionary(src, offset, length);
value = readDictionary(src, offset, length, codepage);
return;
}
/*
* FIXME: Support this!
*/
// /* ID 1 is another special case: It denotes the code page of
// * byte strings in this section. */
// if (id == 1)
// {
// value = readCodepage(src, offset);
// return;
// }
int o = (int) offset;
type = LittleEndian.getUInt(src, o);
o += LittleEndian.INT_SIZE;
/*
* FIXME: Support reading more types!
*/
switch ((int)type) {
case Variant.VT_I4:
{
/*
* Read a word. In Java it is represented as an
* Integer object.
*/
value = new Long(LittleEndian.getUInt(src, o));
break;
}
case Variant.VT_FILETIME:
{
/*
* Read a FILETIME object. In Java it is represented
* as a Date.
*/
final long low = LittleEndian.getUInt(src, o);
o += LittleEndian.INT_SIZE;
final long high = LittleEndian.getUInt(src, o);
value = Util.filetimeToDate((int)high, (int)low);
break;
}
case Variant.VT_LPSTR:
{
/*
* Read a byte string. In Java it is represented as a
* String. The null bytes at the end of the byte
* strings must be stripped.
*/
final int first = o + LittleEndian.INT_SIZE;
long last = first + LittleEndian.getUInt(src, o) - 1;
o += LittleEndian.INT_SIZE;
while (src[(int)last] == 0 && first <= last) {
last--;
}
value = new String(src, (int)first, (int)(last - first + 1));
break;
}
case Variant.VT_CF:
{
/*
* The first four bytes in src, from rc[offset] to
* src[offset + 3] contain the DWord for VT_CF, so
* skip it, we don't need it.
*/
/*
* Truncate the length of the return array by a DWord
* length (4 bytes).
*/
length = length - LittleEndian.INT_SIZE;
final byte[] v = new byte[length];
for (int i = 0; i < length; i++)
v[i] = src[(int)(o + i)];
value = v;
break;
}
case Variant.VT_BOOL:
{
/*
* The first four bytes in src, from src[offset] to
* src[offset + 3] contain the DWord for VT_BOOL, so
* skip it, we don't need it.
*/
final int first = o + LittleEndian.INT_SIZE;
long bool = LittleEndian.getUInt(src, o);
if (bool != 0)
value = new Boolean(true);
else
value = new Boolean(false);
break;
}
default:
{
final byte[] v = new byte[length];
for (int i = 0; i < length; i++)
v[i] = src[(int)(offset + i)];
value = v;
break;
}
}
try
{
value = TypeReader.read(src, o, length, (int) type);
}
catch (Throwable t)
{
t.printStackTrace();
value = "*** null ***";
}
}
@ -277,64 +195,67 @@ public class Property
* @param offset At this offset within <var>src</var> the
* dictionary starts.
* @param length The dictionary contains at most this many bytes.
* @param codepage The codepage of the string values.
* @return The dictonary
*/
protected Map readDictionary(final byte[] src, final long offset,
final int length)
final int length, final int codepage)
{
/*
* FIXME: Check the length!
*/
int o = (int)offset;
/* Check whether "offset" points into the "src" array". */
if (offset < 0 || offset > src.length)
throw new HPSFRuntimeException
("Illegal offset " + offset + " while HPSF stream contains " +
length + " bytes.");
int o = (int) offset;
/*
* Read the number of dictionary entries.
* Read the number of dictionary entries.
*/
final long nrEntries = LittleEndian.getUInt(src, o);
o += LittleEndian.INT_SIZE;
final Map m = new HashMap((int)nrEntries, (float) 1.0);
final Map m = new HashMap((int) nrEntries, (float) 1.0);
for (int i = 0; i < nrEntries; i++)
{
/*
* The key
*/
/* The key. */
final Long id = new Long(LittleEndian.getUInt(src, o));
o += LittleEndian.INT_SIZE;
/*
* The value (a string)
*/
final long sLength = LittleEndian.getUInt(src, o);
/* The value (a string). The length is the either the
* number of characters if the character set is Unicode or
* else the number of bytes. The length includes
* terminating 0x00 bytes which we have to strip off to
* create a Java string. */
long sLength = LittleEndian.getUInt(src, o);
o += LittleEndian.INT_SIZE;
/*
* Strip trailing 0x00 bytes.
*/
long l = sLength;
while (src[(int)(o + l - 1)] == 0x00)
l--;
final String s = new String(src, o, (int)l);
o += sLength;
m.put(id, s);
/* Read the bytes or characters depending on whether the
* character set is Unicode or not. */
StringBuffer b = new StringBuffer((int) sLength);
for (int j = 0; j < sLength; j++)
if (codepage == CP_UNICODE)
{
final int i1 = o + (j * 2);
final int i2 = i1 + 1;
b.append((char) ((src[i2] << 8) + src[i1]));
}
else
b.append((char) src[o + j]);
/* Strip 0x00 characters from the end of the string: */
while (b.charAt(b.length() - 1) == 0x00)
b.setLength(b.length() - 1);
if (codepage == CP_UNICODE)
{
if (sLength % 2 == 1)
sLength++;
o += (sLength + sLength);
}
else
o += sLength;
m.put(id, b.toString());
}
return m;
}
/**
* <p>Reads a code page.</p>
*
* @param src The byte array containing the bytes making out the
* code page.
* @param offset At this offset within <var>src</var> the code
* page starts.
* @return The code page.
*/
protected int readCodePage(final byte[] src, final long offset)
{
throw new UnsupportedOperationException("FIXME");
}
}

View File

@ -161,66 +161,117 @@ public class Section
public Section(final byte[] src, int offset)
{
/*
* Read the format ID.
* Read the format ID.
*/
formatID = new ClassID(src, offset);
offset += ClassID.LENGTH;
/*
* Read the offset from the stream's start and positions to
* the section header.
* Read the offset from the stream's start and positions to
* the section header.
*/
this.offset = LittleEndian.getUInt(src, offset);
offset = (int)this.offset;
/*
* Read the section length.
* Read the section length.
*/
size = (int)LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
/*
* Read the number of properties.
* Read the number of properties.
*/
propertyCount = (int)LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
/*
* Read the properties. The offset is positioned at the first
* entry of the property list.
* Read the properties. The offset is positioned at the first
* entry of the property list. The problem is that we have to
* read the property with ID 1 before we read other
* properties, at least before other properties containing
* strings. The reason is that property 1 specifies the
* codepage. If it is 1200, all strings are in Unicode. In
* other words: Before we can read any strings we have to know
* whether they are in Unicode or not. Unfortunately property
* 1 is not guaranteed to be the first in a section.
*
* The algorithm below reads the properties in two passes: The
* first one looks for property ID 1 and extracts the codepage
* number. The seconds pass reads the other properties.
*/
properties = new Property[propertyCount];
for (int i = 0; i < properties.length; i++) {
final int id = (int)LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
Property propertyOne;
/*
* Offset from the section.
*/
final int sOffset = (int)LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
/* Pass 1: Look for the codepage. */
int codepage = -1;
int pass1Offset = offset;
for (int i = 0; i < properties.length; i++)
{
/* Read the property ID. */
final int id = (int) LittleEndian.getUInt(src, pass1Offset);
pass1Offset += LittleEndian.INT_SIZE;
/*
* Calculate the length of the property.
*/
/* Offset from the section's start. */
final int sOffset = (int) LittleEndian.getUInt(src, pass1Offset);
pass1Offset += LittleEndian.INT_SIZE;
/* Calculate the length of the property. */
int length;
if (i == properties.length - 1) {
length = (int)(src.length - this.offset - sOffset);
} else {
if (i == properties.length - 1)
length = (int) (src.length - this.offset - sOffset);
else
length = (int)
LittleEndian.getUInt(src, pass1Offset +
LittleEndian.INT_SIZE) - sOffset;
if (id == PropertyIDMap.PID_CODEPAGE)
{
/* Read the codepage if the property ID is 1. */
/* Read the property's value type. It must be
* VT_I2. */
int o = (int) (this.offset + sOffset);
final long type = LittleEndian.getUInt(src, o);
o += LittleEndian.INT_SIZE;
if (type != Variant.VT_I2)
throw new HPSFRuntimeException
("Value type of property ID 1 is not VT_I2 but " +
type + ".");
/* Read the codepage number. */
codepage = LittleEndian.getUShort(src, o);
}
}
/* Pass 2: Read all properties, including 1. */
for (int i = 0; i < properties.length; i++)
{
/* Read the property ID. */
final int id = (int) LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
/* Offset from the section. */
final int sOffset = (int) LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
/* Calculate the length of the property. */
int length;
if (i == properties.length - 1)
length = (int) (src.length - this.offset - sOffset);
else
length = (int)
LittleEndian.getUInt(src, offset + LittleEndian.INT_SIZE) -
sOffset;
}
/*
* Create it.
*/
properties[i] =
new Property(id, src, this.offset + sOffset, length);
/* Create it. */
properties[i] = new Property(id, src, this.offset + sOffset,
length, codepage);
}
/*
* Extract the dictionary (if available).
* Extract the dictionary (if available).
*/
dictionary = (Map) getProperty(0);
}
@ -237,7 +288,7 @@ public class Section
*
* @return The property's value
*/
protected Object getProperty(final int id)
public Object getProperty(final int id)
{
wasNull = false;
for (int i = 0; i < properties.length; i++)

View File

@ -0,0 +1,208 @@
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2000 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" must
* not be used to endorse or promote products derived from this
* software without prior written permission. For written
* permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* nor may "Apache" appear in their name, without prior written
* permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
* Portions of this software are based upon public domain software
* originally written at the National Center for Supercomputing Applications,
* University of Illinois, Urbana-Champaign.
*
* Portions of this software are based upon public domain software
* originally written at the National Center for Supercomputing Applications,
* University of Illinois, Urbana-Champaign.
*/
package org.apache.poi.hpsf;
import java.util.*;
import org.apache.poi.util.LittleEndian;
/**
* <p>Reader for specific data types.</p>
*
* @author Rainer Klute (klute@rainer-klute.de)
* @see Property
* @see Variant
* @version $Id$
* @since 2002-12-09
*/
public class TypeReader
{
/**
* <p>Reads a variant data type from a byte array.</p>
*
* @param src The byte array
* @param offset The offset in the byte array where the variant
* starts
* @param length The length of the variant including the variant
* type field
* @return A Java object that corresponds best to the variant
* field. For example, a VT_I4 is returned as a {@link Long}, a
* VT_LPSTR as a {@link String}.
*
* @see Variant
*/
public static Object read(final byte[] src, int offset, int length,
final int type)
{
/*
* FIXME: Support reading more types and clean up this code!
*/
Object value;
length = length - LittleEndian.INT_SIZE;
switch (type)
{
case Variant.VT_I2:
{
/*
* Read a short. In Java it is represented as an
* Integer object.
*/
value = new Integer(LittleEndian.getUShort(src, offset));
break;
}
case Variant.VT_I4:
{
/*
* Read a word. In Java it is represented as a
* Long object.
*/
value = new Long(LittleEndian.getUInt(src, offset));
break;
}
case Variant.VT_FILETIME:
{
/*
* Read a FILETIME object. In Java it is represented
* as a Date object.
*/
final long low = LittleEndian.getUInt(src, offset);
offset += LittleEndian.INT_SIZE;
final long high = LittleEndian.getUInt(src, offset);
value = Util.filetimeToDate((int) high, (int) low);
break;
}
case Variant.VT_LPSTR:
{
/*
* Read a byte string. In Java it is represented as a
* String object. The 0x00 bytes at the end must be
* stripped.
*/
final int first = offset + LittleEndian.INT_SIZE;
long last = first + LittleEndian.getUInt(src, offset) - 1;
offset += LittleEndian.INT_SIZE;
while (src[(int) last] == 0 && first <= last)
last--;
value = new String(src, (int) first, (int) (last - first + 1));
break;
}
case Variant.VT_LPWSTR:
{
/*
* Read a Unicode string. In Java it is represented as
* a String object. The 0x00 bytes at the end must be
* stripped.
*/
final int first = offset + LittleEndian.INT_SIZE;
long last = first + LittleEndian.getUInt(src, offset) - 1;
long l = last - first;
offset += LittleEndian.INT_SIZE;
StringBuffer b = new StringBuffer((int) (last - first));
for (int i = 0; i <= l; i++)
{
final int i1 = offset + (i * 2);
final int i2 = i1 + 1;
b.append((char) ((src[i2] << 8) + src[i1]));
}
/* Strip 0x00 characters from the end of the string: */
while (b.charAt(b.length() - 1) == 0x00)
b.setLength(b.length() - 1);
value = b.toString();
break;
}
case Variant.VT_CF:
{
final byte[] v = new byte[length];
for (int i = 0; i < length; i++)
v[i] = src[(int) (offset + i)];
value = v;
break;
}
case Variant.VT_BOOL:
{
/*
* The first four bytes in src, from src[offset] to
* src[offset + 3] contain the DWord for VT_BOOL, so
* skip it, we don't need it.
*/
final int first = offset + LittleEndian.INT_SIZE;
long bool = LittleEndian.getUInt(src, offset);
if (bool != 0)
value = new Boolean(true);
else
value = new Boolean(false);
break;
}
default:
{
final byte[] v = new byte[length];
for (int i = 0; i < length; i++)
v[i] = src[(int) (offset + i)];
value = v;
break;
}
}
return value;
}
}

View File

@ -100,25 +100,110 @@ public class PropertyIDMap extends HashMap
public final static int PID_APPNAME = 18;
public final static int PID_SECURITY = 19;
/*
* The following definitions are for the Document Summary Information.
*/
/**
* <p>The entry is a dictionary.</p>
*/
public final static int PID_DICTIONARY = 0;
/**
* <p>The entry denotes a code page.</p>
*/
public final static int PID_CODEPAGE = 1;
/**
* <p>The entry is a string denoting the category the file belongs
* to, e.g. review, memo, etc. This is useful to find documents of
* same type.</p>
*/
public final static int PID_CATEGORY = 2;
/**
* <p>Target format for power point presentation, e.g. 35mm,
* printer, video etc.</p>
*/
public final static int PID_PRESFORMAT = 3;
/**
* <p>Number of bytes.</p>
*/
public final static int PID_BYTECOUNT = 4;
/**
* <p>Number of lines.</p>
*/
public final static int PID_LINECOUNT = 5;
/**
* <p>Number of paragraphs.</p>
*/
public final static int PID_PARCOUNT = 6;
/**
* <p>Number of slides in a power point presentation.</p>
*/
public final static int PID_SLIDECOUNT = 7;
/**
* <p>Number of slides with notes.</p>
*/
public final static int PID_NOTECOUNT = 8;
/**
* <p>Number of hidden slides.</p>
*/
public final static int PID_HIDDENCOUNT = 9;
/**
* <p>Number of multimedia clips, e.g. sound or video.</p>
*/
public final static int PID_MMCLIPCOUNT = 10;
/**
* <p>This entry is set to -1 when scaling of the thumbnail is
* desired. Otherwise the thumbnail should be cropped.</p>
*/
public final static int PID_SCALE = 11;
/**
* <p>This entry denotes an internally used property. It is a
* vector of variants consisting of pairs of a string (VT_LPSTR)
* and a number (VT_I4). The string is a heading name, and the
* number tells how many document parts are under that
* heading.</p>
*/
public final static int PID_HEADINGPAIR = 12;
/**
* <p>This entry contains the names of document parts (word: names
* of the documents in the master document, excel: sheet names,
* power point: slide titles, binder: document names).</p>
*/
public final static int PID_DOCPARTS = 13;
/**
* <p>This entry contains the name of the project manager.</p>
*/
public final static int PID_MANAGER = 14;
/**
* <p>This entry contains the company name.</p>
*/
public final static int PID_COMPANY = 15;
/**
* <p>If this entry is -1 the links are dirty and should be
* re-evaluated.</p>
*/
public final static int PID_LINKSDIRTY = 16;
/**
* <p>Contains the summary information property ID values and
* associated strings. See the overall HPSF documentation for
@ -184,7 +269,7 @@ public class PropertyIDMap extends HashMap
{
if (summaryInformationProperties == null)
{
PropertyIDMap m = new PropertyIDMap(17, (float) 1.0);
PropertyIDMap m = new PropertyIDMap(18, (float) 1.0);
m.put(PID_TITLE, "PID_TITLE");
m.put(PID_SUBJECT, "PID_SUBJECT");
m.put(PID_AUTHOR, "PID_AUTHOR");
@ -221,6 +306,8 @@ public class PropertyIDMap extends HashMap
if (documentSummaryInformationProperties == null)
{
PropertyIDMap m = new PropertyIDMap(17, (float) 1.0);
m.put(PID_DICTIONARY, "PID_DICTIONARY");
m.put(PID_CODEPAGE, "PID_CODEPAGE");
m.put(PID_CATEGORY, "PID_CATEGORY");
m.put(PID_PRESFORMAT, "PID_PRESFORMAT");
m.put(PID_BYTECOUNT, "PID_BYTECOUNT");

View File

@ -57,54 +57,51 @@ package org.apache.poi.hpsf.wellknown;
import java.util.*;
/**
* <p>
* <p>Maps section format IDs to {@link PropertyIDMap}s. It is
* initialized with two well-known section format IDs: those of the
* <tt>\005SummaryInformation</tt> stream and the
* <tt>\005DocumentSummaryInformation</tt> stream.</p>
*
* Maps section format IDs to {@link PropertyIDMap}s. It is initialized with
* two well-known section format IDs: those of the <tt>\005SummaryInformation
* </tt> stream and the <tt>\005DocumentSummaryInformation stream.</p> <p>
* <p>If you have a section format ID you can use it as a key to query
* this map. If you get a {@link PropertyIDMap} returned your section
* is well-known and you can query the {@link PropertyIDMap} for PID
* strings. If you get back <code>null</code> you are on your own.</p>
*
* If you have a section format ID you can use it as a key to query this map.
* If you get a {@link PropertyIDMap} returned your section is well-known and
* you can query the {@link PropertyIDMap} for PID strings. If you get back
* <code>null</code> you are on your own.</p> <p>
* <p>This {@link Map} expects the byte arrays of section format IDs
* as keys. A key maps to a {@link PropertyIDMap} describing the
* property IDs in sections with the specified section format ID.</p>
*
* This {@link Map} expects the byte arrays of section format IDs as keys. A
* key maps to a {@link PropertyIDMap} describing the property IDs in sections
* with the specified section format ID.</p>
*
*@author Rainer Klute (klute@rainer-klute.de)
*@created May 10, 2002
*@version $Id$
*@since 2002-02-09
* @author Rainer Klute (klute@rainer-klute.de)
* @version $Id$
* @since 2002-02-09
*/
public class SectionIDMap extends HashMap {
public class SectionIDMap extends HashMap
{
/**
* <p>
*
* The SummaryInformation's section's format ID.</p>
* <p>The SummaryInformation's section's format ID.</p>
*/
public final static byte[] SUMMARY_INFORMATION_ID =
new byte[]{(byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0,
(byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68,
(byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00,
(byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9};
public final static byte[] SUMMARY_INFORMATION_ID = new byte[]
{
(byte) 0xF2, (byte) 0x9F, (byte) 0x85, (byte) 0xE0,
(byte) 0x4F, (byte) 0xF9, (byte) 0x10, (byte) 0x68,
(byte) 0xAB, (byte) 0x91, (byte) 0x08, (byte) 0x00,
(byte) 0x2B, (byte) 0x27, (byte) 0xB3, (byte) 0xD9
};
/**
* <p>
*
* The DocumentSummaryInformation's first section's format ID. The second
* section has a different format ID which is not well-known.</p>
* <p>The DocumentSummaryInformation's first section's format
* ID. The second section has a different format ID which is not
* well-known.</p>
*/
public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID =
new byte[]{(byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02,
(byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B,
(byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00,
(byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE};
public final static byte[] DOCUMENT_SUMMARY_INFORMATION_ID = new byte[]
{
(byte) 0xD5, (byte) 0xCD, (byte) 0xD5, (byte) 0x02,
(byte) 0x2E, (byte) 0x9C, (byte) 0x10, (byte) 0x1B,
(byte) 0x93, (byte) 0x97, (byte) 0x08, (byte) 0x00,
(byte) 0x2B, (byte) 0x2C, (byte) 0xF9, (byte) 0xAE
};
/**
* Description of the Field
*/
public final static String UNDEFINED = "[undefined]";
private static SectionIDMap defaultMap;
@ -112,19 +109,20 @@ public class SectionIDMap extends HashMap {
/**
* <p>
* <p>Returns the singleton instance of the default {@link
* SectionIDMap}.</p>
*
* Returns the singleton instance of the default {@link SectionIDMap}.</p>
*
*@return The instance value
* @return The instance value
*/
public static SectionIDMap getInstance() {
if (defaultMap == null) {
public static SectionIDMap getInstance()
{
if (defaultMap == null)
{
final SectionIDMap m = new SectionIDMap();
m.put(SUMMARY_INFORMATION_ID,
PropertyIDMap.getSummaryInformationProperties());
PropertyIDMap.getSummaryInformationProperties());
m.put(DOCUMENT_SUMMARY_INFORMATION_ID,
PropertyIDMap.getDocumentSummaryInformationProperties());
PropertyIDMap.getDocumentSummaryInformationProperties());
defaultMap = m;
}
return defaultMap;
@ -133,31 +131,30 @@ public class SectionIDMap extends HashMap {
/**
* <p>
* <p>Returns the property ID string that is associated with a
* given property ID in a section format ID's namespace.</p>
*
* Returns the property ID string that is associated with a given property
* ID in a section format ID's namespace.</p>
*
*@param sectionFormatID Each section format ID has its own name space of
* property ID strings and thus must be specified.
*@param pid The property ID
*@return The well-known property ID string associated with
* the property ID <var>pid</var> in the name space spanned by <var>
* sectionFormatID</var> . If the <var>pid</var> /<var>sectionFormatID
* </var> combination is not well-known, the string "[undefined]" is
* returned.
* @param sectionFormatID Each section format ID has its own name
* space of property ID strings and thus must be specified.
* @param pid The property ID
* @return The well-known property ID string associated with the
* property ID <var>pid</var> in the name space spanned by <var>
* sectionFormatID</var> . If the <var>pid</var>
* /<var>sectionFormatID </var> combination is not well-known, the
* string "[undefined]" is returned.
*/
public static String getPIDString(final byte[] sectionFormatID,
final int pid) {
final int pid)
{
final PropertyIDMap m =
(PropertyIDMap) getInstance().get(sectionFormatID);
if (m == null) {
(PropertyIDMap) getInstance().get(sectionFormatID);
if (m == null)
return UNDEFINED;
} else {
else
{
final String s = (String) m.get(pid);
if (s == null) {
if (s == null)
return UNDEFINED;
}
return s;
}
}
@ -165,57 +162,47 @@ public class SectionIDMap extends HashMap {
/**
* <p>
*
* Returns the {@link PropertyIDMap} for a given section format ID.</p>
*
*@param sectionFormatID Description of the Parameter
*@return Description of the Return Value
* <p>Returns the {@link PropertyIDMap} for a given section format
* ID.</p>
*/
public PropertyIDMap get(final byte[] sectionFormatID) {
public PropertyIDMap get(final byte[] sectionFormatID)
{
return (PropertyIDMap) super.get(new String(sectionFormatID));
}
/**
* <p>
* <p>Returns the {@link PropertyIDMap} for a given section format
* ID.</p>
*
* Returns the {@link PropertyIDMap} for a given section format ID.</p>
*
*@param sectionFormatID A section format ID as a <tt>byte[]</tt> .
*@return Description of the Return Value
*@deprecated Use {@link #get(byte[])} instead!
* @param sectionFormatID A section format ID as a <tt>byte[]</tt> .
* @deprecated Use {@link #get(byte[])} instead!
*/
public Object get(final Object sectionFormatID) {
public Object get(final Object sectionFormatID)
{
return get((byte[]) sectionFormatID);
}
/**
* <p>
*
* Associates a section format ID with a {@link PropertyIDMap}.</p>
*
*@param sectionFormatID Description of the Parameter
*@param propertyIDMap Description of the Parameter
*@return Description of the Return Value
* <p>Associates a section format ID with a {@link
* PropertyIDMap}.</p>
*/
public Object put(final byte[] sectionFormatID,
final PropertyIDMap propertyIDMap) {
final PropertyIDMap propertyIDMap)
{
return super.put(new String(sectionFormatID), propertyIDMap);
}
/**
*@param key Description of the Parameter
*@param value Description of the Parameter
*@return Description of the Return Value
*@deprecated Use {@link #put(byte[], PropertyIDMap)} instead!
* @deprecated Use {@link #put(byte[], PropertyIDMap)} instead!
*/
public Object put(final Object key, final Object value) {
public Object put(final Object key, final Object value)
{
return put((byte[]) key, (PropertyIDMap) value);
}

View File

@ -183,7 +183,7 @@ public class TestBasic extends TestCase
/**
* <p>Tests the {@link PropertySet} methods. The test file has two
* property set: the first one is a {@link SummaryInformation},
* property sets: the first one is a {@link SummaryInformation},
* the second one is a {@link DocumentSummaryInformation}.</p>
*/
public void testPropertySetMethods() throws IOException, HPSFException
@ -214,11 +214,11 @@ public class TestBasic extends TestCase
/**
* <p>Runs the test cases stand-alone.</p>
*/
public static void main(String[] args)
public static void main(String[] args) throws Throwable
{
System.setProperty("HPSF.testdata.path",
"./src/testcases/org/apache/poi/hpsf/data");
junit.textui.TestRunner.run(TestBasic.class);
junit.textui.TestRunner.run(TestBasic.class);
}
}

View File

@ -0,0 +1,142 @@
/* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2002 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache POI" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact apache@apache.org.
*
* 5. Products derived from this software may not be called "Apache",
* "Apache POI", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.poi.hpsf.basic;
import java.io.*;
import java.util.*;
import junit.framework.*;
import org.apache.poi.hpsf.*;
/**
* <p>Tests whether Unicode string can be read from a
* DocumentSummaryInformation.</p>
*
* @author Rainer Klute (klute@rainer-klute.de)
* @since 2002-12-09
* @version $Id$
*/
public class TestUnicode extends TestCase
{
final static String POI_FS = "TestUnicode.xls";
final static String[] POI_FILES = new String[]
{
"\005DocumentSummaryInformation",
};
File data;
POIFile[] poiFiles;
public TestUnicode(String name)
{
super(name);
}
/**
* <p>Read a the test file from the "data" directory.</p>
*/
public void setUp() throws FileNotFoundException, IOException
{
final File dataDir =
new File(System.getProperty("HPSF.testdata.path"));
data = new File(dataDir, POI_FS);
}
/**
* <p>Tests the {@link PropertySet} methods. The test file has two
* property set: the first one is a {@link SummaryInformation},
* the second one is a {@link DocumentSummaryInformation}.</p>
*/
public void testPropertySetMethods() throws IOException, HPSFException
{
POIFile poiFile = Util.readPOIFiles(data, POI_FILES)[0];
byte[] b = poiFile.getBytes();
PropertySet ps =
PropertySetFactory.create(new ByteArrayInputStream(b));
Assert.assertTrue(ps.isDocumentSummaryInformation());
Assert.assertEquals(ps.getSectionCount(), 2);
Section s = (Section) ps.getSections().get(1);
Assert.assertEquals(s.getProperty(1),
new Integer(1200));
Assert.assertEquals(s.getProperty(2),
new Long(4198897018l));
Assert.assertEquals(s.getProperty(3),
"MCon_Info zu Office bei Schreiner");
Assert.assertEquals(s.getProperty(4),
"petrovitsch@schreiner-online.de");
Assert.assertEquals(s.getProperty(5),
"Petrovitsch, Wilhelm");
}
/**
* <p>Runs the test cases stand-alone.</p>
*/
public static void main(String[] args)
{
System.setProperty("HPSF.testdata.path",
"./src/testcases/org/apache/poi/hpsf/data");
junit.textui.TestRunner.run(TestUnicode.class);
}
}

View File

@ -114,10 +114,33 @@ public class Util
*/
public static POIFile[] readPOIFiles(final File poiFs)
throws FileNotFoundException, IOException
{
return readPOIFiles(poiFs, null);
}
/**
* <p>Reads a set of files from a POI filesystem and returns them
* as an array of {@link POIFile} instances. This method loads all
* files into memory and thus does not cope well with large POI
* filessystems.</p>
*
* @param file The name of the POI filesystem as seen by the
* operating system. (This is the "filename".)
*
* @param poiFiles The names of the POI files to be read.
*
* @return The POI files. The elements are ordered in the same way
* as the files in the POI filesystem.
*/
public static POIFile[] readPOIFiles(final File poiFs,
final String[] poiFiles)
throws FileNotFoundException, IOException
{
final List files = new ArrayList();
POIFSReader r = new POIFSReader();
r.registerListener(new POIFSReaderListener()
POIFSReaderListener pfl = new POIFSReaderListener()
{
public void processPOIFSReaderEvent(POIFSReaderEvent event)
{
@ -140,7 +163,17 @@ public class Util
throw new RuntimeException(ex.getMessage());
}
}
});
};
if (poiFiles == null)
/* Register the listener for all POI files. */
r.registerListener(pfl);
else
/* Register the listener for the specified POI files
* only. */
for (int i = 0; i < poiFiles.length; i++)
r.registerListener(pfl, poiFiles[i]);
/* Read the POI filesystem. */
r.read(new FileInputStream(poiFs));
POIFile[] result = new POIFile[files.size()];
for (int i = 0; i < result.length; i++)

Binary file not shown.