#52372 - OutOfMemoryError parsing a word file
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1793602 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a0b324224b
commit
cd6236c74b
@ -22,7 +22,11 @@ import java.io.IOException;
|
|||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
import org.apache.commons.collections4.bidimap.TreeBidiMap;
|
import org.apache.commons.collections4.bidimap.TreeBidiMap;
|
||||||
@ -30,11 +34,16 @@ import org.apache.poi.hpsf.wellknown.PropertyIDMap;
|
|||||||
import org.apache.poi.hpsf.wellknown.SectionIDMap;
|
import org.apache.poi.hpsf.wellknown.SectionIDMap;
|
||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.util.CodePageUtil;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.util.LittleEndianByteArrayInputStream;
|
||||||
|
import org.apache.poi.util.LittleEndianConsts;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents a section in a {@link PropertySet}.
|
* Represents a section in a {@link PropertySet}.
|
||||||
*/
|
*/
|
||||||
public class Section {
|
public class Section {
|
||||||
|
private static final POILogger LOG = POILogFactory.getLogger(Section.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps property IDs to section-private PID strings. These
|
* Maps property IDs to section-private PID strings. These
|
||||||
@ -62,7 +71,7 @@ public class Section {
|
|||||||
/**
|
/**
|
||||||
* The offset of the section in the stream.
|
* The offset of the section in the stream.
|
||||||
*/
|
*/
|
||||||
private long offset = -1;
|
private final long _offset;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The section's size in bytes.
|
* The section's size in bytes.
|
||||||
@ -72,7 +81,7 @@ public class Section {
|
|||||||
/**
|
/**
|
||||||
* This section's properties.
|
* This section's properties.
|
||||||
*/
|
*/
|
||||||
private final Map<Long,Property> properties = new TreeMap<Long,Property>();
|
private final Map<Long,Property> properties = new LinkedHashMap<Long,Property>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This member is {@code true} if the last call to {@link
|
* This member is {@code true} if the last call to {@link
|
||||||
@ -85,6 +94,7 @@ public class Section {
|
|||||||
* Creates an empty {@link Section}.
|
* Creates an empty {@link Section}.
|
||||||
*/
|
*/
|
||||||
public Section() {
|
public Section() {
|
||||||
|
this._offset = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -96,6 +106,7 @@ public class Section {
|
|||||||
* @param s The section set to copy
|
* @param s The section set to copy
|
||||||
*/
|
*/
|
||||||
public Section(final Section s) {
|
public Section(final Section s) {
|
||||||
|
this._offset = -1;
|
||||||
setFormatID(s.getFormatID());
|
setFormatID(s.getFormatID());
|
||||||
for (Property p : s.properties.values()) {
|
for (Property p : s.properties.values()) {
|
||||||
properties.put(p.getID(), new MutableProperty(p));
|
properties.put(p.getID(), new MutableProperty(p));
|
||||||
@ -117,32 +128,38 @@ public class Section {
|
|||||||
*/
|
*/
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public Section(final byte[] src, final int offset) throws UnsupportedEncodingException {
|
public Section(final byte[] src, final int offset) throws UnsupportedEncodingException {
|
||||||
int o1 = offset;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the format ID.
|
* Read the format ID.
|
||||||
*/
|
*/
|
||||||
formatID = new ClassID(src, o1);
|
formatID = new ClassID(src, offset);
|
||||||
o1 += ClassID.LENGTH;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the offset from the stream's start and positions to
|
* Read the offset from the stream's start and positions to
|
||||||
* the section header.
|
* the section header.
|
||||||
*/
|
*/
|
||||||
this.offset = LittleEndian.getUInt(src, o1);
|
int offFix = (int)LittleEndian.getUInt(src, offset + ClassID.LENGTH);
|
||||||
o1 = (int) this.offset;
|
|
||||||
|
// some input files have a invalid (padded?) offset, which need to be fixed
|
||||||
|
// search for beginning of size field
|
||||||
|
if (src[offFix] == 0) {
|
||||||
|
for (int i=0; i<3 && src[offFix] == 0; i++,offFix++);
|
||||||
|
// cross check with propertyCount field and the property list field
|
||||||
|
for (int i=0; i<3 && (src[offFix+3] != 0 || src[offFix+7] != 0 || src[offFix+11] != 0); i++,offFix--);
|
||||||
|
}
|
||||||
|
|
||||||
|
this._offset = offFix;
|
||||||
|
|
||||||
|
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(src, offFix);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the section length.
|
* Read the section length.
|
||||||
*/
|
*/
|
||||||
size = (int) LittleEndian.getUInt(src, o1);
|
size = (int)leis.readUInt();
|
||||||
o1 += LittleEndian.INT_SIZE;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the number of properties.
|
* Read the number of properties.
|
||||||
*/
|
*/
|
||||||
final int propertyCount = (int) LittleEndian.getUInt(src, o1);
|
final int propertyCount = (int)leis.readUInt();
|
||||||
o1 += LittleEndian.INT_SIZE;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read the properties. The offset is positioned at the first
|
* Read the properties. The offset is positioned at the first
|
||||||
@ -169,64 +186,65 @@ public class Section {
|
|||||||
* seconds pass reads the other properties.
|
* seconds pass reads the other properties.
|
||||||
*/
|
*/
|
||||||
/* Pass 1: Read the property list. */
|
/* Pass 1: Read the property list. */
|
||||||
int pass1Offset = o1;
|
|
||||||
long cpOffset = -1;
|
|
||||||
final TreeBidiMap<Long,Long> offset2Id = new TreeBidiMap<Long,Long>();
|
final TreeBidiMap<Long,Long> offset2Id = new TreeBidiMap<Long,Long>();
|
||||||
for (int i = 0; i < propertyCount; i++) {
|
for (int i = 0; i < propertyCount; i++) {
|
||||||
/* Read the property ID. */
|
/* Read the property ID. */
|
||||||
long id = LittleEndian.getUInt(src, pass1Offset);
|
long id = (int)leis.readUInt();
|
||||||
pass1Offset += LittleEndian.INT_SIZE;
|
|
||||||
|
|
||||||
/* Offset from the section's start. */
|
/* Offset from the section's start. */
|
||||||
long off = LittleEndian.getUInt(src, pass1Offset);
|
long off = (int)leis.readUInt();
|
||||||
pass1Offset += LittleEndian.INT_SIZE;
|
|
||||||
|
|
||||||
offset2Id.put(off, id);
|
offset2Id.put(off, id);
|
||||||
|
|
||||||
if (id == PropertyIDMap.PID_CODEPAGE) {
|
|
||||||
cpOffset = off;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Long cpOffset = offset2Id.getKey((long)PropertyIDMap.PID_CODEPAGE);
|
||||||
|
|
||||||
/* Look for the codepage. */
|
/* Look for the codepage. */
|
||||||
int codepage = -1;
|
int codepage = -1;
|
||||||
if (cpOffset != -1) {
|
if (cpOffset != null) {
|
||||||
/* Read the property's value type. It must be VT_I2. */
|
/* Read the property's value type. It must be VT_I2. */
|
||||||
long o = this.offset + cpOffset;
|
leis.setReadIndex((int)(this._offset + cpOffset));
|
||||||
final long type = LittleEndian.getUInt(src, (int)o);
|
final long type = leis.readUInt();
|
||||||
o += LittleEndian.INT_SIZE;
|
|
||||||
|
|
||||||
if (type != Variant.VT_I2) {
|
if (type != Variant.VT_I2) {
|
||||||
throw new HPSFRuntimeException
|
throw new HPSFRuntimeException
|
||||||
("Value type of property ID 1 is not VT_I2 but " +
|
("Value type of property ID 1 is not VT_I2 but " + type + ".");
|
||||||
type + ".");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Read the codepage number. */
|
/* Read the codepage number. */
|
||||||
codepage = LittleEndian.getUShort(src, (int)o);
|
codepage = leis.readUShort();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Pass 2: Read all properties - including the codepage property,
|
/* Pass 2: Read all properties - including the codepage property,
|
||||||
* if available. */
|
* if available. */
|
||||||
for (Map.Entry<Long,Long> me : offset2Id.entrySet()) {
|
for (Map.Entry<Long,Long> me : offset2Id.entrySet()) {
|
||||||
long off = me.getKey();
|
long off = me.getKey();
|
||||||
long id = me.getValue();
|
long id = me.getValue();
|
||||||
Property p;
|
|
||||||
if (id == PropertyIDMap.PID_CODEPAGE) {
|
|
||||||
p = new Property(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2, codepage);
|
|
||||||
} else {
|
|
||||||
int pLen = propLen(offset2Id, off, size);
|
|
||||||
long o = this.offset + off;
|
|
||||||
p = new Property(id, src, o, pLen, codepage);
|
|
||||||
}
|
|
||||||
properties.put(id, p);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
int pLen = propLen(offset2Id, off, size);
|
||||||
* Extract the dictionary (if available).
|
leis.setReadIndex((int)(this._offset + off));
|
||||||
*/
|
|
||||||
dictionary = (Map<Long,String>) getProperty(0);
|
if (id == PropertyIDMap.PID_DICTIONARY) {
|
||||||
|
leis.mark(100000);
|
||||||
|
if (!readDictionary(leis, pLen, codepage)) {
|
||||||
|
// there was an error reading the dictionary, maybe because the pid (0) was used wrong
|
||||||
|
// try reading a property instead
|
||||||
|
leis.reset();
|
||||||
|
try {
|
||||||
|
// fix id
|
||||||
|
id = Math.max(PropertyIDMap.PID_MAX, offset2Id.inverseBidiMap().lastKey())+1;
|
||||||
|
setProperty(new MutableProperty(id, leis, pLen, codepage));
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
LOG.log(POILogger.INFO, "Dictionary fallback failed - ignoring property");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
} else if (id == PropertyIDMap.PID_CODEPAGE) {
|
||||||
|
setCodepage(codepage);
|
||||||
|
} else {
|
||||||
|
setProperty(new MutableProperty(id, leis, pLen, codepage));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -291,7 +309,7 @@ public class Section {
|
|||||||
* @return The offset of the section in the stream.
|
* @return The offset of the section in the stream.
|
||||||
*/
|
*/
|
||||||
public long getOffset() {
|
public long getOffset() {
|
||||||
return offset;
|
return _offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -344,11 +362,10 @@ public class Section {
|
|||||||
* Sets the string value of the property with the specified ID.
|
* Sets the string value of the property with the specified ID.
|
||||||
*
|
*
|
||||||
* @param id The property's ID
|
* @param id The property's ID
|
||||||
* @param value The property's value. It will be written as a Unicode
|
* @param value The property's value.
|
||||||
* string.
|
|
||||||
*/
|
*/
|
||||||
public void setProperty(final int id, final String value) {
|
public void setProperty(final int id, final String value) {
|
||||||
setProperty(id, Variant.VT_LPWSTR, value);
|
setProperty(id, Variant.VT_LPSTR, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -411,8 +428,9 @@ public class Section {
|
|||||||
* @see #getProperty
|
* @see #getProperty
|
||||||
* @see Variant
|
* @see Variant
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("deprecation")
|
||||||
public void setProperty(final int id, final long variantType, final Object value) {
|
public void setProperty(final int id, final long variantType, final Object value) {
|
||||||
setProperty(new Property(id, variantType, value));
|
setProperty(new MutableProperty(id, variantType, value));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -591,11 +609,12 @@ public class Section {
|
|||||||
*/
|
*/
|
||||||
public String getPIDString(final long pid) {
|
public String getPIDString(final long pid) {
|
||||||
String s = null;
|
String s = null;
|
||||||
if (dictionary != null) {
|
Map<Long,String> dic = getDictionary();
|
||||||
s = dictionary.get(Long.valueOf(pid));
|
if (dic != null) {
|
||||||
|
s = dic.get(pid);
|
||||||
}
|
}
|
||||||
if (s == null) {
|
if (s == null) {
|
||||||
s = SectionIDMap.getPIDString(getFormatID().getBytes(), pid);
|
s = SectionIDMap.getPIDString(getFormatID(), pid);
|
||||||
}
|
}
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
@ -614,19 +633,6 @@ public class Section {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the codepage.
|
|
||||||
*
|
|
||||||
* @param codepage the codepage
|
|
||||||
*/
|
|
||||||
public void setCodepage(final int codepage)
|
|
||||||
{
|
|
||||||
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
|
|
||||||
Integer.valueOf(codepage));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Checks whether this section is equal to another object. The result is
|
* Checks whether this section is equal to another object. The result is
|
||||||
* {@code false} if one of the the following conditions holds:
|
* {@code false} if one of the the following conditions holds:
|
||||||
@ -651,8 +657,9 @@ public class Section {
|
|||||||
* @return {@code true} if the objects are equal, {@code false} if
|
* @return {@code true} if the objects are equal, {@code false} if
|
||||||
* not
|
* not
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public boolean equals(final Object o) {
|
public boolean equals(final Object o) {
|
||||||
if (o == null || !(o instanceof Section)) {
|
if (!(o instanceof Section)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
final Section s = (Section) o;
|
final Section s = (Section) o;
|
||||||
@ -660,59 +667,26 @@ public class Section {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compare all properties except 0 and 1 as they must be handled
|
/* Compare all properties except the dictionary (id 0) and
|
||||||
* specially. */
|
* the codepage (id 1 / ignored) as they must be handled specially. */
|
||||||
Property[] pa1 = new Property[getProperties().length];
|
Set<Long> propIds = new HashSet<Long>(properties.keySet());
|
||||||
Property[] pa2 = new Property[s.getProperties().length];
|
propIds.addAll(s.properties.keySet());
|
||||||
System.arraycopy(getProperties(), 0, pa1, 0, pa1.length);
|
propIds.remove(0L);
|
||||||
System.arraycopy(s.getProperties(), 0, pa2, 0, pa2.length);
|
propIds.remove(1L);
|
||||||
|
|
||||||
/* Extract properties 0 and 1 and remove them from the copy of the
|
for (Long id : propIds) {
|
||||||
* arrays. */
|
Property p1 = properties.get(id);
|
||||||
Property p10 = null;
|
Property p2 = s.properties.get(id);
|
||||||
Property p20 = null;
|
if (p1 == null || p2 == null || !p1.equals(p2)) {
|
||||||
for (int i = 0; i < pa1.length; i++) {
|
return false;
|
||||||
final long id = pa1[i].getID();
|
|
||||||
if (id == 0) {
|
|
||||||
p10 = pa1[i];
|
|
||||||
pa1 = remove(pa1, i);
|
|
||||||
i--;
|
|
||||||
}
|
}
|
||||||
if (id == 1) {
|
|
||||||
pa1 = remove(pa1, i);
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < pa2.length; i++) {
|
|
||||||
final long id = pa2[i].getID();
|
|
||||||
if (id == 0) {
|
|
||||||
p20 = pa2[i];
|
|
||||||
pa2 = remove(pa2, i);
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
if (id == 1) {
|
|
||||||
pa2 = remove(pa2, i);
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If the number of properties (not counting property 1) is unequal the
|
|
||||||
* sections are unequal. */
|
|
||||||
if (pa1.length != pa2.length) {
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the dictionaries are unequal the sections are unequal. */
|
/* If the dictionaries are unequal the sections are unequal. */
|
||||||
boolean dictionaryEqual = true;
|
Map<Long,String> d1 = getDictionary();
|
||||||
if (p10 != null && p20 != null) {
|
Map<Long,String> d2 = s.getDictionary();
|
||||||
dictionaryEqual = p10.getValue().equals(p20.getValue());
|
|
||||||
} else if (p10 != null || p20 != null) {
|
return (d1 == null && d2 == null) || (d1 != null && d2 != null && d1.equals(d2));
|
||||||
dictionaryEqual = false;
|
|
||||||
}
|
|
||||||
if (dictionaryEqual) {
|
|
||||||
return Util.equals(pa1, pa2);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -724,22 +698,6 @@ public class Section {
|
|||||||
dirty |= (properties.remove(id) != null);
|
dirty |= (properties.remove(id) != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes a field from a property array. The resulting array is
|
|
||||||
* compactified and returned.
|
|
||||||
*
|
|
||||||
* @param pa The property array.
|
|
||||||
* @param i The index of the field to be removed.
|
|
||||||
* @return the compactified array.
|
|
||||||
*/
|
|
||||||
private Property[] remove(final Property[] pa, final int i) {
|
|
||||||
final Property[] h = new Property[pa.length - 1];
|
|
||||||
if (i > 0) {
|
|
||||||
System.arraycopy(pa, 0, h, 0, i);
|
|
||||||
}
|
|
||||||
System.arraycopy(pa, i + 1, h, i, h.length - i);
|
|
||||||
return h;
|
|
||||||
}
|
|
||||||
/**
|
/**
|
||||||
* Writes this section into an output stream.<p>
|
* Writes this section into an output stream.<p>
|
||||||
*
|
*
|
||||||
@ -763,6 +721,17 @@ public class Section {
|
|||||||
return sectionBytes.length;
|
return sectionBytes.length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Writing the section's dictionary it tricky. If there is a dictionary
|
||||||
|
* (property 0) the codepage property (property 1) must be set, too. */
|
||||||
|
int codepage = getCodepage();
|
||||||
|
if (codepage == -1) {
|
||||||
|
String msg =
|
||||||
|
"The codepage property is not set although a dictionary is present. "+
|
||||||
|
"Defaulting to ISO-8859-1.";
|
||||||
|
LOG.log(POILogger.WARN, msg);
|
||||||
|
codepage = Property.DEFAULT_CODEPAGE;
|
||||||
|
}
|
||||||
|
|
||||||
/* The properties are written to this stream. */
|
/* The properties are written to this stream. */
|
||||||
final ByteArrayOutputStream propertyStream = new ByteArrayOutputStream();
|
final ByteArrayOutputStream propertyStream = new ByteArrayOutputStream();
|
||||||
|
|
||||||
@ -777,79 +746,120 @@ public class Section {
|
|||||||
/* Increase the position variable by the size of the property list so
|
/* Increase the position variable by the size of the property list so
|
||||||
* that it points behind the property list and to the beginning of the
|
* that it points behind the property list and to the beginning of the
|
||||||
* properties themselves. */
|
* properties themselves. */
|
||||||
position += 2 * LittleEndian.INT_SIZE + getPropertyCount() * 2 * LittleEndian.INT_SIZE;
|
position += 2 * LittleEndianConsts.INT_SIZE + getPropertyCount() * 2 * LittleEndianConsts.INT_SIZE;
|
||||||
|
|
||||||
/* Writing the section's dictionary it tricky. If there is a dictionary
|
|
||||||
* (property 0) the codepage property (property 1) must be set, too. */
|
|
||||||
int codepage = -1;
|
|
||||||
if (getProperty(PropertyIDMap.PID_DICTIONARY) != null) {
|
|
||||||
final Object p1 = getProperty(PropertyIDMap.PID_CODEPAGE);
|
|
||||||
if (p1 != null) {
|
|
||||||
if (!(p1 instanceof Integer)) {
|
|
||||||
throw new IllegalPropertySetDataException
|
|
||||||
("The codepage property (ID = 1) must be an " +
|
|
||||||
"Integer object.");
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* Warning: The codepage property is not set although a
|
|
||||||
* dictionary is present. In order to cope with this problem we
|
|
||||||
* add the codepage property and set it to Unicode. */
|
|
||||||
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
|
|
||||||
Integer.valueOf(CodePageUtil.CP_UNICODE));
|
|
||||||
}
|
|
||||||
codepage = getCodepage();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Write the properties and the property list into their respective
|
/* Write the properties and the property list into their respective
|
||||||
* streams: */
|
* streams: */
|
||||||
for (Property p : properties.values()) {
|
for (Property p : properties.values()) {
|
||||||
final long id = p.getID();
|
final long id = p.getID();
|
||||||
|
|
||||||
/* Write the property list entry. */
|
/* Write the property list entry. */
|
||||||
TypeWriter.writeUIntToStream(propertyListStream, p.getID());
|
LittleEndian.putUInt(id, propertyListStream);
|
||||||
TypeWriter.writeUIntToStream(propertyListStream, position);
|
LittleEndian.putUInt(position, propertyListStream);
|
||||||
|
|
||||||
/* If the property ID is not equal 0 we write the property and all
|
/* If the property ID is not equal 0 we write the property and all
|
||||||
* is fine. However, if it equals 0 we have to write the section's
|
* is fine. However, if it equals 0 we have to write the section's
|
||||||
* dictionary which has an implicit type only and an explicit
|
* dictionary which has an implicit type only and an explicit
|
||||||
* value. */
|
* value. */
|
||||||
if (id != 0)
|
if (id != 0) {
|
||||||
/* Write the property and update the position to the next
|
/* Write the property and update the position to the next
|
||||||
* property. */
|
* property. */
|
||||||
position += p.write(propertyStream, getCodepage());
|
position += p.write(propertyStream, codepage);
|
||||||
else
|
} else {
|
||||||
{
|
if (codepage == -1) {
|
||||||
if (codepage == -1)
|
throw new IllegalPropertySetDataException("Codepage (property 1) is undefined.");
|
||||||
throw new IllegalPropertySetDataException
|
}
|
||||||
("Codepage (property 1) is undefined.");
|
position += writeDictionary(propertyStream, codepage);
|
||||||
position += writeDictionary(propertyStream, dictionary,
|
|
||||||
codepage);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
propertyStream.close();
|
|
||||||
propertyListStream.close();
|
|
||||||
|
|
||||||
/* Write the section: */
|
/* Write the section: */
|
||||||
byte[] pb1 = propertyListStream.toByteArray();
|
int streamLength = LittleEndianConsts.INT_SIZE * 2 + propertyListStream.size() + propertyStream.size();
|
||||||
byte[] pb2 = propertyStream.toByteArray();
|
|
||||||
|
|
||||||
/* Write the section's length: */
|
/* Write the section's length: */
|
||||||
TypeWriter.writeToStream(out, LittleEndian.INT_SIZE * 2 +
|
LittleEndian.putInt(streamLength, out);
|
||||||
pb1.length + pb2.length);
|
|
||||||
|
|
||||||
/* Write the section's number of properties: */
|
/* Write the section's number of properties: */
|
||||||
TypeWriter.writeToStream(out, getPropertyCount());
|
LittleEndian.putInt(getPropertyCount(), out);
|
||||||
|
|
||||||
/* Write the property list: */
|
/* Write the property list: */
|
||||||
out.write(pb1);
|
propertyListStream.writeTo(out);
|
||||||
|
|
||||||
/* Write the properties: */
|
/* Write the properties: */
|
||||||
out.write(pb2);
|
propertyStream.writeTo(out);
|
||||||
|
|
||||||
int streamLength = LittleEndian.INT_SIZE * 2 + pb1.length + pb2.length;
|
|
||||||
return streamLength;
|
return streamLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a dictionary.
|
||||||
|
*
|
||||||
|
* @param leis The byte stream containing the bytes making out the dictionary.
|
||||||
|
* @param length The dictionary contains at most this many bytes.
|
||||||
|
* @param codepage The codepage of the string values.
|
||||||
|
*
|
||||||
|
* @return {@code true} if dictionary was read successful, {@code false} otherwise
|
||||||
|
*
|
||||||
|
* @throws UnsupportedEncodingException if the dictionary's codepage is not
|
||||||
|
* (yet) supported.
|
||||||
|
*/
|
||||||
|
private boolean readDictionary(LittleEndianByteArrayInputStream leis, final int length, final int codepage)
|
||||||
|
throws UnsupportedEncodingException {
|
||||||
|
Map<Long,String> dic = new HashMap<Long,String>();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Read the number of dictionary entries.
|
||||||
|
*/
|
||||||
|
final long nrEntries = leis.readUInt();
|
||||||
|
|
||||||
|
long id = -1;
|
||||||
|
boolean isCorrupted = false;
|
||||||
|
for (int i = 0; i < nrEntries; i++) {
|
||||||
|
String errMsg =
|
||||||
|
"The property set's dictionary contains bogus data. "
|
||||||
|
+ "All dictionary entries starting with the one with ID "
|
||||||
|
+ id + " will be ignored.";
|
||||||
|
|
||||||
|
/* The key. */
|
||||||
|
id = leis.readUInt();
|
||||||
|
|
||||||
|
/* The value (a string). The length is the either the
|
||||||
|
* number of (two-byte) characters if the character set is Unicode
|
||||||
|
* or the number of bytes if the character set is not Unicode.
|
||||||
|
* The length includes terminating 0x00 bytes which we have to strip
|
||||||
|
* off to create a Java string. */
|
||||||
|
long sLength = leis.readUInt();
|
||||||
|
|
||||||
|
/* Read the string - Strip 0x00 characters from the end of the string. */
|
||||||
|
int cp = (codepage == -1) ? Property.DEFAULT_CODEPAGE : codepage;
|
||||||
|
int nrBytes = (int)((sLength-1) * (cp == CodePageUtil.CP_UNICODE ? 2 : 1));
|
||||||
|
if (nrBytes > 0xFFFFFF) {
|
||||||
|
LOG.log(POILogger.WARN, errMsg);
|
||||||
|
isCorrupted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
byte buf[] = new byte[nrBytes];
|
||||||
|
leis.readFully(buf, 0, nrBytes);
|
||||||
|
final String str = CodePageUtil.getStringFromCodePage(buf, 0, nrBytes, cp);
|
||||||
|
|
||||||
|
int pad = 1;
|
||||||
|
if (cp == CodePageUtil.CP_UNICODE) {
|
||||||
|
pad = 2+((4 - ((nrBytes+2) & 0x3)) & 0x3);
|
||||||
|
}
|
||||||
|
leis.skip(pad);
|
||||||
|
|
||||||
|
dic.put(id, str);
|
||||||
|
} catch (RuntimeException ex) {
|
||||||
|
LOG.log(POILogger.WARN, errMsg, ex);
|
||||||
|
isCorrupted = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
setDictionary(dic);
|
||||||
|
return !isCorrupted;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -861,48 +871,37 @@ public class Section {
|
|||||||
* @return The number of bytes written
|
* @return The number of bytes written
|
||||||
* @exception IOException if an I/O exception occurs.
|
* @exception IOException if an I/O exception occurs.
|
||||||
*/
|
*/
|
||||||
private static int writeDictionary(final OutputStream out, final Map<Long,String> dictionary, final int codepage)
|
private int writeDictionary(final OutputStream out, final int codepage)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int length = TypeWriter.writeUIntToStream(out, dictionary.size());
|
final byte padding[] = new byte[4];
|
||||||
for (Map.Entry<Long,String> ls : dictionary.entrySet()) {
|
Map<Long,String> dic = getDictionary();
|
||||||
final Long key = ls.getKey();
|
|
||||||
final String value = ls.getValue();
|
LittleEndian.putUInt(dic.size(), out);
|
||||||
|
int length = LittleEndianConsts.INT_SIZE;
|
||||||
|
for (Map.Entry<Long,String> ls : dic.entrySet()) {
|
||||||
|
|
||||||
|
LittleEndian.putUInt(ls.getKey(), out);
|
||||||
|
length += LittleEndianConsts.INT_SIZE;
|
||||||
|
|
||||||
|
String value = ls.getValue()+"\0";
|
||||||
|
LittleEndian.putUInt( value.length(), out );
|
||||||
|
length += LittleEndianConsts.INT_SIZE;
|
||||||
|
|
||||||
|
byte bytes[] = CodePageUtil.getBytesInCodePage(value, codepage);
|
||||||
|
out.write(bytes);
|
||||||
|
length += bytes.length;
|
||||||
|
|
||||||
if (codepage == CodePageUtil.CP_UNICODE) {
|
if (codepage == CodePageUtil.CP_UNICODE) {
|
||||||
/* Write the dictionary item in Unicode. */
|
int pad = (4 - (length & 0x3)) & 0x3;
|
||||||
int sLength = value.length() + 1;
|
out.write(padding, 0, pad);
|
||||||
if ((sLength & 1) == 1) {
|
length += pad;
|
||||||
sLength++;
|
|
||||||
}
|
|
||||||
length += TypeWriter.writeUIntToStream(out, key.longValue());
|
|
||||||
length += TypeWriter.writeUIntToStream(out, sLength);
|
|
||||||
final byte[] ca = CodePageUtil.getBytesInCodePage(value, codepage);
|
|
||||||
for (int j = 2; j < ca.length; j += 2) {
|
|
||||||
out.write(ca[j+1]);
|
|
||||||
out.write(ca[j]);
|
|
||||||
length += 2;
|
|
||||||
}
|
|
||||||
sLength -= value.length();
|
|
||||||
while (sLength > 0) {
|
|
||||||
out.write(0x00);
|
|
||||||
out.write(0x00);
|
|
||||||
length += 2;
|
|
||||||
sLength--;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
/* Write the dictionary item in another codepage than
|
|
||||||
* Unicode. */
|
|
||||||
length += TypeWriter.writeUIntToStream(out, key.longValue());
|
|
||||||
length += TypeWriter.writeUIntToStream(out, value.length() + 1L);
|
|
||||||
final byte[] ba = CodePageUtil.getBytesInCodePage(value, codepage);
|
|
||||||
for (int j = 0; j < ba.length; j++) {
|
|
||||||
out.write(ba[j]);
|
|
||||||
length++;
|
|
||||||
}
|
|
||||||
out.write(0x00);
|
|
||||||
length++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int pad = (4 - (length & 0x3)) & 0x3;
|
||||||
|
out.write(padding, 0, pad);
|
||||||
|
length += pad;
|
||||||
|
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -924,25 +923,27 @@ public class Section {
|
|||||||
*/
|
*/
|
||||||
public void setDictionary(final Map<Long,String> dictionary) throws IllegalPropertySetDataException {
|
public void setDictionary(final Map<Long,String> dictionary) throws IllegalPropertySetDataException {
|
||||||
if (dictionary != null) {
|
if (dictionary != null) {
|
||||||
this.dictionary = dictionary;
|
if (this.dictionary == null) {
|
||||||
|
this.dictionary = new TreeMap<Long,String>();
|
||||||
|
}
|
||||||
|
this.dictionary.putAll(dictionary);
|
||||||
|
|
||||||
|
/* If the codepage property (ID 1) for the strings (keys and values)
|
||||||
|
* used in the dictionary is not yet defined, set it to ISO-8859-1. */
|
||||||
|
int cp = getCodepage();
|
||||||
|
if (cp == -1) {
|
||||||
|
setCodepage(Property.DEFAULT_CODEPAGE);
|
||||||
|
}
|
||||||
|
|
||||||
/* Set the dictionary property (ID 0). Please note that the second
|
/* Set the dictionary property (ID 0). Please note that the second
|
||||||
* parameter in the method call below is unused because dictionaries
|
* parameter in the method call below is unused because dictionaries
|
||||||
* don't have a type. */
|
* don't have a type. */
|
||||||
setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);
|
setProperty(PropertyIDMap.PID_DICTIONARY, -1, dictionary);
|
||||||
|
|
||||||
/* If the codepage property (ID 1) for the strings (keys and
|
|
||||||
* values) used in the dictionary is not yet defined, set it to
|
|
||||||
* Unicode. */
|
|
||||||
final Integer codepage = (Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
|
|
||||||
if (codepage == null) {
|
|
||||||
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2,
|
|
||||||
Integer.valueOf(CodePageUtil.CP_UNICODE));
|
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* Setting the dictionary to null means to remove property 0.
|
/* Setting the dictionary to null means to remove property 0.
|
||||||
* However, it does not mean to remove property 1 (codepage). */
|
* However, it does not mean to remove property 1 (codepage). */
|
||||||
removeProperty(PropertyIDMap.PID_DICTIONARY);
|
removeProperty(PropertyIDMap.PID_DICTIONARY);
|
||||||
|
this.dictionary = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -951,6 +952,7 @@ public class Section {
|
|||||||
/**
|
/**
|
||||||
* @see Object#hashCode()
|
* @see Object#hashCode()
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
long hashCode = 0;
|
long hashCode = 0;
|
||||||
hashCode += getFormatID().hashCode();
|
hashCode += getFormatID().hashCode();
|
||||||
@ -967,9 +969,11 @@ public class Section {
|
|||||||
/**
|
/**
|
||||||
* @see Object#toString()
|
* @see Object#toString()
|
||||||
*/
|
*/
|
||||||
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuffer b = new StringBuffer();
|
final StringBuffer b = new StringBuffer();
|
||||||
final Property[] pa = getProperties();
|
final Property[] pa = getProperties();
|
||||||
|
b.append("\n\n\n");
|
||||||
b.append(getClass().getName());
|
b.append(getClass().getName());
|
||||||
b.append('[');
|
b.append('[');
|
||||||
b.append("formatID: ");
|
b.append("formatID: ");
|
||||||
@ -981,8 +985,12 @@ public class Section {
|
|||||||
b.append(", size: ");
|
b.append(", size: ");
|
||||||
b.append(getSize());
|
b.append(getSize());
|
||||||
b.append(", properties: [\n");
|
b.append(", properties: [\n");
|
||||||
for (int i = 0; i < pa.length; i++) {
|
int codepage = getCodepage();
|
||||||
b.append(pa[i]);
|
if (codepage == -1) {
|
||||||
|
codepage = Property.DEFAULT_CODEPAGE;
|
||||||
|
}
|
||||||
|
for (Property p : pa) {
|
||||||
|
b.append(p.toString(codepage));
|
||||||
b.append(",\n");
|
b.append(",\n");
|
||||||
}
|
}
|
||||||
b.append(']');
|
b.append(']');
|
||||||
@ -1002,7 +1010,12 @@ public class Section {
|
|||||||
* @return the dictionary or {@code null} if the section does not have
|
* @return the dictionary or {@code null} if the section does not have
|
||||||
* a dictionary.
|
* a dictionary.
|
||||||
*/
|
*/
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
public Map<Long,String> getDictionary() {
|
public Map<Long,String> getDictionary() {
|
||||||
|
if (dictionary == null) {
|
||||||
|
dictionary = (Map<Long,String>) getProperty(PropertyIDMap.PID_DICTIONARY);
|
||||||
|
}
|
||||||
|
|
||||||
return dictionary;
|
return dictionary;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1013,13 +1026,17 @@ public class Section {
|
|||||||
*
|
*
|
||||||
* @return The section's codepage if one is defined, else -1.
|
* @return The section's codepage if one is defined, else -1.
|
||||||
*/
|
*/
|
||||||
public int getCodepage()
|
public int getCodepage() {
|
||||||
{
|
|
||||||
final Integer codepage = (Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
|
final Integer codepage = (Integer) getProperty(PropertyIDMap.PID_CODEPAGE);
|
||||||
if (codepage == null) {
|
return (codepage == null) ? -1 : codepage.intValue();
|
||||||
return -1;
|
}
|
||||||
}
|
|
||||||
int cp = codepage.intValue();
|
/**
|
||||||
return cp;
|
* Sets the codepage.
|
||||||
|
*
|
||||||
|
* @param codepage the codepage
|
||||||
|
*/
|
||||||
|
public void setCodepage(final int codepage) {
|
||||||
|
setProperty(PropertyIDMap.PID_CODEPAGE, Variant.VT_I2, codepage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/hpsf/TestBug52372.doc
Normal file
BIN
test-data/hpsf/TestBug52372.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user