Revert previous changes enough to restore the case in the commoncrawl-corpus which is failing now, add a unit-test for this case, enable one disabled unit-test

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1781822 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2017-02-05 22:34:40 +00:00
parent 142e52dfba
commit f431473f29
4 changed files with 54 additions and 49 deletions

View File

@ -24,7 +24,7 @@ import org.apache.poi.util.HexDump;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
/** /**
* Escher array properties are the most wierd construction ever invented * Escher array properties are the most weird construction ever invented
* with all sorts of special cases. I'm hopeful I've got them all. * with all sorts of special cases. I'm hopeful I've got them all.
*/ */
public final class EscherArrayProperty extends EscherComplexProperty implements Iterable<byte[]> { public final class EscherArrayProperty extends EscherComplexProperty implements Iterable<byte[]> {
@ -43,7 +43,7 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
/** /**
* When reading a property from data stream remember if the complex part is empty and set this flag. * When reading a property from data stream remember if the complex part is empty and set this flag.
*/ */
private boolean emptyComplexPart = false; private boolean emptyComplexPart;
public EscherArrayProperty(short id, byte[] complexData) { public EscherArrayProperty(short id, byte[] complexData) {
super(id, checkComplexData(complexData)); super(id, checkComplexData(complexData));
@ -67,9 +67,13 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
} }
public void setNumberOfElementsInArray(int numberOfElements) { public void setNumberOfElementsInArray(int numberOfElements) {
int expectedArraySize = getArraySizeInBytes(numberOfElements, getSizeOfElements()); int expectedArraySize = numberOfElements * getActualSizeOfElements(getSizeOfElements()) + FIXED_SIZE;
resizeComplexData(expectedArraySize, getComplexData().length); if (expectedArraySize != getComplexData().length) {
LittleEndian.putShort(getComplexData(), 0, (short)numberOfElements); byte[] newArray = new byte[expectedArraySize];
System.arraycopy(getComplexData(), 0, newArray, 0, getComplexData().length);
setComplexData(newArray);
}
LittleEndian.putShort(getComplexData(), 0, (short) numberOfElements);
} }
public int getNumberOfElementsInMemory() { public int getNumberOfElementsInMemory() {
@ -77,8 +81,12 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
} }
public void setNumberOfElementsInMemory(int numberOfElements) { public void setNumberOfElementsInMemory(int numberOfElements) {
int expectedArraySize = getArraySizeInBytes(numberOfElements, getSizeOfElements()); int expectedArraySize = numberOfElements * getActualSizeOfElements(getSizeOfElements()) + FIXED_SIZE;
resizeComplexData(expectedArraySize, expectedArraySize); if (expectedArraySize != getComplexData().length) {
byte[] newArray = new byte[expectedArraySize];
System.arraycopy(getComplexData(), 0, newArray, 0, expectedArraySize);
setComplexData(newArray);
}
LittleEndian.putShort(getComplexData(), 2, (short) numberOfElements); LittleEndian.putShort(getComplexData(), 2, (short) numberOfElements);
} }
@ -89,9 +97,13 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
public void setSizeOfElements(int sizeOfElements) { public void setSizeOfElements(int sizeOfElements) {
LittleEndian.putShort( getComplexData(), 4, (short) sizeOfElements ); LittleEndian.putShort( getComplexData(), 4, (short) sizeOfElements );
int expectedArraySize = getArraySizeInBytes(getNumberOfElementsInArray(), sizeOfElements); int expectedArraySize = getNumberOfElementsInArray() * getActualSizeOfElements(getSizeOfElements()) + FIXED_SIZE;
// Keep just the first 6 bytes. The rest is no good to us anyway. if (expectedArraySize != getComplexData().length) {
resizeComplexData(expectedArraySize, 6); // Keep just the first 6 bytes. The rest is no good to us anyway.
byte[] newArray = new byte[expectedArraySize];
System.arraycopy( getComplexData(), 0, newArray, 0, 6 );
setComplexData(newArray);
}
} }
public byte[] getElement(int index) { public byte[] getElement(int index) {
@ -108,13 +120,13 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
@Override @Override
public String toString() { public String toString() {
StringBuffer results = new StringBuffer(); StringBuilder results = new StringBuilder();
results.append(" {EscherArrayProperty:" + '\n'); results.append(" {EscherArrayProperty:" + '\n');
results.append(" Num Elements: " + getNumberOfElementsInArray() + '\n'); results.append(" Num Elements: ").append(getNumberOfElementsInArray()).append('\n');
results.append(" Num Elements In Memory: " + getNumberOfElementsInMemory() + '\n'); results.append(" Num Elements In Memory: ").append(getNumberOfElementsInMemory()).append('\n');
results.append(" Size of elements: " + getSizeOfElements() + '\n'); results.append(" Size of elements: ").append(getSizeOfElements()).append('\n');
for (int i = 0; i < getNumberOfElementsInArray(); i++) { for (int i = 0; i < getNumberOfElementsInArray(); i++) {
results.append(" Element " + i + ": " + HexDump.toHex(getElement(i)) + '\n'); results.append(" Element ").append(i).append(": ").append(HexDump.toHex(getElement(i))).append('\n');
} }
results.append("}" + '\n'); results.append("}" + '\n');
@ -150,25 +162,23 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
*/ */
public int setArrayData(byte[] data, int offset) { public int setArrayData(byte[] data, int offset) {
if (emptyComplexPart){ if (emptyComplexPart){
resizeComplexData(0, 0); setComplexData(new byte[0]);
return 0; } else {
} short numElements = LittleEndian.getShort(data, offset);
// LittleEndian.getShort(data, offset + 2); // numReserved
short numElements = LittleEndian.getShort(data, offset); short sizeOfElements = LittleEndian.getShort(data, offset + 4);
// LittleEndian.getShort(data, offset + 2); // numReserved
short sizeOfElements = LittleEndian.getShort(data, offset + 4);
// TODO: this part is strange - it doesn't make sense to compare // the code here seems to depend on complexData already being
// the size of the existing data when setting a new data array ... // sized correctly via the constructor
int arraySize = getArraySizeInBytes(numElements, sizeOfElements); int arraySize = getActualSizeOfElements(sizeOfElements) * numElements;
if (arraySize - FIXED_SIZE == getComplexData().length) { if (arraySize == getComplexData().length) {
// The stored data size in the simple block excludes the header size // The stored data size in the simple block excludes the header size
sizeIncludesHeaderSize = false; setComplexData(new byte[arraySize + 6]);
sizeIncludesHeaderSize = false;
}
System.arraycopy(data, offset, getComplexData(), 0, getComplexData().length );
} }
int cpySize = Math.min(arraySize, data.length-offset); return getComplexData().length;
resizeComplexData(cpySize, 0);
System.arraycopy(data, offset, getComplexData(), 0, cpySize);
return cpySize;
} }
/** /**
@ -199,15 +209,10 @@ public final class EscherArrayProperty extends EscherComplexProperty implements
return sizeOfElements; return sizeOfElements;
} }
private static int getArraySizeInBytes(int numberOfElements, int sizeOfElements) {
return numberOfElements * getActualSizeOfElements((short)(sizeOfElements & 0xFFFF)) + FIXED_SIZE;
}
@Override @Override
public Iterator<byte[]> iterator() { public Iterator<byte[]> iterator() {
return new Iterator<byte[]>(){ return new Iterator<byte[]>(){
private int idx = 0; int idx;
@Override @Override
public boolean hasNext() { public boolean hasNext() {
return (idx < getNumberOfElementsInArray()); return (idx < getNumberOfElementsInArray());

View File

@ -28,7 +28,6 @@ import org.apache.poi.util.LittleEndian;
* stored here. * stored here.
*/ */
public class EscherComplexProperty extends EscherProperty { public class EscherComplexProperty extends EscherProperty {
// TODO - make private and final
private byte[] _complexData; private byte[] _complexData;
/** /**
@ -88,23 +87,17 @@ public class EscherComplexProperty extends EscherProperty {
/** /**
* Get the complex data value. * Get the complex data value.
* *
* @return the complex bytes * @return the complex bytes
*/ */
public byte[] getComplexData() { public byte[] getComplexData() {
return _complexData; return _complexData;
} }
protected void resizeComplexData(int newSize, int bytesToKeep) { protected void setComplexData(byte[] _complexData) {
if (newSize == _complexData.length) { this._complexData = _complexData;
return;
}
byte[] newArray = new byte[newSize];
System.arraycopy(_complexData, 0, newArray, 0, Math.min(bytesToKeep, newSize));
_complexData = newArray;
} }
/** /**
* Determine whether this property is equal to another property. * Determine whether this property is equal to another property.
* *

View File

@ -903,6 +903,7 @@ public class TestBugs{
assertEquals(section2NumColumns, section.getNumColumns()); assertEquals(section2NumColumns, section.getNumColumns());
} }
@Test
public void testRegressionIn315beta2() { public void testRegressionIn315beta2() {
HWPFDocument hwpfDocument = HWPFTestDataSamples.openSampleFile("cap.stanford.edu_profiles_viewbiosketch_facultyid=4009&name=m_maciver.doc"); HWPFDocument hwpfDocument = HWPFTestDataSamples.openSampleFile("cap.stanford.edu_profiles_viewbiosketch_facultyid=4009&name=m_maciver.doc");
assertNotNull(hwpfDocument); assertNotNull(hwpfDocument);
@ -929,4 +930,10 @@ public class TestBugs{
fs.close(); fs.close();
} }
} }
@Test
public void testCommonCrawlRegression() throws IOException {
HWPFDocument document = HWPFTestDataSamples.openSampleFile("ca.kwsymphony.www_education_School_Concert_Seat_Booking_Form_2011-12.doc");
document.close();
}
} }