fixed EscherAggregate to correctly write records with trailing solver container

git-svn-id: https://svn.apache.org/repos/asf/poi/branches/gsoc2012@1350298 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2012-06-14 15:36:34 +00:00
parent 1021f99832
commit 3d077abe2c
6 changed files with 246 additions and 86 deletions

View File

@ -311,7 +311,7 @@ public abstract class EscherRecord {
protected String formatXmlRecordHeader(String className, String recordId, String version, String instance){
StringBuilder builder = new StringBuilder();
builder.append("<").append(className).append(" recordId=\"0x").append(recordId).append("\" version=\"0x")
.append(version).append("\" instance=\"0x").append(instance).append("\">\n");
.append(version).append("\" instance=\"0x").append(instance).append("\" size=\"").append(getRecordSize()).append("\">\n");
return builder.toString();
}

View File

@ -85,4 +85,9 @@ public final class DrawingRecord extends StandardRecord {
return rec;
}
@Override
public String toString() {
return "DrawingRecord["+recordData.length+"]";
}
}

View File

@ -82,9 +82,9 @@ import org.apache.poi.util.POILogger;
* build escher(office art) records tree from this array.
* Each shape in drawing layer matches corresponding ObjRecord
* Each textbox matches corresponding TextObjectRecord
*
* <p/>
* ObjRecord contains information about shape. Thus each ObjRecord corresponds EscherContainerRecord(SPGR)
*
* <p/>
* EscherAggrefate contains also NoteRecords
* NoteRecords must be serial
*
@ -492,51 +492,28 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
pos = offset;
int writtenEscherBytes = 0;
for (int i = 1; i < shapes.size(); i++) {
int endOffset;
if (i == shapes.size()-1){
endOffset = buffer.length - 1;
} else {
endOffset = (Integer) spEndingOffsets.get(i) - 1;
}
int endOffset = (Integer) spEndingOffsets.get(i) - 1;
int startOffset;
if (i == 1)
startOffset = 0;
else
startOffset = (Integer) spEndingOffsets.get(i - 1);
byte[] drawingData = new byte[endOffset - startOffset + 1];
System.arraycopy(buffer, startOffset, drawingData, 0, drawingData.length);
int temp = 0;
pos += writeDataIntoDrawingRecord(0, drawingData, writtenEscherBytes, pos, data, i);
//First record in drawing layer MUST be DrawingRecord
if (writtenEscherBytes + drawingData.length > RecordInputStream.MAX_RECORD_DATA_SIZE && i != 1) {
for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) {
ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length)));
temp += drawing.serialize(pos + temp, data);
}
} else {
for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) {
if (j == 0) {
DrawingRecord drawing = new DrawingRecord();
drawing.setData(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length)));
temp += drawing.serialize(pos + temp, data);
} else {
ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length)));
temp += drawing.serialize(pos + temp, data);
}
}
}
pos += temp;
writtenEscherBytes += drawingData.length;
// Write the matching OBJ record
Record obj = shapeToObj.get(shapes.get(i));
temp = obj.serialize(pos, data);
pos += temp;
pos += obj.serialize(pos, data);
if (i == shapes.size() - 1 && endOffset < buffer.length - 1) {
drawingData = new byte[buffer.length - endOffset - 1];
System.arraycopy(buffer, endOffset + 1, drawingData, 0, drawingData.length);
pos += writeDataIntoDrawingRecord(0, drawingData, writtenEscherBytes, pos, data, i);
}
}
// write records that need to be serialized after all drawing group records
@ -544,13 +521,34 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
Record rec = (Record) tailRec.get(i);
pos += rec.serialize(pos, data);
}
int bytesWritten = pos - offset;
if (bytesWritten != getRecordSize())
throw new RecordFormatException(bytesWritten + " bytes written but getRecordSize() reports " + getRecordSize());
return bytesWritten;
}
private int writeDataIntoDrawingRecord(int temp, byte[] drawingData, int writtenEscherBytes, int pos, byte[] data, int i) {
//First record in drawing layer MUST be DrawingRecord
if (writtenEscherBytes + drawingData.length > RecordInputStream.MAX_RECORD_DATA_SIZE && i != 1) {
for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) {
ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length)));
temp += drawing.serialize(pos + temp, data);
}
} else {
for (int j = 0; j < drawingData.length; j += RecordInputStream.MAX_RECORD_DATA_SIZE) {
if (j == 0) {
DrawingRecord drawing = new DrawingRecord();
drawing.setData(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length)));
temp += drawing.serialize(pos + temp, data);
} else {
ContinueRecord drawing = new ContinueRecord(Arrays.copyOfRange(drawingData, j, Math.min(j + RecordInputStream.MAX_RECORD_DATA_SIZE, drawingData.length)));
temp += drawing.serialize(pos + temp, data);
}
}
}
return temp;
}
/**
* How many bytes do the raw escher records contain.
*
@ -591,10 +589,13 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
spEndingOffsets.add(0, 0);
for (int i = 1; i < spEndingOffsets.size(); i++) {
if (spEndingOffsets.get(i) - spEndingOffsets.get(i - 1) <= RecordInputStream.MAX_RECORD_DATA_SIZE){
if (i == spEndingOffsets.size() - 1 && spEndingOffsets.get(i) < pos) {
continueRecordsHeadersSize += 4;
}
if (spEndingOffsets.get(i) - spEndingOffsets.get(i - 1) <= RecordInputStream.MAX_RECORD_DATA_SIZE) {
continue;
}
continueRecordsHeadersSize += ((spEndingOffsets.get(i) - spEndingOffsets.get(i - 1)) / RecordInputStream.MAX_RECORD_DATA_SIZE)*4;
continueRecordsHeadersSize += ((spEndingOffsets.get(i) - spEndingOffsets.get(i - 1)) / RecordInputStream.MAX_RECORD_DATA_SIZE) * 4;
}
int drawingRecordSize = rawEscherSize + (shapeToObj.size()) * 4;
@ -608,13 +609,13 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
Record r = (Record) iterator.next();
tailRecordSize += r.getRecordSize();
}
return drawingRecordSize + objRecordSize + tailRecordSize +continueRecordsHeadersSize;
return drawingRecordSize + objRecordSize + tailRecordSize + continueRecordsHeadersSize;
}
/**
* Associates an escher record to an OBJ record or a TXO record.
*/
Object associateShapeToObjRecord(EscherRecord r, ObjRecord objRecord) {
public Object associateShapeToObjRecord(EscherRecord r, ObjRecord objRecord) {
return shapeToObj.put(r, objRecord);
}
@ -624,6 +625,7 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
public void setPatriarch(HSSFPatriarch patriarch) {
this.patriarch = patriarch;
convertPatriarch(patriarch);
}
/**
@ -1080,12 +1082,12 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
/**
* Returns the mapping of {@link EscherClientDataRecord} and {@link EscherTextboxRecord}
* to their {@link TextObjectRecord} or {@link ObjRecord} .
*
* <p/>
* We need to access it outside of EscherAggregate when building shapes
*
* @return
*/
public Map<EscherRecord, Record> getShapeToObjMapping(){
public Map<EscherRecord, Record> getShapeToObjMapping() {
return Collections.unmodifiableMap(shapeToObj);
}

View File

@ -67,7 +67,7 @@ public class HSSFShapeFactory {
}
Class clazz = shapeTypeToClass.get(type);
if (null == clazz){
System.out.println("No class attached to shape type: "+type);
//System.out.println("No class attached to shape type: "+type);
return new HSSFUnknownShape(spContainer, objRecord);
}
try{

View File

@ -19,16 +19,35 @@ package org.apache.poi.hssf.model;
import junit.framework.TestCase;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherDggRecord;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.record.ContinueRecord;
import org.apache.poi.hssf.record.DrawingRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.EscherAggregate;
import org.apache.poi.hssf.record.NoteRecord;
import org.apache.poi.hssf.record.ObjRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.RecordBase;
import org.apache.poi.hssf.record.RecordFactory;
import org.apache.poi.hssf.record.TextObjectRecord;
import org.apache.poi.hssf.record.WindowTwoRecord;
import org.apache.poi.hssf.record.aggregates.RowRecordsAggregate;
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.hssf.usermodel.HSSFPatriarch;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFTestHelper;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.util.HexRead;
import java.io.*;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @author Yegor Kozlov
@ -36,52 +55,158 @@ import java.util.List;
*/
public class TestDrawingAggregate extends TestCase {
private int spgrCount = 0;
private int spCount = 0;
private int shapeCount = 0;
private int shGroupCount = 0;
/*
* EscherAggregate must have for each SpgrContainer HSSFShapeGroup and for each SpContainer HSSFShape
/**
* information about drawing aggregate in a worksheet
*/
private void checkEscherAndShapesCount(EscherAggregate agg, HSSFSheet sheet) {
/*
HSSFPatriarch patriarch = HSSFTestHelper.createTestPatriarch(sheet, agg);
agg.setPatriarch(patriarch);
EscherAggregate.createShapeTree(EscherAggregate.getMainSpgrContainer(agg), agg.getPatriarch(), agg);
EscherContainerRecord mainContainer = EscherAggregate.getMainSpgrContainer(agg);
calculateShapesCount(agg.getPatriarch());
calculateEscherContainersCount(mainContainer);
private static class DrawingAggregateInfo {
/**
* start and end indices of the aggregate in the worksheet stream
*/
private int startRecordIndex, endRecordIndex;
/**
* the records being aggregated
*/
private List<RecordBase> aggRecords;
assertEquals(spgrCount, shGroupCount);
assertEquals(spCount - spgrCount - 1, shapeCount);
*/
/**
* @return aggregate info or null if the sheet does not contain drawing objects
*/
static DrawingAggregateInfo get(HSSFSheet sheet){
DrawingAggregateInfo info = null;
InternalSheet isheet = HSSFTestHelper.getSheetForTest(sheet);
List<RecordBase> records = isheet.getRecords();
for(int i = 0; i < records.size(); i++){
RecordBase rb = records.get(i);
if((rb instanceof DrawingRecord) && info == null) {
info = new DrawingAggregateInfo();
info.startRecordIndex = i;
info.endRecordIndex = i;
} else if (info != null && (
rb instanceof DrawingRecord
|| rb instanceof ObjRecord
|| rb instanceof TextObjectRecord
|| rb instanceof ContinueRecord
|| rb instanceof NoteRecord
)){
info.endRecordIndex = i;
} else {
if(rb instanceof EscherAggregate)
throw new IllegalStateException("Drawing data already aggregated. " +
"You should cal this method before the first invocation of HSSFSheet#getDrawingPatriarch()");
if (info != null) break;
}
}
if(info != null){
info.aggRecords = new ArrayList<RecordBase>(
records.subList(info.startRecordIndex, info.endRecordIndex + 1));
}
return info;
}
/**
* @return the raw data being aggregated
*/
byte[] getRawBytes(){
ByteArrayOutputStream out = new ByteArrayOutputStream();
for (RecordBase rb : aggRecords) {
Record r = (Record) rb;
try {
out.write(r.serialize());
} catch (IOException e) {
throw new RuntimeException(e);
}
}
return out.toByteArray();
}
}
private void calculateEscherContainersCount(EscherContainerRecord spgr) {
for (EscherRecord record : spgr.getChildRecords()) {
if (EscherContainerRecord.SP_CONTAINER == record.getRecordId()) {
spCount++;
/**
* iterate over all sheets, aggregate drawing records (if there are any)
* and remember information about the aggregated data.
* Then serialize the workbook, read back and assert that the aggregated data is preserved.
*
* The assertion is strict meaning that the drawing data before and after save must be equal.
*/
private static void assertWriteAndReadBack(HSSFWorkbook wb){
// map aggregate info by sheet index
Map<Integer, DrawingAggregateInfo> aggs = new HashMap<Integer, DrawingAggregateInfo>();
for(int i = 0; i < wb.getNumberOfSheets(); i++){
HSSFSheet sheet = wb.getSheetAt(i);
DrawingAggregateInfo info = DrawingAggregateInfo.get(sheet);
if(info != null) {
aggs.put(i, info);
HSSFPatriarch p = sheet.getDrawingPatriarch();
// compare aggregate.serialize() with raw bytes from the record stream
EscherAggregate agg = HSSFTestHelper.getEscherAggregate(p);
byte[] dgBytes1 = info.getRawBytes();
byte[] dgBytes2 = agg.serialize();
assertEquals("different size of raw data ande aggregate.serialize()", dgBytes1.length, dgBytes2.length);
assertTrue("raw drawing data ("+dgBytes1.length+" bytes) and aggregate.serialize() are different.",
Arrays.equals(dgBytes1, dgBytes2));
}
}
if(aggs.size() != 0){
HSSFWorkbook wb2 = HSSFTestDataSamples.writeOutAndReadBack(wb);
for(int i = 0; i < wb2.getNumberOfSheets(); i++){
DrawingAggregateInfo info1 = aggs.get(i);
if(info1 != null) {
HSSFSheet sheet2 = wb2.getSheetAt(i);
DrawingAggregateInfo info2 = DrawingAggregateInfo.get(sheet2);
byte[] dgBytes1 = info1.getRawBytes();
byte[] dgBytes2 = info2.getRawBytes();
assertEquals("different size of drawing data before and after save", dgBytes1.length, dgBytes2.length);
assertTrue("drawing data ("+dgBytes1.length+" bytes) before and after save is different.",
Arrays.equals(dgBytes1, dgBytes2));
}
}
}
}
/**
* test that we correctly read and write drawing aggregates
* in all .xls files in POI test samples
*/
public void testAllTestSamples(){
File[] xls = new File(System.getProperty("POI.testdata.path"), "spreadsheet").listFiles(
new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(".xls");
}
}
);
for(File file : xls) {
HSSFWorkbook wb;
try {
wb = HSSFTestDataSamples.openSampleWorkbook(file.getName());
} catch (Throwable e){
// don't bother about files we cannot read - they are different bugs
// System.out.println("[WARN] Cannot read " + file.getName());
continue;
}
if (EscherContainerRecord.SPGR_CONTAINER == record.getRecordId()) {
spgrCount++;
calculateEscherContainersCount((EscherContainerRecord) record);
try {
assertWriteAndReadBack(wb);
} catch (Throwable e){
//e.printStackTrace();
System.err.println("[ERROR] assertion failed for " + file.getName() + ": " + e.getMessage());
}
}
}
private void calculateShapesCount(HSSFShapeContainer group) {
for (HSSFShape shape : (List<HSSFShape>) group.getChildren()) {
if (shape instanceof HSSFShapeGroup) {
shGroupCount++;
calculateShapesCount((HSSFShapeGroup) shape);
} else {
shapeCount++;
}
}
}
/**
* TODO: figure out why it fails with "RecordFormatException: 0 bytes written but getRecordSize() reports 80"
*/
public void testFailing(){
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("15573.xls");
HSSFSheet sh = wb.getSheetAt(0);
sh.getDrawingPatriarch();
wb = HSSFTestDataSamples.writeOutAndReadBack(wb);
}
private static byte[] toByteArray(List<RecordBase> records) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
@ -96,10 +221,14 @@ public class TestDrawingAggregate extends TestCase {
return out.toByteArray();
}
public void testSolverContainerMustBeSavedDuringSerialization(){
public void testSolverContainerMustBeSavedDuringSerialization() throws IOException{
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("SolverContainerAfterSPGR.xls");
HSSFSheet sh = wb.getSheetAt(0);
InternalSheet ish = HSSFTestHelper.getSheetForTest(sh);
List<RecordBase> records = ish.getRecords();
// records to be aggregated
List<RecordBase> dgRecords = records.subList(19, 22);
byte[] dgBytes = toByteArray(dgRecords);
sh.getDrawingPatriarch();
EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid);
assertEquals(agg.getEscherRecords().get(0).getChildRecords().size(), 3);
@ -112,6 +241,29 @@ public class TestDrawingAggregate extends TestCase {
assertEquals(agg.getEscherRecords().get(0).getChildRecords().size(), 3);
assertEquals(agg.getEscherRecords().get(0).getChild(2).getRecordId(), EscherContainerRecord.SOLVER_CONTAINER);
// collect drawing records into a byte buffer.
agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid);
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
}
public void testFileWithTextbox() throws IOException{
HSSFWorkbook wb = HSSFTestDataSamples.openSampleWorkbook("text.xls");
HSSFSheet sh = wb.getSheetAt(0);
InternalSheet ish = HSSFTestHelper.getSheetForTest(sh);
List<RecordBase> records = ish.getRecords();
// records to be aggregated
List<RecordBase> dgRecords = records.subList(19, 23);
byte[] dgBytes = toByteArray(dgRecords);
sh.getDrawingPatriarch();
// collect drawing records into a byte buffer.
EscherAggregate agg = (EscherAggregate) ish.findFirstRecordBySid(EscherAggregate.sid);
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
}
/**
@ -172,8 +324,7 @@ public class TestDrawingAggregate extends TestCase {
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data brefpore and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
checkEscherAndShapesCount(agg, sh);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
}
/**
@ -241,7 +392,6 @@ public class TestDrawingAggregate extends TestCase {
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
checkEscherAndShapesCount(agg, sh);
}
@ -323,7 +473,6 @@ public class TestDrawingAggregate extends TestCase {
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
checkEscherAndShapesCount(agg, sh);
}
@ -382,7 +531,6 @@ public class TestDrawingAggregate extends TestCase {
byte[] dgBytesAfterSave = agg.serialize();
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data before and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
checkEscherAndShapesCount(agg, sh);
}
public void testUnhandledContinue() {
@ -2051,4 +2199,5 @@ public class TestDrawingAggregate extends TestCase {
assertEquals("different size of drawing data before and after save", dgBytes.length, dgBytesAfterSave.length);
assertTrue("drawing data brefpore and after save is different", Arrays.equals(dgBytes, dgBytesAfterSave));
}
}

View File

@ -39,4 +39,8 @@ public class HSSFTestHelper {
public static HSSFPatriarch createTestPatriarch(HSSFSheet sheet, EscherAggregate agg){
return new HSSFPatriarch(sheet, agg);
}
public static EscherAggregate getEscherAggregate(HSSFPatriarch patriarch){
return patriarch._getBoundAggregate();
}
}