Improved handling of byte position sensitive records

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353709 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2005-06-09 15:09:16 +00:00
parent cfc6960898
commit d40aa464fa
11 changed files with 417 additions and 38 deletions

View File

@ -77,14 +77,21 @@ public abstract class Sheet
} else if(records[i+1] instanceof TextBytesAtom) {
TextBytesAtom tba = (TextBytesAtom)records[i+1];
trun = new TextRun(tha,tba);
} else if(records[i+1].getRecordType() == 4001l) {
// StyleTextPropAtom - Safe to ignore
} else if(records[i+1].getRecordType() == 4010l) {
// Safe to ignore
// TextSpecInfoAtom - Safe to ignore
} else {
System.err.println("Found a TextHeaderAtom not followed by a TextBytesAtom or TextCharsAtom: Followed by " + records[i+1].getRecordType());
continue;
}
found.add(trun);
i++;
if(trun != null) {
found.add(trun);
i++;
} else {
// Not a valid one, so skip on to next and look again
}
}
}
}

View File

@ -99,7 +99,7 @@ public class TextRun
public void setText(String s) {
// If size changed, warn
if(s.length() != getText().length()) {
System.err.println("Warning: Your powerpoint file is probably no longer readable by powerpoint, as the text run has changed size!");
System.err.println("Warning: Your powerpoint file may no longer readable by powerpoint, as the text run has changed size!");
}
if(_isUnicode) {

View File

@ -0,0 +1,75 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.io.ByteArrayOutputStream;
/**
* If we come across a record we know has children of (potential)
* interest, but where the record itself is boring, but where other
* records may care about where this one lives, we create one
* of these. It allows us to get at the children, and track where on
* disk this is, but not much else.
* Anything done using this should quite quickly be transitioned to its
* own proper record class!
*
* @author Nick Burch
*/
public class DummyPositionSensitiveRecordWithChildren extends PositionDependentRecordContainer
{
private Record[] _children;
private byte[] _header;
private long _type;
/**
* Create a new holder for a boring record with children, but with
* position dependent characteristics
*/
protected DummyPositionSensitiveRecordWithChildren(byte[] source, int start, int len) {
// Just grab the header, not the whole contents
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
_type = LittleEndian.getUShort(_header,2);
// Find our children
_children = Record.findChildRecords(source,start+8,len-8);
}
/**
* Return the value we were given at creation
*/
public long getRecordType() { return _type; }
/**
* Return any children
*/
public Record[] getChildRecords() { return _children; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0],_header[1],_type,_children,out);
}
}

View File

@ -30,7 +30,7 @@ import java.io.ByteArrayOutputStream;
* @author Nick Burch
*/
public class Notes extends RecordContainer
public class Notes extends PositionDependentRecordContainer
{
private Record[] _children;
private byte[] _header;

View File

@ -21,47 +21,183 @@ package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Enumeration;
import java.util.Hashtable;
/**
* General holder for PersistPtrFullBlock and PersistPtrIncrementalBlock
* records. We need to handle them specially, since we have to go around
* updating UserEditAtoms if they shuffle about on disk
* These hold references to where slides "live". If the position of a slide
* moves, then we have update all of these. If we come up with a new version
* of a slide, then we have to add one of these to the end of the chain
* (via CurrentUserAtom and UserEditAtom) pointing to the new slide location
*
* @author Nick Burch
*/
public class PersistPtrHolder extends PositionDependentRecordAtom
{
private byte[] _contents;
private byte[] _header;
private byte[] _ptrData; // Will need to update this once we allow updates to _slideLocations
private long _type;
/**
* Holds the lookup for slides to their position on disk.
* You always need to check the most recent PersistPtrHolder
* that knows about a given slide to find the right location
*/
private Hashtable _slideLocations;
/**
* Holds the lookup from slide id to where their offset is
* held inside _ptrData. Used when writing out, and updating
* the positions of the slides
*/
private Hashtable _slideOffsetDataLocation;
/**
* Get the list of slides that this PersistPtrHolder knows about.
* (They will be the keys in the hashtable for looking up the positions
* of these slides)
*/
public int[] getKnownSlideIDs() {
int[] ids = new int[_slideLocations.size()];
Enumeration e = _slideLocations.keys();
for(int i=0; i<ids.length; i++) {
Integer id = (Integer)e.nextElement();
ids[i] = id.intValue();
}
return ids;
}
/**
* Get the lookup from slide numbers to byte offsets, for the slides
* known about by this PersistPtrHolder.
*/
public Hashtable getSlideLocationsLookup() {
return _slideLocations;
}
/**
* Adds a new slide, notes or similar, to be looked up by this.
* For now, won't look for the most optimal on disk representation.
*/
public void addSlideLookup(int slideID, int posOnDisk) {
byte[] newPtrData = new byte[_ptrData.length + 8];
System.arraycopy(_ptrData,0,newPtrData,0,_ptrData.length);
// Add to the lookup hash
_slideLocations.put(new Integer(slideID), new Integer(posOnDisk));
// Build the info block
// First 20 bits = offset number = slide ID
// Remaining 12 bits = offset count = 1
int infoBlock = slideID;
infoBlock += (1 << 20);
// Write out the data for this
LittleEndian.putInt(newPtrData,newPtrData.length-8,infoBlock);
LittleEndian.putInt(newPtrData,newPtrData.length-4,posOnDisk);
// Save the new ptr data
_ptrData = newPtrData;
// Update the atom header
LittleEndian.putInt(_header,4,newPtrData.length);
}
/**
* Create a new holder for a PersistPtr record
*/
protected PersistPtrHolder(byte[] source, int start, int len) {
// Sanity Checking - including whole header, so treat
// length as based of 0, not 8 (including header size based)
if(len < 4) { len = 4; }
// Store where we are found on disk
myLastOnDiskOffset = start;
if(len < 8) { len = 8; }
// Treat as an atom, grab and hold everything
_contents = new byte[len];
System.arraycopy(source,start,_contents,0,len);
_type = LittleEndian.getUShort(_contents,2);
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
_type = LittleEndian.getUShort(_header,2);
// Try to make sense of the data part:
// Data part is made up of a number of these sets:
// 32 bit info value
// 12 bits count of # of entries
// base number for these entries
// count * 32 bit offsets
// Repeat as many times as you have data
_slideLocations = new Hashtable();
_slideOffsetDataLocation = new Hashtable();
_ptrData = new byte[len-8];
System.arraycopy(source,start+8,_ptrData,0,_ptrData.length);
int pos = 0;
while(pos < _ptrData.length) {
// Grab the info field
long info = LittleEndian.getUInt(_ptrData,pos);
// First 20 bits = offset number
// Remaining 12 bits = offset count
int offset_count = (int)(info >> 20);
int offset_no = (int)(info - (offset_count << 20));
//System.out.println("Info is " + info + ", count is " + offset_count + ", number is " + offset_no);
// Wind on by the 4 byte info header
pos += 4;
// Grab the offsets for each of the sheets
for(int i=0; i<offset_count; i++) {
int sheet_no = offset_no + i;
long sheet_offset = LittleEndian.getUInt(_ptrData,pos);
_slideLocations.put(new Integer(sheet_no), new Integer((int)sheet_offset));
_slideOffsetDataLocation.put(new Integer(sheet_no), new Integer(pos));
// Wind on by 4 bytes per sheet found
pos += 4;
}
}
}
/**
* Return the value we were given at creation
* Return the value we were given at creation, be it 6001 or 6002
*/
public long getRecordType() { return _type; }
/**
* At write-out time, update the references to the sheets to their
* new positions
*/
public void updateOtherRecordReferences(Hashtable oldToNewReferencesLookup) {
int[] slideIDs = getKnownSlideIDs();
// Loop over all the slides we know about
// Find where they used to live, and where they now live
// Then, update the right bit of _ptrData with their new location
for(int i=0; i<slideIDs.length; i++) {
Integer id = new Integer(slideIDs[i]);
Integer oldPos = (Integer)_slideLocations.get(id);
Integer newPos = (Integer)oldToNewReferencesLookup.get(oldPos);
if(newPos == null) {
throw new RuntimeException("Couldn't find the new location of the \"slide\" that used to be at " + oldPos);
}
// Write out the new location
Integer dataOffset = (Integer)_slideOffsetDataLocation.get(id);
LittleEndian.putInt(_ptrData,dataOffset.intValue(),newPos.intValue());
// Update our hashtable
_slideLocations.remove(id);
_slideLocations.put(id,newPos);
}
}
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
out.write(_contents);
out.write(_header);
out.write(_ptrData);
}
}

View File

@ -0,0 +1,52 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.util.Hashtable;
/**
* Records which either care about where they are on disk, or have other
* records who care about where they are, will implement this interface.
* Normally, they'll subclass PositionDependentRecordAtom or
* PositionDependentRecordContainer, which will do the work of providing
* the setting and updating interfaces for them.
* This is a special (and dangerous) kind of Record. When created, they
* need to be pinged with their current location. When written out, they
* need to be given their new location, and offered the list of records
* which have changed their location.
*
* @author Nick Burch
*/
public interface PositionDependentRecord
{
/** Fetch our location on the disk, as of the last write out */
public int getLastOnDiskOffset();
/**
* Update the Record's idea of where on disk it lives, after a write out.
* Use with care...
*/
public void setLastOnDiskOffset(int offset);
/**
* Offer the record the list of records that have changed their
* location as part of the writeout.
*/
public void updateOtherRecordReferences(Hashtable oldToNewReferencesLookup);
}

View File

@ -17,6 +17,7 @@
package org.apache.poi.hslf.record;
import java.util.Hashtable;
/**
* A special (and dangerous) kind of Record Atom that cares about where
@ -26,7 +27,7 @@ package org.apache.poi.hslf.record;
* @author Nick Burch
*/
public abstract class PositionDependentRecordAtom extends RecordAtom
public abstract class PositionDependentRecordAtom extends RecordAtom implements PositionDependentRecord
{
/** Our location on the disk, as of the last write out */
protected int myLastOnDiskOffset;
@ -38,7 +39,15 @@ public abstract class PositionDependentRecordAtom extends RecordAtom
* Update the Record's idea of where on disk it lives, after a write out.
* Use with care...
*/
public void setLastOnDiskOffet(int offset) {
public void setLastOnDiskOffset(int offset) {
myLastOnDiskOffset = offset;
}
/**
* Offer the record the list of records that have changed their
* location as part of the writeout.
* Allows records to update their internal pointers to other records
* locations
*/
public abstract void updateOtherRecordReferences(Hashtable oldToNewReferencesLookup);
}

View File

@ -0,0 +1,53 @@
/* ====================================================================
Copyright 2002-2004 Apache Software Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.util.Hashtable;
/**
* A special (and dangerous) kind of Record Container, for which other
* Atoms care about where this one lives on disk.
* Will track its position on disk.
*
* @author Nick Burch
*/
public abstract class PositionDependentRecordContainer extends RecordContainer implements PositionDependentRecord
{
/** Our location on the disk, as of the last write out */
protected int myLastOnDiskOffset;
/** Fetch our location on the disk, as of the last write out */
public int getLastOnDiskOffset() { return myLastOnDiskOffset; }
/**
* Update the Record's idea of where on disk it lives, after a write out.
* Use with care...
*/
public void setLastOnDiskOffset(int offset) {
myLastOnDiskOffset = offset;
}
/**
* Since we're a container, we don't mind if other records move about.
* If we're told they have, just return straight off.
*/
public void updateOtherRecordReferences(Hashtable oldToNewReferencesLookup) {
return;
}
}

View File

@ -122,71 +122,98 @@ public abstract class Record
* (not including the size of the header), this code assumes you're
* passing in corrected lengths
*/
protected static Record createRecordForType(long type, byte[] b, int start, int len) {
public static Record createRecordForType(long type, byte[] b, int start, int len) {
Record toReturn = null;
// Default is to use UnknownRecordPlaceholder
// When you create classes for new Records, add them here
switch((int)type) {
// Document
case 1000:
return new DummyRecordWithChildren(b,start,len);
toReturn = new DummyPositionSensitiveRecordWithChildren(b,start,len);
break;
// "Slide"
case 1006:
return new Slide(b,start,len);
toReturn = new Slide(b,start,len);
break;
// "SlideAtom"
case 1007:
return new SlideAtom(b,start,len);
toReturn = new SlideAtom(b,start,len);
break;
// "Notes"
case 1008:
return new Notes(b,start,len);
toReturn = new Notes(b,start,len);
break;
// "NotesAtom" (Details on Notes sheets)
case 1009:
return new NotesAtom(b,start,len);
toReturn = new NotesAtom(b,start,len);
break;
// "SlidePersistAtom" (Details on text for a sheet)
case 1011:
return new SlidePersistAtom(b,start,len);
toReturn = new SlidePersistAtom(b,start,len);
break;
// MainMaster (MetaSheet lives inside the PPDrawing inside this)
case 1016:
return new DummyRecordWithChildren(b,start,len);
toReturn = new DummyPositionSensitiveRecordWithChildren(b,start,len);
break;
// PPDrawing (MetaSheet lives inside this)
case 1036:
return new PPDrawing(b,start,len);
toReturn = new PPDrawing(b,start,len);
break;
// TextHeaderAtom (Holds details on following text)
case 3999:
return new TextHeaderAtom(b,start,len);
toReturn = new TextHeaderAtom(b,start,len);
break;
// TextCharsAtom (Text in Unicode format)
case 4000:
return new TextCharsAtom(b,start,len);
toReturn = new TextCharsAtom(b,start,len);
break;
// TextByteAtom (Text in ascii format)
case 4008:
return new TextBytesAtom(b,start,len);
toReturn = new TextBytesAtom(b,start,len);
break;
// SlideListWithText (Many Sheets live inside here)
case 4080:
return new SlideListWithText(b,start,len);
toReturn = new SlideListWithText(b,start,len);
break;
// UserEditAtom (Holds pointers, last viewed etc)
case 4085:
return new UserEditAtom(b,start,len);
toReturn = new UserEditAtom(b,start,len);
break;
// PersistPtrFullBlock (Don't know what it holds, but do care about where it lives)
case 6001:
return new PersistPtrHolder(b,start,len);
toReturn = new PersistPtrHolder(b,start,len);
break;
// PersistPtrIncrementalBlock (Don't know what it holds, but do care about where it lives)
case 6002:
return new PersistPtrHolder(b,start,len);
toReturn = new PersistPtrHolder(b,start,len);
break;
default:
return new UnknownRecordPlaceholder(b,start,len);
toReturn = new UnknownRecordPlaceholder(b,start,len);
break;
}
// If it's a position aware record, tell it where it is
if(toReturn instanceof PositionDependentRecord) {
PositionDependentRecord pdr = (PositionDependentRecord)toReturn;
pdr.setLastOnDiskOffset(start);
}
// Return the record
return toReturn;
}
}

View File

@ -30,7 +30,7 @@ import java.io.ByteArrayOutputStream;
* @author Nick Burch
*/
public class Slide extends RecordContainer
public class Slide extends PositionDependentRecordContainer
{
private Record[] _children;
private byte[] _header;

View File

@ -21,6 +21,7 @@ package org.apache.poi.hslf.record;
import org.apache.poi.util.LittleEndian;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Hashtable;
/**
* A UserEdit Atom (type 4085). Holds information which bits of the file
@ -75,9 +76,6 @@ public class UserEditAtom extends PositionDependentRecordAtom
// Sanity Checking
if(len < 34) { len = 34; }
// Store where we currently live on disk
myLastOnDiskOffset = start;
// Get the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
@ -118,6 +116,28 @@ public class UserEditAtom extends PositionDependentRecordAtom
*/
public long getRecordType() { return _type; }
/**
* At write-out time, update the references to PersistPtrs and
* other UserEditAtoms to point to their new positions
*/
public void updateOtherRecordReferences(Hashtable oldToNewReferencesLookup) {
// Look up the new positions of our preceding UserEditAtomOffset
if(lastUserEditAtomOffset != 0) {
Integer newLocation = (Integer)oldToNewReferencesLookup.get(new Integer(lastUserEditAtomOffset));
if(newLocation == null) {
throw new RuntimeException("Couldn't find the new location of the UserEditAtom that used to be at " + lastUserEditAtomOffset);
}
lastUserEditAtomOffset = newLocation.intValue();
}
// Ditto for our PersistPtr
Integer newLocation = (Integer)oldToNewReferencesLookup.get(new Integer(persistPointersOffset));
if(newLocation == null) {
throw new RuntimeException("Couldn't find the new location of the PersistPtr that used to be at " + persistPointersOffset);
}
persistPointersOffset = newLocation.intValue();
}
/**
* Write the contents of the record back, so it can be written
* to disk