Improved handling of byte level position sensitive records

QuickButGreedyTextExtractor - gets all the text in a file, fast


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353708 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2005-06-09 15:07:56 +00:00
parent ed83ff62b1
commit cfc6960898

View File

@ -243,57 +243,50 @@ public class HSLFSlideShow
writePropertySet("\005DocumentSummaryInformation",dsInf,outFS); writePropertySet("\005DocumentSummaryInformation",dsInf,outFS);
} }
// Need to take special care of PersistPtrHolder and UserEditAtoms
// Store where they used to be, and where they are now // For position dependent records, hold where they were and now are
Hashtable persistPtrHolderPos = new Hashtable(); // As we go along, update, and hand over, to any Position Dependent
Hashtable userEditAtomsPos = new Hashtable(); // records we happen across
int lastUserEditAtomPos = -1; Hashtable oldToNewPositions = new Hashtable();
// Write ourselves out // Write ourselves out
ByteArrayOutputStream baos = new ByteArrayOutputStream(); ByteArrayOutputStream baos = new ByteArrayOutputStream();
for(int i=0; i<_records.length; i++) { for(int i=0; i<_records.length; i++) {
// If it's a special record, record where it was and now is // For now, we're only handling PositionDependentRecord's that
if(_records[i] instanceof PersistPtrHolder) { // happen at the top level.
// Update position // In future, we'll need the handle them everywhere, but that's
PersistPtrHolder pph = (PersistPtrHolder)_records[i]; // a bit trickier
int oldPos = pph.getLastOnDiskOffset(); if(_records[i] instanceof PositionDependentRecord) {
PositionDependentRecord pdr = (PositionDependentRecord)_records[i];
int oldPos = pdr.getLastOnDiskOffset();
int newPos = baos.size(); int newPos = baos.size();
pph.setLastOnDiskOffet(newPos); pdr.setLastOnDiskOffset(newPos);
persistPtrHolderPos.put(new Integer(oldPos),new Integer(newPos)); oldToNewPositions.put(new Integer(oldPos),new Integer(newPos));
} pdr.updateOtherRecordReferences(oldToNewPositions);
if(_records[i] instanceof UserEditAtom) {
// Update position
UserEditAtom uea = (UserEditAtom)_records[i];
int oldPos = uea.getLastOnDiskOffset();
int newPos = baos.size();
lastUserEditAtomPos = newPos;
uea.setLastOnDiskOffet(newPos);
userEditAtomsPos.put(new Integer(oldPos),new Integer(newPos));
// Update internal positions
if(uea.getLastUserEditAtomOffset() != 0) {
Integer ueNewPos = (Integer)userEditAtomsPos.get( new Integer( uea.getLastUserEditAtomOffset() ) );
uea.setLastUserEditAtomOffset(ueNewPos.intValue());
}
if(uea.getPersistPointersOffset() != 0) {
Integer ppNewPos = (Integer)persistPtrHolderPos.get( new Integer( uea.getPersistPointersOffset() ) );
uea.setPersistPointersOffset(ppNewPos.intValue());
}
} }
// Finally, write out // Finally, write out
_records[i].writeOut(baos); _records[i].writeOut(baos);
} }
// Update our cached copy of the bytes that make up the PPT stream
_docstream = baos.toByteArray()
// Write the PPT stream into the POIFS layer
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
outFS.createDocument(bais,"PowerPoint Document"); outFS.createDocument(bais,"PowerPoint Document");
// Update and write out the Current User atom // Update and write out the Current User atom
if(lastUserEditAtomPos != -1) { int oldLastUserEditAtomPos = (int)currentUser.getCurrentEditOffset();
currentUser.setCurrentEditOffset(lastUserEditAtomPos); Integer newLastUserEditAtomPos = (Integer)oldToNewPositions.get(new Integer(oldLastUserEditAtomPos));
if(newLastUserEditAtomPos == null) {
throw new RuntimeException("Couldn't find the new location of the UserEditAtom that used to be at " + oldLastUserEditAtomPos);
} }
currentUser.setCurrentEditOffset(newLastUserEditAtomPos.intValue());
currentUser.writeToFS(outFS); currentUser.writeToFS(outFS);
// Send the POIFSFileSystem object out
// Send the POIFSFileSystem object out to the underlying stream
outFS.writeFilesystem(out); outFS.writeFilesystem(out);
} }