Use the new "most recent important records" record list when creating notes and slides. Makes the code much cleaner, and means that handling older versions of slides is taken care of in a much better manner

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353733 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2005-06-26 19:05:07 +00:00
parent c6ad45cdf8
commit 1b45e20595

View File

@ -59,29 +59,9 @@ public class SlideShow
// Friendly objects for people to deal with // Friendly objects for people to deal with
private Slide[] _slides; private Slide[] _slides;
private Notes[] _notes; private Notes[] _notes;
// MetaSheets (eg masters) not yet supported
// private MetaSheets[] _msheets; // private MetaSheets[] _msheets;
/**
* right now this function takes one parameter: a ppt file, and outputs
* the text it can find for it
*/
public static void main(String args[]) throws IOException
{
HSLFSlideShow basefoo = new HSLFSlideShow(args[0]);
SlideShow foo = new SlideShow(basefoo);
Slide[] slides = foo.getSlides();
for(int i=0; i<slides.length; i++) {
Slide slide = slides[i];
System.out.println("*Slide " + slide.getSheetNumber() + ":");
TextRun[] runs = slide.getTextRuns();
for(int j=0; j<runs.length; j++) {
TextRun run = runs[j];
System.out.println(" * Text run " + run.getRunType());
System.out.println("\n" + run.getText() + "\n");
}
}
}
/** /**
* Constructs a Powerpoint document from the underlying * Constructs a Powerpoint document from the underlying
@ -103,6 +83,7 @@ public class SlideShow
buildSlidesAndNotes(); buildSlidesAndNotes();
} }
/** /**
* Use the PersistPtrHolder entries to figure out what is * Use the PersistPtrHolder entries to figure out what is
* the "most recent" version of all the core records * the "most recent" version of all the core records
@ -189,80 +170,75 @@ public class SlideShow
Vector notesV = new Vector(10); Vector notesV = new Vector(10);
// For holding the Meta Sheet Records // For holding the Meta Sheet Records
Vector metaSheetsV = new Vector(10); Vector metaSheetsV = new Vector(10);
// For holding Document Records
Vector documentsV = new Vector(10);
// For holding SlideListWithText Records // For holding SlideListWithText Records
Vector slwtV = new Vector(10); Vector slwtV = new Vector(10);
// For holding the Document record we're going to use
Record documentRecord = null;
// Look for Notes, Slides and Documents // Look for Notes, Slides and Documents
for(int i=0; i<_records.length; i++) { for(int i=0; i<_mostRecentCoreRecords.length; i++) {
if(_records[i] instanceof org.apache.poi.hslf.record.Notes) { if(_mostRecentCoreRecords[i] instanceof org.apache.poi.hslf.record.Notes) {
notesV.add(_records[i]); notesV.add(_mostRecentCoreRecords[i]);
} }
if(_records[i] instanceof org.apache.poi.hslf.record.Slide) { if(_mostRecentCoreRecords[i] instanceof org.apache.poi.hslf.record.Slide) {
slidesV.add(_records[i]); slidesV.add(_mostRecentCoreRecords[i]);
} }
if(_records[i].getRecordType() == 1000l) { if(_records[i].getRecordType() == 1000l) {
documentsV.add(_records[i]); documentRecord = _mostRecentCoreRecords[i];
} }
} }
// Also look for SlideListWithTexts in Documents // Now look for SlideListWithTexts in the most up-to-date Document Record
// //
// Need to get the SlideAtomsSets for all of these. Then, query the // Need to get the SlideAtomsSets for all of these. Then, query the
// SlidePersistAtom, and group stuff together between SLWT blocks // SlidePersistAtom, and group stuff together between SLWT blocks
// based on the refID/slideID. Finally, build up a list of all the // based on the refID/slideID
// SlideAtomsSets for a given refID / slideID, and pass them on to
// the Slide when creating
// //
// If a notes sheet exists, can normally match the Notes sheet ID // If a notes sheet exists, can normally match the Notes sheet ID
// to the slide ID in the SlidePersistAtom. Since there isn't always, // to the slide ID in the SlidePersistAtom. Since there isn't always,
// and we can't find the ID in the slide, just order on the slide ID, // and we can't find the ID in the slide, just order on the slide ID,
// and hand off to the Slides in turn. // and hand off to the Slides in turn.
// (Based on output from dev.SLWTTextListing and dev.SlideAndNotesAtomListing) // (Based on output from dev.SLWTTextListing and dev.SlideAndNotesAtomListing)
// //
// There is often duplicate text, especially for the first few // We're trusting that the ordering of slides from the persistence
// Slides. Currently, it's up to the Slide model code to detect // layer will match the ordering found here. However, we should
// and ignore those // really find a PPT file with random sheets inserted to check with
//
// There shouldn't be any text duplication - only using the most
// record Document record's SLWTs should see to that
for(int i=0; i<documentsV.size(); i++) { Record[] docChildren = documentRecord.getChildRecords();
Record docRecord = (Record)documentsV.get(i); for(int i=0; i<docChildren.length; i++) {
Record[] docChildren = docRecord.getChildRecords(); if(docChildren[i] instanceof SlideListWithText) {
for(int j=0; j<docChildren.length; j++) { slwtV.add(docChildren[i]);
if(docChildren[j] instanceof SlideListWithText) {
//System.out.println("Found SLWT in document " + i);
//System.out.println(" Has " + docChildren[j].getChildRecords().length + " children");
slwtV.add(docChildren[j]);
}
} }
} }
// For now, grab out all the sets of Atoms in the SlideListWithText's // For now, grab out all the sets of Atoms in the SlideListWithText's
// Only store those which aren't empty // Only store those which aren't empty
// Also, get the list of IDs while we're at it
HashSet uniqueSlideIDs = new HashSet();
Vector setsV = new Vector(); Vector setsV = new Vector();
for(int i=0; i<slwtV.size(); i++) { for(int i=0; i<slwtV.size(); i++) {
SlideListWithText slwt = (SlideListWithText)slwtV.get(i); SlideListWithText slwt = (SlideListWithText)slwtV.get(i);
SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets(); SlideAtomsSet[] thisSets = slwt.getSlideAtomsSets();
for(int j=0; j<thisSets.length; j++) { for(int j=0; j<thisSets.length; j++) {
setsV.add(thisSets[j]); SlideAtomsSet thisSet = thisSets[j];
setsV.add(thisSet);
int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
Integer idI = new Integer(id);
if(! uniqueSlideIDs.contains(idI) ) {
uniqueSlideIDs.add(idI);
} else {
System.err.println("** WARNING - Found two SlideAtomsSets for a given slide (" + id + ") - only using the first one **");
}
} }
} }
// Now, sort the SlideAtomSets together into groups for the same slide ID, // Now, order the SlideAtomSets by their slide's ID
// and order them by the slide ID
// Find the unique IDs
HashSet uniqueSlideIDs = new HashSet();
for(int i=0; i<setsV.size(); i++) {
SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i);
int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
Integer idI = new Integer(id);
if(! uniqueSlideIDs.contains(idI) ) {
uniqueSlideIDs.add(idI);
}
}
int[] slideIDs = new int[uniqueSlideIDs.size()]; int[] slideIDs = new int[uniqueSlideIDs.size()];
int pos = 0; int pos = 0;
for(Iterator getIDs = uniqueSlideIDs.iterator(); getIDs.hasNext(); pos++) { for(Iterator getIDs = uniqueSlideIDs.iterator(); getIDs.hasNext(); pos++) {
@ -272,7 +248,7 @@ public class SlideShow
// Sort // Sort
Arrays.sort(slideIDs); Arrays.sort(slideIDs);
// Group // Group
Vector[] sortedSetsV = new Vector[slideIDs.length]; SlideAtomsSet[] slideAtomSets = new SlideAtomsSet[slideIDs.length];
for(int i=0; i<setsV.size(); i++) { for(int i=0; i<setsV.size(); i++) {
SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i); SlideAtomsSet thisSet = (SlideAtomsSet)setsV.get(i);
int id = thisSet.getSlidePersistAtom().getSlideIdentifier(); int id = thisSet.getSlidePersistAtom().getSlideIdentifier();
@ -280,11 +256,11 @@ public class SlideShow
for(int j=0; j<slideIDs.length; j++) { for(int j=0; j<slideIDs.length; j++) {
if(slideIDs[j] == id) { arrayPos = j; } if(slideIDs[j] == id) { arrayPos = j; }
} }
if(sortedSetsV[arrayPos] == null) { sortedSetsV[arrayPos] = new Vector(); } slideAtomSets[arrayPos] = thisSet;
sortedSetsV[arrayPos].add(thisSet);
} }
// ******************* Do the real model layer creation **************** // ******************* Do the real model layer creation ****************
@ -302,42 +278,28 @@ public class SlideShow
// Grab the slide Record // Grab the slide Record
org.apache.poi.hslf.record.Slide slideRecord = (org.apache.poi.hslf.record.Slide)slidesV.get(i); org.apache.poi.hslf.record.Slide slideRecord = (org.apache.poi.hslf.record.Slide)slidesV.get(i);
// Decide if we've got a SlideAtomSet to use
SlideAtomsSet atomSet = null;
if(i < slideAtomSets.length) {
atomSet = slideAtomSets[i];
}
// Do they have a Notes? // Do they have a Notes?
Notes thisNotes = null; Notes thisNotes = null;
// Find their SlideAtom, and use this to check for a Notes // Find their SlideAtom, and use this to check for a Notes
Record[] slideRecordChildren = slideRecord.getChildRecords(); SlideAtom sa = slideRecord.getSlideAtom();
for(int j=0; j<slideRecordChildren.length; j++) { int notesID = sa.getNotesID();
if(slideRecordChildren[j] instanceof SlideAtom) { if(notesID != 0) {
SlideAtom sa = (SlideAtom)slideRecordChildren[j]; for(int k=0; k<_notes.length; k++) {
int notesID = sa.getNotesID(); if(_notes[k].getSheetNumber() == notesID) {
if(notesID != 0) { thisNotes = _notes[k];
for(int k=0; k<_notes.length; k++) {
if(_notes[k].getSheetNumber() == notesID) {
thisNotes = _notes[k];
}
}
} }
} }
} }
// Grab the (hopefully) corresponding block of Atoms // Create the Slide model layer
SlideAtomsSet[] sets; _slides[i] = new Slide(slideRecord,thisNotes,atomSet);
if(sortedSetsV.length > i) {
Vector thisSetsV = sortedSetsV[i];
sets = new SlideAtomsSet[thisSetsV.size()];
for(int j=0; j<sets.length; j++) {
sets[j] = (SlideAtomsSet)thisSetsV.get(j);
}
//System.out.println("For slide " + i + ", found " + sets.length + " Sets of text");
} else {
// Didn't find enough SlideAtomSets to give any to this sheet
sets = new SlideAtomsSet[0];
}
// Create the Slide model layer
_slides[i] = new Slide(slideRecord,thisNotes,sets);
} }
} }