Hopefully fix bug 39228 - big overhaul of how slides and notes are build, based on Yegor's discoveries. Hopefully, we now return the right slide #, and in the right order

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@417541 13f79535-47bb-0310-9956-ffa450edef68
2006-06-27 18:15:32 +00:00 · 2006-06-27 18:15:32 +00:00 · 7175573d2f
commit 7175573d2f
parent 8514bc913e
4 changed files with 218 additions and 156 deletions
--- a/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java
@ -73,9 +73,18 @@ public abstract class Sheet
  public SlideShow getSlideShow() { return _slideShow; }
  
  /**
-   * Set the SlideShow we're attached to
+   * Set the SlideShow we're attached to.
+   * Also passes it on to our child RichTextRuns
   */
-  public void setSlideShow(SlideShow ss) { _slideShow = ss; }
+  public void setSlideShow(SlideShow ss) { 
+	  _slideShow = ss;
+	  TextRun[] trs = getTextRuns();
+	  if(trs != null) {
+		  for(int i=0; i<trs.length; i++) {
+			  trs[i].supplySlideShow(_slideShow);
+		  }
+	  }
+  }

  
  /**
--- a/src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java
@ -25,6 +25,7 @@ import java.util.Vector;
 import org.apache.poi.hslf.record.*;
 import org.apache.poi.hslf.record.StyleTextPropAtom.TextPropCollection;
 import org.apache.poi.hslf.usermodel.RichTextRun;
+import org.apache.poi.hslf.usermodel.SlideShow;
 import org.apache.poi.util.StringUtil;

 /**
@ -45,6 +46,7 @@ public class TextRun
 	protected StyleTextPropAtom _styleAtom;
 	protected boolean _isUnicode;
 	protected RichTextRun[] _rtRuns;
+	private SlideShow slideShow;

 	/**
 	* Constructs a Text Run from a Unicode text block
@ -500,4 +502,17 @@ public class TextRun
 	public void setRunType(int type) {
 		_headerAtom.setTextType(type);
 	}
+	
+	/**
+	 * Supply the SlideShow we belong to.
+	 * Also passes it on to our child RichTextRuns
+	 */
+	public void supplySlideShow(SlideShow ss) {
+		slideShow = ss;
+		if(_rtRuns != null) {
+			for(int i=0; i<_rtRuns.length; i++) {
+				_rtRuns[i].supplySlideShow(slideShow);
+			}
+		}
+	}
 } 
--- a/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java
+++ b/src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java
@ -247,19 +247,34 @@ public class SlideShow
 		}
 	}
  }
+  
+  	/**
+  	 * For a given SlideAtomsSet, return the core record, based on the refID from the
+  	 *  SlidePersistAtom
+  	 */
+	private Record getCoreRecordForSAS(SlideAtomsSet sas) {
+		SlidePersistAtom spa = sas.getSlidePersistAtom();
+		int refID = spa.getRefID();
+		return getCoreRecordForRefID(refID);
+	}
+  
+	/**
+   	 * For a given refID (the internal, 0 based numbering scheme), return the
+	 *  core record
+	 * @param refID the refID
+	 */
+	private Record getCoreRecordForRefID(int refID) {
+		Integer coreRecordId = (Integer)
+			_sheetIdToCoreRecordsLookup.get(new Integer(refID));
+		Record r = _mostRecentCoreRecords[coreRecordId.intValue()];
+		return r;
+	}

  /**
   * Build up model level Slide and Notes objects, from the underlying
   *  records.
   */
  private void buildSlidesAndNotes() {
-    // For holding the Slide Records
-    Vector slidesV = new Vector(10);
-    // For holding the Notes Records
-    Vector notesV = new Vector(10);
-    // For holding the Meta Sheet Records
-    Vector metaSheetsV = new Vector(10);
-	  
 	// Ensure we really found a Document record earlier
 	// If we didn't, then the file is probably corrupt
 	if(_documentRecord == null) {
@ -269,167 +284,102 @@ public class SlideShow

 	// Fetch the SlideListWithTexts in the most up-to-date Document Record
 	//
-	// Then, use this to find the Slide records, and also the Notes record
-	//  for each Slide (if it has one)
+	// As far as we understand it:
+	//  * The first SlideListWithText will contain a SlideAtomsSet
+	//     for each of the master slides
+	//  * The second SlideListWithText will contain a SlideAtomsSet
+	//     for each of the slides, in their current order
+	//    These SlideAtomsSets will normally contain text
+	//  * The third SlideListWithText (if present), will contain a
+	//     SlideAtomsSet for each Notes
+	//    These SlideAtomsSets will not normally contain text
 	//
-	// The following matching algorithm is based on looking at the output
-	//  of org.apache.poi.hslf.dev.SlideIdListing on a number of files:
-	//
-	// 1) Get the SlideAtomSets from the SlideListWithTexts of the most
-	//     up-to-date Document
-	// 2) Get the SlidePersistAtoms from all of these
-	// 3) Get the RefId, which corresponds to a "sheet ID" from the
-	//    PersistPtr Stuff
-	// 4) Grab the record at that ID, and see if it's a slide or a notes
-	// 5) Build a mapping between the SlideIdentifier ID and the RefId
-	//     for both slides and notes
-	// 6) Loop over all the slides
-	// 7) Look each slide's SlideAtom to see if it has associated Notes - 
-	//     if it does, the ID will be SlideIdentifier for those notes
-	//     (Note: might not be the same as the SlideIdentifier of the Slide)
-	// 8) Generate the model representations, giving them the matching
-	//     slide atom sets, IDs etc
+	// Having indentified the masters, slides and notes + their orders,
+	//  we have to go and find their matching records
+	// We always use the latest versions of these records, and use the
+	//  SlideAtom/NotesAtom to match them with the StyleAtomSet 

-	SlideListWithText[] slwts = _documentRecord.getSlideListWithTexts();
+	SlideListWithText masterSLWT = _documentRecord.getMasterSlideListWithText();
+	SlideListWithText slidesSLWT = _documentRecord.getSlideSlideListWithText();
+	SlideListWithText notesSLWT  = _documentRecord.getNotesSlideListWithText();
 	
-	// To hold the lookup from SlideIdentifier IDs to RefIDs
-	Hashtable slideSlideIdToRefid = new Hashtable();
-	Hashtable notesSlideIdToRefid = new Hashtable();
-	// To hold the lookup from SlideIdentifier IDs to SlideAtomsSets
-	Hashtable slideSlideIdToSlideAtomsSet = new Hashtable();
-	Hashtable notesSlideIdToSlideAtomsSet = new Hashtable();
-	
-	// Loop over all the SlideListWithTexts, getting their 
-	//  SlideAtomSets
-	for(int i=0; i<slwts.length; i++) {
-		SlideAtomsSet[] sas = slwts[i].getSlideAtomsSets();
-		for(int j=0; j<sas.length; j++) {
-			// What does this SlidePersistAtom point to?
-			SlidePersistAtom spa = sas[j].getSlidePersistAtom();
-			Integer slideIdentifier = new Integer( spa.getSlideIdentifier() );
-			Integer slideRefId = new Integer( spa.getRefID() ); 
+	// Start by finding the notes records to go with the entries in
+	//  notesSLWT
+	org.apache.poi.hslf.record.Notes[] notesRecords;
+	SlideAtomsSet[] notesSets = new SlideAtomsSet[0];
+	Hashtable slideIdToNotes = new Hashtable();
+	if(notesSLWT == null) {
+		// None
+		notesRecords = new org.apache.poi.hslf.record.Notes[0]; 
+	} else {
+		// Match up the records and the SlideAtomSets
+		notesSets = notesSLWT.getSlideAtomsSets();
+		notesRecords = new org.apache.poi.hslf.record.Notes[notesSets.length];
+		for(int i=0; i<notesSets.length; i++) {
+			// Get the right core record
+			Record r = getCoreRecordForSAS(notesSets[i]);
 			
-			// Grab the record it points to
-			Integer coreRecordId = (Integer)
-				_sheetIdToCoreRecordsLookup.get(slideRefId);
-			Record r = _mostRecentCoreRecords[coreRecordId.intValue()];
-			
-			// Add the IDs to the appropriate lookups
-			if(r instanceof org.apache.poi.hslf.record.Slide) {
-				slideSlideIdToRefid.put( slideIdentifier, slideRefId );
-				// Save the SlideAtomsSet
-				slideSlideIdToSlideAtomsSet.put( slideIdentifier, sas[j] );
-			} else if(r instanceof org.apache.poi.hslf.record.Notes) {
-				notesSlideIdToRefid.put( slideIdentifier, slideRefId );
-				// Save the SlideAtomsSet
-				notesSlideIdToSlideAtomsSet.put( slideIdentifier, sas[j] );
-			} else if(r.getRecordType() == RecordTypes.MainMaster.typeID) {
-				// Skip for now, we don't do Master slides yet
+			// Ensure it really is a notes record
+			if(r instanceof org.apache.poi.hslf.record.Notes) {
+				notesRecords[i] = (org.apache.poi.hslf.record.Notes)r;
 			} else {
-				throw new IllegalStateException("SlidePersistAtom had a RefId that pointed to something other than a Slide or a Notes, was a " + r + " with type " + r.getRecordType());
+				System.err.println("A Notes SlideAtomSet at " + i + " said its record was at refID " + notesSets[i].getSlidePersistAtom().getRefID() + ", but that was actually a " + r);
+			}
+			
+			// Record the match between slide id and these notes
+			SlidePersistAtom spa = notesSets[i].getSlidePersistAtom();
+			Integer slideId = new Integer(spa.getSlideIdentifier());
+			slideIdToNotes.put(slideId, new Integer(i));
+		}
+	}
+	
+	// Now, do the same thing for our slides
+	org.apache.poi.hslf.record.Slide[] slidesRecords;
+	SlideAtomsSet[] slidesSets = new SlideAtomsSet[0];
+	if(slidesSLWT == null) {
+		// None
+		slidesRecords = new org.apache.poi.hslf.record.Slide[0]; 
+	} else {
+		// Match up the records and the SlideAtomSets
+		slidesSets = slidesSLWT.getSlideAtomsSets();
+		slidesRecords = new org.apache.poi.hslf.record.Slide[slidesSets.length];
+		for(int i=0; i<slidesSets.length; i++) {
+			// Get the right core record
+			Record r = getCoreRecordForSAS(slidesSets[i]);
+			
+			// Ensure it really is a slide record
+			if(r instanceof org.apache.poi.hslf.record.Slide) {
+				slidesRecords[i] = (org.apache.poi.hslf.record.Slide)r;
+			} else {
+				System.err.println("A Slide SlideAtomSet at " + i + " said its record was at refID " + slidesSets[i].getSlidePersistAtom().getRefID() + ", but that was actually a " + r);
 			}
 		}
 	}
 	
-	// Now, create a model representation of a slide for each
-	//  slide + slideatomset we found
-	// Do it in order of the SlideIdentifiers
-	int[] slideIDs = new int[slideSlideIdToRefid.size()];
-	int pos = 0;
-	Enumeration e = slideSlideIdToRefid.keys();
-	while(e.hasMoreElements()) {
-		Integer id = (Integer)e.nextElement();
-		slideIDs[pos] = id.intValue();
-		pos++;
-	}
-	// Sort
-	Arrays.sort(slideIDs);
-	
-	// Create
-	for(int i=0; i<slideIDs.length; i++) {
-		// Build up the list of all the IDs we might want to use
-		int slideIdentifier = slideIDs[i];
-		Integer slideIdentifierI = new Integer(slideIdentifier);
-		int slideNumber = (i+1);
-		Integer slideRefI = (Integer)slideSlideIdToRefid.get(slideIdentifierI); 
-		Integer slideCoreRecNumI = (Integer)_sheetIdToCoreRecordsLookup.get(slideRefI);
-		int slideCoreRecNum = slideCoreRecNumI.intValue();
-		
-		// Fetch the Slide record
-		org.apache.poi.hslf.record.Slide s = (org.apache.poi.hslf.record.Slide)
-			_mostRecentCoreRecords[slideCoreRecNum];
-		
-		// Do we have a notes for this slide?
-		org.apache.poi.hslf.record.Notes n = null;
-		if(s.getSlideAtom().getNotesID() > 0) {
-			// Get the SlideIdentifier of the Notes
-			// (Note - might not be the same as the SlideIdentifier of the Slide)
-			int notesSlideIdentifier = s.getSlideAtom().getNotesID();
-			Integer notesSlideIdentifierI = new Integer(notesSlideIdentifier);
-			
-			// Grab the notes record
-			Integer notesRefI = (Integer)notesSlideIdToRefid.get(notesSlideIdentifierI);
-			Integer notesCoreRecNum = (Integer)_sheetIdToCoreRecordsLookup.get(notesRefI);
-			n = (org.apache.poi.hslf.record.Notes)
-				_mostRecentCoreRecords[notesCoreRecNum.intValue()];
-		}
-		
-		// Grab the matching SlideAtomSet 
-		SlideAtomsSet sas = (SlideAtomsSet)
-			slideSlideIdToSlideAtomsSet.get(slideIdentifierI);
-		
-		// Build the notes model, if there's notes
-		Notes notes = null;
-		if(n != null) {
-			// TODO: Use this
-			SlideAtomsSet nsas = (SlideAtomsSet)
-				notesSlideIdToSlideAtomsSet.get(slideIdentifierI);
-			
-			// Create the model view of the notes
-			notes = new Notes(n);
-			notesV.add(notes);
-		}
-		
-		// Build the slide model
-		Slide slide = new Slide(s, notes, sas, slideIdentifier, slideNumber);
-		slidesV.add(slide);
-	}
-	
-	// ******************* Finish up ****************
-
-	// Finish setting up the notes
-	_notes = new Notes[notesV.size()];
+	// Finally, generate model objects for everything
+	// Notes first
+	_notes = new Notes[notesRecords.length];
 	for(int i=0; i<_notes.length; i++) {
-		_notes[i] = (Notes)notesV.get(i);
+		_notes[i] = new Notes(notesRecords[i]);
 		_notes[i].setSlideShow(this);
-		
-		// Now supply ourselves to all the rich text runs
-		//  of this note's TextRuns
-		TextRun[] trs = _notes[i].getTextRuns(); 
-		for(int j=0; j<trs.length; j++) {
-			RichTextRun[] rtrs = trs[j].getRichTextRuns();
-			for(int k=0; k<rtrs.length; k++) {
-				rtrs[k].supplySlideShow(this);
-			}
-		}
 	}
-
-
-	// Create our Slides
-	_slides = new Slide[slidesV.size()];
+	// Then slides
+	_slides = new Slide[slidesRecords.length];
 	for(int i=0; i<_slides.length; i++) {
-		_slides[i] = (Slide)slidesV.get(i);
-		_slides[i].setSlideShow(this);
-
-		// Now supply ourselves to all the rich text runs
-		//  of this slide's TextRuns
-		TextRun[] trs = _slides[i].getTextRuns(); 
-		for(int j=0; j<trs.length; j++) {
-			RichTextRun[] rtrs = trs[j].getRichTextRuns();
-			for(int k=0; k<rtrs.length; k++) {
-				rtrs[k].supplySlideShow(this);
-			}
+		SlideAtomsSet sas = slidesSets[i];
+		int slideIdentifier = sas.getSlidePersistAtom().getSlideIdentifier();
+		Integer slideIdentifierI = new Integer(slideIdentifier);
+		
+		// Do we have a notes for this?
+		Notes notes = null;
+		if(slideIdToNotes.containsKey(slideIdentifierI)) {
+			Integer notesPos = (Integer)slideIdToNotes.get(slideIdentifierI);
+			notes = _notes[notesPos.intValue()];
 		}
+		
+		// Now, build our slide
+		_slides[i] = new Slide(slidesRecords[i], notes, sas, slideIdentifier, (i+1));
+		_slides[i].setSlideShow(this);
 	}
  }

--- a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestSlideOrdering.java
+++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestSlideOrdering.java
@ -0,0 +1,88 @@
+
+/* ====================================================================
+   Copyright 2002-2004   Apache Software Foundation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+        
+
+
+package org.apache.poi.hslf.usermodel;
+
+
+import junit.framework.TestCase;
+import org.apache.poi.hslf.*;
+import org.apache.poi.hslf.model.*;
+
+/**
+ * Tests that SlideShow returns Sheets in the right order
+ *
+ * @author Nick Burch (nick at torchbox dot com)
+ */
+public class TestSlideOrdering extends TestCase {
+	// Simple slideshow, record order matches slide order
+	private SlideShow ssA;
+	// Complex slideshow, record order doesn't match slide order
+	private SlideShow ssB;
+
+    public TestSlideOrdering() throws Exception {
+		String dirname = System.getProperty("HSLF.testdata.path");
+		
+		String filenameA = dirname + "/basic_test_ppt_file.ppt";
+		HSLFSlideShow hssA = new HSLFSlideShow(filenameA);
+		ssA = new SlideShow(hssA);
+		
+		String filenameB = dirname + "/incorrect_slide_order.ppt";
+		HSLFSlideShow hssB = new HSLFSlideShow(filenameB);
+		ssB = new SlideShow(hssB);
+    }
+
+    /**
+     * Test the simple case - record order matches slide order
+     */
+    public void testSimpleCase() throws Exception {
+    	assertEquals(2, ssA.getSlides().length);
+    	
+    	Slide s1 = ssA.getSlides()[0];
+    	Slide s2 = ssA.getSlides()[1];
+    	
+    	String[] firstTRs = new String[] {
+    			"This is a test title",
+    			"This is the title on page 2"
+    	};
+    	
+    	assertEquals(firstTRs[0], s1.getTextRuns()[0].getText());
+    	assertEquals(firstTRs[1], s2.getTextRuns()[0].getText());
+    }
+
+    /**
+     * Test the complex case - record order differs from slide order
+     */
+    public void testComplexCase() throws Exception {
+    	assertEquals(3, ssB.getSlides().length);
+    	
+    	Slide s1 = ssB.getSlides()[0];
+    	Slide s2 = ssB.getSlides()[1];
+    	Slide s3 = ssB.getSlides()[2];
+    	
+    	String[] firstTRs = new String[] {
+    			"Slide 1",
+    			"Slide 2",
+    			"Slide 3"
+    	};
+    	
+    	assertEquals(firstTRs[0], s1.getTextRuns()[0].getText());
+    	assertEquals(firstTRs[1], s2.getTextRuns()[0].getText());
+    	assertEquals(firstTRs[2], s3.getTextRuns()[0].getText());
+    }
+}