#56260 Partial fix for a slide with a TextHeaderAtom but no other atoms related to it, which is followed by another TextHeaderAtom straight away

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1577537 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2014-03-14 13:59:24 +00:00
parent 32c5314e25
commit adb98e1073
3 changed files with 89 additions and 7 deletions

View File

@ -17,16 +17,34 @@
package org.apache.poi.hslf.model;
import org.apache.poi.ddf.*;
import org.apache.poi.hslf.record.*;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import java.awt.Graphics2D;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.awt.*;
import org.apache.poi.ddf.EscherContainerRecord;
import org.apache.poi.ddf.EscherDgRecord;
import org.apache.poi.ddf.EscherDggRecord;
import org.apache.poi.ddf.EscherRecord;
import org.apache.poi.hslf.record.CString;
import org.apache.poi.hslf.record.ColorSchemeAtom;
import org.apache.poi.hslf.record.EscherTextboxWrapper;
import org.apache.poi.hslf.record.OEPlaceholderAtom;
import org.apache.poi.hslf.record.PPDrawing;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.RecordContainer;
import org.apache.poi.hslf.record.RecordTypes;
import org.apache.poi.hslf.record.RoundTripHFPlaceholder12;
import org.apache.poi.hslf.record.SheetContainer;
import org.apache.poi.hslf.record.StyleTextProp9Atom;
import org.apache.poi.hslf.record.StyleTextPropAtom;
import org.apache.poi.hslf.record.TextBytesAtom;
import org.apache.poi.hslf.record.TextCharsAtom;
import org.apache.poi.hslf.record.TextHeaderAtom;
import org.apache.poi.hslf.record.TextRulerAtom;
import org.apache.poi.hslf.usermodel.SlideShow;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* This class defines the common format of "Sheets" in a powerpoint
@ -182,6 +200,7 @@ public abstract class Sheet {
// Is there a StyleTextPropAtom after the Text Atom?
// TODO Do we need to check for text ones two away as well?
// TODO Refactor this to happen later in this for loop
if (i < (records.length - 2)) {
next = records[i+2];
if (next instanceof StyleTextPropAtom) {
@ -191,6 +210,16 @@ public abstract class Sheet {
// See what follows the TextHeaderAtom
next = records[i+1];
// Is it one we ignore and check the one after that?
if (i < records.length - 2) {
// TODO MasterTextPropAtom
if (next instanceof TextRulerAtom) {
next = records[i+2];
}
}
// Is it one we need to record?
if (next instanceof TextCharsAtom) {
TextCharsAtom tca = (TextCharsAtom)next;
trun = new TextRun(tha, tca, stpa);
@ -199,6 +228,11 @@ public abstract class Sheet {
trun = new TextRun(tha, tba, stpa);
} else if (next instanceof StyleTextPropAtom) {
stpa = (StyleTextPropAtom)next;
} else if (next instanceof TextHeaderAtom) {
// Seems to be a mostly, but not completely deleted block of
// text. Only the header remains, which isn't useful alone
// Skip on to the next TextHeaderAtom
continue;
} else if (next.getRecordType() == (long)RecordTypes.TextSpecInfoAtom.typeID ||
next.getRecordType() == (long)RecordTypes.BaseTextPropAtom.typeID) {
// Safe to ignore these ones

View File

@ -22,8 +22,10 @@ import java.util.LinkedList;
import java.util.List;
import org.apache.poi.hslf.model.textproperties.TextPropCollection;
import org.apache.poi.hslf.record.PPDrawing;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.RecordContainer;
import org.apache.poi.hslf.record.SlideListWithText;
import org.apache.poi.hslf.record.StyleTextProp9Atom;
import org.apache.poi.hslf.record.StyleTextPropAtom;
import org.apache.poi.hslf.record.TextBytesAtom;
@ -656,6 +658,14 @@ public final class TextRun
protected void setIndex(int id){
slwtIndex = id;
}
/**
* Is this Text Run one from a {@link PPDrawing}, or is it
* one from the {@link SlideListWithText}?
*/
public boolean isDrawingBased() {
return (slwtIndex == -1);
}
/**
* Returns the array of all hyperlinks in this text run

View File

@ -51,6 +51,11 @@ import org.apache.poi.hslf.model.TextBox;
import org.apache.poi.hslf.model.TextRun;
import org.apache.poi.hslf.model.TextShape;
import org.apache.poi.hslf.model.TitleMaster;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.SlideListWithText;
import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
import org.apache.poi.hslf.record.TextHeaderAtom;
import org.junit.Test;
/**
@ -502,4 +507,37 @@ public final class TestBugs {
assertTrue("No Exceptions while reading headers", true);
}
@Test
public void bug56260() throws Exception {
File file = _slTests.getFile("56260.ppt");
HSLFSlideShow ss = new HSLFSlideShow(file.getAbsolutePath());
SlideShow _show = new SlideShow(ss);
Slide[] _slides = _show.getSlides();
assertEquals(13, _slides.length);
// Check the number of TextHeaderAtoms on Slide 1
Document dr = _show.getDocumentRecord();
SlideListWithText slidesSLWT = dr.getSlideSlideListWithText();
SlideAtomsSet s1 = slidesSLWT.getSlideAtomsSets()[0];
int tha = 0;
for (Record r : s1.getSlideRecords()) {
if (r instanceof TextHeaderAtom) tha++;
}
assertEquals(2, tha);
// Check to see that we have a pair next to each other
assertEquals(TextHeaderAtom.class, s1.getSlideRecords()[0].getClass());
assertEquals(TextHeaderAtom.class, s1.getSlideRecords()[1].getClass());
// Check the number of text runs based on the slide (not textbox)
// Will have skipped the empty one
int str = 0;
for (TextRun tr : _slides[0].getTextRuns()) {
if (! tr.isDrawingBased()) str++;
}
assertEquals(1, str);
}
}