Support for getting OLE object data in PowerPointExtractor, see Bugzilla 47456
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@791241 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
efc5431030
commit
9c5c51ad29
@ -33,6 +33,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.5-beta7" date="2009-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
|
||||
|
@ -21,14 +21,12 @@ import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hslf.HSLFSlideShow;
|
||||
import org.apache.poi.hslf.model.Comment;
|
||||
import org.apache.poi.hslf.model.HeadersFooters;
|
||||
import org.apache.poi.hslf.model.Notes;
|
||||
import org.apache.poi.hslf.model.Slide;
|
||||
import org.apache.poi.hslf.model.TextRun;
|
||||
import org.apache.poi.hslf.model.*;
|
||||
import org.apache.poi.hslf.usermodel.SlideShow;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
@ -151,7 +149,24 @@ public final class PowerPointExtractor extends POIOLE2TextExtractor {
|
||||
return getText(false,true);
|
||||
}
|
||||
|
||||
/**
|
||||
public List<OLEShape> getOLEShapes() {
|
||||
List<OLEShape> list = new ArrayList<OLEShape>();
|
||||
|
||||
for (int i = 0; i < _slides.length; i++) {
|
||||
Slide slide = _slides[i];
|
||||
|
||||
Shape[] shapes = slide.getShapes();
|
||||
for (int j = 0; j < shapes.length; j++) {
|
||||
if (shapes[j] instanceof OLEShape) {
|
||||
list.add((OLEShape) shapes[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches text from the slideshow, be it slide text or note text.
|
||||
* Because the final block of text in a TextRun normally have their
|
||||
* last \n stripped, we add it back
|
||||
|
@ -18,11 +18,16 @@
|
||||
package org.apache.poi.hslf.extractor;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hslf.HSLFSlideShow;
|
||||
import org.apache.poi.hslf.model.OLEShape;
|
||||
import org.apache.poi.hslf.usermodel.SlideShow;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
@ -167,51 +172,30 @@ public final class TestExtractor extends TestCase {
|
||||
|
||||
/**
|
||||
* A powerpoint file with embeded powerpoint files
|
||||
* TODO - figure out how to handle this, as ppt
|
||||
* appears to embed not as ole2 streams
|
||||
*/
|
||||
public void DISABLEDtestExtractFromOwnEmbeded() throws Exception {
|
||||
String filename3 = pdirname + "/ppt_with_embeded.ppt";
|
||||
POIFSFileSystem fs = new POIFSFileSystem(
|
||||
new FileInputStream(filename3)
|
||||
);
|
||||
HSLFSlideShow ss;
|
||||
|
||||
DirectoryNode dirA = (DirectoryNode)
|
||||
fs.getRoot().getEntry("MBD0000A3B6");
|
||||
DirectoryNode dirB = (DirectoryNode)
|
||||
fs.getRoot().getEntry("MBD0000A3B3");
|
||||
|
||||
assertNotNull(dirA.getEntry("PowerPoint Document"));
|
||||
assertNotNull(dirB.getEntry("PowerPoint Document"));
|
||||
|
||||
// Check the first file
|
||||
ss = new HSLFSlideShow(dirA, fs);
|
||||
ppe = new PowerPointExtractor(ss);
|
||||
assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
|
||||
ppe.getText(true, false)
|
||||
);
|
||||
|
||||
// And the second
|
||||
ss = new HSLFSlideShow(dirB, fs);
|
||||
ppe = new PowerPointExtractor(ss);
|
||||
assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
|
||||
ppe.getText(true, false)
|
||||
);
|
||||
|
||||
|
||||
// Check the master doc two ways
|
||||
ss = new HSLFSlideShow(fs.getRoot(), fs);
|
||||
ppe = new PowerPointExtractor(ss);
|
||||
assertEquals("I have embeded files in me\n",
|
||||
ppe.getText(true, false)
|
||||
);
|
||||
|
||||
ss = new HSLFSlideShow(fs);
|
||||
ppe = new PowerPointExtractor(ss);
|
||||
assertEquals("I have embeded files in me\n",
|
||||
ppe.getText(true, false)
|
||||
);
|
||||
public void testExtractFromOwnEmbeded() throws Exception {
|
||||
String path = pdirname + "/ppt_with_embeded.ppt";
|
||||
ppe = new PowerPointExtractor(path);
|
||||
List<OLEShape> shapes = ppe.getOLEShapes();
|
||||
assertEquals("Expected 6 ole shapes in " + path, 6, shapes.size());
|
||||
int num_ppt = 0, num_doc = 0, num_xls = 0;
|
||||
for(OLEShape ole : shapes) {
|
||||
String name = ole.getInstanceName();
|
||||
InputStream data = ole.getObjectData().getData();
|
||||
if ("Worksheet".equals(name)) {
|
||||
HSSFWorkbook wb = new HSSFWorkbook(data);
|
||||
num_xls++;
|
||||
} else if ("Document".equals(name)) {
|
||||
HWPFDocument doc = new HWPFDocument(data);
|
||||
num_doc++;
|
||||
} else if ("Presentation".equals(name)) {
|
||||
num_ppt++;
|
||||
SlideShow ppt = new SlideShow(data);
|
||||
}
|
||||
}
|
||||
assertEquals("Expected 2 embedded Word Documents", 2, num_doc);
|
||||
assertEquals("Expected 2 embedded Excel Spreadsheets", 2, num_xls);
|
||||
assertEquals("Expected 2 embedded PowerPoint Presentations", 2, num_ppt);
|
||||
}
|
||||
|
||||
/**
|
||||
|
Loading…
Reference in New Issue
Block a user