BUG-59302 --add minimal support for VBAMacro extraction to HSLF; credit to Andreas Beeker for this patch. Problems are mine.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1765696 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2016-10-19 18:44:46 +00:00
parent c0a5f568da
commit 5958cb4840
9 changed files with 371 additions and 6 deletions

View File

@ -46,8 +46,15 @@ import org.apache.poi.util.IOUtils;
import org.apache.poi.util.RLEDecompressingInputStream;
/**
* Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
* <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
* and returns them.
* </p>
* <p>
* <b>NOTE:</b> This does not read macros from .ppt files.
* See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF() in the scratchpad
* module for an example of how to do this. Patches that make macro
* extraction from .ppt more elegant are welcomed!
* </p>
*
* @since 3.15-beta2
*/

View File

@ -0,0 +1,84 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.poi.util.LittleEndian;
/**
* A container record that specifies information about the document and document display settings.
*/
public final class DocInfoListContainer extends RecordContainer {
private byte[] _header;
private static long _type = RecordTypes.List.typeID;
// Links to our more interesting children
/**
* Set things up, and find our more interesting children
*/
protected DocInfoListContainer(byte[] source, int start, int len) {
// Grab the header
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Find our children
_children = Record.findChildRecords(source,start+8,len-8);
findInterestingChildren();
}
/**
* Go through our child records, picking out the ones that are
* interesting, and saving those for use by the easy helper
* methods.
*/
private void findInterestingChildren() {
}
/**
* Create a new DocInfoListContainer, with blank fields - not yet supported
*/
private DocInfoListContainer() {
_header = new byte[8];
_children = new Record[0];
// Setup our header block
_header[0] = 0x0f; // We are a container record
LittleEndian.putShort(_header, 2, (short)_type);
// Setup our child records
findInterestingChildren();
}
/**
* We are of type 0x7D0
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0],_header[1],_type,_children,out);
}
}

View File

@ -143,6 +143,15 @@ public class ExOleObjStg extends RecordAtom implements PositionDependentRecord,
return RecordTypes.ExOleObjStg.typeID;
}
/**
* Gets the record instance from the header
*
* @return record instance
*/
public int getRecordInstance() {
return (LittleEndian.getUShort(_header, 0) >>> 4);
}
/**
* Write the contents of the record back, so it can be written
* to disk.

View File

@ -47,8 +47,8 @@ public enum RecordTypes {
ViewInfo(1020,null),
ViewInfoAtom(1021,null),
SlideViewInfoAtom(1022,null),
VBAInfo(1023,null),
VBAInfoAtom(1024,null),
VBAInfo(1023,VBAInfoContainer.class),
VBAInfoAtom(1024,VBAInfoAtom.class),
SSDocInfoAtom(1025,null),
Summary(1026,null),
DocRoutingSlip(1030,null),
@ -63,7 +63,7 @@ public enum RecordTypes {
NamedShowSlides(1042,null),
SheetProperties(1044,null),
RoundTripCustomTableStyles12Atom(1064,null),
List(2000,null),
List(2000,DocInfoListContainer.class),
FontCollection(2005,FontCollection.class),
BookmarkCollection(2019,null),
SoundCollection(2020,SoundCollection.class),

View File

@ -0,0 +1,118 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.poi.util.LittleEndian;
/**
* An atom record that specifies a reference to the VBA project storage.
*/
public final class VBAInfoAtom extends RecordAtom {
private static final long _type = RecordTypes.VBAInfoAtom.typeID;
/**
* Record header.
*/
private byte[] _header;
/**
* Record data.
*/
private long persistIdRef;
private boolean hasMacros;
private long version;
/**
* Constructs an empty atom - not yet supported
*/
private VBAInfoAtom() {
_header = new byte[8];
// TODO: fix me
LittleEndian.putUInt(_header, 0, _type);
persistIdRef = 0;
hasMacros = true;
version = 2;
}
/**
* Constructs the vba atom record from its source data.
*
* @param source the source data as a byte array.
* @param start the start offset into the byte array.
* @param len the length of the slice in the byte array.
*/
public VBAInfoAtom(byte[] source, int start, int len) {
// Get the header.
_header = new byte[8];
System.arraycopy(source,start,_header,0,8);
// Get the record data.
persistIdRef = LittleEndian.getUInt(source, start+8);
hasMacros = (LittleEndian.getUInt(source, start+12) == 1);
version = LittleEndian.getUInt(source, start+16);
}
/**
* Gets the record type.
* @return the record type.
*/
public long getRecordType() { return _type; }
/**
* Write the contents of the record back, so it can be written
* to disk
*
* @param out the output stream to write to.
* @throws java.io.IOException if an error occurs.
*/
public void writeOut(OutputStream out) throws IOException {
out.write(_header);
LittleEndian.putUInt(persistIdRef, out);
LittleEndian.putUInt(hasMacros ? 1 : 0, out);
LittleEndian.putUInt(version, out);
}
public long getPersistIdRef() {
return persistIdRef;
}
public void setPersistIdRef(long persistIdRef) {
this.persistIdRef = persistIdRef;
}
public boolean isHasMacros() {
return hasMacros;
}
public void setHasMacros(boolean hasMacros) {
this.hasMacros = hasMacros;
}
public long getVersion() {
return version;
}
public void setVersion(long version) {
this.version = version;
}
}

View File

@ -0,0 +1,87 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hslf.record;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.poi.util.LittleEndian;
/**
* A container record that specifies VBA information for the document.
*/
public final class VBAInfoContainer extends RecordContainer {
private byte[] _header;
private static long _type = RecordTypes.VBAInfo.typeID;
// Links to our more interesting children
/**
* Set things up, and find our more interesting children
*/
protected VBAInfoContainer(byte[] source, int start, int len) {
// Grab the header
_header = new byte[8];
System.arraycopy(source, start, _header, 0, 8);
// Find our children
_children = Record.findChildRecords(source, start + 8, len - 8);
findInterestingChildren();
}
/**
* Go through our child records, picking out the ones that are
* interesting, and saving those for use by the easy helper
* methods.
*/
private void findInterestingChildren() {
}
/**
* Create a new VBAInfoContainer, with blank fields - not yet supported
*/
private VBAInfoContainer() {
_header = new byte[8];
_children = new Record[0];
// Setup our header block
_header[0] = 0x0f; // We are a container record
LittleEndian.putShort(_header, 2, (short) _type);
// Setup our child records
findInterestingChildren();
}
/**
* We are of type 0x3FF
*/
public long getRecordType() {
return _type;
}
/**
* Write the contents of the record back, so it can be written
* to disk
*/
public void writeOut(OutputStream out) throws IOException {
writeOut(_header[0], _header[1], _type, _children, out);
}
}

View File

@ -17,6 +17,7 @@
package org.apache.poi.hslf.usermodel;
import static org.apache.poi.POITestCase.assertContains;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
@ -30,7 +31,9 @@ import java.awt.geom.Path2D;
import java.awt.geom.Rectangle2D;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintStream;
import java.text.AttributedCharacterIterator;
@ -43,6 +46,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.poi.POIDataSamples;
import org.apache.poi.ddf.AbstractEscherOptRecord;
import org.apache.poi.ddf.EscherArrayProperty;
import org.apache.poi.ddf.EscherColorRef;
@ -51,12 +55,18 @@ import org.apache.poi.hslf.HSLFTestDataSamples;
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hslf.model.HeadersFooters;
import org.apache.poi.hslf.record.DocInfoListContainer;
import org.apache.poi.hslf.record.Document;
import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.RecordTypes;
import org.apache.poi.hslf.record.SlideListWithText;
import org.apache.poi.hslf.record.SlideListWithText.SlideAtomsSet;
import org.apache.poi.hslf.record.TextHeaderAtom;
import org.apache.poi.hslf.record.VBAInfoAtom;
import org.apache.poi.hslf.record.VBAInfoContainer;
import org.apache.poi.hssf.usermodel.DummyGraphics2d;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.macros.VBAMacroReader;
import org.apache.poi.sl.draw.DrawFactory;
import org.apache.poi.sl.draw.DrawPaint;
import org.apache.poi.sl.draw.DrawTextParagraph;
@ -72,6 +82,7 @@ import org.apache.poi.sl.usermodel.TextBox;
import org.apache.poi.sl.usermodel.TextParagraph;
import org.apache.poi.sl.usermodel.TextParagraph.TextAlign;
import org.apache.poi.sl.usermodel.TextRun;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
import org.apache.poi.util.Units;
@ -948,4 +959,51 @@ public final class TestBugs {
ppt2.close();
}
@Test
public void bug59302() throws IOException {
//add extraction from PPT
Map<String, String> macros = getMacrosFromHSLF("59302.ppt");
assertNotNull("couldn't find macros", macros);
assertNotNull("couldn't find second module", macros.get("Module2"));
assertContains(macros.get("Module2"), "newMacro in Module2");
assertNotNull("couldn't find first module", macros.get("Module1"));
assertContains(macros.get("Module1"), "Italicize");
macros = getMacrosFromHSLF("SimpleMacro.ppt");
assertNotNull(macros.get("Module1"));
assertContains(macros.get("Module1"), "This is a macro slideshow");
}
//It isn't pretty, but it works...
private Map<String, String> getMacrosFromHSLF(String fileName) throws IOException {
InputStream is = null;
NPOIFSFileSystem npoifs = null;
try {
is = new FileInputStream(POIDataSamples.getSlideShowInstance().getFile(fileName));
npoifs = new NPOIFSFileSystem(is);
//TODO: should we run the VBAMacroReader on this npoifs?
//TBD: We know that ppt typically don't store macros in the regular place,
//but _can_ they?
HSLFSlideShow ppt = new HSLFSlideShow(npoifs);
//get macro persist id
DocInfoListContainer list = (DocInfoListContainer)ppt.getDocumentRecord().findFirstOfType(RecordTypes.List.typeID);
VBAInfoContainer vbaInfo = (VBAInfoContainer)list.findFirstOfType(RecordTypes.VBAInfo.typeID);
VBAInfoAtom vbaAtom = (VBAInfoAtom)vbaInfo.findFirstOfType(RecordTypes.VBAInfoAtom.typeID);
long persistId = vbaAtom.getPersistIdRef();
for (HSLFObjectData objData : ppt.getEmbeddedObjects()) {
if (objData.getExOleObjStg().getPersistId() == persistId) {
return new VBAMacroReader(objData.getData()).readMacros();
}
}
} finally {
IOUtils.closeQuietly(npoifs);
IOUtils.closeQuietly(is);
}
return null;
}
}

View File

@ -87,7 +87,8 @@ public class TestVBAMacroReader {
public void XSSFfromStream() throws Exception {
fromStream(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm");
}
@Ignore("bug 59302: Found 0 macros")
@Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" +
"for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant")
@Test
public void HSLFfromStream() throws Exception {
fromStream(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");
@ -123,7 +124,8 @@ public class TestVBAMacroReader {
public void XSSFfromFile() throws Exception {
fromFile(POIDataSamples.getSpreadSheetInstance(), "SimpleMacro.xlsm");
}
@Ignore("bug 59302: Found 0 macros")
@Ignore("bug 59302: Found 0 macros; See org.apache.poi.hslf.usermodel.TestBugs.getMacrosFromHSLF()" +
"for an example of how to get macros out of ppt. TODO: make integration across file formats more elegant")
@Test
public void HSLFfromFile() throws Exception {
fromFile(POIDataSamples.getSlideShowInstance(), "SimpleMacro.ppt");

Binary file not shown.