VBA extraction support from bug #52949 from Barry Lagerweij
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738418 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2a0ed81538
commit
4abcc6626a
188
src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java
Normal file
188
src/java/org/apache/poi/poifs/macros/VBAMacroExtractor.java
Normal file
@ -0,0 +1,188 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.poifs.macros;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.PushbackInputStream;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
|
||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
|
||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.RLEDecompressingInputStream;
|
||||
|
||||
/**
|
||||
* This class is able to extract the source of all VBA Modules of an Excel file.
|
||||
*/
|
||||
public class VBAMacroExtractor {
|
||||
|
||||
/**
|
||||
* Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files.
|
||||
*
|
||||
* @param in
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public Map<String, String> extractMacros(InputStream in) throws IOException {
|
||||
PushbackInputStream bpin = new PushbackInputStream(in, 2);
|
||||
byte[] header = new byte[2];
|
||||
if (bpin.read(header) != 2) {
|
||||
throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");
|
||||
}
|
||||
bpin.unread(header);
|
||||
if (header[0] == 'P' && header[1] == 'K') {
|
||||
ZipInputStream zis = new ZipInputStream(bpin);
|
||||
ZipEntry zipEntry;
|
||||
while ((zipEntry = zis.getNextEntry()) != null) {
|
||||
if ("xl/vbaProject.bin".equals(zipEntry.getName())) {
|
||||
try {
|
||||
return extractMacrosFromPOIFSInputStream(zis);
|
||||
} finally {
|
||||
zis.closeEntry();
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
} else {
|
||||
return extractMacrosFromPOIFSInputStream(bpin);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or
|
||||
* vbaProject.bin from OOXML files)
|
||||
*
|
||||
* @param in
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {
|
||||
class Module {
|
||||
|
||||
Integer offset;
|
||||
byte[] buf;
|
||||
}
|
||||
class ModuleMap extends HashMap<String, Module> {
|
||||
|
||||
Charset charset = Charset.forName("Cp1252"); // default charset
|
||||
}
|
||||
try {
|
||||
final ModuleMap modules = new ModuleMap();
|
||||
POIFSReader dirReader = new POIFSReader();
|
||||
dirReader.registerListener(new POIFSReaderListener() {
|
||||
|
||||
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
|
||||
try {
|
||||
String name = event.getName();
|
||||
if (event.getPath().toString().endsWith("\\VBA")) {
|
||||
if ("dir".equals(name)) {
|
||||
// process DIR
|
||||
RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());
|
||||
String streamName = null;
|
||||
while (true) {
|
||||
int id = in.readShort();
|
||||
if (id == -1 || id == 0x0010) {
|
||||
break; // EOF or TERMINATOR
|
||||
}
|
||||
int len = in.readInt();
|
||||
switch (id) {
|
||||
case 0x0009: // PROJECTVERSION
|
||||
in.skip(6);
|
||||
break;
|
||||
case 0x0003: // PROJECTCODEPAGE
|
||||
int codepage = in.readShort();
|
||||
modules.charset = Charset.forName("Cp" + codepage);
|
||||
break;
|
||||
case 0x001A: // STREAMNAME
|
||||
byte[] streamNameBuf = new byte[len];
|
||||
int count = in.read(streamNameBuf);
|
||||
streamName = new String(streamNameBuf, 0, count, modules.charset);
|
||||
break;
|
||||
case 0x0031: // MODULEOFFSET
|
||||
int moduleOffset = in.readInt();
|
||||
Module module = modules.get(streamName);
|
||||
if (module != null) {
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(
|
||||
module.buf, moduleOffset, module.buf.length - moduleOffset));
|
||||
IOUtils.copy(stream, out);
|
||||
stream.close();
|
||||
out.close();
|
||||
module.buf = out.toByteArray();
|
||||
} else {
|
||||
module = new Module();
|
||||
module.offset = moduleOffset;
|
||||
modules.put(streamName, module);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
in.skip(len);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
|
||||
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
|
||||
Module module = modules.get(name);
|
||||
final DocumentInputStream stream = event.getStream();
|
||||
final InputStream in;
|
||||
if (module == null) {
|
||||
// no DIR stream with offsets yet, so store the compressed bytes for later
|
||||
module = new Module();
|
||||
modules.put(name, module);
|
||||
in = stream;
|
||||
} else {
|
||||
// we know the offset already, so decompress immediately on-the-fly
|
||||
stream.skip(module.offset);
|
||||
in = new RLEDecompressingInputStream(stream);
|
||||
}
|
||||
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
IOUtils.copy(in, out);
|
||||
in.close();
|
||||
out.close();
|
||||
module.buf = out.toByteArray();
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
});
|
||||
dirReader.read(in);
|
||||
Map<String, String> moduleSources = new HashMap<String, String>();
|
||||
for (Map.Entry<String, Module> entry : modules.entrySet()) {
|
||||
Module module = entry.getValue();
|
||||
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
|
||||
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
|
||||
}
|
||||
}
|
||||
return moduleSources;
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
}
|
273
src/java/org/apache/poi/util/RLEDecompressingInputStream.java
Normal file
273
src/java/org/apache/poi/util/RLEDecompressingInputStream.java
Normal file
@ -0,0 +1,273 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Wrapper of InputStream which provides Run Length Encoding (RLE)
|
||||
* decompression on the fly. Uses MS-OVBA decompression algorithm. See
|
||||
* http://download.microsoft.com/download/2/4/8/24862317-78F0-4C4B-B355-C7B2C1D997DB/[MS-OVBA].pdf
|
||||
*/
|
||||
public class RLEDecompressingInputStream extends InputStream {
|
||||
|
||||
/**
|
||||
* Bitmasks for performance
|
||||
*/
|
||||
private static final int[] POWER2 = new int[] { 0x0001, // 0
|
||||
0x0002, // 1
|
||||
0x0004, // 2
|
||||
0x0008, // 3
|
||||
0x0010, // 4
|
||||
0x0020, // 5
|
||||
0x0040, // 6
|
||||
0x0080, // 7
|
||||
0x0100, // 8
|
||||
0x0200, // 9
|
||||
0x0400, // 10
|
||||
0x0800, // 11
|
||||
0x1000, // 12
|
||||
0x2000, // 13
|
||||
0x4000, // 14
|
||||
0x8000 // 15
|
||||
};
|
||||
|
||||
/** the wrapped inputstream */
|
||||
private InputStream in;
|
||||
|
||||
/** a byte buffer with size 4096 for storing a single chunk */
|
||||
private byte[] buf;
|
||||
|
||||
/** the current position in the byte buffer for reading */
|
||||
private int pos;
|
||||
|
||||
/** the number of bytes in the byte buffer */
|
||||
private int len;
|
||||
|
||||
/**
|
||||
* Creates a new wrapper RLE Decompression InputStream.
|
||||
*
|
||||
* @param in
|
||||
* @throws IOException
|
||||
*/
|
||||
public RLEDecompressingInputStream(InputStream in) throws IOException {
|
||||
this.in = in;
|
||||
buf = new byte[4096];
|
||||
pos = 0;
|
||||
int header = in.read();
|
||||
if (header != 0x01) {
|
||||
throw new IllegalArgumentException(String.format("Header byte 0x01 expected, received 0x%02X", header & 0xFF));
|
||||
}
|
||||
len = readChunk();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
if (len == -1) {
|
||||
return -1;
|
||||
}
|
||||
if (pos >= len) {
|
||||
if ((len = readChunk()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return buf[pos++];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b) throws IOException {
|
||||
return read(b, 0, b.length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int l) throws IOException {
|
||||
if (len == -1) {
|
||||
return -1;
|
||||
}
|
||||
int offset = off;
|
||||
int length = l;
|
||||
while (length > 0) {
|
||||
if (pos >= len) {
|
||||
if ((len = readChunk()) == -1) {
|
||||
return offset > off ? offset - off : -1;
|
||||
}
|
||||
}
|
||||
int c = Math.min(length, len - pos);
|
||||
System.arraycopy(buf, pos, b, offset, c);
|
||||
pos += c;
|
||||
length -= c;
|
||||
offset += c;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long skip(long n) throws IOException {
|
||||
long length = n;
|
||||
while (length > 0) {
|
||||
if (pos >= len) {
|
||||
if ((len = readChunk()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
int c = (int) Math.min(n, len - pos);
|
||||
pos += c;
|
||||
length -= c;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int available() {
|
||||
return (len > 0 ? len - pos : 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
in.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a single chunk from the underlying inputstream.
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private int readChunk() throws IOException {
|
||||
pos = 0;
|
||||
int w = readShort(in);
|
||||
if (w == -1) {
|
||||
return -1;
|
||||
}
|
||||
int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length
|
||||
if ((w & 0x7000) != 0x3000) {
|
||||
throw new IllegalArgumentException(String.format("Chunksize header A should be 0x3000, received 0x%04X", w & 0xE000));
|
||||
}
|
||||
boolean rawChunk = (w & 0x8000) == 0;
|
||||
if (rawChunk) {
|
||||
if (in.read(buf, 0, chunkSize) < chunkSize) {
|
||||
throw new IllegalStateException(String.format("Not enough bytes read, expected %d", chunkSize));
|
||||
}
|
||||
return chunkSize;
|
||||
} else {
|
||||
int inOffset = 0;
|
||||
int outOffset = 0;
|
||||
while (inOffset < chunkSize) {
|
||||
int tokenFlags = in.read();
|
||||
inOffset++;
|
||||
if (tokenFlags == -1) {
|
||||
break;
|
||||
}
|
||||
for (int n = 0; n < 8; n++) {
|
||||
if (inOffset >= chunkSize) {
|
||||
break;
|
||||
}
|
||||
if ((tokenFlags & POWER2[n]) == 0) {
|
||||
// literal
|
||||
final int b = in.read();
|
||||
if (b == -1) {
|
||||
return -1;
|
||||
}
|
||||
buf[outOffset++] = (byte) b;
|
||||
inOffset++;
|
||||
} else {
|
||||
// compressed token
|
||||
int token = readShort(in);
|
||||
if (token == -1) {
|
||||
return -1;
|
||||
}
|
||||
inOffset += 2;
|
||||
int copyLenBits = getCopyLenBits(outOffset - 1);
|
||||
int copyOffset = (token >> (copyLenBits)) + 1;
|
||||
int copyLen = (token & (POWER2[copyLenBits] - 1)) + 3;
|
||||
int startPos = outOffset - copyOffset;
|
||||
int endPos = startPos + copyLen;
|
||||
for (int i = startPos; i < endPos; i++) {
|
||||
buf[outOffset++] = buf[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return outOffset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to determine how many bits in the CopyToken are used for the CopyLength.
|
||||
*
|
||||
* @param offset
|
||||
* @return
|
||||
*/
|
||||
static int getCopyLenBits(int offset) {
|
||||
for (int n = 11; n >= 4; n--) {
|
||||
if ((offset & POWER2[n]) != 0) {
|
||||
return 15 - n;
|
||||
}
|
||||
}
|
||||
return 12;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method for read a 2-bytes short in little endian encoding.
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public int readShort() throws IOException {
|
||||
return readShort(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method for read a 4-bytes int in little endian encoding.
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public int readInt() throws IOException {
|
||||
return readInt(this);
|
||||
}
|
||||
|
||||
private int readShort(InputStream stream) throws IOException {
|
||||
int b0, b1;
|
||||
if ((b0 = stream.read()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
if ((b1 = stream.read()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
return (b0 & 0xFF) | ((b1 & 0xFF) << 8);
|
||||
}
|
||||
|
||||
private int readInt(InputStream stream) throws IOException {
|
||||
int b0, b1, b2, b3;
|
||||
if ((b0 = stream.read()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
if ((b1 = stream.read()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
if ((b2 = stream.read()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
if ((b3 = stream.read()) == -1) {
|
||||
return -1;
|
||||
}
|
||||
return (b0 & 0xFF) | ((b1 & 0xFF) << 8) | ((b2 & 0xFF) << 16) | ((b3 & 0xFF) << 24);
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user