Make the extractor exactly that, powered by the reader #52949
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738429 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2185e91883
commit
e8e1a294e6
@ -17,172 +17,73 @@
|
|||||||
|
|
||||||
package org.apache.poi.poifs.macros;
|
package org.apache.poi.poifs.macros;
|
||||||
|
|
||||||
import java.io.ByteArrayInputStream;
|
import java.io.File;
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.OutputStreamWriter;
|
||||||
import java.io.PushbackInputStream;
|
|
||||||
import java.nio.charset.Charset;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.zip.ZipEntry;
|
|
||||||
import java.util.zip.ZipInputStream;
|
|
||||||
|
|
||||||
import org.apache.poi.poifs.eventfilesystem.POIFSReader;
|
import org.apache.poi.util.StringUtil;
|
||||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
|
|
||||||
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
|
|
||||||
import org.apache.poi.poifs.filesystem.DocumentInputStream;
|
|
||||||
import org.apache.poi.util.IOUtils;
|
|
||||||
import org.apache.poi.util.RLEDecompressingInputStream;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class is able to extract the source of all VBA Modules of an Excel file.
|
* This class extracts out the source of all VBA Modules of an office file,
|
||||||
|
* both OOXML and OLE2/POIFS, eg XLSM or DOC
|
||||||
*/
|
*/
|
||||||
public class VBAMacroExtractor {
|
public class VBAMacroExtractor {
|
||||||
|
public static void main(String args[]) throws IOException {
|
||||||
|
if (args.length == 0) {
|
||||||
|
System.err.println("Use:");
|
||||||
|
System.err.println(" VBAMacroExtractor <office.doc> [output]");
|
||||||
|
System.err.println("");
|
||||||
|
System.err.println("If an output directory is given, macros are written there");
|
||||||
|
System.err.println("Otherwise they are output to the screen");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
File input = new File(args[0]);
|
||||||
* Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files.
|
File output = null;
|
||||||
*
|
if (args.length > 1) {
|
||||||
* @param in
|
output = new File(args[1]);
|
||||||
* @return
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public Map<String, String> extractMacros(InputStream in) throws IOException {
|
|
||||||
PushbackInputStream bpin = new PushbackInputStream(in, 2);
|
|
||||||
byte[] header = new byte[2];
|
|
||||||
if (bpin.read(header) != 2) {
|
|
||||||
throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");
|
|
||||||
}
|
|
||||||
bpin.unread(header);
|
|
||||||
if (header[0] == 'P' && header[1] == 'K') {
|
|
||||||
ZipInputStream zis = new ZipInputStream(bpin);
|
|
||||||
ZipEntry zipEntry;
|
|
||||||
while ((zipEntry = zis.getNextEntry()) != null) {
|
|
||||||
if ("xl/vbaProject.bin".equals(zipEntry.getName())) {
|
|
||||||
try {
|
|
||||||
return extractMacrosFromPOIFSInputStream(zis);
|
|
||||||
} finally {
|
|
||||||
zis.closeEntry();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
return extractMacrosFromPOIFSInputStream(bpin);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VBAMacroExtractor extract = new VBAMacroExtractor();
|
||||||
|
extract.extract(input, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public void extract(File input, File outputDir) throws IOException {
|
||||||
* Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or
|
if (! input.exists()) throw new FileNotFoundException(input.toString());
|
||||||
* vbaProject.bin from OOXML files)
|
System.err.print("Extracting VBA Macros from " + input + " to ");
|
||||||
*
|
if (outputDir != null) {
|
||||||
* @param in
|
if (! outputDir.exists()) outputDir.mkdir();
|
||||||
* @return
|
System.err.println(outputDir);
|
||||||
* @throws IOException
|
} else {
|
||||||
*/
|
System.err.println("STDOUT");
|
||||||
public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {
|
|
||||||
class Module {
|
|
||||||
|
|
||||||
Integer offset;
|
|
||||||
byte[] buf;
|
|
||||||
}
|
}
|
||||||
class ModuleMap extends HashMap<String, Module> {
|
|
||||||
|
|
||||||
Charset charset = Charset.forName("Cp1252"); // default charset
|
VBAMacroReader reader = new VBAMacroReader(input);
|
||||||
}
|
Map<String,String> macros = reader.readMacros();
|
||||||
try {
|
reader.close();
|
||||||
final ModuleMap modules = new ModuleMap();
|
|
||||||
POIFSReader dirReader = new POIFSReader();
|
|
||||||
dirReader.registerListener(new POIFSReaderListener() {
|
|
||||||
|
|
||||||
public void processPOIFSReaderEvent(POIFSReaderEvent event) {
|
final String divider = "---------------------------------------";
|
||||||
try {
|
for (String macro : macros.keySet()) {
|
||||||
String name = event.getName();
|
if (outputDir == null) {
|
||||||
if (event.getPath().toString().endsWith("\\VBA")) {
|
System.out.println(divider);
|
||||||
if ("dir".equals(name)) {
|
System.out.println(macro);
|
||||||
// process DIR
|
System.out.println("");
|
||||||
RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream());
|
System.out.println(macros.get(macro));
|
||||||
String streamName = null;
|
} else {
|
||||||
while (true) {
|
File out = new File(outputDir, macro + ".vba");
|
||||||
int id = in.readShort();
|
FileOutputStream fout = new FileOutputStream(out);
|
||||||
if (id == -1 || id == 0x0010) {
|
OutputStreamWriter fwriter = new OutputStreamWriter(fout, StringUtil.UTF8);
|
||||||
break; // EOF or TERMINATOR
|
fwriter.write(macros.get(macro));
|
||||||
}
|
fwriter.close();
|
||||||
int len = in.readInt();
|
fout.close();
|
||||||
switch (id) {
|
System.out.println("Extracted " + out);
|
||||||
case 0x0009: // PROJECTVERSION
|
|
||||||
in.skip(6);
|
|
||||||
break;
|
|
||||||
case 0x0003: // PROJECTCODEPAGE
|
|
||||||
int codepage = in.readShort();
|
|
||||||
modules.charset = Charset.forName("Cp" + codepage);
|
|
||||||
break;
|
|
||||||
case 0x001A: // STREAMNAME
|
|
||||||
byte[] streamNameBuf = new byte[len];
|
|
||||||
int count = in.read(streamNameBuf);
|
|
||||||
streamName = new String(streamNameBuf, 0, count, modules.charset);
|
|
||||||
break;
|
|
||||||
case 0x0031: // MODULEOFFSET
|
|
||||||
int moduleOffset = in.readInt();
|
|
||||||
Module module = modules.get(streamName);
|
|
||||||
if (module != null) {
|
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
||||||
RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(
|
|
||||||
module.buf, moduleOffset, module.buf.length - moduleOffset));
|
|
||||||
IOUtils.copy(stream, out);
|
|
||||||
stream.close();
|
|
||||||
out.close();
|
|
||||||
module.buf = out.toByteArray();
|
|
||||||
} else {
|
|
||||||
module = new Module();
|
|
||||||
module.offset = moduleOffset;
|
|
||||||
modules.put(streamName, module);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
in.skip(len);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
|
|
||||||
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
|
|
||||||
Module module = modules.get(name);
|
|
||||||
final DocumentInputStream stream = event.getStream();
|
|
||||||
final InputStream in;
|
|
||||||
if (module == null) {
|
|
||||||
// no DIR stream with offsets yet, so store the compressed bytes for later
|
|
||||||
module = new Module();
|
|
||||||
modules.put(name, module);
|
|
||||||
in = stream;
|
|
||||||
} else {
|
|
||||||
// we know the offset already, so decompress immediately on-the-fly
|
|
||||||
stream.skip(module.offset);
|
|
||||||
in = new RLEDecompressingInputStream(stream);
|
|
||||||
}
|
|
||||||
final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
|
||||||
IOUtils.copy(in, out);
|
|
||||||
in.close();
|
|
||||||
out.close();
|
|
||||||
module.buf = out.toByteArray();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
dirReader.read(in);
|
|
||||||
Map<String, String> moduleSources = new HashMap<String, String>();
|
|
||||||
for (Map.Entry<String, Module> entry : modules.entrySet()) {
|
|
||||||
Module module = entry.getValue();
|
|
||||||
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
|
|
||||||
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return moduleSources;
|
}
|
||||||
} catch (IOException e) {
|
if (outputDir == null) {
|
||||||
e.printStackTrace();
|
System.out.println(divider);
|
||||||
throw e;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user