Make the extractor exactly that, powered by the reader #52949

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1738429 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2016-04-10 12:59:38 +00:00
parent 2185e91883
commit e8e1a294e6

View File

@ -17,172 +17,73 @@
package org.apache.poi.poifs.macros; package org.apache.poi.poifs.macros;
import java.io.ByteArrayInputStream; import java.io.File;
import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.OutputStreamWriter;
import java.io.PushbackInputStream;
import java.nio.charset.Charset;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.poi.poifs.eventfilesystem.POIFSReader; import org.apache.poi.util.StringUtil;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.RLEDecompressingInputStream;
/** /**
* This class is able to extract the source of all VBA Modules of an Excel file. * This class extracts out the source of all VBA Modules of an office file,
* both OOXML and OLE2/POIFS, eg XLSM or DOC
*/ */
public class VBAMacroExtractor { public class VBAMacroExtractor {
public static void main(String args[]) throws IOException {
/** if (args.length == 0) {
* Extract macros from XLSM or XLS file. Automatically detects ZIP (XLSM, DOCX, etc) files. System.err.println("Use:");
* System.err.println(" VBAMacroExtractor <office.doc> [output]");
* @param in System.err.println("");
* @return System.err.println("If an output directory is given, macros are written there");
* @throws IOException System.err.println("Otherwise they are output to the screen");
*/ System.exit(1);
public Map<String, String> extractMacros(InputStream in) throws IOException {
PushbackInputStream bpin = new PushbackInputStream(in, 2);
byte[] header = new byte[2];
if (bpin.read(header) != 2) {
throw new IllegalArgumentException("Invalid InputStream: cannot read 2 bytes");
} }
bpin.unread(header);
if (header[0] == 'P' && header[1] == 'K') { File input = new File(args[0]);
ZipInputStream zis = new ZipInputStream(bpin); File output = null;
ZipEntry zipEntry; if (args.length > 1) {
while ((zipEntry = zis.getNextEntry()) != null) { output = new File(args[1]);
if ("xl/vbaProject.bin".equals(zipEntry.getName())) {
try {
return extractMacrosFromPOIFSInputStream(zis);
} finally {
zis.closeEntry();
}
}
}
return null;
} else {
return extractMacrosFromPOIFSInputStream(bpin);
} }
VBAMacroExtractor extract = new VBAMacroExtractor();
extract.extract(input, output);
} }
/** public void extract(File input, File outputDir) throws IOException {
* Extracts all macros from all modules of the provided input stream. The stream is assumed to be in POIFS format (i.e. XLS file itself or if (! input.exists()) throw new FileNotFoundException(input.toString());
* vbaProject.bin from OOXML files) System.err.print("Extracting VBA Macros from " + input + " to ");
* if (outputDir != null) {
* @param in if (! outputDir.exists()) outputDir.mkdir();
* @return System.err.println(outputDir);
* @throws IOException } else {
*/ System.err.println("STDOUT");
public Map<String, String> extractMacrosFromPOIFSInputStream(InputStream in) throws IOException {
class Module {
Integer offset;
byte[] buf;
} }
class ModuleMap extends HashMap<String, Module> {
VBAMacroReader reader = new VBAMacroReader(input);
Charset charset = Charset.forName("Cp1252"); // default charset Map<String,String> macros = reader.readMacros();
} reader.close();
try {
final ModuleMap modules = new ModuleMap(); final String divider = "---------------------------------------";
POIFSReader dirReader = new POIFSReader(); for (String macro : macros.keySet()) {
dirReader.registerListener(new POIFSReaderListener() { if (outputDir == null) {
System.out.println(divider);
public void processPOIFSReaderEvent(POIFSReaderEvent event) { System.out.println(macro);
try { System.out.println("");
String name = event.getName(); System.out.println(macros.get(macro));
if (event.getPath().toString().endsWith("\\VBA")) { } else {
if ("dir".equals(name)) { File out = new File(outputDir, macro + ".vba");
// process DIR FileOutputStream fout = new FileOutputStream(out);
RLEDecompressingInputStream in = new RLEDecompressingInputStream(event.getStream()); OutputStreamWriter fwriter = new OutputStreamWriter(fout, StringUtil.UTF8);
String streamName = null; fwriter.write(macros.get(macro));
while (true) { fwriter.close();
int id = in.readShort(); fout.close();
if (id == -1 || id == 0x0010) { System.out.println("Extracted " + out);
break; // EOF or TERMINATOR
}
int len = in.readInt();
switch (id) {
case 0x0009: // PROJECTVERSION
in.skip(6);
break;
case 0x0003: // PROJECTCODEPAGE
int codepage = in.readShort();
modules.charset = Charset.forName("Cp" + codepage);
break;
case 0x001A: // STREAMNAME
byte[] streamNameBuf = new byte[len];
int count = in.read(streamNameBuf);
streamName = new String(streamNameBuf, 0, count, modules.charset);
break;
case 0x0031: // MODULEOFFSET
int moduleOffset = in.readInt();
Module module = modules.get(streamName);
if (module != null) {
ByteArrayOutputStream out = new ByteArrayOutputStream();
RLEDecompressingInputStream stream = new RLEDecompressingInputStream(new ByteArrayInputStream(
module.buf, moduleOffset, module.buf.length - moduleOffset));
IOUtils.copy(stream, out);
stream.close();
out.close();
module.buf = out.toByteArray();
} else {
module = new Module();
module.offset = moduleOffset;
modules.put(streamName, module);
}
break;
default:
in.skip(len);
break;
}
}
} else if (!name.startsWith("__SRP") && !name.startsWith("_VBA_PROJECT")) {
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
Module module = modules.get(name);
final DocumentInputStream stream = event.getStream();
final InputStream in;
if (module == null) {
// no DIR stream with offsets yet, so store the compressed bytes for later
module = new Module();
modules.put(name, module);
in = stream;
} else {
// we know the offset already, so decompress immediately on-the-fly
stream.skip(module.offset);
in = new RLEDecompressingInputStream(stream);
}
final ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(in, out);
in.close();
out.close();
module.buf = out.toByteArray();
}
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
dirReader.read(in);
Map<String, String> moduleSources = new HashMap<String, String>();
for (Map.Entry<String, Module> entry : modules.entrySet()) {
Module module = entry.getValue();
if (module.buf != null && module.buf.length > 0) { // Skip empty modules
moduleSources.put(entry.getKey(), new String(module.buf, modules.charset));
}
} }
return moduleSources; }
} catch (IOException e) { if (outputDir == null) {
e.printStackTrace(); System.out.println(divider);
throw e;
} }
} }
} }