60279 -- back off to brute-force search for macro content if offset information is not correct
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1808301 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5ef2f1636c
commit
4852228f3f
@ -43,7 +43,9 @@ import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
|
||||
import org.apache.poi.util.CodePageUtil;
|
||||
import org.apache.poi.util.HexDump;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.RLEDecompressingInputStream;
|
||||
import org.apache.poi.util.StringUtil;
|
||||
|
||||
/**
|
||||
* <p>Finds all VBA Macros in an office file (OLE2/POIFS and OOXML/OPC),
|
||||
@ -61,9 +63,7 @@ import org.apache.poi.util.RLEDecompressingInputStream;
|
||||
public class VBAMacroReader implements Closeable {
|
||||
protected static final String VBA_PROJECT_OOXML = "vbaProject.bin";
|
||||
protected static final String VBA_PROJECT_POIFS = "VBA";
|
||||
// FIXME: When minimum supported version is Java 7, replace with java.nio.charset.StandardCharsets.UTF_16LE
|
||||
private static final Charset UTF_16LE = Charset.forName("UTF-16LE");
|
||||
|
||||
|
||||
private NPOIFSFileSystem fs;
|
||||
|
||||
public VBAMacroReader(InputStream rstream) throws IOException {
|
||||
@ -145,7 +145,7 @@ public class VBAMacroReader implements Closeable {
|
||||
}
|
||||
}
|
||||
protected static class ModuleMap extends HashMap<String, Module> {
|
||||
Charset charset = Charset.forName("Cp1252"); // default charset
|
||||
Charset charset = StringUtil.WIN_1252; // default charset
|
||||
}
|
||||
|
||||
/**
|
||||
@ -172,20 +172,7 @@ public class VBAMacroReader implements Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
|
||||
*
|
||||
* @param stream the inputstream to read from
|
||||
* @param length number of bytes to read from stream
|
||||
* @param charset the character set encoding of the bytes in the stream
|
||||
* @return a java String in the supplied character set
|
||||
* @throws IOException If reading from the stream fails
|
||||
*/
|
||||
private static String readString(InputStream stream, int length, Charset charset) throws IOException {
|
||||
byte[] buffer = new byte[length];
|
||||
int count = stream.read(buffer);
|
||||
return new String(buffer, 0, count, charset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* reads module from DIR node in input stream and adds it to the modules map for decompression later
|
||||
@ -199,7 +186,7 @@ public class VBAMacroReader implements Closeable {
|
||||
* @param modules a map to store the modules
|
||||
* @throws IOException If reading data from the stream or from modules fails
|
||||
*/
|
||||
private static void readModule(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
|
||||
private static void readModuleMetadataFromDirStream(RLEDecompressingInputStream in, String streamName, ModuleMap modules) throws IOException {
|
||||
int moduleOffset = in.readInt();
|
||||
Module module = modules.get(streamName);
|
||||
if (module == null) {
|
||||
@ -218,27 +205,57 @@ public class VBAMacroReader implements Closeable {
|
||||
}
|
||||
}
|
||||
|
||||
private static void readModule(DocumentInputStream dis, String name, ModuleMap modules) throws IOException {
|
||||
private static void readModuleFromDocumentStream(DocumentNode documentNode, String name, ModuleMap modules) throws IOException {
|
||||
Module module = modules.get(name);
|
||||
// TODO Refactor this to fetch dir then do the rest
|
||||
if (module == null) {
|
||||
// no DIR stream with offsets yet, so store the compressed bytes for later
|
||||
module = new Module();
|
||||
modules.put(name, module);
|
||||
module.read(dis);
|
||||
InputStream dis = new DocumentInputStream(documentNode);
|
||||
try {
|
||||
module.read(dis);
|
||||
} finally {
|
||||
dis.close();
|
||||
}
|
||||
} else if (module.buf == null) { //if we haven't already read the bytes for the module keyed off this name...
|
||||
|
||||
if (module.offset == null) {
|
||||
//This should not happen. bug 59858
|
||||
throw new IOException("Module offset for '" + name + "' was never read.");
|
||||
}
|
||||
// we know the offset already, so decompress immediately on-the-fly
|
||||
long skippedBytes = dis.skip(module.offset);
|
||||
if (skippedBytes != module.offset) {
|
||||
throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
|
||||
|
||||
//try the general case, where module.offset is accurate
|
||||
InputStream decompressed = null;
|
||||
InputStream compressed = new DocumentInputStream(documentNode);
|
||||
try {
|
||||
// we know the offset already, so decompress immediately on-the-fly
|
||||
long skippedBytes = compressed.skip(module.offset);
|
||||
if (skippedBytes != module.offset) {
|
||||
throw new IOException("tried to skip " + module.offset + " bytes, but actually skipped " + skippedBytes + " bytes");
|
||||
}
|
||||
decompressed = new RLEDecompressingInputStream(compressed);
|
||||
module.read(decompressed);
|
||||
return;
|
||||
} catch (IllegalArgumentException e) {
|
||||
} catch (IllegalStateException e) {
|
||||
} finally {
|
||||
IOUtils.closeQuietly(compressed);
|
||||
IOUtils.closeQuietly(decompressed);
|
||||
}
|
||||
|
||||
//bad module.offset, try brute force
|
||||
compressed = new DocumentInputStream(documentNode);
|
||||
byte[] decompressedBytes = null;
|
||||
try {
|
||||
decompressedBytes = findCompressedStreamWBruteForce(compressed);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(compressed);
|
||||
}
|
||||
|
||||
if (decompressedBytes != null) {
|
||||
module.read(new ByteArrayInputStream(decompressedBytes));
|
||||
}
|
||||
InputStream stream = new RLEDecompressingInputStream(dis);
|
||||
module.read(stream);
|
||||
stream.close();
|
||||
}
|
||||
|
||||
}
|
||||
@ -249,7 +266,7 @@ public class VBAMacroReader implements Closeable {
|
||||
* @throws IOException If skipping would exceed the available data or skipping did not work.
|
||||
*/
|
||||
private static void trySkip(InputStream in, long n) throws IOException {
|
||||
long skippedBytes = in.skip(n);
|
||||
long skippedBytes = IOUtils.skipFully(in, n);
|
||||
if (skippedBytes != n) {
|
||||
if (skippedBytes < 0) {
|
||||
throw new IOException(
|
||||
@ -258,33 +275,18 @@ public class VBAMacroReader implements Closeable {
|
||||
} else {
|
||||
throw new IOException(
|
||||
"Tried skipping " + n + " bytes, but only " + skippedBytes + " bytes were skipped. "
|
||||
+ "This should never happen.");
|
||||
+ "This should never happen with a non-corrupt file.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
||||
private static final int EOF = -1;
|
||||
private static final int VERSION_INDEPENDENT_TERMINATOR = 0x0010;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int VERSION_DEPENDENT_TERMINATOR = 0x002B;
|
||||
private static final int PROJECTVERSION = 0x0009;
|
||||
private static final int PROJECTCODEPAGE = 0x0003;
|
||||
private static final int STREAMNAME = 0x001A;
|
||||
private static final int MODULEOFFSET = 0x0031;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int MODULETYPE_PROCEDURAL = 0x0021;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int MODULETYPE_DOCUMENT_CLASS_OR_DESIGNER = 0x0022;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int PROJECTLCID = 0x0002;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int MODULE_NAME = 0x0019;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int MODULE_NAME_UNICODE = 0x0047;
|
||||
@SuppressWarnings("unused")
|
||||
private static final int MODULE_DOC_STRING = 0x001c;
|
||||
private static final int STREAMNAME_RESERVED = 0x0032;
|
||||
private static final int PROJECT_CONSTANTS_RESERVED = 0x003C;
|
||||
private static final int HELP_FILE_PATH_RESERVED = 0x003D;
|
||||
private static final int REFERENCE_NAME_RESERVED = 0x003E;
|
||||
private static final int DOC_STRING_RESERVED = 0x0040;
|
||||
private static final int MODULE_DOCSTRING_RESERVED = 0x0048;
|
||||
|
||||
/**
|
||||
* Reads VBA Project modules from a VBA Project directory located at
|
||||
@ -293,76 +295,330 @@ public class VBAMacroReader implements Closeable {
|
||||
* @since 3.15-beta2
|
||||
*/
|
||||
protected void readMacros(DirectoryNode macroDir, ModuleMap modules) throws IOException {
|
||||
//bug59858 shows that dirstream may not be in this directory (\MBD00082648\_VBA_PROJECT_CUR\VBA ENTRY NAME)
|
||||
//but may be in another directory (\_VBA_PROJECT_CUR\VBA ENTRY NAME)
|
||||
//process the dirstream first -- "dir" is case insensitive
|
||||
for (String entryName : macroDir.getEntryNames()) {
|
||||
if ("dir".equalsIgnoreCase(entryName)) {
|
||||
processDirStream(macroDir.getEntry(entryName), modules);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (Entry entry : macroDir) {
|
||||
if (! (entry instanceof DocumentNode)) { continue; }
|
||||
|
||||
String name = entry.getName();
|
||||
DocumentNode document = (DocumentNode)entry;
|
||||
DocumentInputStream dis = new DocumentInputStream(document);
|
||||
try {
|
||||
if ("dir".equalsIgnoreCase(name)) {
|
||||
// process DIR
|
||||
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
|
||||
String streamName = null;
|
||||
int recordId = 0;
|
||||
try {
|
||||
while (true) {
|
||||
recordId = in.readShort();
|
||||
if (EOF == recordId
|
||||
|| VERSION_INDEPENDENT_TERMINATOR == recordId) {
|
||||
break;
|
||||
}
|
||||
int recordLength = in.readInt();
|
||||
switch (recordId) {
|
||||
case PROJECTVERSION:
|
||||
trySkip(in, 6);
|
||||
break;
|
||||
case PROJECTCODEPAGE:
|
||||
int codepage = in.readShort();
|
||||
modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
|
||||
break;
|
||||
case STREAMNAME:
|
||||
streamName = readString(in, recordLength, modules.charset);
|
||||
int reserved = in.readShort();
|
||||
if (reserved != STREAMNAME_RESERVED) {
|
||||
throw new IOException("Expected x0032 after stream name before Unicode stream name, but found: "+
|
||||
Integer.toHexString(reserved));
|
||||
}
|
||||
int unicodeNameRecordLength = in.readInt();
|
||||
readUnicodeString(in, unicodeNameRecordLength);
|
||||
// do something with this at some point
|
||||
break;
|
||||
case MODULEOFFSET:
|
||||
readModule(in, streamName, modules);
|
||||
break;
|
||||
default:
|
||||
trySkip(in, recordLength);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new IOException(
|
||||
"Error occurred while reading macros at section id "
|
||||
+ recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
|
||||
}
|
||||
finally {
|
||||
in.close();
|
||||
}
|
||||
} else if (!startsWithIgnoreCase(name, "__SRP")
|
||||
|
||||
if (! "dir".equalsIgnoreCase(name) && !startsWithIgnoreCase(name, "__SRP")
|
||||
&& !startsWithIgnoreCase(name, "_VBA_PROJECT")) {
|
||||
// process module, skip __SRP and _VBA_PROJECT since these do not contain macros
|
||||
readModule(dis, name, modules);
|
||||
}
|
||||
}
|
||||
finally {
|
||||
dis.close();
|
||||
readModuleFromDocumentStream(document, name, modules);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private enum RecordType {
|
||||
// Constants from MS-OVBA: https://msdn.microsoft.com/en-us/library/office/cc313094(v=office.12).aspx
|
||||
MODULE_OFFSET(0x0031),
|
||||
PROJECT_SYS_KIND(0x01),
|
||||
PROJECT_LCID(0x0002),
|
||||
PROJECT_LCID_INVOKE(0x14),
|
||||
PROJECT_CODEPAGE(0x0003),
|
||||
PROJECT_NAME(0x04),
|
||||
PROJECT_DOC_STRING(0x05),
|
||||
PROJECT_HELP_FILE_PATH(0x06),
|
||||
PROJECT_HELP_CONTEXT(0x07, 8),
|
||||
PROJECT_LIB_FLAGS(0x08),
|
||||
PROJECT_VERSION(0x09, 10),
|
||||
PROJECT_CONSTANTS(0x0C),
|
||||
PROJECT_MODULES(0x0F),
|
||||
DIR_STREAM_TERMINATOR(0x10),
|
||||
PROJECT_COOKIE(0x13),
|
||||
MODULE_NAME(0x19),
|
||||
MODULE_NAME_UNICODE(0x47),
|
||||
MODULE_STREAM_NAME(0x1A),
|
||||
MODULE_DOC_STRING(0x1C),
|
||||
MODULE_HELP_CONTEXT(0x1E),
|
||||
MODULE_COOKIE(0x2c),
|
||||
MODULE_TYPE_PROCEDURAL(0x21, 4),
|
||||
MODULE_TYPE_OTHER(0x22, 4),
|
||||
MODULE_PRIVATE(0x28, 4),
|
||||
REFERENCE_NAME(0x16),
|
||||
REFERENCE_REGISTERED(0x0D),
|
||||
REFERENCE_PROJECT(0x0E),
|
||||
REFERENCE_CONTROL_A(0x2F),
|
||||
|
||||
//according to the spec, REFERENCE_CONTROL_B(0x33) should have the
|
||||
//same structure as REFERENCE_CONTROL_A(0x2F).
|
||||
//However, it seems to have the int(length) record structure that most others do.
|
||||
//See 59830.xls for this record.
|
||||
REFERENCE_CONTROL_B(0x33),
|
||||
//REFERENCE_ORIGINAL(0x33),
|
||||
|
||||
|
||||
MODULE_TERMINATOR(0x002B),
|
||||
EOF(-1),
|
||||
UNKNOWN(-2);
|
||||
|
||||
|
||||
private final int VARIABLE_LENGTH = -1;
|
||||
private final int id;
|
||||
private final int constantLength;
|
||||
|
||||
RecordType(int id) {
|
||||
this.id = id;
|
||||
this.constantLength = VARIABLE_LENGTH;
|
||||
}
|
||||
|
||||
RecordType(int id, int constantLength) {
|
||||
this.id = id;
|
||||
this.constantLength = constantLength;
|
||||
}
|
||||
|
||||
int getConstantLength() {
|
||||
return constantLength;
|
||||
}
|
||||
|
||||
static RecordType lookup(int id) {
|
||||
for (RecordType type : RecordType.values()) {
|
||||
if (type.id == id) {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
return UNKNOWN;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private enum DIR_STATE {
|
||||
INFORMATION_RECORD,
|
||||
REFERENCES_RECORD,
|
||||
MODULES_RECORD
|
||||
}
|
||||
|
||||
private static class ASCIIUnicodeStringPair {
|
||||
private final String ascii;
|
||||
private final String unicode;
|
||||
|
||||
ASCIIUnicodeStringPair(String ascii, String unicode) {
|
||||
this.ascii = ascii;
|
||||
this.unicode = unicode;
|
||||
}
|
||||
|
||||
private String getAscii() {
|
||||
return ascii;
|
||||
}
|
||||
|
||||
private String getUnicode() {
|
||||
return unicode;
|
||||
}
|
||||
}
|
||||
|
||||
private void processDirStream(Entry dir, ModuleMap modules) throws IOException {
|
||||
DocumentNode dirDocumentNode = (DocumentNode)dir;
|
||||
DocumentInputStream dis = new DocumentInputStream(dirDocumentNode);
|
||||
DIR_STATE dirState = DIR_STATE.INFORMATION_RECORD;
|
||||
try {
|
||||
RLEDecompressingInputStream in = new RLEDecompressingInputStream(dis);
|
||||
String streamName = null;
|
||||
int recordId = 0;
|
||||
boolean inReferenceTwiddled = false;
|
||||
try {
|
||||
while (true) {
|
||||
recordId = in.readShort();
|
||||
if (recordId == -1) {
|
||||
break;
|
||||
}
|
||||
RecordType type = RecordType.lookup(recordId);
|
||||
|
||||
if (type.equals(RecordType.EOF) || type.equals(RecordType.DIR_STREAM_TERMINATOR)) {
|
||||
break;
|
||||
}
|
||||
switch (type) {
|
||||
case PROJECT_VERSION:
|
||||
trySkip(in, RecordType.PROJECT_VERSION.getConstantLength());
|
||||
break;
|
||||
case PROJECT_CODEPAGE:
|
||||
in.readInt();//record size must == 4
|
||||
int codepage = in.readShort();
|
||||
modules.charset = Charset.forName(CodePageUtil.codepageToEncoding(codepage, true));
|
||||
break;
|
||||
case MODULE_STREAM_NAME:
|
||||
ASCIIUnicodeStringPair pair = readStringPair(in, modules.charset, STREAMNAME_RESERVED);
|
||||
streamName = pair.getAscii();
|
||||
break;
|
||||
case PROJECT_DOC_STRING:
|
||||
readStringPair(in, modules.charset, DOC_STRING_RESERVED);
|
||||
break;
|
||||
case PROJECT_HELP_FILE_PATH:
|
||||
readStringPair(in, modules.charset, HELP_FILE_PATH_RESERVED);
|
||||
break;
|
||||
case PROJECT_CONSTANTS:
|
||||
readStringPair(in, modules.charset, PROJECT_CONSTANTS_RESERVED);
|
||||
break;
|
||||
case REFERENCE_NAME:
|
||||
if (dirState.equals(DIR_STATE.INFORMATION_RECORD)) {
|
||||
dirState = DIR_STATE.REFERENCES_RECORD;
|
||||
}
|
||||
readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
|
||||
break;
|
||||
case MODULE_DOC_STRING :
|
||||
int modDocStringLength = in.readInt();
|
||||
readString(in, modDocStringLength, modules.charset);
|
||||
int modDocStringReserved = in.readShort();
|
||||
if (modDocStringReserved != MODULE_DOCSTRING_RESERVED) {
|
||||
throw new IOException("Expected x003C after stream name before Unicode stream name, but found: " +
|
||||
Integer.toHexString(modDocStringReserved));
|
||||
}
|
||||
int unicodeModDocStringLength = in.readInt();
|
||||
readUnicodeString(in, unicodeModDocStringLength);
|
||||
// do something with this at some point
|
||||
break;
|
||||
case MODULE_OFFSET:
|
||||
int modOffsetSz = in.readInt();
|
||||
//should be 4
|
||||
readModuleMetadataFromDirStream(in, streamName, modules);
|
||||
break;
|
||||
case PROJECT_MODULES:
|
||||
dirState = DIR_STATE.MODULES_RECORD;
|
||||
in.readInt();//size must == 2
|
||||
in.readShort();//number of modules
|
||||
break;
|
||||
case REFERENCE_CONTROL_A:
|
||||
int szTwiddled = in.readInt();
|
||||
trySkip(in, szTwiddled);
|
||||
int nextRecord = in.readShort();
|
||||
//reference name is optional!
|
||||
if (nextRecord == RecordType.REFERENCE_NAME.id) {
|
||||
readStringPair(in, modules.charset, REFERENCE_NAME_RESERVED);
|
||||
nextRecord = in.readShort();
|
||||
}
|
||||
if (nextRecord != 0x30) {
|
||||
throw new IOException("Expected 0x30 as Reserved3 in a ReferenceControl record");
|
||||
}
|
||||
int szExtended = in.readInt();
|
||||
trySkip(in, szExtended);
|
||||
break;
|
||||
case MODULE_TERMINATOR:
|
||||
int endOfModulesReserved = in.readInt();
|
||||
//must be 0;
|
||||
break;
|
||||
default:
|
||||
if (type.getConstantLength() > -1) {
|
||||
trySkip(in, type.getConstantLength());
|
||||
} else {
|
||||
int recordLength = in.readInt();
|
||||
trySkip(in, recordLength);
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
} catch (final IOException e) {
|
||||
throw new IOException(
|
||||
"Error occurred while reading macros at section id "
|
||||
+ recordId + " (" + HexDump.shortToHex(recordId) + ")", e);
|
||||
} finally {
|
||||
in.close();
|
||||
}
|
||||
} finally {
|
||||
dis.close();
|
||||
}
|
||||
}
|
||||
|
||||
private ASCIIUnicodeStringPair readStringPair(RLEDecompressingInputStream in, Charset charset, int reservedByte) throws IOException {
|
||||
int nameLength = in.readInt();
|
||||
String ascii = readString(in, nameLength, charset);
|
||||
int reserved = in.readShort();
|
||||
if (reserved != reservedByte) {
|
||||
throw new IOException("Expected "+Integer.toHexString(reservedByte)+ "after name before Unicode name, but found: " +
|
||||
Integer.toHexString(reserved));
|
||||
}
|
||||
int unicodeNameRecordLength = in.readInt();
|
||||
String unicode = readUnicodeString(in, unicodeNameRecordLength);
|
||||
return new ASCIIUnicodeStringPair(ascii, unicode);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Read <tt>length</tt> bytes of MBCS (multi-byte character set) characters from the stream
|
||||
*
|
||||
* @param stream the inputstream to read from
|
||||
* @param length number of bytes to read from stream
|
||||
* @param charset the character set encoding of the bytes in the stream
|
||||
* @return a java String in the supplied character set
|
||||
* @throws IOException If reading from the stream fails
|
||||
*/
|
||||
private static String readString(InputStream stream, int length, Charset charset) throws IOException {
|
||||
byte[] buffer = IOUtils.safelyAllocate(length, 20000);
|
||||
int bytesRead = IOUtils.readFully(stream, buffer);
|
||||
if (bytesRead != length) {
|
||||
throw new IOException("Tried to read: "+length +
|
||||
", but could only read: "+bytesRead);
|
||||
}
|
||||
return new String(buffer, 0, length, charset);
|
||||
}
|
||||
|
||||
private String readUnicodeString(RLEDecompressingInputStream in, int unicodeNameRecordLength) throws IOException {
|
||||
byte[] buffer = new byte[unicodeNameRecordLength];
|
||||
IOUtils.readFully(in, buffer);
|
||||
return new String(buffer, UTF_16LE);
|
||||
byte[] buffer = IOUtils.safelyAllocate(unicodeNameRecordLength, 20000);
|
||||
int bytesRead = IOUtils.readFully(in, buffer);
|
||||
if (bytesRead != unicodeNameRecordLength) {
|
||||
|
||||
}
|
||||
return new String(buffer, StringUtil.UTF16LE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sometimes the offset record in the dirstream is incorrect, but the macro can still be found.
|
||||
* This will try to find the the first RLEDecompressing stream that starts with "Attribute".
|
||||
* This relies on some, er, heuristics, admittedly.
|
||||
*
|
||||
* @param is full module inputstream to read
|
||||
* @return uncompressed bytes if found, <code>null</code> otherwise
|
||||
* @throws IOException for a true IOException copying the is to a byte array
|
||||
*/
|
||||
private static byte[] findCompressedStreamWBruteForce(InputStream is) throws IOException {
|
||||
//buffer to memory for multiple tries
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
IOUtils.copy(is, bos);
|
||||
byte[] compressed = bos.toByteArray();
|
||||
byte[] decompressed = null;
|
||||
for (int i = 0; i < compressed.length; i++) {
|
||||
if (compressed[i] == 0x01 && i < compressed.length-1) {
|
||||
int w = LittleEndian.getUShort(compressed, i+1);
|
||||
if (w <= 0 || (w & 0x7000) != 0x3000) {
|
||||
continue;
|
||||
}
|
||||
decompressed = tryToDecompress(new ByteArrayInputStream(compressed, i, compressed.length - i));
|
||||
if (decompressed != null) {
|
||||
if (decompressed.length > 9) {
|
||||
//this is a complete hack. The challenge is that there
|
||||
//can be many 0 length or junk streams that are uncompressed
|
||||
//look in the first 20 characters for "Attribute"
|
||||
int firstX = Math.min(20, decompressed.length);
|
||||
String start = new String(decompressed, 0, firstX, StringUtil.WIN_1252);
|
||||
if (start.contains("Attribute")) {
|
||||
return decompressed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return decompressed;
|
||||
}
|
||||
|
||||
private static byte[] tryToDecompress(InputStream is) {
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
try {
|
||||
IOUtils.copy(new RLEDecompressingInputStream(is), bos);
|
||||
} catch (IllegalArgumentException e){
|
||||
return null;
|
||||
} catch (IllegalStateException e) {
|
||||
return null;
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
}
|
||||
return bos.toByteArray();
|
||||
}
|
||||
}
|
||||
|
@ -156,7 +156,7 @@ public class RLEDecompressingInputStream extends InputStream {
|
||||
private int readChunk() throws IOException {
|
||||
pos = 0;
|
||||
int w = readShort(in);
|
||||
if (w == -1) {
|
||||
if (w == -1 || w == 0) {
|
||||
return -1;
|
||||
}
|
||||
int chunkSize = (w & 0x0FFF) + 1; // plus 3 bytes minus 2 for the length
|
||||
|
@ -33,6 +33,7 @@ import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.apache.poi.POITestCase.assertContains;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertFalse;
|
||||
import static org.junit.Assert.assertNotNull;
|
||||
|
||||
@ -251,6 +252,7 @@ public class TestVBAMacroReader {
|
||||
File f = POIDataSamples.getSpreadSheetInstance().getFile("59830.xls");
|
||||
VBAMacroReader r = new VBAMacroReader(f);
|
||||
Map<String, String> macros = r.readMacros();
|
||||
assertEquals(29, macros.size());
|
||||
assertNotNull(macros.get("Module20"));
|
||||
assertContains(macros.get("Module20"), "here start of superscripting");
|
||||
r.close();
|
||||
@ -261,6 +263,7 @@ public class TestVBAMacroReader {
|
||||
File f = POIDataSamples.getSpreadSheetInstance().getFile("59858.xls");
|
||||
VBAMacroReader r = new VBAMacroReader(f);
|
||||
Map<String, String> macros = r.readMacros();
|
||||
assertEquals(11, macros.size());
|
||||
assertNotNull(macros.get("Sheet4"));
|
||||
assertContains(macros.get("Sheet4"), "intentional constituent");
|
||||
r.close();
|
||||
@ -271,6 +274,7 @@ public class TestVBAMacroReader {
|
||||
File f = POIDataSamples.getDocumentInstance().getFile("60158.docm");
|
||||
VBAMacroReader r = new VBAMacroReader(f);
|
||||
Map<String, String> macros = r.readMacros();
|
||||
assertEquals(2, macros.size());
|
||||
assertNotNull(macros.get("NewMacros"));
|
||||
assertContains(macros.get("NewMacros"), "' dirty");
|
||||
r.close();
|
||||
@ -282,8 +286,24 @@ public class TestVBAMacroReader {
|
||||
File f = POIDataSamples.getSpreadSheetInstance().getFile("60273.xls");
|
||||
VBAMacroReader r = new VBAMacroReader(f);
|
||||
Map<String, String> macros = r.readMacros();
|
||||
assertEquals(2, macros.size());
|
||||
assertNotNull(macros.get("Module1"));
|
||||
assertContains(macros.get("Module1"), "9/8/2004");
|
||||
r.close();
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void bug60279() throws IOException {
|
||||
File f = POIDataSamples.getDocumentInstance().getFile("60279.doc");
|
||||
VBAMacroReader r = new VBAMacroReader(f);
|
||||
Map<String, String> macros = r.readMacros();
|
||||
assertEquals(1, macros.size());
|
||||
String content = macros.get("ThisDocument");
|
||||
assertContains(content, "Attribute VB_Base = \"1Normal.ThisDocument\"");
|
||||
assertContains(content, "Attribute VB_Customizable = True");
|
||||
r.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
BIN
test-data/document/60279.doc
Normal file
BIN
test-data/document/60279.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user