#61162 - En-/decryption support for HWPF

Decryption for Binary RC4 and CryptoAPI (... XOR is missing)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1797837 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2017-06-06 22:21:11 +00:00
parent 70db823b18
commit 2b4f944883
9 changed files with 378 additions and 200 deletions

View File

@ -195,7 +195,7 @@ public abstract class POIDocument implements Closeable {
NPOIFSFileSystem encPoifs = null; NPOIFSFileSystem encPoifs = null;
String step = "getting"; String step = "getting";
try { try {
if (encryptionInfo != null) { if (encryptionInfo != null && encryptionInfo.isDocPropsEncrypted()) {
step = "getting encrypted"; step = "getting encrypted";
String encryptedStream = null; String encryptedStream = null;
for (String s : encryptedStreamNames) { for (String s : encryptedStreamNames) {

View File

@ -32,7 +32,11 @@ public final class Biff8EncryptionKey {
* @param password pass <code>null</code> to clear user password (and use default) * @param password pass <code>null</code> to clear user password (and use default)
*/ */
public static void setCurrentUserPassword(String password) { public static void setCurrentUserPassword(String password) {
_userPasswordTLS.set(password); if (password == null) {
_userPasswordTLS.remove();
} else {
_userPasswordTLS.set(password);
}
} }
/** /**

View File

@ -122,8 +122,11 @@ public class EncryptionInfo implements Cloneable {
} else if ( } else if (
2 <= versionMajor && versionMajor <= 4 2 <= versionMajor && versionMajor <= 4
&& versionMinor == 2) { && versionMinor == 2) {
encryptionMode = (preferredEncryptionMode == cryptoAPI) ? cryptoAPI : standard;
encryptionFlags = dis.readInt(); encryptionFlags = dis.readInt();
encryptionMode = (
preferredEncryptionMode == cryptoAPI
|| !flagAES.isSet(encryptionFlags))
? cryptoAPI : standard;
} else if ( } else if (
versionMajor == agile.versionMajor versionMajor == agile.versionMajor
&& versionMinor == agile.versionMinor){ && versionMinor == agile.versionMinor){
@ -268,6 +271,14 @@ public class EncryptionInfo implements Cloneable {
return encryptionMode; return encryptionMode;
} }
/**
* @return true, if Document Summary / Summary are encrypted and stored in the {@code EncryptedStream} stream,
* otherwise the Summaries aren't encrypted and located in their usual streams
*/
public boolean isDocPropsEncrypted() {
return !flagDocProps.isSet(getEncryptionFlags());
}
@Override @Override
public EncryptionInfo clone() throws CloneNotSupportedException { public EncryptionInfo clone() throws CloneNotSupportedException {
EncryptionInfo other = (EncryptionInfo)super.clone(); EncryptionInfo other = (EncryptionInfo)super.clone();

View File

@ -51,9 +51,9 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
super(stream, size, chunkSize); super(stream, size, chunkSize);
} }
public BinaryRC4CipherInputStream(InputStream stream) public BinaryRC4CipherInputStream(InputStream stream, int size, int initialPos)
throws GeneralSecurityException { throws GeneralSecurityException {
super(stream, Integer.MAX_VALUE, chunkSize); super(stream, size, chunkSize, initialPos);
} }
} }
@ -141,7 +141,7 @@ public class BinaryRC4Decryptor extends Decryptor implements Cloneable {
@Override @Override
public InputStream getDataStream(InputStream stream, int size, int initialPos) public InputStream getDataStream(InputStream stream, int size, int initialPos)
throws IOException, GeneralSecurityException { throws IOException, GeneralSecurityException {
return new BinaryRC4CipherInputStream(stream); return new BinaryRC4CipherInputStream(stream, size, initialPos);
} }

View File

@ -18,6 +18,7 @@
package org.apache.poi.hwpf; package org.apache.poi.hwpf;
import java.io.ByteArrayInputStream; import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -25,9 +26,29 @@ import java.io.OutputStream;
import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.model.*; import org.apache.poi.hwpf.model.BookmarksTables;
import org.apache.poi.hwpf.model.CHPBinTable;
import org.apache.poi.hwpf.model.ComplexFileTable;
import org.apache.poi.hwpf.model.DocumentProperties;
import org.apache.poi.hwpf.model.EscherRecordHolder;
import org.apache.poi.hwpf.model.FSPADocumentPart;
import org.apache.poi.hwpf.model.FSPATable;
import org.apache.poi.hwpf.model.FieldsTables;
import org.apache.poi.hwpf.model.FontTable;
import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.model.NoteType;
import org.apache.poi.hwpf.model.NotesTables;
import org.apache.poi.hwpf.model.PAPBinTable;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.RevisionMarkAuthorTable;
import org.apache.poi.hwpf.model.SavedByTable;
import org.apache.poi.hwpf.model.SectionTable;
import org.apache.poi.hwpf.model.SinglentonTextPiece;
import org.apache.poi.hwpf.model.StyleSheet;
import org.apache.poi.hwpf.model.SubdocumentType;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.usermodel.Bookmarks; import org.apache.poi.hwpf.usermodel.Bookmarks;
import org.apache.poi.hwpf.usermodel.BookmarksImpl; import org.apache.poi.hwpf.usermodel.BookmarksImpl;
import org.apache.poi.hwpf.usermodel.Field; import org.apache.poi.hwpf.usermodel.Field;
@ -40,13 +61,12 @@ import org.apache.poi.hwpf.usermodel.OfficeDrawings;
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl; import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.EntryUtils; import org.apache.poi.poifs.filesystem.EntryUtils;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
/** /**
@ -59,8 +79,6 @@ public final class HWPFDocument extends HWPFDocumentCore {
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable"; private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
private static final String STREAM_DATA = "Data"; private static final String STREAM_DATA = "Data";
private static final String STREAM_TABLE_0 = "0Table";
private static final String STREAM_TABLE_1 = "1Table";
/** table stream buffer*/ /** table stream buffer*/
protected byte[] _tableStream; protected byte[] _tableStream;
@ -178,11 +196,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
} }
// use the fib to determine the name of the table stream. // use the fib to determine the name of the table stream.
String name = STREAM_TABLE_0; String name = (_fib.getFibBase().isFWhichTblStm()) ? STREAM_TABLE_1 : STREAM_TABLE_0;
if (_fib.getFibBase().isFWhichTblStm())
{
name = STREAM_TABLE_1;
}
// Grab the table stream. // Grab the table stream.
if (!directory.hasEntry(name)) { if (!directory.hasEntry(name)) {
@ -190,25 +204,12 @@ public final class HWPFDocument extends HWPFDocumentCore {
} }
// read in the table stream. // read in the table stream.
InputStream is = directory.createDocumentInputStream(name); _tableStream = getDocumentEntryBytes(name, _fib.getFibBase().getLKey(), Integer.MAX_VALUE);
_tableStream = IOUtils.toByteArray(is);
is.close();
_fib.fillVariableFields(_mainStream, _tableStream); _fib.fillVariableFields(_mainStream, _tableStream);
// read in the data stream. // read in the data stream.
InputStream dis = null; _dataStream = directory.hasEntry(STREAM_DATA) ? getDocumentEntryBytes(STREAM_DATA, 0, Integer.MAX_VALUE) : new byte[0];
try {
DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA);
dis = directory.createDocumentInputStream(STREAM_DATA);
_dataStream = IOUtils.toByteArray(dis, dataProps.getSize());
} catch(IOException e) {
_dataStream = new byte[0];
} finally {
if (dis != null) {
dis.close();
}
}
// Get the cp of the start of text in the main stream // Get the cp of the start of text in the main stream
// The latest spec doc says this is always zero! // The latest spec doc says this is always zero!
@ -233,8 +234,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/ */
boolean preserveBinTables = false; boolean preserveBinTables = false;
try { try {
preserveBinTables = Boolean.parseBoolean( System preserveBinTables = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
} catch ( Exception exc ) { } catch ( Exception exc ) {
// ignore; // ignore;
} }
@ -250,8 +250,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/ */
boolean preserveTextTable = false; boolean preserveTextTable = false;
try { try {
preserveTextTable = Boolean.parseBoolean( System preserveTextTable = Boolean.parseBoolean( System.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
} catch ( Exception exc ) { } catch ( Exception exc ) {
// ignore; // ignore;
} }
@ -612,8 +611,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException { private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException {
// initialize our streams for writing. // initialize our streams for writing.
HWPFFileSystem docSys = new HWPFFileSystem(); HWPFFileSystem docSys = new HWPFFileSystem();
HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT); ByteArrayOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1); ByteArrayOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
//HWPFOutputStream dataStream = docSys.getStream("Data"); //HWPFOutputStream dataStream = docSys.getStream("Data");
int tableOffset = 0; int tableOffset = 0;
@ -630,13 +629,13 @@ public final class HWPFDocument extends HWPFDocumentCore {
// it after we write everything else. // it after we write everything else.
byte[] placeHolder = new byte[fibSize]; byte[] placeHolder = new byte[fibSize];
wordDocumentStream.write(placeHolder); wordDocumentStream.write(placeHolder);
int mainOffset = wordDocumentStream.getOffset(); int mainOffset = wordDocumentStream.size();
// write out the StyleSheet. // write out the StyleSheet.
_fib.setFcStshf(tableOffset); _fib.setFcStshf(tableOffset);
_ss.writeTo(tableStream); _ss.writeTo(tableStream);
_fib.setLcbStshf(tableStream.getOffset() - tableOffset); _fib.setLcbStshf(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
// get fcMin and fcMac because we will be writing the actual text with the // get fcMin and fcMac because we will be writing the actual text with the
// complex table. // complex table.
@ -654,9 +653,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the Complex table, includes text. // write out the Complex table, includes text.
_fib.setFcClx(tableOffset); _fib.setFcClx(tableOffset);
_cft.writeTo(wordDocumentStream, tableStream); _cft.writeTo(wordDocumentStream, tableStream);
_fib.setLcbClx(tableStream.getOffset() - tableOffset); _fib.setLcbClx(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
int fcMac = wordDocumentStream.getOffset(); int fcMac = wordDocumentStream.size();
/* /*
* dop (document properties record) Written immediately after the end of * dop (document properties record) Written immediately after the end of
@ -670,8 +669,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the DocumentProperties. // write out the DocumentProperties.
_fib.setFcDop(tableOffset); _fib.setFcDop(tableOffset);
_dop.writeTo(tableStream); _dop.writeTo(tableStream);
_fib.setLcbDop(tableStream.getOffset() - tableOffset); _fib.setLcbDop(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
/* /*
* plcfBkmkf (table recording beginning CPs of bookmarks) Written * plcfBkmkf (table recording beginning CPs of bookmarks) Written
@ -683,7 +682,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null ) if ( _bookmarksTables != null )
{ {
_bookmarksTables.writePlcfBkmkf( _fib, tableStream ); _bookmarksTables.writePlcfBkmkf( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
/* /*
@ -696,7 +695,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null ) if ( _bookmarksTables != null )
{ {
_bookmarksTables.writePlcfBkmkl( _fib, tableStream ); _bookmarksTables.writePlcfBkmkl( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
/* /*
@ -710,8 +709,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the CHPBinTable. // write out the CHPBinTable.
_fib.setFcPlcfbteChpx(tableOffset); _fib.setFcPlcfbteChpx(tableOffset);
_cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable()); _cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset); _fib.setLcbPlcfbteChpx(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
/* /*
* plcfbtePapx (bin table for PAP FKPs) Written immediately after the * plcfbtePapx (bin table for PAP FKPs) Written immediately after the
@ -724,8 +723,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the PAPBinTable. // write out the PAPBinTable.
_fib.setFcPlcfbtePapx(tableOffset); _fib.setFcPlcfbtePapx(tableOffset);
_pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable()); _pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset); _fib.setLcbPlcfbtePapx(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
/* /*
* plcfendRef (endnote reference position table) Written immediately * plcfendRef (endnote reference position table) Written immediately
@ -739,7 +738,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/ */
_endnotesTables.writeRef( _fib, tableStream ); _endnotesTables.writeRef( _fib, tableStream );
_endnotesTables.writeTxt( _fib, tableStream ); _endnotesTables.writeTxt( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
/* /*
* plcffld*** (table of field positions and statuses for annotation * plcffld*** (table of field positions and statuses for annotation
@ -753,7 +752,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _fieldsTables != null ) if ( _fieldsTables != null )
{ {
_fieldsTables.write( _fib, tableStream ); _fieldsTables.write( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
/* /*
@ -768,7 +767,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
*/ */
_footnotesTables.writeRef( _fib, tableStream ); _footnotesTables.writeRef( _fib, tableStream );
_footnotesTables.writeTxt( _fib, tableStream ); _footnotesTables.writeTxt( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
/* /*
* plcfsed (section table) Written immediately after the previously * plcfsed (section table) Written immediately after the previously
@ -781,8 +780,8 @@ public final class HWPFDocument extends HWPFDocumentCore {
// write out the SectionTable. // write out the SectionTable.
_fib.setFcPlcfsed(tableOffset); _fib.setFcPlcfsed(tableOffset);
_st.writeTo(wordDocumentStream, tableStream); _st.writeTo(wordDocumentStream, tableStream);
_fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset); _fib.setLcbPlcfsed(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
// write out the list tables // write out the list tables
if ( _lt != null ) if ( _lt != null )
@ -800,7 +799,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
* Specification; Page 25 of 210 * Specification; Page 25 of 210
*/ */
_lt.writeListDataTo( _fib, tableStream ); _lt.writeListDataTo( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
/* /*
* plflfo (more list formats) Written immediately after the end of * plflfo (more list formats) Written immediately after the end of
@ -814,7 +813,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
* Specification; Page 26 of 210 * Specification; Page 26 of 210
*/ */
_lt.writeListOverridesTo( _fib, tableStream ); _lt.writeListOverridesTo( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
/* /*
@ -827,7 +826,7 @@ public final class HWPFDocument extends HWPFDocumentCore {
if ( _bookmarksTables != null ) if ( _bookmarksTables != null )
{ {
_bookmarksTables.writeSttbfBkmk( _fib, tableStream ); _bookmarksTables.writeSttbfBkmk( _fib, tableStream );
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
/* /*
@ -843,9 +842,9 @@ public final class HWPFDocument extends HWPFDocumentCore {
{ {
_fib.setFcSttbSavedBy(tableOffset); _fib.setFcSttbSavedBy(tableOffset);
_sbt.writeTo(tableStream); _sbt.writeTo(tableStream);
_fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset); _fib.setLcbSttbSavedBy(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
// write out the revision mark authors table. // write out the revision mark authors table.
@ -853,21 +852,21 @@ public final class HWPFDocument extends HWPFDocumentCore {
{ {
_fib.setFcSttbfRMark(tableOffset); _fib.setFcSttbfRMark(tableOffset);
_rmat.writeTo(tableStream); _rmat.writeTo(tableStream);
_fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset); _fib.setLcbSttbfRMark(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
} }
// write out the FontTable. // write out the FontTable.
_fib.setFcSttbfffn(tableOffset); _fib.setFcSttbfffn(tableOffset);
_ft.writeTo(tableStream); _ft.writeTo(tableStream);
_fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset); _fib.setLcbSttbfffn(tableStream.size() - tableOffset);
tableOffset = tableStream.getOffset(); tableOffset = tableStream.size();
// set some variables in the FileInformationBlock. // set some variables in the FileInformationBlock.
_fib.getFibBase().setFcMin(fcMin); _fib.getFibBase().setFcMin(fcMin);
_fib.getFibBase().setFcMac(fcMac); _fib.getFibBase().setFcMac(fcMac);
_fib.setCbMac(wordDocumentStream.getOffset()); _fib.setCbMac(wordDocumentStream.size());
// make sure that the table, doc and data streams use big blocks. // make sure that the table, doc and data streams use big blocks.
byte[] mainBuf = wordDocumentStream.toByteArray(); byte[] mainBuf = wordDocumentStream.toByteArray();

View File

@ -17,13 +17,19 @@
package org.apache.poi.hwpf; package org.apache.poi.hwpf;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.PushbackInputStream; import java.io.PushbackInputStream;
import java.security.GeneralSecurityException;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.POIDocument; import org.apache.poi.POIDocument;
import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.model.CHPBinTable; import org.apache.poi.hwpf.model.CHPBinTable;
import org.apache.poi.hwpf.model.FibBase;
import org.apache.poi.hwpf.model.FileInformationBlock; import org.apache.poi.hwpf.model.FileInformationBlock;
import org.apache.poi.hwpf.model.FontTable; import org.apache.poi.hwpf.model.FontTable;
import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.model.ListTables;
@ -34,145 +40,242 @@ import org.apache.poi.hwpf.model.TextPieceTable;
import org.apache.poi.hwpf.usermodel.ObjectPoolImpl; import org.apache.poi.hwpf.usermodel.ObjectPoolImpl;
import org.apache.poi.hwpf.usermodel.ObjectsPool; import org.apache.poi.hwpf.usermodel.ObjectsPool;
import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.crypt.ChunkedCipherInputStream;
import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.crypt.EncryptionInfo;
import org.apache.poi.poifs.crypt.EncryptionMode;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.BoundedInputStream;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.Internal; import org.apache.poi.util.Internal;
import org.apache.poi.util.LittleEndianByteArrayInputStream;
/** /**
* This class holds much of the core of a Word document, but * This class holds much of the core of a Word document, but
* without some of the table structure information. * without some of the table structure information.
* You generally want to work with one of * You generally want to work with one of
* {@link HWPFDocument} or {@link HWPFOldDocument} * {@link HWPFDocument} or {@link HWPFOldDocument}
*/ */
public abstract class HWPFDocumentCore extends POIDocument public abstract class HWPFDocumentCore extends POIDocument {
{
protected static final String STREAM_OBJECT_POOL = "ObjectPool"; protected static final String STREAM_OBJECT_POOL = "ObjectPool";
protected static final String STREAM_WORD_DOCUMENT = "WordDocument"; protected static final String STREAM_WORD_DOCUMENT = "WordDocument";
protected static final String STREAM_TABLE_0 = "0Table";
protected static final String STREAM_TABLE_1 = "1Table";
/** Holds OLE2 objects */ private static final int FIB_BASE_LEN = 68;
protected ObjectPoolImpl _objectPool;
/** The FIB */ /** Holds OLE2 objects */
protected FileInformationBlock _fib; protected ObjectPoolImpl _objectPool;
/** Holds styles for this document.*/ /** The FIB */
protected StyleSheet _ss; protected FileInformationBlock _fib;
/** Contains formatting properties for text*/ /** Holds styles for this document.*/
protected CHPBinTable _cbt; protected StyleSheet _ss;
/** Contains formatting properties for paragraphs*/ /** Contains formatting properties for text*/
protected PAPBinTable _pbt; protected CHPBinTable _cbt;
/** Contains formatting properties for sections.*/ /** Contains formatting properties for paragraphs*/
protected SectionTable _st; protected PAPBinTable _pbt;
/** Holds fonts for this document.*/ /** Contains formatting properties for sections.*/
protected FontTable _ft; protected SectionTable _st;
/** Hold list tables */ /** Holds fonts for this document.*/
protected ListTables _lt; protected FontTable _ft;
/** main document stream buffer*/ /** Hold list tables */
protected byte[] _mainStream; protected ListTables _lt;
protected HWPFDocumentCore() /** main document stream buffer*/
{ protected byte[] _mainStream;
super((DirectoryNode)null);
}
/** private EncryptionInfo _encryptionInfo;
* Takes an InputStream, verifies that it's not RTF or PDF, builds a
* POIFSFileSystem from it, and returns that.
*/
public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
// Open a PushbackInputStream, so we can peek at the first few bytes
PushbackInputStream pis = new PushbackInputStream(istream,6);
byte[] first6 = IOUtils.toByteArray(pis, 6);
// Does it start with {\rtf ? If so, it's really RTF protected HWPFDocumentCore() {
if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r' super((DirectoryNode)null);
&& first6[3] == 't' && first6[4] == 'f') { }
throw new IllegalArgumentException("The document is really a RTF file");
} else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
throw new IllegalArgumentException("The document is really a PDF file");
}
// OK, so it's neither RTF nor PDF /**
// Open a POIFSFileSystem on the (pushed back) stream * Takes an InputStream, verifies that it's not RTF or PDF, builds a
pis.unread(first6); * POIFSFileSystem from it, and returns that.
return new POIFSFileSystem(pis); */
} public static POIFSFileSystem verifyAndBuildPOIFS(InputStream istream) throws IOException {
// Open a PushbackInputStream, so we can peek at the first few bytes
PushbackInputStream pis = new PushbackInputStream(istream,6);
byte[] first6 = IOUtils.toByteArray(pis, 6);
/** // Does it start with {\rtf ? If so, it's really RTF
* This constructor loads a Word document from an InputStream. if(first6[0] == '{' && first6[1] == '\\' && first6[2] == 'r'
* && first6[3] == 't' && first6[4] == 'f') {
* @param istream The InputStream that contains the Word document. throw new IllegalArgumentException("The document is really a RTF file");
* @throws IOException If there is an unexpected IOException from the passed } else if(first6[0] == '%' && first6[1] == 'P' && first6[2] == 'D' && first6[3] == 'F' ) {
* in InputStream. throw new IllegalArgumentException("The document is really a PDF file");
*/ }
public HWPFDocumentCore(InputStream istream) throws IOException
{
//do Ole stuff
this( verifyAndBuildPOIFS(istream) );
}
/** // OK, so it's neither RTF nor PDF
* This constructor loads a Word document from a POIFSFileSystem // Open a POIFSFileSystem on the (pushed back) stream
* pis.unread(first6);
* @param pfilesystem The POIFSFileSystem that contains the Word document. return new POIFSFileSystem(pis);
* @throws IOException If there is an unexpected IOException from the passed }
* in POIFSFileSystem.
*/
public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException
{
this(pfilesystem.getRoot());
}
/** /**
* This constructor loads a Word document from a specific point * This constructor loads a Word document from an InputStream.
* in a POIFSFileSystem, probably not the default. *
* Used typically to open embeded documents. * @param istream The InputStream that contains the Word document.
* * @throws IOException If there is an unexpected IOException from the passed
* @param directory The DirectoryNode that contains the Word document. * in InputStream.
* @throws IOException If there is an unexpected IOException from the passed */
* in POIFSFileSystem. public HWPFDocumentCore(InputStream istream) throws IOException {
*/ //do Ole stuff
public HWPFDocumentCore(DirectoryNode directory) throws IOException { this( verifyAndBuildPOIFS(istream) );
// Sort out the hpsf properties }
super(directory);
// read in the main stream. /**
DocumentEntry documentProps = (DocumentEntry)directory.getEntry("WordDocument"); * This constructor loads a Word document from a POIFSFileSystem
DocumentInputStream dis = null; *
try { * @param pfilesystem The POIFSFileSystem that contains the Word document.
dis = directory.createDocumentInputStream(STREAM_WORD_DOCUMENT); * @throws IOException If there is an unexpected IOException from the passed
_mainStream = IOUtils.toByteArray(dis, documentProps.getSize()); * in POIFSFileSystem.
} finally { */
if (dis != null) { public HWPFDocumentCore(POIFSFileSystem pfilesystem) throws IOException {
dis.close(); this(pfilesystem.getRoot());
}
/**
* This constructor loads a Word document from a specific point
* in a POIFSFileSystem, probably not the default.
* Used typically to open embeded documents.
*
* @param directory The DirectoryNode that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
// Sort out the hpsf properties
super(directory);
// read in the main stream.
_mainStream = getDocumentEntryBytes(STREAM_WORD_DOCUMENT, FIB_BASE_LEN, Integer.MAX_VALUE);
_fib = new FileInformationBlock(_mainStream);
DirectoryEntry objectPoolEntry = null;
if (directory.hasEntry(STREAM_OBJECT_POOL)) {
objectPoolEntry = (DirectoryEntry) directory.getEntry(STREAM_OBJECT_POOL);
}
_objectPool = new ObjectPoolImpl(objectPoolEntry);
}
/**
* For a given named property entry, either return it or null if
* if it wasn't found
*
* @param setName The property to read
* @return The value of the given property or null if it wasn't found.
*/
@Override
protected PropertySet getPropertySet(String setName) {
EncryptionInfo ei;
try {
ei = getEncryptionInfo();
} catch (IOException e) {
throw new RuntimeException(e);
}
return (ei == null)
? super.getPropertySet(setName)
: super.getPropertySet(setName, ei);
}
protected EncryptionInfo getEncryptionInfo() throws IOException {
if (_encryptionInfo != null) {
return _encryptionInfo;
}
// Create our FIB, and check for the doc being encrypted
byte[] fibBaseBytes = (_mainStream != null) ? _mainStream : getDocumentEntryBytes(STREAM_WORD_DOCUMENT, -1, FIB_BASE_LEN);
FibBase fibBase = new FibBase( fibBaseBytes, 0 );
if (!fibBase.isFEncrypted()) {
return null;
}
String tableStrmName = fibBase.isFWhichTblStm() ? STREAM_TABLE_1 : STREAM_TABLE_0;
byte[] tableStream = getDocumentEntryBytes(tableStrmName, -1, fibBase.getLKey());
LittleEndianByteArrayInputStream leis = new LittleEndianByteArrayInputStream(tableStream);
EncryptionMode em = fibBase.isFObfuscated() ? EncryptionMode.xor : null;
EncryptionInfo ei = new EncryptionInfo(leis, em);
Decryptor dec = ei.getDecryptor();
dec.setChunkSize(512);
try {
String pass = Biff8EncryptionKey.getCurrentUserPassword();
if (pass == null) {
pass = Decryptor.DEFAULT_PASSWORD;
}
if (!dec.verifyPassword(pass)) {
throw new EncryptedDocumentException("document is encrypted, password is invalid - use Biff8EncryptionKey.setCurrentUserPasswort() to set password before opening");
}
} catch (GeneralSecurityException e) {
throw new IOException(e.getMessage(), e);
}
_encryptionInfo = ei;
return ei;
}
/**
* Reads OLE Stream into byte array - if an {@link EncryptionInfo} is available,
* decrypt the bytes starting at encryptionOffset. If encryptionOffset = -1, then do not try
* to decrypt the bytes
*
* @param name the name of the stream
* @param encryptionOffset the offset from which to start decrypting, use {@code -1} for no decryption
* @param len length of the bytes to be read, use {@link Integer#MAX_VALUE} for all bytes
* @return the read bytes
* @throws IOException if the stream can't be found
*/
protected byte[] getDocumentEntryBytes(String name, int encryptionOffset, int len) throws IOException {
DirectoryNode dir = getDirectory();
DocumentEntry documentProps = (DocumentEntry)dir.getEntry(name);
DocumentInputStream dis = dir.createDocumentInputStream(documentProps);
EncryptionInfo ei = (encryptionOffset > -1) ? getEncryptionInfo() : null;
int streamSize = documentProps.getSize();
ByteArrayOutputStream bos = new ByteArrayOutputStream(Math.min(streamSize,len));
InputStream is = dis;
try {
if (ei != null) {
try {
Decryptor dec = ei.getDecryptor();
is = dec.getDataStream(dis, streamSize, 0);
if (encryptionOffset > 0) {
ChunkedCipherInputStream cis = (ChunkedCipherInputStream)is;
byte plain[] = new byte[encryptionOffset];
cis.readPlain(plain, 0, encryptionOffset);
bos.write(plain);
}
} catch (GeneralSecurityException e) {
throw new IOException(e.getMessage(), e);
}
}
// This simplifies a few combinations, so we actually always try to copy len bytes
// regardless if encryptionOffset is greater than 0
if (len < Integer.MAX_VALUE) {
is = new BoundedInputStream(is, len);
}
IOUtils.copy(is, bos);
return bos.toByteArray();
} finally {
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(dis);
} }
} }
// Create our FIB, and check for the doc being encrypted
_fib = new FileInformationBlock(_mainStream);
DirectoryEntry objectPoolEntry; /**
try {
objectPoolEntry = (DirectoryEntry) directory
.getEntry(STREAM_OBJECT_POOL);
} catch (FileNotFoundException exc) {
objectPoolEntry = null;
}
_objectPool = new ObjectPoolImpl(objectPoolEntry);
}
/**
* Returns the range which covers the whole of the document, but excludes * Returns the range which covers the whole of the document, but excludes
* any headers and footers. * any headers and footers.
*/ */
@ -198,43 +301,35 @@ public abstract class HWPFDocumentCore extends POIDocument
@Internal @Internal
public abstract StringBuilder getText(); public abstract StringBuilder getText();
public CHPBinTable getCharacterTable() public CHPBinTable getCharacterTable() {
{ return _cbt;
return _cbt; }
}
public PAPBinTable getParagraphTable() public PAPBinTable getParagraphTable() {
{ return _pbt;
return _pbt; }
}
public SectionTable getSectionTable() public SectionTable getSectionTable() {
{ return _st;
return _st; }
}
public StyleSheet getStyleSheet() public StyleSheet getStyleSheet() {
{ return _ss;
return _ss; }
}
public ListTables getListTables() public ListTables getListTables() {
{ return _lt;
return _lt; }
}
public FontTable getFontTable() public FontTable getFontTable() {
{ return _ft;
return _ft; }
}
public FileInformationBlock getFileInformationBlock() public FileInformationBlock getFileInformationBlock() {
{ return _fib;
return _fib; }
}
public ObjectsPool getObjectsPool() public ObjectsPool getObjectsPool() {
{
return _objectPool; return _objectPool;
} }
@ -244,4 +339,4 @@ public abstract class HWPFDocumentCore extends POIDocument
public byte[] getMainStream() { public byte[] getMainStream() {
return _mainStream; return _mainStream;
} }
} }

View File

@ -0,0 +1,69 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.junit.AfterClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.junit.runners.Parameterized.Parameter;
import org.junit.runners.Parameterized.Parameters;
@RunWith(Parameterized.class)
public class HWPFTestEncryption {
@AfterClass
public static void clearPass() {
Biff8EncryptionKey.setCurrentUserPassword(null);
}
@Parameter(value = 0)
public String file;
@Parameter(value = 1)
public String password;
@Parameter(value = 2)
public String expected;
@Parameters(name="{0}")
public static Collection<String[]> data() {
return Arrays.asList(
new String[]{ "password_tika_binaryrc4.doc", "tika", "This is an encrypted Word 2007 File." },
new String[]{ "password_password_cryptoapi.doc", "password", "This is a test" }
);
}
@Test
public void extract() throws IOException {
Biff8EncryptionKey.setCurrentUserPassword(password);
HWPFDocument docD = HWPFTestDataSamples.openSampleFile(file);
WordExtractor we = new WordExtractor(docD);
String actual = we.getText().trim();
assertEquals(expected, actual);
we.close();
docD.close();
}
}

Binary file not shown.

Binary file not shown.