1019 lines
35 KiB
Java
1019 lines
35 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.hwpf;
|
|
|
|
import java.io.ByteArrayInputStream;
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
import java.io.OutputStream;
|
|
|
|
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
|
import org.apache.poi.hpsf.SummaryInformation;
|
|
import org.apache.poi.hwpf.model.*;
|
|
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
|
import org.apache.poi.hwpf.usermodel.Bookmarks;
|
|
import org.apache.poi.hwpf.usermodel.BookmarksImpl;
|
|
import org.apache.poi.hwpf.usermodel.Field;
|
|
import org.apache.poi.hwpf.usermodel.Fields;
|
|
import org.apache.poi.hwpf.usermodel.FieldsImpl;
|
|
import org.apache.poi.hwpf.usermodel.HWPFList;
|
|
import org.apache.poi.hwpf.usermodel.Notes;
|
|
import org.apache.poi.hwpf.usermodel.NotesImpl;
|
|
import org.apache.poi.hwpf.usermodel.OfficeDrawings;
|
|
import org.apache.poi.hwpf.usermodel.OfficeDrawingsImpl;
|
|
import org.apache.poi.hwpf.usermodel.Range;
|
|
import org.apache.poi.poifs.common.POIFSConstants;
|
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
|
import org.apache.poi.poifs.filesystem.DocumentEntry;
|
|
import org.apache.poi.poifs.filesystem.Entry;
|
|
import org.apache.poi.poifs.filesystem.EntryUtils;
|
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|
import org.apache.poi.util.IOUtils;
|
|
import org.apache.poi.util.Internal;
|
|
|
|
/**
|
|
*
|
|
* This class acts as the bucket that we throw all of the Word data structures
|
|
* into.
|
|
*/
|
|
public final class HWPFDocument extends HWPFDocumentCore {
|
|
/*package*/ static final String PROPERTY_PRESERVE_BIN_TABLES = "org.apache.poi.hwpf.preserveBinTables";
|
|
private static final String PROPERTY_PRESERVE_TEXT_TABLE = "org.apache.poi.hwpf.preserveTextTable";
|
|
|
|
private static final String STREAM_DATA = "Data";
|
|
private static final String STREAM_TABLE_0 = "0Table";
|
|
private static final String STREAM_TABLE_1 = "1Table";
|
|
|
|
/** table stream buffer*/
|
|
protected byte[] _tableStream;
|
|
|
|
/** data stream buffer*/
|
|
protected byte[] _dataStream;
|
|
|
|
/** Document wide Properties*/
|
|
protected DocumentProperties _dop;
|
|
|
|
/** Contains text of the document wrapped in a obfuscated Word data
|
|
* structure*/
|
|
protected ComplexFileTable _cft;
|
|
|
|
/** Contains text buffer linked directly to single-piece document text piece */
|
|
protected StringBuilder _text;
|
|
|
|
/** Holds the save history for this document. */
|
|
protected SavedByTable _sbt;
|
|
|
|
/** Holds the revision mark authors for this document. */
|
|
protected RevisionMarkAuthorTable _rmat;
|
|
|
|
/** Holds FSBA (shape) information */
|
|
private FSPATable _fspaHeaders;
|
|
|
|
/** Holds FSBA (shape) information */
|
|
private FSPATable _fspaMain;
|
|
|
|
/** Escher Drawing Group information */
|
|
protected EscherRecordHolder _escherRecordHolder;
|
|
|
|
/** Holds pictures table */
|
|
protected PicturesTable _pictures;
|
|
|
|
/** Holds Office Art objects */
|
|
protected OfficeDrawingsImpl _officeDrawingsHeaders;
|
|
|
|
/** Holds Office Art objects */
|
|
protected OfficeDrawingsImpl _officeDrawingsMain;
|
|
|
|
/** Holds the bookmarks tables */
|
|
protected BookmarksTables _bookmarksTables;
|
|
|
|
/** Holds the bookmarks */
|
|
protected Bookmarks _bookmarks;
|
|
|
|
/** Holds the ending notes tables */
|
|
protected NotesTables _endnotesTables = new NotesTables( NoteType.ENDNOTE );
|
|
|
|
/** Holds the footnotes */
|
|
protected Notes _endnotes = new NotesImpl( _endnotesTables );
|
|
|
|
/** Holds the footnotes tables */
|
|
protected NotesTables _footnotesTables = new NotesTables( NoteType.FOOTNOTE );
|
|
|
|
/** Holds the footnotes */
|
|
protected Notes _footnotes = new NotesImpl( _footnotesTables );
|
|
|
|
/** Holds the fields PLCFs */
|
|
protected FieldsTables _fieldsTables;
|
|
|
|
/** Holds the fields */
|
|
protected Fields _fields;
|
|
|
|
protected HWPFDocument()
|
|
{
|
|
super();
|
|
this._text = new StringBuilder("\r");
|
|
}
|
|
|
|
/**
|
|
* This constructor loads a Word document from an InputStream.
|
|
*
|
|
* @param istream The InputStream that contains the Word document.
|
|
* @throws IOException If there is an unexpected IOException from the passed
|
|
* in InputStream.
|
|
*/
|
|
public HWPFDocument(InputStream istream) throws IOException
|
|
{
|
|
//do Ole stuff
|
|
this( verifyAndBuildPOIFS(istream) );
|
|
}
|
|
|
|
/**
|
|
* This constructor loads a Word document from a POIFSFileSystem
|
|
*
|
|
* @param pfilesystem The POIFSFileSystem that contains the Word document.
|
|
* @throws IOException If there is an unexpected IOException from the passed
|
|
* in POIFSFileSystem.
|
|
*/
|
|
public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
|
|
{
|
|
this(pfilesystem.getRoot());
|
|
}
|
|
|
|
/**
|
|
* This constructor loads a Word document from a specific point
|
|
* in a POIFSFileSystem, probably not the default.
|
|
* Used typically to open embeded documents.
|
|
*
|
|
* @param directory The DirectoryNode that contains the Word document.
|
|
* @throws IOException If there is an unexpected IOException from the passed
|
|
* in POIFSFileSystem.
|
|
*/
|
|
public HWPFDocument(DirectoryNode directory) throws IOException
|
|
{
|
|
// Load the main stream and FIB
|
|
// Also handles HPSF bits
|
|
super(directory);
|
|
|
|
// Is this document too old for us?
|
|
if(_fib.getFibBase().getNFib() < 106) {
|
|
throw new OldWordFileFormatException("The document is too old - Word 95 or older. Try HWPFOldDocument instead?");
|
|
}
|
|
|
|
// use the fib to determine the name of the table stream.
|
|
String name = STREAM_TABLE_0;
|
|
if (_fib.getFibBase().isFWhichTblStm())
|
|
{
|
|
name = STREAM_TABLE_1;
|
|
}
|
|
|
|
// Grab the table stream.
|
|
if (!directory.hasEntry(name)) {
|
|
throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
|
|
}
|
|
|
|
// read in the table stream.
|
|
InputStream is = directory.createDocumentInputStream(name);
|
|
_tableStream = IOUtils.toByteArray(is);
|
|
is.close();
|
|
|
|
_fib.fillVariableFields(_mainStream, _tableStream);
|
|
|
|
// read in the data stream.
|
|
InputStream dis = null;
|
|
try {
|
|
DocumentEntry dataProps = (DocumentEntry)directory.getEntry(STREAM_DATA);
|
|
dis = directory.createDocumentInputStream(STREAM_DATA);
|
|
_dataStream = IOUtils.toByteArray(dis, dataProps.getSize());
|
|
} catch(IOException e) {
|
|
_dataStream = new byte[0];
|
|
} finally {
|
|
if (dis != null) {
|
|
dis.close();
|
|
}
|
|
}
|
|
|
|
// Get the cp of the start of text in the main stream
|
|
// The latest spec doc says this is always zero!
|
|
int fcMin = 0;
|
|
//fcMin = _fib.getFcMin()
|
|
|
|
// Start to load up our standard structures.
|
|
_dop = new DocumentProperties(_tableStream, _fib.getFcDop(), _fib.getLcbDop() );
|
|
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
|
|
TextPieceTable _tpt = _cft.getTextPieceTable();
|
|
|
|
// Now load the rest of the properties, which need to be adjusted
|
|
// for where text really begin
|
|
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt);
|
|
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt);
|
|
|
|
_text = _tpt.getText();
|
|
|
|
/*
|
|
* in this mode we preserving PAPX/CHPX structure from file, so text may
|
|
* miss from output, and text order may be corrupted
|
|
*/
|
|
boolean preserveBinTables = false;
|
|
try {
|
|
preserveBinTables = Boolean.parseBoolean( System
|
|
.getProperty( PROPERTY_PRESERVE_BIN_TABLES ) );
|
|
} catch ( Exception exc ) {
|
|
// ignore;
|
|
}
|
|
|
|
if ( !preserveBinTables ) {
|
|
_cbt.rebuild( _cft );
|
|
_pbt.rebuild( _text, _cft );
|
|
}
|
|
|
|
/*
|
|
* Property to disable text rebuilding. In this mode changing the text
|
|
* will lead to unpredictable behavior
|
|
*/
|
|
boolean preserveTextTable = false;
|
|
try {
|
|
preserveTextTable = Boolean.parseBoolean( System
|
|
.getProperty( PROPERTY_PRESERVE_TEXT_TABLE ) );
|
|
} catch ( Exception exc ) {
|
|
// ignore;
|
|
}
|
|
if ( !preserveTextTable ) {
|
|
_cft = new ComplexFileTable();
|
|
_tpt = _cft.getTextPieceTable();
|
|
final TextPiece textPiece = new SinglentonTextPiece( _text );
|
|
_tpt.add( textPiece );
|
|
_text = textPiece.getStringBuilder();
|
|
}
|
|
|
|
// Read FSPA and Escher information
|
|
// _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(),
|
|
// _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());
|
|
_fspaHeaders = new FSPATable( _tableStream, _fib,
|
|
FSPADocumentPart.HEADER );
|
|
_fspaMain = new FSPATable( _tableStream, _fib, FSPADocumentPart.MAIN );
|
|
|
|
if (_fib.getFcDggInfo() != 0)
|
|
{
|
|
_escherRecordHolder = new EscherRecordHolder(_tableStream, _fib.getFcDggInfo(), _fib.getLcbDggInfo());
|
|
} else
|
|
{
|
|
_escherRecordHolder = new EscherRecordHolder();
|
|
}
|
|
|
|
// read in the pictures stream
|
|
_pictures = new PicturesTable(this, _dataStream, _mainStream, _fspaMain, _escherRecordHolder);
|
|
|
|
// And escher pictures
|
|
_officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder, _mainStream );
|
|
_officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder, _mainStream);
|
|
|
|
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _fib.getSubdocumentTextStreamLength( SubdocumentType.MAIN));
|
|
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
|
|
_ft = new FontTable(_tableStream, _fib.getFcSttbfffn(), _fib.getLcbSttbfffn());
|
|
|
|
int listOffset = _fib.getFcPlfLst();
|
|
// int lfoOffset = _fib.getFcPlfLfo();
|
|
if ( listOffset != 0 && _fib.getLcbPlfLst() != 0 )
|
|
{
|
|
_lt = new ListTables( _tableStream, listOffset, _fib.getFcPlfLfo(),
|
|
_fib.getLcbPlfLfo() );
|
|
}
|
|
|
|
int sbtOffset = _fib.getFcSttbSavedBy();
|
|
int sbtLength = _fib.getLcbSttbSavedBy();
|
|
if (sbtOffset != 0 && sbtLength != 0)
|
|
{
|
|
_sbt = new SavedByTable(_tableStream, sbtOffset, sbtLength);
|
|
}
|
|
|
|
int rmarkOffset = _fib.getFcSttbfRMark();
|
|
int rmarkLength = _fib.getLcbSttbfRMark();
|
|
if (rmarkOffset != 0 && rmarkLength != 0)
|
|
{
|
|
_rmat = new RevisionMarkAuthorTable(_tableStream, rmarkOffset, rmarkLength);
|
|
}
|
|
|
|
_bookmarksTables = new BookmarksTables( _tableStream, _fib );
|
|
_bookmarks = new BookmarksImpl( _bookmarksTables );
|
|
|
|
_endnotesTables = new NotesTables( NoteType.ENDNOTE, _tableStream, _fib );
|
|
_endnotes = new NotesImpl( _endnotesTables );
|
|
_footnotesTables = new NotesTables( NoteType.FOOTNOTE, _tableStream, _fib );
|
|
_footnotes = new NotesImpl( _footnotesTables );
|
|
|
|
_fieldsTables = new FieldsTables(_tableStream, _fib);
|
|
_fields = new FieldsImpl(_fieldsTables);
|
|
}
|
|
|
|
@Internal
|
|
public TextPieceTable getTextTable()
|
|
{
|
|
return _cft.getTextPieceTable();
|
|
}
|
|
|
|
@Internal
|
|
@Override
|
|
public StringBuilder getText()
|
|
{
|
|
return _text;
|
|
}
|
|
|
|
public DocumentProperties getDocProperties()
|
|
{
|
|
return _dop;
|
|
}
|
|
|
|
public Range getOverallRange() {
|
|
return new Range(0, _text.length(), this);
|
|
}
|
|
|
|
/**
|
|
* Returns the range which covers the whole of the document, but excludes
|
|
* any headers and footers.
|
|
*/
|
|
public Range getRange()
|
|
{
|
|
// // First up, trigger a full-recalculate
|
|
// // Needed in case of deletes etc
|
|
// getOverallRange();
|
|
//
|
|
// if ( getFileInformationBlock().isFComplex() )
|
|
// {
|
|
// /*
|
|
// * Page 31:
|
|
// *
|
|
// * main document must be found by examining the piece table entries
|
|
// * from the 0th piece table entry from the piece table entry that
|
|
// * describes cp=fib.ccpText.
|
|
// */
|
|
// // TODO: review
|
|
// return new Range( _cpSplit.getMainDocumentStart(),
|
|
// _cpSplit.getMainDocumentEnd(), this );
|
|
// }
|
|
//
|
|
// /*
|
|
// * Page 31:
|
|
// *
|
|
// * "In a non-complex file, this means text of the: main document
|
|
// begins
|
|
// * at fib.fcMin in the file and continues through
|
|
// * fib.fcMin+fib.ccpText."
|
|
// */
|
|
// int bytesStart = getFileInformationBlock().getFcMin();
|
|
//
|
|
// int charsStart = getTextTable().getCharIndex( bytesStart );
|
|
// int charsEnd = charsStart
|
|
// + getFileInformationBlock().getSubdocumentTextStreamLength(
|
|
// SubdocumentType.MAIN );
|
|
|
|
// it seems much simpler -- sergey
|
|
return getRange(SubdocumentType.MAIN);
|
|
}
|
|
|
|
private Range getRange( SubdocumentType subdocument )
|
|
{
|
|
int startCp = 0;
|
|
for ( SubdocumentType previos : SubdocumentType.ORDERED )
|
|
{
|
|
int length = getFileInformationBlock()
|
|
.getSubdocumentTextStreamLength( previos );
|
|
if ( subdocument == previos )
|
|
return new Range( startCp, startCp + length, this );
|
|
startCp += length;
|
|
}
|
|
throw new UnsupportedOperationException(
|
|
"Subdocument type not supported: " + subdocument );
|
|
}
|
|
|
|
/**
|
|
* Returns the {@link Range} which covers all the Footnotes.
|
|
*
|
|
* @return the {@link Range} which covers all the Footnotes.
|
|
*/
|
|
public Range getFootnoteRange()
|
|
{
|
|
return getRange( SubdocumentType.FOOTNOTE );
|
|
}
|
|
|
|
/**
|
|
* Returns the {@link Range} which covers all endnotes.
|
|
*
|
|
* @return the {@link Range} which covers all endnotes.
|
|
*/
|
|
public Range getEndnoteRange()
|
|
{
|
|
return getRange( SubdocumentType.ENDNOTE );
|
|
}
|
|
|
|
/**
|
|
* Returns the {@link Range} which covers all annotations.
|
|
*
|
|
* @return the {@link Range} which covers all annotations.
|
|
*/
|
|
public Range getCommentsRange()
|
|
{
|
|
return getRange( SubdocumentType.ANNOTATION );
|
|
}
|
|
|
|
/**
|
|
* Returns the {@link Range} which covers all textboxes.
|
|
*
|
|
* @return the {@link Range} which covers all textboxes.
|
|
*/
|
|
public Range getMainTextboxRange()
|
|
{
|
|
return getRange( SubdocumentType.TEXTBOX );
|
|
}
|
|
|
|
/**
|
|
* Returns the range which covers all "Header Stories".
|
|
* A header story contains a header, footer, end note
|
|
* separators and footnote separators.
|
|
*/
|
|
public Range getHeaderStoryRange() {
|
|
return getRange( SubdocumentType.HEADER );
|
|
}
|
|
|
|
/**
|
|
* Returns the character length of a document.
|
|
* @return the character length of a document
|
|
*/
|
|
public int characterLength()
|
|
{
|
|
return _text.length();
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the saved -by table, which holds the save history for the document.
|
|
*
|
|
* @return the saved-by table.
|
|
*/
|
|
@Internal
|
|
public SavedByTable getSavedByTable()
|
|
{
|
|
return _sbt;
|
|
}
|
|
|
|
/**
|
|
* Gets a reference to the revision mark author table, which holds the revision mark authors for the document.
|
|
*
|
|
* @return the saved-by table.
|
|
*/
|
|
@Internal
|
|
public RevisionMarkAuthorTable getRevisionMarkAuthorTable()
|
|
{
|
|
return _rmat;
|
|
}
|
|
|
|
/**
|
|
* @return PicturesTable object, that is able to extract images from this document
|
|
*/
|
|
public PicturesTable getPicturesTable() {
|
|
return _pictures;
|
|
}
|
|
|
|
@Internal
|
|
public EscherRecordHolder getEscherRecordHolder() {
|
|
return _escherRecordHolder;
|
|
}
|
|
|
|
public OfficeDrawings getOfficeDrawingsHeaders()
|
|
{
|
|
return _officeDrawingsHeaders;
|
|
}
|
|
|
|
public OfficeDrawings getOfficeDrawingsMain()
|
|
{
|
|
return _officeDrawingsMain;
|
|
}
|
|
|
|
/**
|
|
* @return user-friendly interface to access document bookmarks
|
|
*/
|
|
public Bookmarks getBookmarks()
|
|
{
|
|
return _bookmarks;
|
|
}
|
|
|
|
/**
|
|
* @return user-friendly interface to access document endnotes
|
|
*/
|
|
public Notes getEndnotes()
|
|
{
|
|
return _endnotes;
|
|
}
|
|
|
|
/**
|
|
* @return user-friendly interface to access document footnotes
|
|
*/
|
|
public Notes getFootnotes()
|
|
{
|
|
return _footnotes;
|
|
}
|
|
|
|
/**
|
|
* @return FieldsTables object, that is able to extract fields descriptors from this document
|
|
* @deprecated POI 3.8.
|
|
*/
|
|
@Deprecated
|
|
@Internal
|
|
public FieldsTables getFieldsTables() {
|
|
return _fieldsTables;
|
|
}
|
|
|
|
/**
|
|
* Returns user-friendly interface to access document {@link Field}s
|
|
*
|
|
* @return user-friendly interface to access document {@link Field}s
|
|
*/
|
|
public Fields getFields()
|
|
{
|
|
return _fields;
|
|
}
|
|
|
|
/**
|
|
* Write out the word file that is represented by this class, to the
|
|
* currently open {@link File}, via the writeable {@link POIFSFileSystem}
|
|
* it was opened as.
|
|
*
|
|
* <p>This will fail (with an {@link IllegalStateException} if the
|
|
* Document was opened read-only, opened from an {@link InputStream}
|
|
* instead of a File, or if this is not the root document. For those cases,
|
|
* you must use {@link #write(OutputStream)} or {@link #write(File)} to
|
|
* write to a brand new document.
|
|
*
|
|
* @since 3.15
|
|
*/
|
|
@Override
|
|
public void write() throws IOException {
|
|
validateInPlaceWritePossible();
|
|
|
|
// Update the Document+Properties streams in the file
|
|
write(getDirectory().getFileSystem(), false);
|
|
|
|
// Sync with the File on disk
|
|
getDirectory().getFileSystem().writeFilesystem();
|
|
}
|
|
|
|
/**
|
|
* Writes out the word file that is represented by an instance of this class.
|
|
*
|
|
* If the {@link File} exists, it will be replaced, otherwise a new one
|
|
* will be created
|
|
*
|
|
* @param newFile The File to write to.
|
|
* @throws IOException If there is an unexpected IOException from writing
|
|
* to the File.
|
|
*
|
|
* @since 3.15 beta 3
|
|
*/
|
|
@Override
|
|
public void write(File newFile) throws IOException {
|
|
NPOIFSFileSystem pfs = POIFSFileSystem.create(newFile);
|
|
write(pfs, true);
|
|
pfs.writeFilesystem();
|
|
}
|
|
|
|
/**
|
|
* Writes out the word file that is represented by an instance of this class.
|
|
*
|
|
* For better performance when writing to files, use {@link #write(File)}.
|
|
* If {@code stream} has a high cost/latency associated with each written byte,
|
|
* consider wrapping the OutputStream in a {@link java.io.BufferedOutputStream}
|
|
* to improve write performance.
|
|
*
|
|
* @param out The OutputStream to write to.
|
|
* @throws IOException If there is an unexpected IOException from the passed
|
|
* in OutputStream.
|
|
*/
|
|
public void write(OutputStream out) throws IOException {
|
|
NPOIFSFileSystem pfs = new NPOIFSFileSystem();
|
|
write(pfs, true);
|
|
pfs.writeFilesystem( out );
|
|
}
|
|
private void write(NPOIFSFileSystem pfs, boolean copyOtherEntries) throws IOException {
|
|
// initialize our streams for writing.
|
|
HWPFFileSystem docSys = new HWPFFileSystem();
|
|
HWPFOutputStream wordDocumentStream = docSys.getStream(STREAM_WORD_DOCUMENT);
|
|
HWPFOutputStream tableStream = docSys.getStream(STREAM_TABLE_1);
|
|
//HWPFOutputStream dataStream = docSys.getStream("Data");
|
|
int tableOffset = 0;
|
|
|
|
// FileInformationBlock fib = (FileInformationBlock)_fib.clone();
|
|
// clear the offsets and sizes in our FileInformationBlock.
|
|
_fib.clearOffsetsSizes();
|
|
|
|
// determine the FileInformationBLock size
|
|
int fibSize = _fib.getSize();
|
|
fibSize += POIFSConstants.SMALLER_BIG_BLOCK_SIZE -
|
|
(fibSize % POIFSConstants.SMALLER_BIG_BLOCK_SIZE);
|
|
|
|
// preserve space for the FileInformationBlock because we will be writing
|
|
// it after we write everything else.
|
|
byte[] placeHolder = new byte[fibSize];
|
|
wordDocumentStream.write(placeHolder);
|
|
int mainOffset = wordDocumentStream.getOffset();
|
|
|
|
// write out the StyleSheet.
|
|
_fib.setFcStshf(tableOffset);
|
|
_ss.writeTo(tableStream);
|
|
_fib.setLcbStshf(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
// get fcMin and fcMac because we will be writing the actual text with the
|
|
// complex table.
|
|
int fcMin = mainOffset;
|
|
|
|
/*
|
|
* clx (encoding of the sprm lists for a complex file and piece table
|
|
* for a any file) Written immediately after the end of the previously
|
|
* recorded structure. This is recorded in all Word documents
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 23 of 210
|
|
*/
|
|
|
|
// write out the Complex table, includes text.
|
|
_fib.setFcClx(tableOffset);
|
|
_cft.writeTo(wordDocumentStream, tableStream);
|
|
_fib.setLcbClx(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
int fcMac = wordDocumentStream.getOffset();
|
|
|
|
/*
|
|
* dop (document properties record) Written immediately after the end of
|
|
* the previously recorded structure. This is recorded in all Word
|
|
* documents
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 23 of 210
|
|
*/
|
|
|
|
// write out the DocumentProperties.
|
|
_fib.setFcDop(tableOffset);
|
|
_dop.writeTo(tableStream);
|
|
_fib.setLcbDop(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
/*
|
|
* plcfBkmkf (table recording beginning CPs of bookmarks) Written
|
|
* immediately after the sttbfBkmk, if the document contains bookmarks.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
if ( _bookmarksTables != null )
|
|
{
|
|
_bookmarksTables.writePlcfBkmkf( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
/*
|
|
* plcfBkmkl (table recording limit CPs of bookmarks) Written
|
|
* immediately after the plcfBkmkf, if the document contains bookmarks.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
if ( _bookmarksTables != null )
|
|
{
|
|
_bookmarksTables.writePlcfBkmkl( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
/*
|
|
* plcfbteChpx (bin table for CHP FKPs) Written immediately after the
|
|
* previously recorded table. This is recorded in all Word documents.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
|
|
// write out the CHPBinTable.
|
|
_fib.setFcPlcfbteChpx(tableOffset);
|
|
_cbt.writeTo(wordDocumentStream, tableStream, fcMin, _cft.getTextPieceTable());
|
|
_fib.setLcbPlcfbteChpx(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
/*
|
|
* plcfbtePapx (bin table for PAP FKPs) Written immediately after the
|
|
* plcfbteChpx. This is recorded in all Word documents.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
|
|
// write out the PAPBinTable.
|
|
_fib.setFcPlcfbtePapx(tableOffset);
|
|
_pbt.writeTo(wordDocumentStream, tableStream, _cft.getTextPieceTable());
|
|
_fib.setLcbPlcfbtePapx(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
/*
|
|
* plcfendRef (endnote reference position table) Written immediately
|
|
* after the previously recorded table if the document contains endnotes
|
|
*
|
|
* plcfendTxt (endnote text position table) Written immediately after
|
|
* the plcfendRef if the document contains endnotes
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
_endnotesTables.writeRef( _fib, tableStream );
|
|
_endnotesTables.writeTxt( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
/*
|
|
* plcffld*** (table of field positions and statuses for annotation
|
|
* subdocument) Written immediately after the previously recorded table,
|
|
* if the ******* subdocument contains fields.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
|
|
if ( _fieldsTables != null )
|
|
{
|
|
_fieldsTables.write( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
/*
|
|
* plcffndRef (footnote reference position table) Written immediately
|
|
* after the stsh if the document contains footnotes
|
|
*
|
|
* plcffndTxt (footnote text position table) Written immediately after
|
|
* the plcffndRef if the document contains footnotes
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 24 of 210
|
|
*/
|
|
_footnotesTables.writeRef( _fib, tableStream );
|
|
_footnotesTables.writeTxt( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
/*
|
|
* plcfsed (section table) Written immediately after the previously
|
|
* recorded table. Recorded in all Word documents
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 25 of 210
|
|
*/
|
|
|
|
// write out the SectionTable.
|
|
_fib.setFcPlcfsed(tableOffset);
|
|
_st.writeTo(wordDocumentStream, tableStream);
|
|
_fib.setLcbPlcfsed(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
// write out the list tables
|
|
if ( _lt != null )
|
|
{
|
|
/*
|
|
* plcflst (list formats) Written immediately after the end of the
|
|
* previously recorded, if there are any lists defined in the
|
|
* document. This begins with a short count of LSTF structures
|
|
* followed by those LSTF structures. This is immediately followed
|
|
* by the allocated data hanging off the LSTFs. This data consists
|
|
* of the array of LVLs for each LSTF. (Each LVL consists of an LVLF
|
|
* followed by two grpprls and an XST.)
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 25 of 210
|
|
*/
|
|
_lt.writeListDataTo( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
/*
|
|
* plflfo (more list formats) Written immediately after the end of
|
|
* the plcflst and its accompanying data, if there are any lists
|
|
* defined in the document. This consists first of a PL of LFO
|
|
* records, followed by the allocated data (if any) hanging off the
|
|
* LFOs. The allocated data consists of the array of LFOLVLFs for
|
|
* each LFO (and each LFOLVLF is immediately followed by some LVLs).
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 26 of 210
|
|
*/
|
|
_lt.writeListOverridesTo( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
/*
|
|
* sttbfBkmk (table of bookmark name strings) Written immediately after
|
|
* the previously recorded table, if the document contains bookmarks.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 27 of 210
|
|
*/
|
|
if ( _bookmarksTables != null )
|
|
{
|
|
_bookmarksTables.writeSttbfBkmk( _fib, tableStream );
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
/*
|
|
* sttbSavedBy (last saved by string table) Written immediately after
|
|
* the previously recorded table.
|
|
*
|
|
* Microsoft Office Word 97-2007 Binary File Format (.doc)
|
|
* Specification; Page 27 of 210
|
|
*/
|
|
|
|
// write out the saved-by table.
|
|
if (_sbt != null)
|
|
{
|
|
_fib.setFcSttbSavedBy(tableOffset);
|
|
_sbt.writeTo(tableStream);
|
|
_fib.setLcbSttbSavedBy(tableStream.getOffset() - tableOffset);
|
|
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
// write out the revision mark authors table.
|
|
if (_rmat != null)
|
|
{
|
|
_fib.setFcSttbfRMark(tableOffset);
|
|
_rmat.writeTo(tableStream);
|
|
_fib.setLcbSttbfRMark(tableStream.getOffset() - tableOffset);
|
|
|
|
tableOffset = tableStream.getOffset();
|
|
}
|
|
|
|
// write out the FontTable.
|
|
_fib.setFcSttbfffn(tableOffset);
|
|
_ft.writeTo(tableStream);
|
|
_fib.setLcbSttbfffn(tableStream.getOffset() - tableOffset);
|
|
tableOffset = tableStream.getOffset();
|
|
|
|
// set some variables in the FileInformationBlock.
|
|
_fib.getFibBase().setFcMin(fcMin);
|
|
_fib.getFibBase().setFcMac(fcMac);
|
|
_fib.setCbMac(wordDocumentStream.getOffset());
|
|
|
|
// make sure that the table, doc and data streams use big blocks.
|
|
byte[] mainBuf = wordDocumentStream.toByteArray();
|
|
if (mainBuf.length < 4096)
|
|
{
|
|
byte[] tempBuf = new byte[4096];
|
|
System.arraycopy(mainBuf, 0, tempBuf, 0, mainBuf.length);
|
|
mainBuf = tempBuf;
|
|
}
|
|
|
|
// Table1 stream will be used
|
|
_fib.getFibBase().setFWhichTblStm( true );
|
|
|
|
// write out the FileInformationBlock.
|
|
//_fib.serialize(mainBuf, 0);
|
|
_fib.writeTo(mainBuf, tableStream);
|
|
|
|
byte[] tableBuf = tableStream.toByteArray();
|
|
if (tableBuf.length < 4096)
|
|
{
|
|
byte[] tempBuf = new byte[4096];
|
|
System.arraycopy(tableBuf, 0, tempBuf, 0, tableBuf.length);
|
|
tableBuf = tempBuf;
|
|
}
|
|
|
|
byte[] dataBuf = _dataStream;
|
|
if (dataBuf == null)
|
|
{
|
|
dataBuf = new byte[4096];
|
|
}
|
|
if (dataBuf.length < 4096)
|
|
{
|
|
byte[] tempBuf = new byte[4096];
|
|
System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
|
|
dataBuf = tempBuf;
|
|
}
|
|
|
|
// Create a new document preserving order of entries / Update existing
|
|
boolean docWritten = false;
|
|
boolean dataWritten = false;
|
|
boolean objectPoolWritten = false;
|
|
boolean tableWritten = false;
|
|
boolean propertiesWritten = false;
|
|
for (Entry entry : getDirectory()) {
|
|
if ( entry.getName().equals( STREAM_WORD_DOCUMENT ) )
|
|
{
|
|
if ( !docWritten )
|
|
{
|
|
write(pfs, mainBuf, STREAM_WORD_DOCUMENT);
|
|
docWritten = true;
|
|
}
|
|
}
|
|
else if ( entry.getName().equals( STREAM_OBJECT_POOL ) )
|
|
{
|
|
if ( !objectPoolWritten )
|
|
{
|
|
if ( copyOtherEntries ) {
|
|
_objectPool.writeTo( pfs.getRoot() );
|
|
} else {
|
|
// Object pool is already there, no need to change/copy
|
|
}
|
|
objectPoolWritten = true;
|
|
}
|
|
}
|
|
else if ( entry.getName().equals( STREAM_TABLE_0 )
|
|
|| entry.getName().equals( STREAM_TABLE_1 ) )
|
|
{
|
|
if ( !tableWritten )
|
|
{
|
|
write(pfs, tableBuf, STREAM_TABLE_1);
|
|
tableWritten = true;
|
|
}
|
|
}
|
|
else if ( entry.getName().equals(
|
|
SummaryInformation.DEFAULT_STREAM_NAME )
|
|
|| entry.getName().equals(
|
|
DocumentSummaryInformation.DEFAULT_STREAM_NAME ) )
|
|
{
|
|
if ( !propertiesWritten )
|
|
{
|
|
writeProperties( pfs );
|
|
propertiesWritten = true;
|
|
}
|
|
}
|
|
else if ( entry.getName().equals( STREAM_DATA ) )
|
|
{
|
|
if ( !dataWritten )
|
|
{
|
|
write(pfs, dataBuf, STREAM_DATA);
|
|
dataWritten = true;
|
|
}
|
|
}
|
|
else if ( copyOtherEntries )
|
|
{
|
|
EntryUtils.copyNodeRecursively( entry, pfs.getRoot() );
|
|
}
|
|
}
|
|
|
|
if ( !docWritten )
|
|
write(pfs, mainBuf, STREAM_WORD_DOCUMENT);
|
|
if ( !tableWritten )
|
|
write(pfs, tableBuf, STREAM_TABLE_1);
|
|
if ( !propertiesWritten )
|
|
writeProperties( pfs );
|
|
if ( !dataWritten )
|
|
write(pfs, dataBuf, STREAM_DATA);
|
|
if ( !objectPoolWritten && copyOtherEntries )
|
|
_objectPool.writeTo( pfs.getRoot() );
|
|
|
|
/*
|
|
* since we updated all references in FIB and etc, using new arrays to
|
|
* access data
|
|
*/
|
|
replaceDirectory(pfs.getRoot());
|
|
this._tableStream = tableStream.toByteArray();
|
|
this._dataStream = dataBuf;
|
|
}
|
|
private static void write(NPOIFSFileSystem pfs, byte[] data, String name) throws IOException {
|
|
pfs.createOrUpdateDocument(new ByteArrayInputStream(data), name);
|
|
}
|
|
|
|
@Internal
|
|
public byte[] getDataStream()
|
|
{
|
|
return _dataStream;
|
|
}
|
|
@Internal
|
|
public byte[] getTableStream()
|
|
{
|
|
return _tableStream;
|
|
}
|
|
|
|
public int registerList( HWPFList list )
|
|
{
|
|
if ( _lt == null )
|
|
{
|
|
_lt = new ListTables();
|
|
}
|
|
return _lt.addList( list.getListData(), list.getLFO(),
|
|
list.getLFOData() );
|
|
}
|
|
|
|
public void delete(int start, int length)
|
|
{
|
|
Range r = new Range(start, start + length, this);
|
|
r.delete();
|
|
}
|
|
}
|