add initial support for fast-saved files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-12 00:40:27 +00:00
parent 5fb7c13c38
commit 75090630ff
7 changed files with 161 additions and 27 deletions

View File

@ -216,8 +216,8 @@ public final class HWPFDocument extends HWPFDocumentCore
// Now load the rest of the properties, which need to be adjusted
// for where text really begin
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true);
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true);
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
// Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());

View File

@ -29,6 +29,8 @@ import java.util.Set;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
@ -58,20 +60,21 @@ public class CHPBinTable
* Constructor used to read a binTable in from a Word document.
*
* @deprecated Use
* {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)}
* {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
* instead
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin, TextPieceTable tpt )
{
this( documentStream, tableStream, offset, size, tpt, true );
this( documentStream, tableStream, offset, size, null, tpt, true );
}
/**
* Constructor used to read a binTable in from a Word document.
*/
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces )
int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
boolean ignoreChpxWithoutTextPieces )
{
/*
* Page 35:
@ -105,6 +108,58 @@ public class CHPBinTable
}
}
if ( complexFileTable != null )
{
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() )
continue;
int igrpprl = prm.getIgrpprl();
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
{
logger.log( POILogger.WARN, textPiece
+ "'s PRM references to unknown grpprl" );
continue;
}
boolean hasChp = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
.hasNext(); )
{
SprmOperation sprmOperation = iterator.next();
if ( sprmOperation.getType() == SprmOperation.TYPE_CHP )
{
hasChp = true;
break;
}
}
if ( hasChp )
{
SprmBuffer newSprmBuffer;
try
{
newSprmBuffer = (SprmBuffer) sprmBuffer.clone();
}
catch ( CloneNotSupportedException e )
{
// shall not happen
throw new Error( e );
}
CHPX chpx = new CHPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer );
_textRuns.add( chpx );
}
}
}
// rebuild document paragraphs structure
StringBuilder docText = new StringBuilder();
for ( TextPiece textPiece : tpt.getTextPieces() )

View File

@ -18,9 +18,13 @@
package org.apache.poi.hwpf.model;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.model.io.*;
public final class ComplexFileTable
{
@ -30,6 +34,8 @@ public final class ComplexFileTable
protected TextPieceTable _tpt;
private SprmBuffer[] _grpprls;
public ComplexFileTable()
{
_tpt = new TextPieceTable();
@ -39,12 +45,20 @@ public final class ComplexFileTable
{
//skips through the prms before we reach the piece table. These contain data
//for actual fast saved files
while (tableStream[offset] == GRPPRL_TYPE)
{
offset++;
int skip = LittleEndian.getShort(tableStream, offset);
offset += LittleEndian.SHORT_SIZE + skip;
}
List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
while ( tableStream[offset] == GRPPRL_TYPE )
{
offset++;
int size = LittleEndian.getShort( tableStream, offset );
offset += LittleEndian.SHORT_SIZE;
byte[] bs = LittleEndian.getByteArray( tableStream, offset, size );
offset += size;
SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 );
sprmBuffers.add( sprmBuffer );
}
this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] );
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
{
throw new IOException("The text piece table is corrupted");
@ -59,6 +73,11 @@ public final class ComplexFileTable
return _tpt;
}
public SprmBuffer[] getGrpprls()
{
return _grpprls;
}
public void writeTo(HWPFFileSystem sys)
throws IOException
{

View File

@ -26,6 +26,8 @@ import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
@ -62,12 +64,12 @@ public class PAPBinTable
byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt )
{
this( documentStream, tableStream, dataStream, offset, size, tpt, true );
this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
}
public PAPBinTable( byte[] documentStream, byte[] tableStream,
byte[] dataStream, int offset, int size, TextPieceTable tpt,
boolean ignorePapxWithoutTextPieces )
byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt;
@ -94,6 +96,50 @@ public class PAPBinTable
}
}
if ( complexFileTable != null )
{
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() )
continue;
int igrpprl = prm.getIgrpprl();
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
{
logger.log( POILogger.WARN, textPiece
+ "'s PRM references to unknown grpprl" );
continue;
}
boolean hasPap = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
.hasNext(); )
{
SprmOperation sprmOperation = iterator.next();
if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
{
hasPap = true;
break;
}
}
if ( hasPap )
{
SprmBuffer newSprmBuffer = new SprmBuffer(2);
newSprmBuffer.append( sprmBuffer.toByteArray() );
PAPX papx = new PAPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer, dataStream );
_paragraphs.add( papx );
}
}
}
// rebuild document paragraphs structure
StringBuilder docText = new StringBuilder();
for ( TextPiece textPiece : tpt.getTextPieces() )
@ -144,11 +190,6 @@ public class PAPBinTable
if ( fChar == 13 || fChar == 7 || fChar == 12 )
break;
}
// if ( papx.getStart() <= charIndex && charIndex <
// papx.getEnd() )
// {
// papxs.add( papx );
// }
}
if ( papxs.size() == 0 )
@ -178,10 +219,21 @@ public class PAPBinTable
}
}
SprmBuffer sprmBuffer = new SprmBuffer( 2 );
SprmBuffer sprmBuffer = null;
for ( PAPX papx : papxs )
{
sprmBuffer.append( papx.getGrpprl(), 2 );
if ( sprmBuffer == null )
try
{
sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone();
}
catch ( CloneNotSupportedException e )
{
// can't happen
throw new Error( e );
}
else
sprmBuffer.append( papx.getGrpprl(), 2 );
}
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
dataStream );

View File

@ -19,8 +19,10 @@ package org.apache.poi.hwpf.model;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
@ -90,7 +92,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
Integer.valueOf( startAt ), "; ",
Integer.valueOf( endAt ),
") (bytes) doesn't have corresponding text pieces "
+ "and will be skipped" );
+ "and will be skipped\n\tSkipped SPRM: "
+ new SprmBuffer( getGrpprl( x ), 2 ) );
_papxList.add( null );
continue;
}
@ -150,6 +153,11 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
return _papxList.get(index);
}
public List<PAPX> getPAPXs()
{
return Collections.unmodifiableList( _papxList );
}
/**
* Gets the papx grpprl for the paragraph at index in this fkp.
*

View File

@ -46,7 +46,7 @@ public final class TestCHPBinTable
byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin();
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false);
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
@ -57,7 +57,7 @@ public final class TestCHPBinTable
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false);
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
ArrayList oldTextRuns = _cHPBinTable._textRuns;
ArrayList newTextRuns = newBinTable._textRuns;

View File

@ -40,7 +40,7 @@ public final class TestPAPBinTable
byte[] mainStream = _hWPFDocFixture._mainStream;
byte[] tableStream = _hWPFDocFixture._tableStream;
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false);
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem();
@ -51,7 +51,7 @@ public final class TestPAPBinTable
byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray();
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false);
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs();