add initial support for fast-saved files

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145410 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-12 00:40:27 +00:00
parent 5fb7c13c38
commit 75090630ff
7 changed files with 161 additions and 27 deletions

View File

@ -216,8 +216,8 @@ public final class HWPFDocument extends HWPFDocumentCore
// Now load the rest of the properties, which need to be adjusted // Now load the rest of the properties, which need to be adjusted
// for where text really begin // for where text really begin
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _tpt, true); _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), _cft, _tpt, true);
_pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _tpt, true); _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), _cft, _tpt, true);
// Read FSPA and Escher information // Read FSPA and Escher information
_fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces()); _fspa = new FSPATable(_tableStream, _fib.getFcPlcspaMom(), _fib.getLcbPlcspaMom(), getTextTable().getTextPieces());

View File

@ -29,6 +29,8 @@ import java.util.Set;
import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
@ -58,20 +60,21 @@ public class CHPBinTable
* Constructor used to read a binTable in from a Word document. * Constructor used to read a binTable in from a Word document.
* *
* @deprecated Use * @deprecated Use
* {@link #CHPBinTable(byte[],byte[],int,int,TextPieceTable,boolean)} * {@link #CHPBinTable(byte[],byte[],int,int,ComplexFileTable,TextPieceTable, boolean)}
* instead * instead
*/ */
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin, TextPieceTable tpt ) int size, int fcMin, TextPieceTable tpt )
{ {
this( documentStream, tableStream, offset, size, tpt, true ); this( documentStream, tableStream, offset, size, null, tpt, true );
} }
/** /**
* Constructor used to read a binTable in from a Word document. * Constructor used to read a binTable in from a Word document.
*/ */
public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset, public CHPBinTable( byte[] documentStream, byte[] tableStream, int offset,
int size, TextPieceTable tpt, boolean ignoreChpxWithoutTextPieces ) int size, ComplexFileTable complexFileTable, TextPieceTable tpt,
boolean ignoreChpxWithoutTextPieces )
{ {
/* /*
* Page 35: * Page 35:
@ -105,6 +108,58 @@ public class CHPBinTable
} }
} }
if ( complexFileTable != null )
{
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() )
continue;
int igrpprl = prm.getIgrpprl();
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
{
logger.log( POILogger.WARN, textPiece
+ "'s PRM references to unknown grpprl" );
continue;
}
boolean hasChp = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
.hasNext(); )
{
SprmOperation sprmOperation = iterator.next();
if ( sprmOperation.getType() == SprmOperation.TYPE_CHP )
{
hasChp = true;
break;
}
}
if ( hasChp )
{
SprmBuffer newSprmBuffer;
try
{
newSprmBuffer = (SprmBuffer) sprmBuffer.clone();
}
catch ( CloneNotSupportedException e )
{
// shall not happen
throw new Error( e );
}
CHPX chpx = new CHPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer );
_textRuns.add( chpx );
}
}
}
// rebuild document paragraphs structure // rebuild document paragraphs structure
StringBuilder docText = new StringBuilder(); StringBuilder docText = new StringBuilder();
for ( TextPiece textPiece : tpt.getTextPieces() ) for ( TextPiece textPiece : tpt.getTextPieces() )

View File

@ -18,9 +18,13 @@
package org.apache.poi.hwpf.model; package org.apache.poi.hwpf.model;
import java.io.IOException; import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.model.io.*;
public final class ComplexFileTable public final class ComplexFileTable
{ {
@ -30,6 +34,8 @@ public final class ComplexFileTable
protected TextPieceTable _tpt; protected TextPieceTable _tpt;
private SprmBuffer[] _grpprls;
public ComplexFileTable() public ComplexFileTable()
{ {
_tpt = new TextPieceTable(); _tpt = new TextPieceTable();
@ -39,12 +45,20 @@ public final class ComplexFileTable
{ {
//skips through the prms before we reach the piece table. These contain data //skips through the prms before we reach the piece table. These contain data
//for actual fast saved files //for actual fast saved files
while (tableStream[offset] == GRPPRL_TYPE) List<SprmBuffer> sprmBuffers = new LinkedList<SprmBuffer>();
{ while ( tableStream[offset] == GRPPRL_TYPE )
offset++; {
int skip = LittleEndian.getShort(tableStream, offset); offset++;
offset += LittleEndian.SHORT_SIZE + skip; int size = LittleEndian.getShort( tableStream, offset );
} offset += LittleEndian.SHORT_SIZE;
byte[] bs = LittleEndian.getByteArray( tableStream, offset, size );
offset += size;
SprmBuffer sprmBuffer = new SprmBuffer( bs, false, 0 );
sprmBuffers.add( sprmBuffer );
}
this._grpprls = sprmBuffers.toArray( new SprmBuffer[sprmBuffers.size()] );
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE) if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
{ {
throw new IOException("The text piece table is corrupted"); throw new IOException("The text piece table is corrupted");
@ -59,6 +73,11 @@ public final class ComplexFileTable
return _tpt; return _tpt;
} }
public SprmBuffer[] getGrpprls()
{
return _grpprls;
}
public void writeTo(HWPFFileSystem sys) public void writeTo(HWPFFileSystem sys)
throws IOException throws IOException
{ {

View File

@ -26,6 +26,8 @@ import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFFileSystem; import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream; import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
@ -62,12 +64,12 @@ public class PAPBinTable
byte[] dataStream, int offset, int size, int fcMin, byte[] dataStream, int offset, int size, int fcMin,
TextPieceTable tpt ) TextPieceTable tpt )
{ {
this( documentStream, tableStream, dataStream, offset, size, tpt, true ); this( documentStream, tableStream, dataStream, offset, size, null, tpt, true );
} }
public PAPBinTable( byte[] documentStream, byte[] tableStream, public PAPBinTable( byte[] documentStream, byte[] tableStream,
byte[] dataStream, int offset, int size, TextPieceTable tpt, byte[] dataStream, int offset, int size, ComplexFileTable complexFileTable,
boolean ignorePapxWithoutTextPieces ) TextPieceTable tpt, boolean ignorePapxWithoutTextPieces )
{ {
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
this.tpt = tpt; this.tpt = tpt;
@ -94,6 +96,50 @@ public class PAPBinTable
} }
} }
if ( complexFileTable != null )
{
SprmBuffer[] sprmBuffers = complexFileTable.getGrpprls();
// adding CHPX from fast-saved SPRMs
for ( TextPiece textPiece : tpt.getTextPieces() )
{
PropertyModifier prm = textPiece.getPieceDescriptor().getPrm();
if ( !prm.isComplex() )
continue;
int igrpprl = prm.getIgrpprl();
if ( igrpprl < 0 || igrpprl >= sprmBuffers.length )
{
logger.log( POILogger.WARN, textPiece
+ "'s PRM references to unknown grpprl" );
continue;
}
boolean hasPap = false;
SprmBuffer sprmBuffer = sprmBuffers[igrpprl];
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator
.hasNext(); )
{
SprmOperation sprmOperation = iterator.next();
if ( sprmOperation.getType() == SprmOperation.TYPE_PAP )
{
hasPap = true;
break;
}
}
if ( hasPap )
{
SprmBuffer newSprmBuffer = new SprmBuffer(2);
newSprmBuffer.append( sprmBuffer.toByteArray() );
PAPX papx = new PAPX( textPiece.getStart(),
textPiece.getEnd(), newSprmBuffer, dataStream );
_paragraphs.add( papx );
}
}
}
// rebuild document paragraphs structure // rebuild document paragraphs structure
StringBuilder docText = new StringBuilder(); StringBuilder docText = new StringBuilder();
for ( TextPiece textPiece : tpt.getTextPieces() ) for ( TextPiece textPiece : tpt.getTextPieces() )
@ -144,11 +190,6 @@ public class PAPBinTable
if ( fChar == 13 || fChar == 7 || fChar == 12 ) if ( fChar == 13 || fChar == 7 || fChar == 12 )
break; break;
} }
// if ( papx.getStart() <= charIndex && charIndex <
// papx.getEnd() )
// {
// papxs.add( papx );
// }
} }
if ( papxs.size() == 0 ) if ( papxs.size() == 0 )
@ -178,10 +219,21 @@ public class PAPBinTable
} }
} }
SprmBuffer sprmBuffer = new SprmBuffer( 2 ); SprmBuffer sprmBuffer = null;
for ( PAPX papx : papxs ) for ( PAPX papx : papxs )
{ {
sprmBuffer.append( papx.getGrpprl(), 2 ); if ( sprmBuffer == null )
try
{
sprmBuffer = (SprmBuffer) papx.getSprmBuf().clone();
}
catch ( CloneNotSupportedException e )
{
// can't happen
throw new Error( e );
}
else
sprmBuffer.append( papx.getGrpprl(), 2 );
} }
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer, PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
dataStream ); dataStream );

View File

@ -19,8 +19,10 @@ package org.apache.poi.hwpf.model;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List; import java.util.List;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger; import org.apache.poi.util.POILogger;
@ -90,7 +92,8 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
Integer.valueOf( startAt ), "; ", Integer.valueOf( startAt ), "; ",
Integer.valueOf( endAt ), Integer.valueOf( endAt ),
") (bytes) doesn't have corresponding text pieces " ") (bytes) doesn't have corresponding text pieces "
+ "and will be skipped" ); + "and will be skipped\n\tSkipped SPRM: "
+ new SprmBuffer( getGrpprl( x ), 2 ) );
_papxList.add( null ); _papxList.add( null );
continue; continue;
} }
@ -150,6 +153,11 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
return _papxList.get(index); return _papxList.get(index);
} }
public List<PAPX> getPAPXs()
{
return Collections.unmodifiableList( _papxList );
}
/** /**
* Gets the papx grpprl for the paragraph at index in this fkp. * Gets the papx grpprl for the paragraph at index in this fkp.
* *

View File

@ -46,7 +46,7 @@ public final class TestCHPBinTable
byte[] tableStream = _hWPFDocFixture._tableStream; byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin(); int fcMin = fib.getFcMin();
_cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), fakeTPT, false); _cHPBinTable = new CHPBinTable(mainStream, tableStream, fib.getFcPlcfbteChpx(), fib.getLcbPlcfbteChpx(), null, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
@ -57,7 +57,7 @@ public final class TestCHPBinTable
byte[] newTableStream = tableOut.toByteArray(); byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray();
CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, fakeTPT, false); CHPBinTable newBinTable = new CHPBinTable(newMainStream, newTableStream, 0, newTableStream.length, null, fakeTPT, false);
ArrayList oldTextRuns = _cHPBinTable._textRuns; ArrayList oldTextRuns = _cHPBinTable._textRuns;
ArrayList newTextRuns = newBinTable._textRuns; ArrayList newTextRuns = newBinTable._textRuns;

View File

@ -40,7 +40,7 @@ public final class TestPAPBinTable
byte[] mainStream = _hWPFDocFixture._mainStream; byte[] mainStream = _hWPFDocFixture._mainStream;
byte[] tableStream = _hWPFDocFixture._tableStream; byte[] tableStream = _hWPFDocFixture._tableStream;
_pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fakeTPT, false); _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), null, fakeTPT, false);
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
@ -51,7 +51,7 @@ public final class TestPAPBinTable
byte[] newTableStream = tableOut.toByteArray(); byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray();
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, fakeTPT, false); PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, null, fakeTPT, false);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs(); ArrayList newTextRuns = newBinTable.getParagraphs();