369 lines
12 KiB
Java
369 lines
12 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.hwpf.model;
|
|
|
|
import java.io.IOException;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Collections;
|
|
import java.util.List;
|
|
|
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
|
import org.apache.poi.util.Internal;
|
|
import org.apache.poi.util.LittleEndian;
|
|
|
|
/**
|
|
* Represents a PAP FKP. The style properties for paragraph and character runs
|
|
* are stored in fkps. There are PAP fkps for paragraph properties and CHP fkps
|
|
* for character run properties. The first part of the fkp for both CHP and PAP
|
|
* fkps consists of an array of 4 byte int offsets in the main stream for that
|
|
* Paragraph's or Character run's text. The ending offset is the next
|
|
* value in the array. For example, if an fkp has X number of Paragraph's
|
|
* stored in it then there are (x + 1) 4 byte ints in the beginning array. The
|
|
* number X is determined by the last byte in a 512 byte fkp.
|
|
*
|
|
* CHP and PAP fkps also store the compressed styles(grpprl) that correspond to
|
|
* the offsets on the front of the fkp. The offset of the grpprls is determined
|
|
* differently for CHP fkps and PAP fkps.
|
|
*
|
|
* @author Ryan Ackley
|
|
*/
|
|
@Internal
|
|
public final class PAPFormattedDiskPage extends FormattedDiskPage {
|
|
private static final int BX_SIZE = 13;
|
|
private static final int FC_SIZE = 4;
|
|
|
|
private ArrayList<PAPX> _papxList = new ArrayList<PAPX>();
|
|
private ArrayList<PAPX> _overFlow;
|
|
|
|
/**
|
|
* @deprecated Use {@link #PAPFormattedDiskPage()} instead
|
|
*/
|
|
public PAPFormattedDiskPage( byte[] dataStream )
|
|
{
|
|
this();
|
|
}
|
|
|
|
public PAPFormattedDiskPage()
|
|
{
|
|
}
|
|
|
|
/**
|
|
* Creates a PAPFormattedDiskPage from a 512 byte array
|
|
*
|
|
* @deprecated Use
|
|
* {@link #PAPFormattedDiskPage(byte[], byte[], int, CharIndexTranslator)}
|
|
* instead
|
|
*/
|
|
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
|
|
int offset, int fcMin, TextPieceTable tpt )
|
|
{
|
|
this( documentStream, dataStream, offset, tpt );
|
|
}
|
|
|
|
/**
|
|
* Creates a PAPFormattedDiskPage from a 512 byte array
|
|
*/
|
|
public PAPFormattedDiskPage( byte[] documentStream, byte[] dataStream,
|
|
int offset, CharIndexTranslator translator )
|
|
{
|
|
super( documentStream, offset );
|
|
for ( int x = 0; x < _crun; x++ )
|
|
{
|
|
int bytesStartAt = getStart( x );
|
|
int bytesEndAt = getEnd( x );
|
|
|
|
// int charStartAt = translator.getCharIndex( bytesStartAt );
|
|
// int charEndAt = translator.getCharIndex( bytesEndAt, charStartAt
|
|
// );
|
|
// PAPX papx = new PAPX( charStartAt, charEndAt, getGrpprl( x ),
|
|
// getParagraphHeight( x ), dataStream );
|
|
// _papxList.add( papx );
|
|
|
|
for ( int[] range : translator.getCharIndexRanges( bytesStartAt,
|
|
bytesEndAt ) )
|
|
{
|
|
PAPX papx = new PAPX( range[0], range[1], getGrpprl( x ),
|
|
getParagraphHeight( x ), dataStream );
|
|
_papxList.add( papx );
|
|
}
|
|
}
|
|
_fkp = null;
|
|
}
|
|
|
|
/**
|
|
* Fills the queue for writing.
|
|
*
|
|
* @param filler a List of PAPXs
|
|
*/
|
|
public void fill(List<PAPX> filler)
|
|
{
|
|
_papxList.addAll(filler);
|
|
}
|
|
|
|
/**
|
|
* Used when writing out a Word docunment. This method is part of a sequence
|
|
* that is necessary because there is no easy and efficient way to
|
|
* determine the number PAPX's that will fit into one FKP. THe sequence is
|
|
* as follows:
|
|
*
|
|
* fill()
|
|
* toByteArray()
|
|
* getOverflow()
|
|
*
|
|
* @return The remaining PAPXs that didn't fit into this FKP.
|
|
*/
|
|
ArrayList<PAPX> getOverflow()
|
|
{
|
|
return _overFlow;
|
|
}
|
|
|
|
/**
|
|
* Gets the PAPX at index.
|
|
* @param index The index to get the PAPX for.
|
|
* @return The PAPX at index.
|
|
*/
|
|
public PAPX getPAPX(int index)
|
|
{
|
|
return _papxList.get(index);
|
|
}
|
|
|
|
public List<PAPX> getPAPXs()
|
|
{
|
|
return Collections.unmodifiableList( _papxList );
|
|
}
|
|
|
|
/**
|
|
* Gets the papx grpprl for the paragraph at index in this fkp.
|
|
*
|
|
* @param index The index of the papx to get.
|
|
* @return a papx grpprl.
|
|
*/
|
|
protected byte[] getGrpprl(int index)
|
|
{
|
|
int papxOffset = 2 * LittleEndian.getUByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
|
|
int size = 2 * LittleEndian.getUByte(_fkp, _offset + papxOffset);
|
|
if(size == 0)
|
|
{
|
|
size = 2 * LittleEndian.getUByte(_fkp, _offset + ++papxOffset);
|
|
}
|
|
else
|
|
{
|
|
size--;
|
|
}
|
|
|
|
byte[] papx = new byte[size];
|
|
System.arraycopy(_fkp, _offset + ++papxOffset, papx, 0, size);
|
|
return papx;
|
|
}
|
|
|
|
/**
|
|
* Creates a byte array representation of this data structure. Suitable for
|
|
* writing to a Word document.
|
|
*
|
|
* @param dataStream required if PAPX is too big to fit in FKP
|
|
*
|
|
* @return A byte array representing this data structure.
|
|
* @throws IOException
|
|
* if an I/O error occurs.
|
|
*/
|
|
protected byte[] toByteArray( HWPFOutputStream dataStream,
|
|
CharIndexTranslator translator ) throws IOException
|
|
{
|
|
byte[] buf = new byte[512];
|
|
int size = _papxList.size();
|
|
int grpprlOffset = 0;
|
|
int bxOffset = 0;
|
|
int fcOffset = 0;
|
|
byte[] lastGrpprl = new byte[0];
|
|
|
|
// total size is currently the size of one FC
|
|
int totalSize = FC_SIZE;
|
|
|
|
int index = 0;
|
|
for ( ; index < size; index++ )
|
|
{
|
|
byte[] grpprl = _papxList.get( index ).getGrpprl();
|
|
int grpprlLength = grpprl.length;
|
|
|
|
// is grpprl huge?
|
|
if ( grpprlLength > 488 )
|
|
{
|
|
grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
|
|
}
|
|
|
|
// check to see if we have enough room for an FC, a BX, and the
|
|
// grpprl
|
|
// and the 1 byte size of the grpprl.
|
|
int addition = 0;
|
|
if ( !Arrays.equals( grpprl, lastGrpprl ) )
|
|
{
|
|
addition = ( FC_SIZE + BX_SIZE + grpprlLength + 1 );
|
|
}
|
|
else
|
|
{
|
|
addition = ( FC_SIZE + BX_SIZE );
|
|
}
|
|
|
|
totalSize += addition;
|
|
|
|
// if size is uneven we will have to add one so the first grpprl
|
|
// falls
|
|
// on a word boundary
|
|
if ( totalSize > 511 + ( index % 2 ) )
|
|
{
|
|
totalSize -= addition;
|
|
break;
|
|
}
|
|
|
|
// grpprls must fall on word boundaries
|
|
if ( grpprlLength % 2 > 0 )
|
|
{
|
|
totalSize += 1;
|
|
}
|
|
else
|
|
{
|
|
totalSize += 2;
|
|
}
|
|
lastGrpprl = grpprl;
|
|
}
|
|
|
|
// see if we couldn't fit some
|
|
if ( index != size )
|
|
{
|
|
_overFlow = new ArrayList<PAPX>();
|
|
_overFlow.addAll( _papxList.subList( index, size ) );
|
|
}
|
|
|
|
// index should equal number of papxs that will be in this fkp now.
|
|
buf[511] = (byte) index;
|
|
|
|
bxOffset = ( FC_SIZE * index ) + FC_SIZE;
|
|
grpprlOffset = 511;
|
|
|
|
PAPX papx = null;
|
|
lastGrpprl = new byte[0];
|
|
for ( int x = 0; x < index; x++ )
|
|
{
|
|
papx = _papxList.get( x );
|
|
byte[] phe = papx.getParagraphHeight().toByteArray();
|
|
byte[] grpprl = papx.getGrpprl();
|
|
|
|
// is grpprl huge?
|
|
if ( grpprl.length > 488 )
|
|
{
|
|
// if so do we have storage at getHugeGrpprlOffset()
|
|
// int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
|
|
// if ( hugeGrpprlOffset == -1 ) // then we have no storage...
|
|
// {
|
|
// throw new UnsupportedOperationException(
|
|
// "This Paragraph has no dataStream storage." );
|
|
// }
|
|
// we have some storage...
|
|
|
|
// get the size of the existing storage
|
|
// int maxHugeGrpprlSize = LittleEndian.getUShort( dataStream,
|
|
// hugeGrpprlOffset );
|
|
//
|
|
// if ( maxHugeGrpprlSize < grpprl.length - 2 )
|
|
// { // grpprl.length-2 because we don't store the istd
|
|
// throw new UnsupportedOperationException(
|
|
// "This Paragraph's dataStream storage is too small." );
|
|
// }
|
|
|
|
// store grpprl at hugeGrpprlOffset
|
|
// grpprl.length-2 because we don't store the istd
|
|
// System.arraycopy( grpprl, 2, dataStream, hugeGrpprlOffset +
|
|
// 2,
|
|
// grpprl.length - 2 );
|
|
// LittleEndian.putUShort( dataStream, hugeGrpprlOffset,
|
|
// grpprl.length - 2 );
|
|
|
|
byte[] hugePapx = new byte[grpprl.length - 2];
|
|
System.arraycopy( grpprl, 2, hugePapx, 0, grpprl.length - 2 );
|
|
int dataStreamOffset = dataStream.getOffset();
|
|
dataStream.write( hugePapx );
|
|
|
|
// grpprl = grpprl containing only a sprmPHugePapx2
|
|
int istd = LittleEndian.getUShort( grpprl, 0 );
|
|
|
|
grpprl = new byte[8];
|
|
LittleEndian.putUShort( grpprl, 0, istd );
|
|
LittleEndian.putUShort( grpprl, 2, 0x6646 ); // sprmPHugePapx2
|
|
LittleEndian.putInt( grpprl, 4, dataStreamOffset );
|
|
}
|
|
|
|
boolean same = Arrays.equals( lastGrpprl, grpprl );
|
|
if ( !same )
|
|
{
|
|
grpprlOffset -= ( grpprl.length + ( 2 - grpprl.length % 2 ) );
|
|
grpprlOffset -= ( grpprlOffset % 2 );
|
|
}
|
|
// LittleEndian.putInt( buf, fcOffset, papx.getStartBytes() );
|
|
LittleEndian.putInt( buf, fcOffset,
|
|
translator.getByteIndex( papx.getStart() ) );
|
|
buf[bxOffset] = (byte) ( grpprlOffset / 2 );
|
|
System.arraycopy( phe, 0, buf, bxOffset + 1, phe.length );
|
|
|
|
/*
|
|
* refer to the section on PAPX in the spec. Places a size on the
|
|
* front of the PAPX. Has to do with how the grpprl stays on word
|
|
* boundaries.
|
|
*/
|
|
if ( !same )
|
|
{
|
|
int copyOffset = grpprlOffset;
|
|
if ( ( grpprl.length % 2 ) > 0 )
|
|
{
|
|
buf[copyOffset++] = (byte) ( ( grpprl.length + 1 ) / 2 );
|
|
}
|
|
else
|
|
{
|
|
buf[++copyOffset] = (byte) ( ( grpprl.length ) / 2 );
|
|
copyOffset++;
|
|
}
|
|
System.arraycopy( grpprl, 0, buf, copyOffset, grpprl.length );
|
|
lastGrpprl = grpprl;
|
|
}
|
|
|
|
bxOffset += BX_SIZE;
|
|
fcOffset += FC_SIZE;
|
|
|
|
}
|
|
|
|
// LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
|
|
LittleEndian.putInt( buf, fcOffset,
|
|
translator.getByteIndex( papx.getEnd() ) );
|
|
return buf;
|
|
}
|
|
|
|
/**
|
|
* Used to get the ParagraphHeight of a PAPX at a particular index.
|
|
* @param index
|
|
* @return The ParagraphHeight
|
|
*/
|
|
private ParagraphHeight getParagraphHeight(int index)
|
|
{
|
|
int pheOffset = _offset + 1 + (((_crun + 1) * 4) + (index * 13));
|
|
|
|
ParagraphHeight phe = new ParagraphHeight(_fkp, pheOffset);
|
|
|
|
return phe;
|
|
}
|
|
}
|