work in progress

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353156 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Said Ryan Ackley 2003-06-24 11:32:30 +00:00
parent ba62292d38
commit dd56e7c40b
13 changed files with 385 additions and 81 deletions

View File

@ -57,15 +57,20 @@
package org.apache.poi.hwpf.model.hdftypes;
import java.util.ArrayList;
import java.io.OutputStream;
import java.io.IOException;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.model.io.*;
public class CHPBinTable
{
ArrayList _textRuns;
ArrayList _textRuns = new ArrayList();
public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, int size)
public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin)
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
@ -78,7 +83,7 @@ public class CHPBinTable
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream,
pageOffset);
pageOffset, fcMin);
int fkpSize = cfkp.size();
@ -89,4 +94,63 @@ public class CHPBinTable
}
}
public void writeTo(HWPFFileSystem sys, int fcMin)
throws IOException
{
HWPFOutputStream docStream = sys.getStream("WordDocument");
OutputStream tableStream = sys.getStream("1Table");
PlexOfCps binTable = new PlexOfCps(4);
// each FKP must start on a 512 byte page.
int docOffset = docStream.getOffset();
int mod = docOffset % POIFSConstants.BIG_BLOCK_SIZE;
if (mod != 0)
{
byte[] padding = new byte[POIFSConstants.BIG_BLOCK_SIZE - mod];
docStream.write(padding);
}
// get the page number for the first fkp
docOffset = docStream.getOffset();
int pageNum = docOffset/POIFSConstants.BIG_BLOCK_SIZE;
// get the ending fc
int endingFc = ((PropertyNode)_textRuns.get(_textRuns.size() - 1)).getEnd();
endingFc += fcMin;
ArrayList overflow = _textRuns;
byte[] intHolder = new byte[4];
do
{
PropertyNode startingProp = (PropertyNode)overflow.get(0);
int start = startingProp.getStart() + fcMin;
CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage();
cfkp.fill(overflow);
byte[] bufFkp = cfkp.toByteArray(fcMin);
docStream.write(bufFkp);
overflow = cfkp.getOverflow();
int end = endingFc;
if (overflow != null)
{
end = ((PropertyNode)overflow.get(0)).getEnd();
}
LittleEndian.putInt(intHolder, pageNum++);
binTable.addProperty(new PropertyNode(start, end, intHolder));
}
while (overflow != null);
tableStream.write(binTable.toByteArray());
}
}

View File

@ -53,6 +53,7 @@
*/
package org.apache.poi.hwpf.model.hdftypes;
import java.util.List;
import java.util.ArrayList;
import org.apache.poi.util.LittleEndian;
@ -80,19 +81,24 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
private ArrayList _chpxList = new ArrayList();
private ArrayList _overFlow;
public CHPFormattedDiskPage()
{
}
/**
* This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array
* read from a Word file).
*
* @param fkp The 512 byte array to read data from
*/
public CHPFormattedDiskPage(byte[] documentStream, int offset)
public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin)
{
super(documentStream, offset);
for (int x = 0; x < _crun; x++)
{
_chpxList.add(new CHPX(getStart(x), getEnd(x), getGrpprl(x)));
_chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x)));
}
}
@ -101,6 +107,16 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
return (CHPX)_chpxList.get(index);
}
public void fill(List filler)
{
_chpxList.addAll(filler);
}
public ArrayList getOverflow()
{
return _overFlow;
}
/**
* Gets the chpx for the character run at index in this fkp.
*
@ -117,15 +133,15 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
return new byte[0];
}
int size = LittleEndian.getUnsignedByte(_fkp, chpxOffset);
int size = LittleEndian.getUnsignedByte(_fkp, _offset + chpxOffset);
byte[] chpx = new byte[size];
System.arraycopy(_fkp, ++chpxOffset, chpx, 0, size);
System.arraycopy(_fkp, _offset + ++chpxOffset, chpx, 0, size);
return chpx;
}
protected byte[] toByteArray()
protected byte[] toByteArray(int fcMin)
{
byte[] buf = new byte[512];
int size = _chpxList.size();
@ -177,7 +193,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
chpx = (CHPX)_chpxList.get(x);
byte[] grpprl = chpx.getGrpprl();
LittleEndian.putInt(buf, fcOffset, chpx.getStart());
LittleEndian.putInt(buf, fcOffset, chpx.getStart() + fcMin);
buf[offsetOffset] = (byte)(grpprlOffset/2);
System.arraycopy(grpprl, 0, buf, grpprlOffset, grpprl.length);
@ -185,8 +201,8 @@ public class CHPFormattedDiskPage extends FormattedDiskPage
offsetOffset += 1;
fcOffset += FC_SIZE;
}
// put the last papx's end in
LittleEndian.putInt(buf, fcOffset, chpx.getEnd());
// put the last chpx's end in
LittleEndian.putInt(buf, fcOffset, chpx.getEnd() + fcMin);
return buf;
}

View File

@ -59,23 +59,27 @@ package org.apache.poi.hwpf.model.hdftypes;
import java.io.IOException;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.model.io.*;
public class ComplexFileTable
{
private static final byte GRPPRL_TYPE = 1;
private static final byte TEXT_PIECE_TABLE_TYPE = 2;
TextPieceTable _tpt;
public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset) throws IOException
public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin) throws IOException
{
//skips through the prms before we reach the piece table. These contain data
//for actual fast saved files
while (tableStream[offset] == 1)
while (tableStream[offset] == GRPPRL_TYPE)
{
offset++;
int skip = LittleEndian.getShort(tableStream, offset);
offset += 2 + skip;
offset += LittleEndian.SHORT_SIZE + skip;
}
if(tableStream[offset] != 2)
if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE)
{
throw new IOException("The text piece table is corrupted");
}
@ -83,8 +87,25 @@ public class ComplexFileTable
{
int pieceTableSize = LittleEndian.getInt(tableStream, ++offset);
offset += LittleEndian.INT_SIZE;
_tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize);
_tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin);
}
}
public void writeTo(HWPFFileSystem sys)
throws IOException
{
HWPFOutputStream docStream = sys.getStream("WordDocument");
HWPFOutputStream tableStream = sys.getStream("1Table");
tableStream.write(TEXT_PIECE_TABLE_TYPE);
byte[] table = _tpt.writeTo(docStream);
byte[] numHolder = new byte[LittleEndian.INT_SIZE];
LittleEndian.putInt(numHolder, table.length);
tableStream.write(numHolder);
tableStream.write(table);
}
}

View File

@ -58,35 +58,20 @@
package org.apache.poi.hwpf.model.hdftypes;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.model.hdftypes.definitions.DOPAbstractType;
/**
* Comment me
*
* @author Ryan Ackley
*/
public class DocumentProperties implements HDFType
public class DocumentProperties extends DOPAbstractType
{
public boolean _fFacingPages;
public int _fpc;
public int _epc;
public int _rncFtn;
public int _nFtn;
public int _rncEdn;
public int _nEdn;
public DocumentProperties(byte[] dopArray)
public DocumentProperties(byte[] tableStream, int offset)
{
_fFacingPages = (dopArray[0] & 0x1) > 0;
_fpc = (dopArray[0] & 0x60) >> 5;
short num = LittleEndian.getShort(dopArray, 2);
_rncFtn = (num & 0x3);
_nFtn = (short)(num & 0xfffc) >> 2;
num = LittleEndian.getShort(dopArray, 52);
_rncEdn = num & 0x3;
_nEdn = (short)(num & 0xfffc) >> 2;
num = LittleEndian.getShort(dopArray, 54);
_epc = num & 0x3;
super.fillFields(tableStream, (short)0, offset);
}
}

View File

@ -82,6 +82,12 @@ public abstract class FormattedDiskPage
protected int _crun;
protected int _offset;
public FormattedDiskPage()
{
}
/**
* Uses a 512-byte array to create a FKP
*/
@ -89,6 +95,7 @@ public abstract class FormattedDiskPage
{
_crun = LittleEndian.getUnsignedByte(documentStream, offset + 511);
_fkp = documentStream;
_offset = offset;
}
/**
* Used to get a text offset corresponding to a grpprl in this fkp.

View File

@ -56,15 +56,20 @@
package org.apache.poi.hwpf.model.hdftypes;
import java.util.ArrayList;
import java.io.IOException;
import java.io.OutputStream;
import org.apache.poi.hwpf.model.io.*;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
public class PAPBinTable
{
ArrayList _paragraphs;
ArrayList _paragraphs = new ArrayList();
public PAPBinTable(byte[] documentStream, byte[] tableStream, int offset, int size)
public PAPBinTable(byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin)
{
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
@ -77,7 +82,7 @@ public class PAPBinTable
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
pageOffset);
pageOffset, fcMin);
int fkpSize = pfkp.size();
@ -88,5 +93,61 @@ public class PAPBinTable
}
}
public void writeTo(HWPFFileSystem sys, int fcMin)
throws IOException
{
HWPFOutputStream docStream = sys.getStream("WordDocument");
OutputStream tableStream = sys.getStream("1Table");
PlexOfCps binTable = new PlexOfCps(4);
// each FKP must start on a 512 byte page.
int docOffset = docStream.getOffset();
int mod = docOffset % POIFSConstants.BIG_BLOCK_SIZE;
if (mod != 0)
{
byte[] padding = new byte[POIFSConstants.BIG_BLOCK_SIZE - mod];
docStream.write(padding);
}
// get the page number for the first fkp
docOffset = docStream.getOffset();
int pageNum = docOffset/POIFSConstants.BIG_BLOCK_SIZE;
// get the ending fc
int endingFc = ((PropertyNode)_paragraphs.get(_paragraphs.size() - 1)).getEnd();
endingFc += fcMin;
ArrayList overflow = _paragraphs;
byte[] intHolder = new byte[4];
do
{
PropertyNode startingProp = (PropertyNode)overflow.get(0);
int start = startingProp.getStart() + fcMin;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage();
pfkp.fill(overflow);
byte[] bufFkp = pfkp.toByteArray(fcMin);
docStream.write(bufFkp);
overflow = pfkp.getOverflow();
int end = endingFc;
if (overflow != null)
{
end = ((PropertyNode)overflow.get(0)).getEnd();
}
LittleEndian.putInt(intHolder, pageNum++);
binTable.addProperty(new PropertyNode(start, end, intHolder));
}
while (overflow != null);
tableStream.write(binTable.toByteArray());
}
}

View File

@ -57,6 +57,7 @@ import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
import java.util.ArrayList;
import java.util.List;
/**
* Represents a PAP FKP. The style properties for paragraph and character runs
@ -83,22 +84,38 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
private ArrayList _papxList = new ArrayList();
private ArrayList _overFlow;
public PAPFormattedDiskPage()
{
}
/**
* Creates a PAPFormattedDiskPage from a 512 byte array
*
* @param fkp a 512 byte array.
*/
public PAPFormattedDiskPage(byte[] documentStream, int offset)
public PAPFormattedDiskPage(byte[] documentStream, int offset, int fcMin)
{
super(documentStream, offset);
for (int x = 0; x < _crun; x++)
{
_papxList.add(new PAPX(getStart(x), getEnd(x), getGrpprl(x), getParagraphHeight(x)));
_papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x)));
}
_fkp = null;
}
public void fill(List filler)
{
_papxList.addAll(filler);
}
public ArrayList getOverflow()
{
return _overFlow;
}
public PAPX getPAPX(int index)
{
return (PAPX)_papxList.get(index);
@ -113,10 +130,10 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
protected byte[] getGrpprl(int index)
{
int papxOffset = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE)));
int size = 2 * LittleEndian.getUnsignedByte(_fkp, papxOffset);
int size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + papxOffset);
if(size == 0)
{
size = 2 * LittleEndian.getUnsignedByte(_fkp, ++papxOffset);
size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + ++papxOffset);
}
else
{
@ -124,11 +141,11 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
}
byte[] papx = new byte[size];
System.arraycopy(_fkp, ++papxOffset, papx, 0, size);
System.arraycopy(_fkp, _offset + ++papxOffset, papx, 0, size);
return papx;
}
protected byte[] toByteArray()
protected byte[] toByteArray(int fcMin)
{
byte[] buf = new byte[512];
int size = _papxList.size();
@ -144,8 +161,9 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
{
int grpprlLength = ((PAPX)_papxList.get(index)).getGrpprl().length;
// check to see if we have enough room for an FC, a BX, and the grpprl.
totalSize += (FC_SIZE + BX_SIZE + grpprlLength);
// check to see if we have enough room for an FC, a BX, and the grpprl
// and the 1 byte size of the grpprl.
totalSize += (FC_SIZE + BX_SIZE + grpprlLength + 1);
// if size is uneven we will have to add one so the first grpprl falls
// on a word boundary
if (totalSize > 511 + (index % 2))
@ -159,6 +177,10 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
{
totalSize += 1;
}
else
{
totalSize += 2;
}
}
// see if we couldn't fit some
@ -181,17 +203,29 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
byte[] phe = papx.getParagraphHeight().toByteArray();
byte[] grpprl = papx.getGrpprl();
LittleEndian.putInt(buf, fcOffset, papx.getStart());
LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin);
buf[bxOffset] = (byte)(grpprlOffset/2);
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
// refer to the section on PAPX in the spec. Places a size on the front
// of the PAPX. Has to do with how the grpprl stays on word
// boundaries.
if ((grpprl.length % 2) > 0)
{
buf[grpprlOffset++] = (byte)((grpprl.length + 1)/2);
}
else
{
buf[++grpprlOffset] = (byte)((grpprl.length)/2);
grpprlOffset++;
}
System.arraycopy(grpprl, 0, buf, grpprlOffset, grpprl.length);
grpprlOffset += grpprl.length + (grpprl.length % 2);
bxOffset += BX_SIZE;
fcOffset += FC_SIZE;
}
// put the last papx's end in
LittleEndian.putInt(buf, fcOffset, papx.getEnd());
LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin);
return buf;
}

View File

@ -63,9 +63,9 @@ public class PieceDescriptor
{
short descriptor;
BitField fNoParaLast = new BitField(0x01);
BitField fPaphNil = new BitField(0x02);
BitField fCopied = new BitField(0x04);
private static BitField fNoParaLast = new BitField(0x01);
private static BitField fPaphNil = new BitField(0x02);
private static BitField fCopied = new BitField(0x04);
int fc;
short prm;
boolean unicode;
@ -98,6 +98,11 @@ public class PieceDescriptor
return fc;
}
public void setFilePosition(int pos)
{
fc = pos;
}
public boolean isUnicode()
{
return unicode;

View File

@ -64,8 +64,8 @@ package org.apache.poi.hwpf.model.hdftypes;
public class PropertyNode implements Comparable
{
private byte[] _buf;
private int _fcStart;
private int _fcEnd;
private int _cpStart;
private int _cpEnd;
/**
* @param fcStart The start of the text for this property.
@ -74,8 +74,8 @@ public class PropertyNode implements Comparable
*/
public PropertyNode(int fcStart, int fcEnd, byte[] buf)
{
_fcStart = fcStart;
_fcEnd = fcEnd;
_cpStart = fcStart;
_cpEnd = fcEnd;
_buf = buf;
}
/**
@ -83,14 +83,14 @@ public class PropertyNode implements Comparable
*/
public int getStart()
{
return _fcStart;
return _cpStart;
}
/**
* @retrun The offset of the end of this property's text.
*/
public int getEnd()
{
return _fcEnd;
return _cpEnd;
}
/**
* @return This property's property in copmpressed form.
@ -104,12 +104,12 @@ public class PropertyNode implements Comparable
*/
public int compareTo(Object o)
{
int fcEnd = ((PropertyNode)o).getEnd();
if(_fcEnd == fcEnd)
int cpEnd = ((PropertyNode)o).getEnd();
if(_cpEnd == cpEnd)
{
return 0;
}
else if(_fcEnd < fcEnd)
else if(_cpEnd < cpEnd)
{
return -1;
}

View File

@ -67,10 +67,10 @@ public class SectionTable
{
private static final int SED_SIZE = 12;
private ArrayList _sections;
private ArrayList _sections = new ArrayList();
public SectionTable(byte[] documentStream, byte[] tableStream, int offset,
int size)
int size, int fcMin)
{
PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
@ -86,7 +86,7 @@ public class SectionTable
// check for the optimization
if (fileOffset == 0xffffffff)
{
_sections.add(new SEPX(sed, node.getStart(), node.getEnd(), new byte[0]));
_sections.add(new SEPX(sed, node.getStart() - fcMin, node.getEnd() - fcMin, new byte[0]));
}
else
{
@ -95,12 +95,12 @@ public class SectionTable
byte[] buf = new byte[sepxSize];
fileOffset += LittleEndian.SHORT_SIZE;
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
_sections.add(new SEPX(sed, node.getStart(), node.getEnd(), buf));
_sections.add(new SEPX(sed, node.getStart() - fcMin, node.getEnd() - fcMin, buf));
}
}
}
public void writeTo(HWPFFileSystem sys)
public void writeTo(HWPFFileSystem sys, int fcMin)
throws IOException
{
HWPFOutputStream docStream = sys.getStream("WordDocument");
@ -128,7 +128,7 @@ public class SectionTable
sed.setFc(offset);
// add the section descriptor bytes to the PlexOfCps.
PropertyNode property = new PropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray());
PropertyNode property = new PropertyNode(sepx.getStart() - fcMin, sepx.getEnd() - fcMin, sed.toByteArray());
plex.addProperty(property);
offset = docStream.getOffset();

View File

@ -202,4 +202,67 @@ public class StyleDescription implements HDFType
// {
// _chp = chp;
// }
public byte[] toByteArray()
{
// size equals 8 bytes for known variables plus 2 bytes for name length plus
// name length * 2 plus 2 bytes for null plus upx's preceded by length
int size = 8 + 2 + ((_name.length() + 1) * 2);
//only worry about papx and chpx for upxs
if(_styleTypeCode.getValue(_infoShort2) == PARAGRAPH_STYLE)
{
size += _papx.length + 2 + (_papx.length % 2);
size += _chpx.length + 2;
}
else if (_styleTypeCode.getValue(_infoShort2) == CHARACTER_STYLE)
{
size += _chpx.length + 2;
}
byte[] buf = new byte[size];
int offset = 0;
LittleEndian.putShort(buf, offset, _infoShort);
offset += LittleEndian.SHORT_SIZE;
LittleEndian.putShort(buf, offset, _infoShort2);
offset += LittleEndian.SHORT_SIZE;
LittleEndian.putShort(buf, offset, _bchUpe);
offset += LittleEndian.SHORT_SIZE;
LittleEndian.putShort(buf, offset, _infoShort3);
offset += LittleEndian.SHORT_SIZE;
char[] letters = _name.toCharArray();
LittleEndian.putShort(buf, offset, (short)letters.length);
offset += LittleEndian.SHORT_SIZE;
for (int x = 0; x < letters.length; x++)
{
LittleEndian.putShort(buf, offset, (short)letters[x]);
offset += LittleEndian.SHORT_SIZE;
}
// get past the null delimiter for the name.
offset += LittleEndian.SHORT_SIZE;
//only worry about papx and chpx for upxs
if(_styleTypeCode.getValue(_infoShort2) == PARAGRAPH_STYLE)
{
LittleEndian.putShort(buf, offset, (short)_papx.length);
offset += LittleEndian.SHORT_SIZE;
System.arraycopy(_papx, 0, buf, offset, _papx.length);
offset += _papx.length + (_papx.length % 2);
LittleEndian.putShort(buf, offset, (short)_chpx.length);
offset += LittleEndian.SHORT_SIZE;
System.arraycopy(_chpx, 0, buf, offset, _chpx.length);
offset += _chpx.length;
}
else if (_styleTypeCode.getValue(_infoShort2) == CHARACTER_STYLE)
{
LittleEndian.putShort(buf, offset, (short)_chpx.length);
offset += LittleEndian.SHORT_SIZE;
System.arraycopy(_chpx, 0, buf, offset, _chpx.length);
offset += _chpx.length;
}
return buf;
}
}

View File

@ -66,6 +66,7 @@ public class TextPiece extends PropertyNode implements Comparable
{
private boolean _usesUnicode;
private int _length;
private PieceDescriptor _pd;
/**
* @param start Offset in main document stream.
@ -73,12 +74,11 @@ public class TextPiece extends PropertyNode implements Comparable
* does not necessarily refer to 1 byte.
* @param unicode true if this text is unicode.
*/
public TextPiece(int start, int length, boolean unicode)
public TextPiece(int start, int end, byte[] text, PieceDescriptor pd)
{
super(start, start + length, null);
_usesUnicode = unicode;
_length = length;
super(start, end, text);
_usesUnicode = pd.isUnicode();
_length = end - start;
}
/**
* @return If this text piece uses unicode
@ -87,4 +87,9 @@ public class TextPiece extends PropertyNode implements Comparable
{
return _usesUnicode;
}
public PieceDescriptor getPieceDescriptor()
{
return _pd;
}
}

View File

@ -18,7 +18,7 @@
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
@ -58,20 +58,29 @@ package org.apache.poi.hwpf.model.hdftypes;
import java.io.UnsupportedEncodingException;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.poi.hwpf.model.io.*;
public class TextPieceTable
{
StringBuffer _text = new StringBuffer();
ArrayList _textPieces = new ArrayList();
public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, int size)
public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset,
int size, int fcMin)
throws UnsupportedEncodingException
{
// get our plex of PieceDescriptors
PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size, PieceDescriptor.getSizeInBytes());
int multiple = 2;
int length = pieceTable.length();
PieceDescriptor[] pieces = new PieceDescriptor[length];
// iterate through piece descriptors raw bytes and create
// PieceDescriptor objects
for (int x = 0; x < length; x++)
{
PropertyNode node = pieceTable.getProperty(x);
@ -81,28 +90,62 @@ public class TextPieceTable
{
multiple = 1;
}
}
// using the PieceDescriptors, build our list of TextPieces.
for (int x = 0; x < pieces.length; x++)
{
int start = pieces[x].getFilePosition();
PropertyNode node = pieceTable.getProperty(x);
int textSize = node.getEnd() - node.getStart();
int nodeStart = node.getStart() - fcMin;
int nodeEnd = node.getEnd() - fcMin;
int textSize = nodeEnd - nodeStart;
boolean unicode = pieces[x].isUnicode();
String toStr = null;
if (unicode)
{
toStr = new String(documentStream, start, length * multiple, "UTF-16LE");
byte[] buf = new byte[textSize * multiple];
System.arraycopy(documentStream, start, buf, 0, textSize * multiple);
_textPieces.add(new TextPiece(nodeStart, nodeEnd, buf, pieces[x]));
}
else
{
toStr = new String(documentStream, start, length, "ISO-8859-1");
byte[] buf = new byte[textSize];
System.arraycopy(documentStream, start, buf, 0, textSize);
_textPieces.add(new TextPiece(nodeStart, nodeEnd, buf, pieces[x]));
}
_text.append(toStr);
}
}
public byte[] writeTo(HWPFOutputStream docStream)
throws IOException
{
PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes());
int fcMin = docStream.getOffset();
int size = _textPieces.size();
for (int x = 0; x < size; x++)
{
TextPiece next = (TextPiece)_textPieces.get(x);
PieceDescriptor pd = next.getPieceDescriptor();
// set the text piece position to the current docStream offset.
pd.setFilePosition(docStream.getOffset());
// write the text to the docstream and save the piece descriptor to the
// plex which will be written later to the tableStream.
docStream.write(next.getBuf());
textPlex.addProperty(new PropertyNode(next.getStart() + fcMin,
next.getEnd() + fcMin,
pd.toByteArray()));
}
return textPlex.toByteArray();
}
}