From dd56e7c40b5f90a35fa6b6759efb4e0c2425b40f Mon Sep 17 00:00:00 2001 From: Said Ryan Ackley Date: Tue, 24 Jun 2003 11:32:30 +0000 Subject: [PATCH] work in progress git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353156 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hwpf/model/hdftypes/CHPBinTable.java | 70 ++++++++++++++++++- .../model/hdftypes/CHPFormattedDiskPage.java | 32 ++++++--- .../hwpf/model/hdftypes/ComplexFileTable.java | 31 ++++++-- .../model/hdftypes/DocumentProperties.java | 25 ++----- .../model/hdftypes/FormattedDiskPage.java | 7 ++ .../poi/hwpf/model/hdftypes/PAPBinTable.java | 67 +++++++++++++++++- .../model/hdftypes/PAPFormattedDiskPage.java | 56 ++++++++++++--- .../hwpf/model/hdftypes/PieceDescriptor.java | 11 ++- .../poi/hwpf/model/hdftypes/PropertyNode.java | 18 ++--- .../poi/hwpf/model/hdftypes/SectionTable.java | 12 ++-- .../hwpf/model/hdftypes/StyleDescription.java | 63 +++++++++++++++++ .../poi/hwpf/model/hdftypes/TextPiece.java | 15 ++-- .../hwpf/model/hdftypes/TextPieceTable.java | 59 +++++++++++++--- 13 files changed, 385 insertions(+), 81 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPBinTable.java index e8a8f6f50..1e199a9b6 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPBinTable.java @@ -57,15 +57,20 @@ package org.apache.poi.hwpf.model.hdftypes; import java.util.ArrayList; +import java.io.OutputStream; +import java.io.IOException; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.LittleEndian; +import org.apache.poi.hwpf.model.io.*; + public class CHPBinTable { - ArrayList _textRuns; + ArrayList _textRuns = new ArrayList(); - public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, int size) + public CHPBinTable(byte[] documentStream, byte[] tableStream, int offset, + int size, int fcMin) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); @@ -78,7 +83,7 @@ public class CHPBinTable int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(documentStream, - pageOffset); + pageOffset, fcMin); int fkpSize = cfkp.size(); @@ -89,4 +94,63 @@ public class CHPBinTable } } + public void writeTo(HWPFFileSystem sys, int fcMin) + throws IOException + { + + HWPFOutputStream docStream = sys.getStream("WordDocument"); + OutputStream tableStream = sys.getStream("1Table"); + + PlexOfCps binTable = new PlexOfCps(4); + + // each FKP must start on a 512 byte page. + int docOffset = docStream.getOffset(); + int mod = docOffset % POIFSConstants.BIG_BLOCK_SIZE; + if (mod != 0) + { + byte[] padding = new byte[POIFSConstants.BIG_BLOCK_SIZE - mod]; + docStream.write(padding); + } + + // get the page number for the first fkp + docOffset = docStream.getOffset(); + int pageNum = docOffset/POIFSConstants.BIG_BLOCK_SIZE; + + // get the ending fc + int endingFc = ((PropertyNode)_textRuns.get(_textRuns.size() - 1)).getEnd(); + endingFc += fcMin; + + + ArrayList overflow = _textRuns; + byte[] intHolder = new byte[4]; + do + { + PropertyNode startingProp = (PropertyNode)overflow.get(0); + int start = startingProp.getStart() + fcMin; + + CHPFormattedDiskPage cfkp = new CHPFormattedDiskPage(); + cfkp.fill(overflow); + + byte[] bufFkp = cfkp.toByteArray(fcMin); + docStream.write(bufFkp); + overflow = cfkp.getOverflow(); + + int end = endingFc; + if (overflow != null) + { + end = ((PropertyNode)overflow.get(0)).getEnd(); + } + + LittleEndian.putInt(intHolder, pageNum++); + binTable.addProperty(new PropertyNode(start, end, intHolder)); + + } + while (overflow != null); + tableStream.write(binTable.toByteArray()); + } + + + + + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPFormattedDiskPage.java index 2b2754119..e34067655 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/CHPFormattedDiskPage.java @@ -53,6 +53,7 @@ */ package org.apache.poi.hwpf.model.hdftypes; +import java.util.List; import java.util.ArrayList; import org.apache.poi.util.LittleEndian; @@ -80,19 +81,24 @@ public class CHPFormattedDiskPage extends FormattedDiskPage private ArrayList _chpxList = new ArrayList(); private ArrayList _overFlow; + + public CHPFormattedDiskPage() + { + } + /** * This constructs a CHPFormattedDiskPage from a raw fkp (512 byte array * read from a Word file). * * @param fkp The 512 byte array to read data from */ - public CHPFormattedDiskPage(byte[] documentStream, int offset) + public CHPFormattedDiskPage(byte[] documentStream, int offset, int fcMin) { super(documentStream, offset); for (int x = 0; x < _crun; x++) { - _chpxList.add(new CHPX(getStart(x), getEnd(x), getGrpprl(x))); + _chpxList.add(new CHPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x))); } } @@ -101,6 +107,16 @@ public class CHPFormattedDiskPage extends FormattedDiskPage return (CHPX)_chpxList.get(index); } + public void fill(List filler) + { + _chpxList.addAll(filler); + } + + public ArrayList getOverflow() + { + return _overFlow; + } + /** * Gets the chpx for the character run at index in this fkp. * @@ -117,15 +133,15 @@ public class CHPFormattedDiskPage extends FormattedDiskPage return new byte[0]; } - int size = LittleEndian.getUnsignedByte(_fkp, chpxOffset); + int size = LittleEndian.getUnsignedByte(_fkp, _offset + chpxOffset); byte[] chpx = new byte[size]; - System.arraycopy(_fkp, ++chpxOffset, chpx, 0, size); + System.arraycopy(_fkp, _offset + ++chpxOffset, chpx, 0, size); return chpx; } - protected byte[] toByteArray() + protected byte[] toByteArray(int fcMin) { byte[] buf = new byte[512]; int size = _chpxList.size(); @@ -177,7 +193,7 @@ public class CHPFormattedDiskPage extends FormattedDiskPage chpx = (CHPX)_chpxList.get(x); byte[] grpprl = chpx.getGrpprl(); - LittleEndian.putInt(buf, fcOffset, chpx.getStart()); + LittleEndian.putInt(buf, fcOffset, chpx.getStart() + fcMin); buf[offsetOffset] = (byte)(grpprlOffset/2); System.arraycopy(grpprl, 0, buf, grpprlOffset, grpprl.length); @@ -185,8 +201,8 @@ public class CHPFormattedDiskPage extends FormattedDiskPage offsetOffset += 1; fcOffset += FC_SIZE; } - // put the last papx's end in - LittleEndian.putInt(buf, fcOffset, chpx.getEnd()); + // put the last chpx's end in + LittleEndian.putInt(buf, fcOffset, chpx.getEnd() + fcMin); return buf; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/ComplexFileTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/ComplexFileTable.java index 644fd54cf..63a87f62a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/ComplexFileTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/ComplexFileTable.java @@ -59,23 +59,27 @@ package org.apache.poi.hwpf.model.hdftypes; import java.io.IOException; import org.apache.poi.util.LittleEndian; +import org.apache.poi.hwpf.model.io.*; public class ComplexFileTable { + private static final byte GRPPRL_TYPE = 1; + private static final byte TEXT_PIECE_TABLE_TYPE = 2; + TextPieceTable _tpt; - public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset) throws IOException + public ComplexFileTable(byte[] documentStream, byte[] tableStream, int offset, int fcMin) throws IOException { //skips through the prms before we reach the piece table. These contain data //for actual fast saved files - while (tableStream[offset] == 1) + while (tableStream[offset] == GRPPRL_TYPE) { offset++; int skip = LittleEndian.getShort(tableStream, offset); - offset += 2 + skip; + offset += LittleEndian.SHORT_SIZE + skip; } - if(tableStream[offset] != 2) + if(tableStream[offset] != TEXT_PIECE_TABLE_TYPE) { throw new IOException("The text piece table is corrupted"); } @@ -83,8 +87,25 @@ public class ComplexFileTable { int pieceTableSize = LittleEndian.getInt(tableStream, ++offset); offset += LittleEndian.INT_SIZE; - _tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize); + _tpt = new TextPieceTable(documentStream, tableStream, offset, pieceTableSize, fcMin); } } + public void writeTo(HWPFFileSystem sys) + throws IOException + { + HWPFOutputStream docStream = sys.getStream("WordDocument"); + HWPFOutputStream tableStream = sys.getStream("1Table"); + + tableStream.write(TEXT_PIECE_TABLE_TYPE); + + byte[] table = _tpt.writeTo(docStream); + + byte[] numHolder = new byte[LittleEndian.INT_SIZE]; + LittleEndian.putInt(numHolder, table.length); + tableStream.write(numHolder); + tableStream.write(table); + + } + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/DocumentProperties.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/DocumentProperties.java index 0e3d48a5e..dc196417b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/DocumentProperties.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/DocumentProperties.java @@ -58,35 +58,20 @@ package org.apache.poi.hwpf.model.hdftypes; import org.apache.poi.util.LittleEndian; +import org.apache.poi.hwpf.model.hdftypes.definitions.DOPAbstractType; + /** * Comment me * * @author Ryan Ackley */ -public class DocumentProperties implements HDFType +public class DocumentProperties extends DOPAbstractType { - public boolean _fFacingPages; - public int _fpc; - public int _epc; - public int _rncFtn; - public int _nFtn; - public int _rncEdn; - public int _nEdn; - public DocumentProperties(byte[] dopArray) + public DocumentProperties(byte[] tableStream, int offset) { - _fFacingPages = (dopArray[0] & 0x1) > 0; - _fpc = (dopArray[0] & 0x60) >> 5; - - short num = LittleEndian.getShort(dopArray, 2); - _rncFtn = (num & 0x3); - _nFtn = (short)(num & 0xfffc) >> 2; - num = LittleEndian.getShort(dopArray, 52); - _rncEdn = num & 0x3; - _nEdn = (short)(num & 0xfffc) >> 2; - num = LittleEndian.getShort(dopArray, 54); - _epc = num & 0x3; + super.fillFields(tableStream, (short)0, offset); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/FormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/FormattedDiskPage.java index 3971cc9d2..e2e892323 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/FormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/FormattedDiskPage.java @@ -82,6 +82,12 @@ public abstract class FormattedDiskPage protected int _crun; protected int _offset; + + public FormattedDiskPage() + { + + } + /** * Uses a 512-byte array to create a FKP */ @@ -89,6 +95,7 @@ public abstract class FormattedDiskPage { _crun = LittleEndian.getUnsignedByte(documentStream, offset + 511); _fkp = documentStream; + _offset = offset; } /** * Used to get a text offset corresponding to a grpprl in this fkp. diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPBinTable.java index 2e7e50890..882266237 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPBinTable.java @@ -56,15 +56,20 @@ package org.apache.poi.hwpf.model.hdftypes; import java.util.ArrayList; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.poi.hwpf.model.io.*; import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.LittleEndian; public class PAPBinTable { - ArrayList _paragraphs; + ArrayList _paragraphs = new ArrayList(); - public PAPBinTable(byte[] documentStream, byte[] tableStream, int offset, int size) + public PAPBinTable(byte[] documentStream, byte[] tableStream, int offset, + int size, int fcMin) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); @@ -77,7 +82,7 @@ public class PAPBinTable int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, - pageOffset); + pageOffset, fcMin); int fkpSize = pfkp.size(); @@ -88,5 +93,61 @@ public class PAPBinTable } } + public void writeTo(HWPFFileSystem sys, int fcMin) + throws IOException + { + + HWPFOutputStream docStream = sys.getStream("WordDocument"); + OutputStream tableStream = sys.getStream("1Table"); + + PlexOfCps binTable = new PlexOfCps(4); + + // each FKP must start on a 512 byte page. + int docOffset = docStream.getOffset(); + int mod = docOffset % POIFSConstants.BIG_BLOCK_SIZE; + if (mod != 0) + { + byte[] padding = new byte[POIFSConstants.BIG_BLOCK_SIZE - mod]; + docStream.write(padding); + } + + // get the page number for the first fkp + docOffset = docStream.getOffset(); + int pageNum = docOffset/POIFSConstants.BIG_BLOCK_SIZE; + + // get the ending fc + int endingFc = ((PropertyNode)_paragraphs.get(_paragraphs.size() - 1)).getEnd(); + endingFc += fcMin; + + + ArrayList overflow = _paragraphs; + byte[] intHolder = new byte[4]; + do + { + PropertyNode startingProp = (PropertyNode)overflow.get(0); + int start = startingProp.getStart() + fcMin; + + PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(); + pfkp.fill(overflow); + + byte[] bufFkp = pfkp.toByteArray(fcMin); + docStream.write(bufFkp); + overflow = pfkp.getOverflow(); + + int end = endingFc; + if (overflow != null) + { + end = ((PropertyNode)overflow.get(0)).getEnd(); + } + + LittleEndian.putInt(intHolder, pageNum++); + binTable.addProperty(new PropertyNode(start, end, intHolder)); + + } + while (overflow != null); + tableStream.write(binTable.toByteArray()); + } + + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPFormattedDiskPage.java index e054ddbd5..f23234a43 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PAPFormattedDiskPage.java @@ -57,6 +57,7 @@ import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.util.LittleEndian; import java.util.ArrayList; +import java.util.List; /** * Represents a PAP FKP. The style properties for paragraph and character runs @@ -83,22 +84,38 @@ public class PAPFormattedDiskPage extends FormattedDiskPage private ArrayList _papxList = new ArrayList(); private ArrayList _overFlow; + + public PAPFormattedDiskPage() + { + + } + /** * Creates a PAPFormattedDiskPage from a 512 byte array * * @param fkp a 512 byte array. */ - public PAPFormattedDiskPage(byte[] documentStream, int offset) + public PAPFormattedDiskPage(byte[] documentStream, int offset, int fcMin) { super(documentStream, offset); for (int x = 0; x < _crun; x++) { - _papxList.add(new PAPX(getStart(x), getEnd(x), getGrpprl(x), getParagraphHeight(x))); + _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x))); } _fkp = null; } + public void fill(List filler) + { + _papxList.addAll(filler); + } + + public ArrayList getOverflow() + { + return _overFlow; + } + public PAPX getPAPX(int index) { return (PAPX)_papxList.get(index); @@ -113,10 +130,10 @@ public class PAPFormattedDiskPage extends FormattedDiskPage protected byte[] getGrpprl(int index) { int papxOffset = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + (((_crun + 1) * FC_SIZE) + (index * BX_SIZE))); - int size = 2 * LittleEndian.getUnsignedByte(_fkp, papxOffset); + int size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + papxOffset); if(size == 0) { - size = 2 * LittleEndian.getUnsignedByte(_fkp, ++papxOffset); + size = 2 * LittleEndian.getUnsignedByte(_fkp, _offset + ++papxOffset); } else { @@ -124,11 +141,11 @@ public class PAPFormattedDiskPage extends FormattedDiskPage } byte[] papx = new byte[size]; - System.arraycopy(_fkp, ++papxOffset, papx, 0, size); + System.arraycopy(_fkp, _offset + ++papxOffset, papx, 0, size); return papx; } - protected byte[] toByteArray() + protected byte[] toByteArray(int fcMin) { byte[] buf = new byte[512]; int size = _papxList.size(); @@ -144,8 +161,9 @@ public class PAPFormattedDiskPage extends FormattedDiskPage { int grpprlLength = ((PAPX)_papxList.get(index)).getGrpprl().length; - // check to see if we have enough room for an FC, a BX, and the grpprl. - totalSize += (FC_SIZE + BX_SIZE + grpprlLength); + // check to see if we have enough room for an FC, a BX, and the grpprl + // and the 1 byte size of the grpprl. + totalSize += (FC_SIZE + BX_SIZE + grpprlLength + 1); // if size is uneven we will have to add one so the first grpprl falls // on a word boundary if (totalSize > 511 + (index % 2)) @@ -159,6 +177,10 @@ public class PAPFormattedDiskPage extends FormattedDiskPage { totalSize += 1; } + else + { + totalSize += 2; + } } // see if we couldn't fit some @@ -181,17 +203,29 @@ public class PAPFormattedDiskPage extends FormattedDiskPage byte[] phe = papx.getParagraphHeight().toByteArray(); byte[] grpprl = papx.getGrpprl(); - LittleEndian.putInt(buf, fcOffset, papx.getStart()); + LittleEndian.putInt(buf, fcOffset, papx.getStart() + fcMin); buf[bxOffset] = (byte)(grpprlOffset/2); System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length); + + // refer to the section on PAPX in the spec. Places a size on the front + // of the PAPX. Has to do with how the grpprl stays on word + // boundaries. + if ((grpprl.length % 2) > 0) + { + buf[grpprlOffset++] = (byte)((grpprl.length + 1)/2); + } + else + { + buf[++grpprlOffset] = (byte)((grpprl.length)/2); + grpprlOffset++; + } System.arraycopy(grpprl, 0, buf, grpprlOffset, grpprl.length); - grpprlOffset += grpprl.length + (grpprl.length % 2); bxOffset += BX_SIZE; fcOffset += FC_SIZE; } // put the last papx's end in - LittleEndian.putInt(buf, fcOffset, papx.getEnd()); + LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin); return buf; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PieceDescriptor.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PieceDescriptor.java index a18005030..582692324 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PieceDescriptor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PieceDescriptor.java @@ -63,9 +63,9 @@ public class PieceDescriptor { short descriptor; - BitField fNoParaLast = new BitField(0x01); - BitField fPaphNil = new BitField(0x02); - BitField fCopied = new BitField(0x04); + private static BitField fNoParaLast = new BitField(0x01); + private static BitField fPaphNil = new BitField(0x02); + private static BitField fCopied = new BitField(0x04); int fc; short prm; boolean unicode; @@ -98,6 +98,11 @@ public class PieceDescriptor return fc; } + public void setFilePosition(int pos) + { + fc = pos; + } + public boolean isUnicode() { return unicode; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PropertyNode.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PropertyNode.java index a68222577..c7255e6b6 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PropertyNode.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/PropertyNode.java @@ -64,8 +64,8 @@ package org.apache.poi.hwpf.model.hdftypes; public class PropertyNode implements Comparable { private byte[] _buf; - private int _fcStart; - private int _fcEnd; + private int _cpStart; + private int _cpEnd; /** * @param fcStart The start of the text for this property. @@ -74,8 +74,8 @@ public class PropertyNode implements Comparable */ public PropertyNode(int fcStart, int fcEnd, byte[] buf) { - _fcStart = fcStart; - _fcEnd = fcEnd; + _cpStart = fcStart; + _cpEnd = fcEnd; _buf = buf; } /** @@ -83,14 +83,14 @@ public class PropertyNode implements Comparable */ public int getStart() { - return _fcStart; + return _cpStart; } /** * @retrun The offset of the end of this property's text. */ public int getEnd() { - return _fcEnd; + return _cpEnd; } /** * @return This property's property in copmpressed form. @@ -104,12 +104,12 @@ public class PropertyNode implements Comparable */ public int compareTo(Object o) { - int fcEnd = ((PropertyNode)o).getEnd(); - if(_fcEnd == fcEnd) + int cpEnd = ((PropertyNode)o).getEnd(); + if(_cpEnd == cpEnd) { return 0; } - else if(_fcEnd < fcEnd) + else if(_cpEnd < cpEnd) { return -1; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/SectionTable.java index f2e8108a8..9ea315730 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/SectionTable.java @@ -67,10 +67,10 @@ public class SectionTable { private static final int SED_SIZE = 12; - private ArrayList _sections; + private ArrayList _sections = new ArrayList(); public SectionTable(byte[] documentStream, byte[] tableStream, int offset, - int size) + int size, int fcMin) { PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE); @@ -86,7 +86,7 @@ public class SectionTable // check for the optimization if (fileOffset == 0xffffffff) { - _sections.add(new SEPX(sed, node.getStart(), node.getEnd(), new byte[0])); + _sections.add(new SEPX(sed, node.getStart() - fcMin, node.getEnd() - fcMin, new byte[0])); } else { @@ -95,12 +95,12 @@ public class SectionTable byte[] buf = new byte[sepxSize]; fileOffset += LittleEndian.SHORT_SIZE; System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); - _sections.add(new SEPX(sed, node.getStart(), node.getEnd(), buf)); + _sections.add(new SEPX(sed, node.getStart() - fcMin, node.getEnd() - fcMin, buf)); } } } - public void writeTo(HWPFFileSystem sys) + public void writeTo(HWPFFileSystem sys, int fcMin) throws IOException { HWPFOutputStream docStream = sys.getStream("WordDocument"); @@ -128,7 +128,7 @@ public class SectionTable sed.setFc(offset); // add the section descriptor bytes to the PlexOfCps. - PropertyNode property = new PropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray()); + PropertyNode property = new PropertyNode(sepx.getStart() - fcMin, sepx.getEnd() - fcMin, sed.toByteArray()); plex.addProperty(property); offset = docStream.getOffset(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/StyleDescription.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/StyleDescription.java index 4fc63703a..263e80441 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/StyleDescription.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/StyleDescription.java @@ -202,4 +202,67 @@ public class StyleDescription implements HDFType // { // _chp = chp; // } + public byte[] toByteArray() + { + // size equals 8 bytes for known variables plus 2 bytes for name length plus + // name length * 2 plus 2 bytes for null plus upx's preceded by length + int size = 8 + 2 + ((_name.length() + 1) * 2); + + //only worry about papx and chpx for upxs + if(_styleTypeCode.getValue(_infoShort2) == PARAGRAPH_STYLE) + { + size += _papx.length + 2 + (_papx.length % 2); + size += _chpx.length + 2; + } + else if (_styleTypeCode.getValue(_infoShort2) == CHARACTER_STYLE) + { + size += _chpx.length + 2; + } + + byte[] buf = new byte[size]; + + int offset = 0; + LittleEndian.putShort(buf, offset, _infoShort); + offset += LittleEndian.SHORT_SIZE; + LittleEndian.putShort(buf, offset, _infoShort2); + offset += LittleEndian.SHORT_SIZE; + LittleEndian.putShort(buf, offset, _bchUpe); + offset += LittleEndian.SHORT_SIZE; + LittleEndian.putShort(buf, offset, _infoShort3); + offset += LittleEndian.SHORT_SIZE; + + char[] letters = _name.toCharArray(); + LittleEndian.putShort(buf, offset, (short)letters.length); + offset += LittleEndian.SHORT_SIZE; + for (int x = 0; x < letters.length; x++) + { + LittleEndian.putShort(buf, offset, (short)letters[x]); + offset += LittleEndian.SHORT_SIZE; + } + // get past the null delimiter for the name. + offset += LittleEndian.SHORT_SIZE; + + //only worry about papx and chpx for upxs + if(_styleTypeCode.getValue(_infoShort2) == PARAGRAPH_STYLE) + { + LittleEndian.putShort(buf, offset, (short)_papx.length); + offset += LittleEndian.SHORT_SIZE; + System.arraycopy(_papx, 0, buf, offset, _papx.length); + offset += _papx.length + (_papx.length % 2); + + LittleEndian.putShort(buf, offset, (short)_chpx.length); + offset += LittleEndian.SHORT_SIZE; + System.arraycopy(_chpx, 0, buf, offset, _chpx.length); + offset += _chpx.length; + } + else if (_styleTypeCode.getValue(_infoShort2) == CHARACTER_STYLE) + { + LittleEndian.putShort(buf, offset, (short)_chpx.length); + offset += LittleEndian.SHORT_SIZE; + System.arraycopy(_chpx, 0, buf, offset, _chpx.length); + offset += _chpx.length; + } + + return buf; + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPiece.java index de840221c..91308c2b1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPiece.java @@ -66,6 +66,7 @@ public class TextPiece extends PropertyNode implements Comparable { private boolean _usesUnicode; private int _length; + private PieceDescriptor _pd; /** * @param start Offset in main document stream. @@ -73,12 +74,11 @@ public class TextPiece extends PropertyNode implements Comparable * does not necessarily refer to 1 byte. * @param unicode true if this text is unicode. */ - public TextPiece(int start, int length, boolean unicode) + public TextPiece(int start, int end, byte[] text, PieceDescriptor pd) { - super(start, start + length, null); - _usesUnicode = unicode; - _length = length; - + super(start, end, text); + _usesUnicode = pd.isUnicode(); + _length = end - start; } /** * @return If this text piece uses unicode @@ -87,4 +87,9 @@ public class TextPiece extends PropertyNode implements Comparable { return _usesUnicode; } + + public PieceDescriptor getPieceDescriptor() + { + return _pd; + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPieceTable.java index 97373bb5b..257df115f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/hdftypes/TextPieceTable.java @@ -18,7 +18,7 @@ * distribution. * * 3. The end-user documentation included with the redistribution, - * if any, must include the following acknowledgment: + * if any, must include the following acknowledgment: * "This product includes software developed by the * Apache Software Foundation (http://www.apache.org/)." * Alternately, this acknowledgment may appear in the software itself, @@ -58,20 +58,29 @@ package org.apache.poi.hwpf.model.hdftypes; import java.io.UnsupportedEncodingException; +import java.io.IOException; + +import java.util.ArrayList; + +import org.apache.poi.hwpf.model.io.*; public class TextPieceTable { - StringBuffer _text = new StringBuffer(); + ArrayList _textPieces = new ArrayList(); - public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, int size) + public TextPieceTable(byte[] documentStream, byte[] tableStream, int offset, + int size, int fcMin) throws UnsupportedEncodingException { + // get our plex of PieceDescriptors PlexOfCps pieceTable = new PlexOfCps(tableStream, offset, size, PieceDescriptor.getSizeInBytes()); int multiple = 2; int length = pieceTable.length(); PieceDescriptor[] pieces = new PieceDescriptor[length]; + // iterate through piece descriptors raw bytes and create + // PieceDescriptor objects for (int x = 0; x < length; x++) { PropertyNode node = pieceTable.getProperty(x); @@ -81,28 +90,62 @@ public class TextPieceTable { multiple = 1; } - } + // using the PieceDescriptors, build our list of TextPieces. for (int x = 0; x < pieces.length; x++) { int start = pieces[x].getFilePosition(); PropertyNode node = pieceTable.getProperty(x); - int textSize = node.getEnd() - node.getStart(); + int nodeStart = node.getStart() - fcMin; + int nodeEnd = node.getEnd() - fcMin; + int textSize = nodeEnd - nodeStart; boolean unicode = pieces[x].isUnicode(); String toStr = null; if (unicode) { - toStr = new String(documentStream, start, length * multiple, "UTF-16LE"); + byte[] buf = new byte[textSize * multiple]; + System.arraycopy(documentStream, start, buf, 0, textSize * multiple); + _textPieces.add(new TextPiece(nodeStart, nodeEnd, buf, pieces[x])); } else { - toStr = new String(documentStream, start, length, "ISO-8859-1"); + byte[] buf = new byte[textSize]; + System.arraycopy(documentStream, start, buf, 0, textSize); + _textPieces.add(new TextPiece(nodeStart, nodeEnd, buf, pieces[x])); } - _text.append(toStr); } + } + + public byte[] writeTo(HWPFOutputStream docStream) + throws IOException + { + + PlexOfCps textPlex = new PlexOfCps(PieceDescriptor.getSizeInBytes()); + int fcMin = docStream.getOffset(); + + int size = _textPieces.size(); + for (int x = 0; x < size; x++) + { + TextPiece next = (TextPiece)_textPieces.get(x); + PieceDescriptor pd = next.getPieceDescriptor(); + + // set the text piece position to the current docStream offset. + pd.setFilePosition(docStream.getOffset()); + + // write the text to the docstream and save the piece descriptor to the + // plex which will be written later to the tableStream. + docStream.write(next.getBuf()); + textPlex.addProperty(new PropertyNode(next.getStart() + fcMin, + next.getEnd() + fcMin, + pd.toByteArray())); + + } + + return textPlex.toByteArray(); } + }