diff --git a/src/scratchpad/examples/src/org/apache/poi/hwpf/Word2Forrest.java b/src/scratchpad/examples/src/org/apache/poi/hwpf/Word2Forrest.java new file mode 100644 index 000000000..0b9f443f8 --- /dev/null +++ b/src/scratchpad/examples/src/org/apache/poi/hwpf/Word2Forrest.java @@ -0,0 +1,209 @@ +package org.apache.poi.hwpf; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.usermodel.*; +import org.apache.poi.hwpf.model.*; + +import java.io.*; + +public class Word2Forrest +{ + Writer _out; + HWPFDocument _doc; + + public Word2Forrest(HWPFDocument doc, OutputStream stream) + throws IOException, UnsupportedEncodingException + { + OutputStreamWriter out = new OutputStreamWriter (stream, "UTF-8"); + _out = out; + _doc = doc; + + init (); + openDocument (); + openBody (); + + Range r = doc.getRange (); + StyleSheet styleSheet = doc.getStyleSheet (); + + int sectionLevel = 0; + int lenParagraph = r.numParagraphs (); + boolean inCode = false; + for (int x = 0; x < lenParagraph; x++) + { + Paragraph p = r.getParagraph (x); + String text = p.text (); + if (text.trim ().length () == 0) + { + continue; + } + StyleDescription paragraphStyle = styleSheet.getStyleDescription (p. + getStyleIndex ()); + String styleName = paragraphStyle.getName(); + if (styleName.startsWith ("Heading")) + { + if (inCode) + { + closeSource(); + inCode = false; + } + + int headerLevel = Integer.parseInt (styleName.substring (8)); + if (headerLevel > sectionLevel) + { + openSection (); + } + else + { + for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++) + { + closeSection (); + } + openSection (); + } + sectionLevel = headerLevel; + openTitle (); + writePlainText (text); + closeTitle (); + } + else + { + int cruns = p.numCharacterRuns (); + CharacterRun run = p.getCharacterRun (0); + int ftcAscii = run.getFontNameIndex (); + String fontName = doc.getFontTable().getMainFont(ftcAscii); + if (fontName.startsWith ("Courier")) + { + if (!inCode) + { + openSource (); + inCode = true; + } + writePlainText (p.text()); + } + else + { + if (inCode) + { + inCode = false; + closeSource(); + } + openParagraph(); + writePlainText(p.text()); + closeParagraph(); + } + } + } + for (int x = 0; x < sectionLevel; x++) + { + closeSection(); + } + closeBody(); + closeDocument(); + _out.flush(); + + } + + public void init () + throws IOException + { + _out.write ("\r\n"); + _out.write ("\r\n"); + } + + public void openDocument () + throws IOException + { + _out.write ("\r\n"); + } + public void closeDocument () + throws IOException + { + _out.write ("\r\n"); + } + + + public void openBody () + throws IOException + { + _out.write ("\r\n"); + } + + public void closeBody () + throws IOException + { + _out.write ("\r\n"); + } + + + public void openSection () + throws IOException + { + _out.write ("
"); + + } + + public void closeSection () + throws IOException + { + _out.write ("
"); + + } + + public void openTitle () + throws IOException + { + _out.write (""); + } + + public void closeTitle () + throws IOException + { + _out.write (""); + } + + public void writePlainText (String text) + throws IOException + { + _out.write (text); + } + + public void openParagraph () + throws IOException + { + _out.write ("

"); + } + + public void closeParagraph () + throws IOException + { + _out.write ("

"); + } + + public void openSource () + throws IOException + { + _out.write (""); + } + + + public static void main(String[] args) + { + try + { + OutputStream out = new FileOutputStream("c:\\test.xml"); + + new Word2Forrest(new HWPFDocument(new FileInputStream(args[0])), out); + out.close(); + } + catch (Throwable t) + { + t.printStackTrace(); + } + + } +} \ No newline at end of file diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 8af48b793..da505430c 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -96,6 +96,9 @@ public class HWPFDocument /** table stream buffer*/ private byte[] _tableStream; + /** data stream buffer*/ + private byte[] _dataStream; + /** Document wide Properties*/ private DocumentProperties _dop; @@ -157,6 +160,19 @@ public class HWPFDocument _fib.fillVariableFields(_mainStream, _tableStream); + // read in the data stream. + try + { + DocumentEntry dataProps = + (DocumentEntry) _filesystem.getRoot().getEntry("Data"); + _dataStream = new byte[dataProps.getSize()]; + _filesystem.createDocumentInputStream("Data").read(_dataStream); + } + catch(java.io.FileNotFoundException e) + { + _dataStream = new byte[0]; + } + // get the start of text in the main stream int fcMin = _fib.getFcMin(); @@ -165,7 +181,7 @@ public class HWPFDocument _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin); _tpt = _cft.getTextPieceTable(); _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin); - _pbt = new PAPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin); + _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin); // Word XP puts in a zero filled buffer in front of the text and it screws // up my system for offsets. This is an adjustment. @@ -187,8 +203,13 @@ public class HWPFDocument _lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo()); } - int x = 0; - + PlexOfCps plc = new PlexOfCps(_tableStream, _fib.getFcPlcffldMom(), _fib.getLcbPlcffldMom(), 2); + for (int x = 0; x < plc.length(); x++) + { + GenericPropertyNode node = plc.getProperty(x); + byte[] fld = node.getBytes(); + int breakpoint = 0; + } } public StyleSheet getStyleSheet() @@ -198,7 +219,11 @@ public class HWPFDocument public Range getRange() { - return new Range(0, _fib.getFcMac() - _fib.getFcMin(), this); + // hack to get the ending cp of the document, Have to revisit this. + java.util.List paragraphs = _pbt.getParagraphs(); + PAPX p = (PAPX)paragraphs.get(paragraphs.size() - 1); + + return new Range(0, p.getEnd(), this); } public ListTables getListTables() @@ -219,6 +244,7 @@ public class HWPFDocument HWPFFileSystem docSys = new HWPFFileSystem(); HWPFOutputStream mainStream = docSys.getStream("WordDocument"); HWPFOutputStream tableStream = docSys.getStream("1Table"); + HWPFOutputStream dataStream = docSys.getStream("Data"); int tableOffset = 0; // FileInformationBlock fib = (FileInformationBlock)_fib.clone(); @@ -302,7 +328,7 @@ public class HWPFDocument _fib.setFcMac(fcMac); _fib.setCbMac(mainStream.getOffset()); - // make sure that the table and doc stream use big blocks. + // make sure that the table, doc and data streams use big blocks. byte[] mainBuf = mainStream.toByteArray(); if (mainBuf.length < 4096) { @@ -323,11 +349,20 @@ public class HWPFDocument tableBuf = tempBuf; } + byte[] dataBuf = _dataStream; + if (dataBuf.length < 4096) + { + byte[] tempBuf = new byte[4096]; + System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length); + dataBuf = tempBuf; + } + // spit out the Word document. POIFSFileSystem pfs = new POIFSFileSystem(); pfs.createDocument(new ByteArrayInputStream(mainBuf), "WordDocument"); pfs.createDocument(new ByteArrayInputStream(tableBuf), "1Table"); + pfs.createDocument(new ByteArrayInputStream(dataBuf), "Data"); pfs.writeFilesystem(out); } @@ -352,6 +387,11 @@ public class HWPFDocument return _cft.getTextPieceTable(); } + public byte[] getDataStream() + { + return _dataStream; + } + public int registerList(List list) { if (_lt == null) @@ -361,6 +401,11 @@ public class HWPFDocument return _lt.addList(list.getListData(), list.getOverride()); } + public FontTable getFontTable() + { + return _ft; + } + /** * Takes two arguments, 1) name of the Word file to read in 2) location to * write it out at. @@ -373,15 +418,8 @@ public class HWPFDocument { HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0])); Range r = doc.getRange(); - TableIterator ti = new TableIterator(r); - while (ti.hasNext()) - { - Table t = ti.next(); - int x = 0; - } - - - + String str = r.text(); + int x = 0; // CharacterRun run = new CharacterRun(); // run.setBold(true); // run.setItalic(true); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/Ffn.java b/src/scratchpad/src/org/apache/poi/hwpf/model/Ffn.java index 2793daeeb..e4ca163c1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/Ffn.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/Ffn.java @@ -67,121 +67,119 @@ import java.util.Arrays; */ public class Ffn { - private int field_1_cbFfnM1;//total length of FFN - 1. - private byte field_2; - private static BitField _prq = new BitField(0x0003);// pitch request - private static BitField _fTrueType = new BitField(0x0004);// when 1, font is a TrueType font - private static BitField _ff = new BitField(0x0070); - private short field_3_wWeight;// base weight of font - private byte field_4_chs;// character set identifier - private byte field_5_ixchSzAlt; // index into ffn.szFfn to the name of + private int _cbFfnM1;//total length of FFN - 1. + private byte _info; + private static BitField _prq = new BitField(0x0003);// pitch request + private static BitField _fTrueType = new BitField(0x0004);// when 1, font is a TrueType font + private static BitField _ff = new BitField(0x0070); + private short _wWeight;// base weight of font + private byte _chs;// character set identifier + private byte _ixchSzAlt; // index into ffn.szFfn to the name of // the alternate font - private byte [] field_6_panose = new byte[10];//???? - private byte [] field_7_fontSig = new byte[24];//???? + private byte [] _panose = new byte[10];//???? + private byte [] _fontSig = new byte[24];//???? // zero terminated string that records name of font, cuurently not // supporting Extended chars - private char [] field_8_xszFfn; + private char [] _xszFfn; // extra facilitator members - private int xszFfnLength; + private int _xszFfnLength; public Ffn(byte[] buf, int offset) { - int offsetTmp = offset; + int offsetTmp = offset; - field_1_cbFfnM1 = LittleEndian.getUnsignedByte(buf,offset); + _cbFfnM1 = LittleEndian.getUnsignedByte(buf,offset); offset += LittleEndian.BYTE_SIZE; - field_2 = buf[offset]; + _info = buf[offset]; offset += LittleEndian.BYTE_SIZE; - field_3_wWeight = LittleEndian.getShort(buf, offset); + _wWeight = LittleEndian.getShort(buf, offset); offset += LittleEndian.SHORT_SIZE; - field_4_chs = buf[offset]; + _chs = buf[offset]; offset += LittleEndian.BYTE_SIZE; - field_5_ixchSzAlt = buf[offset]; + _ixchSzAlt = buf[offset]; offset += LittleEndian.BYTE_SIZE; // read panose and fs so we can write them back out. - System.arraycopy(buf, offset, field_6_panose, 0, field_6_panose.length); - offset += field_6_panose.length; - System.arraycopy(buf, offset, field_7_fontSig, 0, field_7_fontSig.length); - offset += field_7_fontSig.length; + System.arraycopy(buf, offset, _panose, 0, _panose.length); + offset += _panose.length; + System.arraycopy(buf, offset, _fontSig, 0, _fontSig.length); + offset += _fontSig.length; - offsetTmp = offset - offsetTmp; - xszFfnLength = this.getSize() - offsetTmp; - field_8_xszFfn = new char[xszFfnLength]; + offsetTmp = offset - offsetTmp; + _xszFfnLength = this.getSize() - offsetTmp; + _xszFfn = new char[_xszFfnLength]; - for(int i = 0; i < xszFfnLength; i++) + for(int i = 0; i < _xszFfnLength; i++) { - field_8_xszFfn[i] = (char)LittleEndian.getUnsignedByte(buf, offset); - offset += LittleEndian.BYTE_SIZE; + _xszFfn[i] = (char)LittleEndian.getShort(buf, offset); + offset += LittleEndian.SHORT_SIZE; } } - public int getField_1_cbFfnM1() + public int get_cbFfnM1() { - return field_1_cbFfnM1; + return _cbFfnM1; } - public byte getField_2() + public short getWeight() { - return field_2; + return _wWeight; } - public short getField_3_wWeight() + public byte getChs() { - return field_3_wWeight; + return _chs; } - public byte getField_4_chs() + public byte [] getPanose() { - return field_4_chs; + return _panose; } - public byte getField_5_ixchSzAlt() + public byte [] getFontSig() { - return field_5_ixchSzAlt; - } - - public byte [] getField_6_panose() - { - return field_6_panose; - } - - public byte [] getField_7_fontSig() - { - return field_7_fontSig; - } - - public char [] getField_8_xszFfn() - { - return field_8_xszFfn; + return _fontSig; } public int getSize() { - return (field_1_cbFfnM1 + 1); + return (_cbFfnM1 + 1); } - public char [] getMainFontName() + public String getMainFontName() { - char [] temp = new char[field_5_ixchSzAlt]; - System.arraycopy(field_8_xszFfn,0,temp,0,temp.length); - return temp; + int index = 0; + for (;index < _xszFfnLength; index++) + { + if (_xszFfn[index] == '\0') + { + break; + } + } + return new String(_xszFfn, 0, index); } - public char [] getAltFontName() + public String getAltFontName() { - char [] temp = new char[xszFfnLength - field_5_ixchSzAlt]; - System.arraycopy(field_8_xszFfn, field_5_ixchSzAlt, temp, 0, temp.length); - return temp; + int index = _ixchSzAlt; + for (;index < _xszFfnLength; index++) + { + if (_xszFfn[index] == '\0') + { + break; + } + } + return new String(_xszFfn, _ixchSzAlt, index); + } - public void setField_1_cbFfnM1(int field_1_cbFfnM1) + public void set_cbFfnM1(int _cbFfnM1) { - this.field_1_cbFfnM1 = field_1_cbFfnM1; + this._cbFfnM1 = _cbFfnM1; } // changed protected to public @@ -190,25 +188,25 @@ public class Ffn int offset = 0; byte[] buf = new byte[this.getSize()]; - buf[offset] = (byte)field_1_cbFfnM1; + buf[offset] = (byte)_cbFfnM1; offset += LittleEndian.BYTE_SIZE; - buf[offset] = field_2; + buf[offset] = _info; offset += LittleEndian.BYTE_SIZE; - LittleEndian.putShort(buf, offset, field_3_wWeight); + LittleEndian.putShort(buf, offset, _wWeight); offset += LittleEndian.SHORT_SIZE; - buf[offset] = field_4_chs; + buf[offset] = _chs; offset += LittleEndian.BYTE_SIZE; - buf[offset] = field_5_ixchSzAlt; + buf[offset] = _ixchSzAlt; offset += LittleEndian.BYTE_SIZE; - System.arraycopy(field_6_panose,0,buf, offset,field_6_panose.length); - offset += field_6_panose.length; - System.arraycopy(field_7_fontSig,0,buf, offset, field_7_fontSig.length); - offset += field_7_fontSig.length; + System.arraycopy(_panose,0,buf, offset,_panose.length); + offset += _panose.length; + System.arraycopy(_fontSig,0,buf, offset, _fontSig.length); + offset += _fontSig.length; - for(int i = 0; i < field_8_xszFfn.length; i++) + for(int i = 0; i < _xszFfn.length; i++) { - buf[offset] = (byte)field_8_xszFfn[i]; + buf[offset] = (byte)_xszFfn[i]; offset += LittleEndian.BYTE_SIZE; } @@ -220,21 +218,21 @@ public class Ffn { boolean retVal = true; - if (((Ffn)o).getField_1_cbFfnM1() == field_1_cbFfnM1) + if (((Ffn)o).get_cbFfnM1() == _cbFfnM1) { - if(((Ffn)o).getField_2() == field_2) + if(((Ffn)o)._info == _info) { - if(((Ffn)o).getField_3_wWeight() == field_3_wWeight) + if(((Ffn)o)._wWeight == _wWeight) { - if(((Ffn)o).getField_4_chs() == field_4_chs) + if(((Ffn)o)._chs == _chs) { - if(((Ffn)o).getField_5_ixchSzAlt() == field_5_ixchSzAlt) + if(((Ffn)o)._ixchSzAlt == _ixchSzAlt) { - if(Arrays.equals(((Ffn)o).getField_6_panose(),field_6_panose)) + if(Arrays.equals(((Ffn)o)._panose,_panose)) { - if(Arrays.equals(((Ffn)o).getField_7_fontSig(),field_7_fontSig)) + if(Arrays.equals(((Ffn)o)._fontSig,_fontSig)) { - if(!(Arrays.equals(((Ffn)o).getField_8_xszFfn(),field_8_xszFfn))) + if(!(Arrays.equals(((Ffn)o)._xszFfn,_xszFfn))) retVal = false; } else diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FieldDescriptor.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FieldDescriptor.java new file mode 100644 index 000000000..4dab881d2 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FieldDescriptor.java @@ -0,0 +1,21 @@ +package org.apache.poi.hwpf.model; + +import org.apache.poi.util.BitField; + +public class FieldDescriptor +{ + byte _fieldBoundaryType; + byte _info; + private final static BitField fZombieEmbed = new BitField(0x02); + private final static BitField fResultDiry = new BitField(0x04); + private final static BitField fResultEdited = new BitField(0x08); + private final static BitField fLocked = new BitField(0x10); + private final static BitField fPrivateResult = new BitField(0x20); + private final static BitField fNested = new BitField(0x40); + private final static BitField fHasSep = new BitField(0x80); + + + public FieldDescriptor() + { + } +} \ No newline at end of file diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java index 15bc73ee7..7b40b827e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FileInformationBlock.java @@ -96,6 +96,7 @@ public class FileInformationBlock extends FIBAbstractType fieldSet.add(new Integer(FIBFieldHandler.PLCFSED)); fieldSet.add(new Integer(FIBFieldHandler.PLCFLST)); fieldSet.add(new Integer(FIBFieldHandler.PLFLFO)); + fieldSet.add(new Integer(FIBFieldHandler.PLCFFLDMOM)); fieldSet.add(new Integer(FIBFieldHandler.STTBFFFN)); fieldSet.add(new Integer(FIBFieldHandler.MODIFIED)); @@ -317,6 +318,16 @@ public class FileInformationBlock extends FIBAbstractType _fieldHandler.clearFields(); } + public int getFcPlcffldMom() + { + return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDMOM); + } + + public int getLcbPlcffldMom() + { + return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDMOM); + } + public void writeTo (byte[] mainStream, HWPFOutputStream tableStream) throws IOException { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java index b0e19c8f9..6f0507f8a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FontTable.java @@ -119,7 +119,7 @@ public class FontTable return lcbSttbfffn; } - public char [] getMainFont(int chpFtc ) + public String getMainFont(int chpFtc ) { if(chpFtc >= _stringCount) { @@ -130,7 +130,7 @@ public class FontTable return _fontNames[chpFtc].getMainFontName(); } - public char [] getAltFont(int chpFtc ) + public String getAltFont(int chpFtc ) { if(chpFtc >= _stringCount) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java index d084d1474..0954e2291 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java @@ -68,8 +68,9 @@ import org.apache.poi.util.LittleEndian; public class PAPBinTable { ArrayList _paragraphs = new ArrayList(); + byte[] _dataStream; - public PAPBinTable(byte[] documentStream, byte[] tableStream, int offset, + public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset, int size, int fcMin) { PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); @@ -83,7 +84,7 @@ public class PAPBinTable int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, - pageOffset, fcMin); + dataStream, pageOffset, fcMin); int fkpSize = pfkp.size(); @@ -92,11 +93,12 @@ public class PAPBinTable _paragraphs.add(pfkp.getPAPX(y)); } } + _dataStream = dataStream; } public void insert(int listIndex, int cpStart, SprmBuffer buf) { - PAPX forInsert = new PAPX(cpStart, cpStart, buf); + PAPX forInsert = new PAPX(cpStart, cpStart, buf, _dataStream); if (listIndex == _paragraphs.size()) { _paragraphs.add(forInsert); @@ -116,7 +118,7 @@ public class PAPBinTable exc.printStackTrace(); } currentPap.setEnd(cpStart); - PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf); + PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf, _dataStream); _paragraphs.add(++listIndex, forInsert); _paragraphs.add(++listIndex, splitPap); } @@ -220,7 +222,7 @@ public class PAPBinTable PropertyNode startingProp = (PropertyNode)overflow.get(0); int start = startingProp.getStart() + fcMin; - PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(); + PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream); pfkp.fill(overflow); byte[] bufFkp = pfkp.toByteArray(fcMin); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java index 33415c37c..a778648f6 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java @@ -84,11 +84,12 @@ public class PAPFormattedDiskPage extends FormattedDiskPage private ArrayList _papxList = new ArrayList(); private ArrayList _overFlow; + private byte[] _dataStream; - public PAPFormattedDiskPage() + public PAPFormattedDiskPage(byte[] dataStream) { - + _dataStream = dataStream; } /** @@ -96,15 +97,16 @@ public class PAPFormattedDiskPage extends FormattedDiskPage * * @param fkp a 512 byte array. */ - public PAPFormattedDiskPage(byte[] documentStream, int offset, int fcMin) + public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin) { super(documentStream, offset); for (int x = 0; x < _crun; x++) { - _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x))); + _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream)); } _fkp = null; + _dataStream = dataStream; } public void fill(List filler) @@ -164,6 +166,12 @@ public class PAPFormattedDiskPage extends FormattedDiskPage byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl(); int grpprlLength = grpprl.length; + // is grpprl huge? + if(grpprlLength > 488) + { + grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl + } + // check to see if we have enough room for an FC, a BX, and the grpprl // and the 1 byte size of the grpprl. int addition = 0; @@ -219,6 +227,40 @@ public class PAPFormattedDiskPage extends FormattedDiskPage byte[] phe = papx.getParagraphHeight().toByteArray(); byte[] grpprl = papx.getGrpprl(); + // is grpprl huge? + if(grpprl.length > 488) + { + // if so do we have storage at getHugeGrpprlOffset() + int hugeGrpprlOffset = papx.getHugeGrpprlOffset(); + if(hugeGrpprlOffset == -1) // then we have no storage... + { + throw new UnsupportedOperationException( + "This Paragraph has no dataStream storage."); + } + else // we have some storage... + { + // get the size of the existing storage + int maxHugeGrpprlSize = LittleEndian.getUShort(_dataStream, + hugeGrpprlOffset); + + if (maxHugeGrpprlSize < grpprl.length) + throw new UnsupportedOperationException( + "This Paragraph's dataStream storage is too small."); + } + + // store grpprl at hugeGrpprlOffset + System.arraycopy(grpprl, 2, _dataStream, hugeGrpprlOffset + 2, + grpprl.length - 2); + LittleEndian.putUShort(_dataStream, hugeGrpprlOffset, grpprl.length); + + // grpprl = grpprl containing only a sprmPHugePapx2 + int istd = LittleEndian.getUShort(grpprl, 0); + grpprl = new byte[8]; + LittleEndian.putUShort(grpprl, 0, istd); + LittleEndian.putUShort(grpprl, 2, 0x6646); // sprmPHugePapx2 + LittleEndian.putInt(grpprl, 4, hugeGrpprlOffset); + } + boolean same = Arrays.equals(lastGrpprl, grpprl); if (!same) { @@ -252,7 +294,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage fcOffset += FC_SIZE; } - // put the last papx's end in + LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin); return buf; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java index b3fd4414d..6683b17e1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java @@ -61,6 +61,7 @@ import org.apache.poi.util.LittleEndian; import org.apache.poi.hwpf.usermodel.ParagraphProperties; import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor; import org.apache.poi.hwpf.sprm.SprmBuffer; +import org.apache.poi.hwpf.sprm.SprmOperation; /** * Comment me @@ -72,17 +73,55 @@ public class PAPX extends CachedPropertyNode { private ParagraphHeight _phe; + private int _hugeGrpprlOffset = -1; - public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe) + public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream) { super(fcStart, fcEnd, new SprmBuffer(papx)); _phe = phe; + SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream); + if(buf != null) + _buf = buf; } - public PAPX(int fcStart, int fcEnd, SprmBuffer buf) + public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream) { super(fcStart, fcEnd, buf); _phe = new ParagraphHeight(); + buf = findHuge(buf, dataStream); + if(buf != null) + _buf = buf; + } + + private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream) + { + byte[] grpprl = buf.toByteArray(); + if(grpprl.length==8 && datastream!=null) // then check for sprmPHugePapx + { + SprmOperation sprm = new SprmOperation(grpprl, 2); + if ((sprm.getOperation()==0x45 || sprm.getOperation()==0x46) + && sprm.getSizeCode() == 3) + { + int hugeGrpprlOffset = sprm.getOperand(); + if(hugeGrpprlOffset+1 < datastream.length) + { + int grpprlSize = LittleEndian.getShort(datastream, hugeGrpprlOffset); + if( hugeGrpprlOffset+grpprlSize < datastream.length) + { + byte[] hugeGrpprl = new byte[grpprlSize]; + // copy original istd into huge Grpprl + hugeGrpprl[0] = grpprl[0]; hugeGrpprl[1] = grpprl[1]; + // copy Grpprl from dataStream + System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2, + grpprlSize-2); + // save a pointer to where we got the huge Grpprl from + _hugeGrpprlOffset = hugeGrpprlOffset; + return new SprmBuffer(hugeGrpprl); + } + } + } + } + return null; } @@ -96,6 +135,11 @@ public class PAPX extends CachedPropertyNode return ((SprmBuffer)_buf).toByteArray(); } + public int getHugeGrpprlOffset() + { + return _hugeGrpprlOffset; + } + public short getIstd() { byte[] buf = getGrpprl(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/StyleDescription.java b/src/scratchpad/src/org/apache/poi/hwpf/model/StyleDescription.java index 56f5cc42f..0ea86edff 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/StyleDescription.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/StyleDescription.java @@ -220,6 +220,11 @@ public class StyleDescription implements HDFType _chp = chp; } + public String getName() + { + return _name; + } + public byte[] toByteArray() { // size equals _baseLength bytes for known variables plus 2 bytes for name diff --git a/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmBuffer.java b/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmBuffer.java index 576b43ac8..8b8e36ed4 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmBuffer.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmBuffer.java @@ -80,6 +80,55 @@ public class SprmBuffer _buf = new byte[4]; _offset = 0; } + + private int findSprm(short opcode) + { + int operation = SprmOperation.getOperationFromOpcode(opcode); + int type = SprmOperation.getTypeFromOpcode(opcode); + + SprmIterator si = new SprmIterator(_buf, 2); + while(si.hasNext()) + { + SprmOperation i = si.next(); + if(i.getOperation() == operation && i.getType() == type) + return i.getGrpprlOffset(); + } + return -1; + } + + public void updateSprm(short opcode, byte operand) + { + int grpprlOffset = findSprm(opcode); + if(grpprlOffset != -1) + { + _buf[grpprlOffset] = operand; + return; + } + else addSprm(opcode, operand); + } + + public void updateSprm(short opcode, short operand) + { + int grpprlOffset = findSprm(opcode); + if(grpprlOffset != -1) + { + LittleEndian.putShort(_buf, grpprlOffset, operand); + return; + } + else addSprm(opcode, operand); + } + + public void updateSprm(short opcode, int operand) + { + int grpprlOffset = findSprm(opcode); + if(grpprlOffset != -1) + { + LittleEndian.putInt(_buf, grpprlOffset, operand); + return; + } + else addSprm(opcode, operand); + } + public void addSprm(short opcode, byte operand) { int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java b/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java index 1b15304e8..13ca1253b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/SprmOperation.java @@ -97,6 +97,16 @@ public class SprmOperation _size = initSize(sprmStart); } + public static int getOperationFromOpcode(short opcode) + { + return OP_BITFIELD.getValue(opcode); + } + + public static int getTypeFromOpcode(short opcode) + { + return TYPE_BITFIELD.getValue(opcode); + } + public int getType() { return _type; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java index ca08bf578..8446a24fe 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java @@ -447,6 +447,12 @@ public class CharacterRun _chpx.addSprm(SPRM_HIGHLIGHT, color); } + public int getFontNameIndex() + { + return _props.getFtcAscii(); + } + + /** * Get the ico24 field for the CHP record. */ diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ListEntry.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ListEntry.java index 7cc3a0dcd..772c50454 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ListEntry.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ListEntry.java @@ -4,6 +4,7 @@ import org.apache.poi.hwpf.model.ListFormatOverride; import org.apache.poi.hwpf.model.ListFormatOverrideLevel; import org.apache.poi.hwpf.model.ListLevel; import org.apache.poi.hwpf.model.ListTables; +import org.apache.poi.hwpf.model.PAPX; import org.apache.poi.hwpf.sprm.SprmBuffer; @@ -13,13 +14,12 @@ public class ListEntry ListLevel _level; ListFormatOverrideLevel _overrideLevel; - ListEntry(int start, int end, ListTables tables, - ParagraphProperties pap, SprmBuffer sprmBuf, Range parent) + ListEntry(PAPX papx, Range parent, ListTables tables) { - super(start, end, pap, sprmBuf, parent); - ListFormatOverride override = tables.getOverride(pap.getIlfo()); - _overrideLevel = override.getOverrideLevel(pap.getIlvl()); - _level = tables.getLevel(override.getLsid(), pap.getIlvl()); + super(papx, parent); + ListFormatOverride override = tables.getOverride(_props.getIlfo()); + _overrideLevel = override.getOverrideLevel(_props.getIlvl()); + _level = tables.getLevel(override.getLsid(), _props.getIlvl()); } public int type() diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java index 32fccf420..cb0134e32 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java @@ -125,8 +125,8 @@ public class Paragraph public final static short SPRM_FADJUSTRIGHT = 0x2448; - private short _istd; - private ParagraphProperties _props; + protected short _istd; + protected ParagraphProperties _props; protected SprmBuffer _papx; protected Paragraph(int startIdx, int endIdx, Table parent) @@ -135,13 +135,20 @@ public class Paragraph PAPX papx = (PAPX)_paragraphs.get(_parEnd - 1); _props = papx.getParagraphProperties(_doc.getStyleSheet()); _papx = papx.getSprmBuf(); + _istd = papx.getIstd(); } - public Paragraph(int start, int end, ParagraphProperties pap, SprmBuffer papx, Range parent) + protected Paragraph(PAPX papx, Range parent) { - super(start, end, parent); - _props = pap; - _papx = papx; + super(papx.getStart(), papx.getEnd(), parent); + _props = papx.getParagraphProperties(_doc.getStyleSheet()); + _papx = papx.getSprmBuf(); + _istd = papx.getIstd(); + } + + public short getStyleIndex() + { + return _istd; } public int type() diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ParagraphProperties.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ParagraphProperties.java index af47271aa..d75db1238 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ParagraphProperties.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/ParagraphProperties.java @@ -63,66 +63,6 @@ public class ParagraphProperties extends PAPAbstractType implements Cloneable { - public final static short SPRM_JC = 0x2403; - public final static short SPRM_FSIDEBYSIDE = 0x2404; - public final static short SPRM_FKEEP = 0x2405; - public final static short SPRM_FKEEPFOLLOW = 0x2406; - public final static short SPRM_FPAGEBREAKBEFORE = 0x2407; - public final static short SPRM_BRCL = 0x2408; - public final static short SPRM_BRCP = 0x2409; - public final static short SPRM_ILVL = 0x260A; - public final static short SPRM_ILFO = 0x460B; - public final static short SPRM_FNOLINENUMB = 0x240C; - public final static short SPRM_CHGTABSPAPX = (short)0xC60D; - public final static short SPRM_DXARIGHT = (short)0x840E; - public final static short SPRM_DXALEFT = (short)0x840F; - public final static short SPRM_DXALEFT1 = (short)0x8411; - public final static short SPRM_DYALINE = 0x6412; - public final static short SPRM_DYABEFORE = (short)0xA413; - public final static short SPRM_DYAAFTER = (short)0xA414; - public final static short SPRM_CHGTABS = (short)0xC615; - public final static short SPRM_FINTABLE = 0x2416; - public final static short SPRM_FTTP = 0x2417; - public final static short SPRM_DXAABS = (short)0x8418; - public final static short SPRM_DYAABS = (short)0x8419; - public final static short SPRM_DXAWIDTH = (short)0x841A; - public final static short SPRM_PC = 0x261B; - public final static short SPRM_WR = 0x2423; - public final static short SPRM_BRCTOP = 0x6424; - public final static short SPRM_BRCLEFT = 0x6425; - public final static short SPRM_BRCBOTTOM = 0x6426; - public final static short SPRM_BRCRIGHT = 0x6427; - public final static short SPRM_BRCBAR = 0x6629; - public final static short SPRM_FNOAUTOHYPH = 0x242A; - public final static short SPRM_WHEIGHTABS = 0x442B; - public final static short SPRM_DCS = 0x442C; - public final static short SPRM_SHD = 0x442D; - public final static short SPRM_DYAFROMTEXT = (short)0x842E; - public final static short SPRM_DXAFROMTEXT = (short)0x842F; - public final static short SPRM_FLOCKED = 0x2430; - public final static short SPRM_FWIDOWCONTROL = 0x2431; - public final static short SPRM_RULER = (short)0xC632; - public final static short SPRM_FKINSOKU = 0x2433; - public final static short SPRM_FWORDWRAP = 0x2434; - public final static short SPRM_FOVERFLOWPUNCT = 0x2435; - public final static short SPRM_FTOPLINEPUNCT = 0x2436; - public final static short SPRM_AUTOSPACEDE = 0x2437; - public final static short SPRM_AUTOSPACEDN = 0x2438; - public final static short SPRM_WALIGNFONT = 0x4439; - public final static short SPRM_FRAMETEXTFLOW = 0x443A; - public final static short SPRM_ANLD = (short)0xC63E; - public final static short SPRM_PROPRMARK = (short)0xC63F; - public final static short SPRM_OUTLVL = 0x2640; - public final static short SPRM_FBIDI = 0x2441; - public final static short SPRM_FNUMRMLNS = 0x2443; - public final static short SPRM_CRLF = 0x2444; - public final static short SPRM_NUMRM = (short)0xC645; - public final static short SPRM_USEPGSUSETTINGS = 0x2447; - public final static short SPRM_FADJUSTRIGHT = 0x2448; - - - private StyleDescription _baseStyle; - private SprmBuffer _papx; public ParagraphProperties() { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index 9df7ac4be..77bd8f357 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -426,13 +426,11 @@ public class Range Paragraph pap = null; if (props.getIlfo() > 0) { - pap = new ListEntry(papx.getStart(), papx.getEnd(), _doc.getListTables(), - props, papx.getSprmBuf(), this); + pap = new ListEntry(papx, this, _doc.getListTables()); } else { - pap = new Paragraph(papx.getStart(), papx.getEnd(), props, - papx.getSprmBuf(), this); + pap = new Paragraph(papx, this); } return pap; @@ -443,6 +441,45 @@ public class Range return TYPE_UNDEFINED; } + public Table getTable(Paragraph paragraph) + { + if (!paragraph.isInTable()) + { + throw new IllegalArgumentException("This paragraph doesn't belong to a table"); + } + + Range r = (Range)paragraph; + if (r._parent.get() != this) + { + throw new IllegalArgumentException("This paragraph is not a child of this range"); + } + + r.initAll(); + int tableEnd = r._parEnd; + + if (r._parStart != 0 && ((Paragraph)r._paragraphs.get(r._parStart - 1)).isInTable()) + { + throw new IllegalArgumentException("This paragraph is not the first one in the table"); + } + + int limit = r._paragraphs.size(); + for (; tableEnd < limit; tableEnd++) + { + if (!((Paragraph)r._paragraphs.get(tableEnd)).isInTable()) + { + break; + } + } + + initAll(); + if (tableEnd > _parEnd) + { + throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range"); + } + + return new Table(r._parStart, tableEnd, r._doc.getRange(), 1); + } + private void initAll() { initText(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java index 8371809ac..bc7f0e677 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java @@ -50,7 +50,7 @@ public class TableRow public void setRowJustification(int jc) { _tprops.setJc(jc); - _papx.addSprm(SPRM_TJC, (short)jc); + _papx.updateSprm(SPRM_TJC, (short)jc); } public int getGapHalf() @@ -61,7 +61,7 @@ public class TableRow public void setGapHalf(int dxaGapHalf) { _tprops.setDxaGapHalf(dxaGapHalf); - _papx.addSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf); + _papx.updateSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf); } public int getRowHeight() @@ -72,7 +72,7 @@ public class TableRow public void setRowHeight(int dyaRowHeight) { _tprops.setDyaRowHeight(dyaRowHeight); - _papx.addSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight); + _papx.updateSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight); } public boolean cantSplit() @@ -83,7 +83,7 @@ public class TableRow public void setCantSplit(boolean cantSplit) { _tprops.setFCantSplit(cantSplit); - _papx.addSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0)); + _papx.updateSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0)); } public boolean isTableHeader() @@ -94,7 +94,7 @@ public class TableRow public void setTableHeader(boolean tableHeader) { _tprops.setFTableHeader(tableHeader); - _papx.addSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0)); + _papx.updateSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0)); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java index 7e2e89dc3..22f977732 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/model/TestPAPBinTable.java @@ -80,7 +80,7 @@ public class TestPAPBinTable byte[] tableStream = _hWPFDocFixture._tableStream; int fcMin = fib.getFcMin(); - _pAPBinTable = new PAPBinTable(mainStream, tableStream, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin); + _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin); HWPFFileSystem fileSys = new HWPFFileSystem(); @@ -91,7 +91,7 @@ public class TestPAPBinTable byte[] newTableStream = tableOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray(); - PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0); + PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0); ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); ArrayList newTextRuns = newBinTable.getParagraphs();