patch from piers and other minor changes

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Said Ryan Ackley 2004-03-10 04:18:57 +00:00
parent 74b4e4792e
commit dd97f4aeb6
19 changed files with 613 additions and 194 deletions

View File

@ -0,0 +1,209 @@
package org.apache.poi.hwpf;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.*;
import org.apache.poi.hwpf.model.*;
import java.io.*;
public class Word2Forrest
{
Writer _out;
HWPFDocument _doc;
public Word2Forrest(HWPFDocument doc, OutputStream stream)
throws IOException, UnsupportedEncodingException
{
OutputStreamWriter out = new OutputStreamWriter (stream, "UTF-8");
_out = out;
_doc = doc;
init ();
openDocument ();
openBody ();
Range r = doc.getRange ();
StyleSheet styleSheet = doc.getStyleSheet ();
int sectionLevel = 0;
int lenParagraph = r.numParagraphs ();
boolean inCode = false;
for (int x = 0; x < lenParagraph; x++)
{
Paragraph p = r.getParagraph (x);
String text = p.text ();
if (text.trim ().length () == 0)
{
continue;
}
StyleDescription paragraphStyle = styleSheet.getStyleDescription (p.
getStyleIndex ());
String styleName = paragraphStyle.getName();
if (styleName.startsWith ("Heading"))
{
if (inCode)
{
closeSource();
inCode = false;
}
int headerLevel = Integer.parseInt (styleName.substring (8));
if (headerLevel > sectionLevel)
{
openSection ();
}
else
{
for (int y = 0; y < (sectionLevel - headerLevel) + 1; y++)
{
closeSection ();
}
openSection ();
}
sectionLevel = headerLevel;
openTitle ();
writePlainText (text);
closeTitle ();
}
else
{
int cruns = p.numCharacterRuns ();
CharacterRun run = p.getCharacterRun (0);
int ftcAscii = run.getFontNameIndex ();
String fontName = doc.getFontTable().getMainFont(ftcAscii);
if (fontName.startsWith ("Courier"))
{
if (!inCode)
{
openSource ();
inCode = true;
}
writePlainText (p.text());
}
else
{
if (inCode)
{
inCode = false;
closeSource();
}
openParagraph();
writePlainText(p.text());
closeParagraph();
}
}
}
for (int x = 0; x < sectionLevel; x++)
{
closeSection();
}
closeBody();
closeDocument();
_out.flush();
}
public void init ()
throws IOException
{
_out.write ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\r\n");
_out.write ("<!DOCTYPE document PUBLIC \"-//APACHE//DTD Documentation V1.1//EN\" \"./dtd/document-v11.dtd\">\r\n");
}
public void openDocument ()
throws IOException
{
_out.write ("<document>\r\n");
}
public void closeDocument ()
throws IOException
{
_out.write ("</document>\r\n");
}
public void openBody ()
throws IOException
{
_out.write ("<body>\r\n");
}
public void closeBody ()
throws IOException
{
_out.write ("</body>\r\n");
}
public void openSection ()
throws IOException
{
_out.write ("<section>");
}
public void closeSection ()
throws IOException
{
_out.write ("</section>");
}
public void openTitle ()
throws IOException
{
_out.write ("<title>");
}
public void closeTitle ()
throws IOException
{
_out.write ("</title>");
}
public void writePlainText (String text)
throws IOException
{
_out.write (text);
}
public void openParagraph ()
throws IOException
{
_out.write ("<p>");
}
public void closeParagraph ()
throws IOException
{
_out.write ("</p>");
}
public void openSource ()
throws IOException
{
_out.write ("<source><![CDATA[");
}
public void closeSource ()
throws IOException
{
_out.write ("]]></source>");
}
public static void main(String[] args)
{
try
{
OutputStream out = new FileOutputStream("c:\\test.xml");
new Word2Forrest(new HWPFDocument(new FileInputStream(args[0])), out);
out.close();
}
catch (Throwable t)
{
t.printStackTrace();
}
}
}

View File

@ -96,6 +96,9 @@ public class HWPFDocument
/** table stream buffer*/ /** table stream buffer*/
private byte[] _tableStream; private byte[] _tableStream;
/** data stream buffer*/
private byte[] _dataStream;
/** Document wide Properties*/ /** Document wide Properties*/
private DocumentProperties _dop; private DocumentProperties _dop;
@ -157,6 +160,19 @@ public class HWPFDocument
_fib.fillVariableFields(_mainStream, _tableStream); _fib.fillVariableFields(_mainStream, _tableStream);
// read in the data stream.
try
{
DocumentEntry dataProps =
(DocumentEntry) _filesystem.getRoot().getEntry("Data");
_dataStream = new byte[dataProps.getSize()];
_filesystem.createDocumentInputStream("Data").read(_dataStream);
}
catch(java.io.FileNotFoundException e)
{
_dataStream = new byte[0];
}
// get the start of text in the main stream // get the start of text in the main stream
int fcMin = _fib.getFcMin(); int fcMin = _fib.getFcMin();
@ -165,7 +181,7 @@ public class HWPFDocument
_cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin); _cft = new ComplexFileTable(_mainStream, _tableStream, _fib.getFcClx(), fcMin);
_tpt = _cft.getTextPieceTable(); _tpt = _cft.getTextPieceTable();
_cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin); _cbt = new CHPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbteChpx(), _fib.getLcbPlcfbteChpx(), fcMin);
_pbt = new PAPBinTable(_mainStream, _tableStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin); _pbt = new PAPBinTable(_mainStream, _tableStream, _dataStream, _fib.getFcPlcfbtePapx(), _fib.getLcbPlcfbtePapx(), fcMin);
// Word XP puts in a zero filled buffer in front of the text and it screws // Word XP puts in a zero filled buffer in front of the text and it screws
// up my system for offsets. This is an adjustment. // up my system for offsets. This is an adjustment.
@ -187,8 +203,13 @@ public class HWPFDocument
_lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo()); _lt = new ListTables(_tableStream, _fib.getFcPlcfLst(), _fib.getFcPlfLfo());
} }
int x = 0; PlexOfCps plc = new PlexOfCps(_tableStream, _fib.getFcPlcffldMom(), _fib.getLcbPlcffldMom(), 2);
for (int x = 0; x < plc.length(); x++)
{
GenericPropertyNode node = plc.getProperty(x);
byte[] fld = node.getBytes();
int breakpoint = 0;
}
} }
public StyleSheet getStyleSheet() public StyleSheet getStyleSheet()
@ -198,7 +219,11 @@ public class HWPFDocument
public Range getRange() public Range getRange()
{ {
return new Range(0, _fib.getFcMac() - _fib.getFcMin(), this); // hack to get the ending cp of the document, Have to revisit this.
java.util.List paragraphs = _pbt.getParagraphs();
PAPX p = (PAPX)paragraphs.get(paragraphs.size() - 1);
return new Range(0, p.getEnd(), this);
} }
public ListTables getListTables() public ListTables getListTables()
@ -219,6 +244,7 @@ public class HWPFDocument
HWPFFileSystem docSys = new HWPFFileSystem(); HWPFFileSystem docSys = new HWPFFileSystem();
HWPFOutputStream mainStream = docSys.getStream("WordDocument"); HWPFOutputStream mainStream = docSys.getStream("WordDocument");
HWPFOutputStream tableStream = docSys.getStream("1Table"); HWPFOutputStream tableStream = docSys.getStream("1Table");
HWPFOutputStream dataStream = docSys.getStream("Data");
int tableOffset = 0; int tableOffset = 0;
// FileInformationBlock fib = (FileInformationBlock)_fib.clone(); // FileInformationBlock fib = (FileInformationBlock)_fib.clone();
@ -302,7 +328,7 @@ public class HWPFDocument
_fib.setFcMac(fcMac); _fib.setFcMac(fcMac);
_fib.setCbMac(mainStream.getOffset()); _fib.setCbMac(mainStream.getOffset());
// make sure that the table and doc stream use big blocks. // make sure that the table, doc and data streams use big blocks.
byte[] mainBuf = mainStream.toByteArray(); byte[] mainBuf = mainStream.toByteArray();
if (mainBuf.length < 4096) if (mainBuf.length < 4096)
{ {
@ -323,11 +349,20 @@ public class HWPFDocument
tableBuf = tempBuf; tableBuf = tempBuf;
} }
byte[] dataBuf = _dataStream;
if (dataBuf.length < 4096)
{
byte[] tempBuf = new byte[4096];
System.arraycopy(dataBuf, 0, tempBuf, 0, dataBuf.length);
dataBuf = tempBuf;
}
// spit out the Word document. // spit out the Word document.
POIFSFileSystem pfs = new POIFSFileSystem(); POIFSFileSystem pfs = new POIFSFileSystem();
pfs.createDocument(new ByteArrayInputStream(mainBuf), "WordDocument"); pfs.createDocument(new ByteArrayInputStream(mainBuf), "WordDocument");
pfs.createDocument(new ByteArrayInputStream(tableBuf), "1Table"); pfs.createDocument(new ByteArrayInputStream(tableBuf), "1Table");
pfs.createDocument(new ByteArrayInputStream(dataBuf), "Data");
pfs.writeFilesystem(out); pfs.writeFilesystem(out);
} }
@ -352,6 +387,11 @@ public class HWPFDocument
return _cft.getTextPieceTable(); return _cft.getTextPieceTable();
} }
public byte[] getDataStream()
{
return _dataStream;
}
public int registerList(List list) public int registerList(List list)
{ {
if (_lt == null) if (_lt == null)
@ -361,6 +401,11 @@ public class HWPFDocument
return _lt.addList(list.getListData(), list.getOverride()); return _lt.addList(list.getListData(), list.getOverride());
} }
public FontTable getFontTable()
{
return _ft;
}
/** /**
* Takes two arguments, 1) name of the Word file to read in 2) location to * Takes two arguments, 1) name of the Word file to read in 2) location to
* write it out at. * write it out at.
@ -373,15 +418,8 @@ public class HWPFDocument
{ {
HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0])); HWPFDocument doc = new HWPFDocument(new FileInputStream(args[0]));
Range r = doc.getRange(); Range r = doc.getRange();
TableIterator ti = new TableIterator(r); String str = r.text();
while (ti.hasNext())
{
Table t = ti.next();
int x = 0; int x = 0;
}
// CharacterRun run = new CharacterRun(); // CharacterRun run = new CharacterRun();
// run.setBold(true); // run.setBold(true);
// run.setItalic(true); // run.setItalic(true);

View File

@ -67,121 +67,119 @@ import java.util.Arrays;
*/ */
public class Ffn public class Ffn
{ {
private int field_1_cbFfnM1;//total length of FFN - 1. private int _cbFfnM1;//total length of FFN - 1.
private byte field_2; private byte _info;
private static BitField _prq = new BitField(0x0003);// pitch request private static BitField _prq = new BitField(0x0003);// pitch request
private static BitField _fTrueType = new BitField(0x0004);// when 1, font is a TrueType font private static BitField _fTrueType = new BitField(0x0004);// when 1, font is a TrueType font
private static BitField _ff = new BitField(0x0070); private static BitField _ff = new BitField(0x0070);
private short field_3_wWeight;// base weight of font private short _wWeight;// base weight of font
private byte field_4_chs;// character set identifier private byte _chs;// character set identifier
private byte field_5_ixchSzAlt; // index into ffn.szFfn to the name of private byte _ixchSzAlt; // index into ffn.szFfn to the name of
// the alternate font // the alternate font
private byte [] field_6_panose = new byte[10];//???? private byte [] _panose = new byte[10];//????
private byte [] field_7_fontSig = new byte[24];//???? private byte [] _fontSig = new byte[24];//????
// zero terminated string that records name of font, cuurently not // zero terminated string that records name of font, cuurently not
// supporting Extended chars // supporting Extended chars
private char [] field_8_xszFfn; private char [] _xszFfn;
// extra facilitator members // extra facilitator members
private int xszFfnLength; private int _xszFfnLength;
public Ffn(byte[] buf, int offset) public Ffn(byte[] buf, int offset)
{ {
int offsetTmp = offset; int offsetTmp = offset;
field_1_cbFfnM1 = LittleEndian.getUnsignedByte(buf,offset); _cbFfnM1 = LittleEndian.getUnsignedByte(buf,offset);
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
field_2 = buf[offset]; _info = buf[offset];
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
field_3_wWeight = LittleEndian.getShort(buf, offset); _wWeight = LittleEndian.getShort(buf, offset);
offset += LittleEndian.SHORT_SIZE; offset += LittleEndian.SHORT_SIZE;
field_4_chs = buf[offset]; _chs = buf[offset];
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
field_5_ixchSzAlt = buf[offset]; _ixchSzAlt = buf[offset];
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
// read panose and fs so we can write them back out. // read panose and fs so we can write them back out.
System.arraycopy(buf, offset, field_6_panose, 0, field_6_panose.length); System.arraycopy(buf, offset, _panose, 0, _panose.length);
offset += field_6_panose.length; offset += _panose.length;
System.arraycopy(buf, offset, field_7_fontSig, 0, field_7_fontSig.length); System.arraycopy(buf, offset, _fontSig, 0, _fontSig.length);
offset += field_7_fontSig.length; offset += _fontSig.length;
offsetTmp = offset - offsetTmp; offsetTmp = offset - offsetTmp;
xszFfnLength = this.getSize() - offsetTmp; _xszFfnLength = this.getSize() - offsetTmp;
field_8_xszFfn = new char[xszFfnLength]; _xszFfn = new char[_xszFfnLength];
for(int i = 0; i < xszFfnLength; i++) for(int i = 0; i < _xszFfnLength; i++)
{ {
field_8_xszFfn[i] = (char)LittleEndian.getUnsignedByte(buf, offset); _xszFfn[i] = (char)LittleEndian.getShort(buf, offset);
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.SHORT_SIZE;
} }
} }
public int getField_1_cbFfnM1() public int get_cbFfnM1()
{ {
return field_1_cbFfnM1; return _cbFfnM1;
} }
public byte getField_2() public short getWeight()
{ {
return field_2; return _wWeight;
} }
public short getField_3_wWeight() public byte getChs()
{ {
return field_3_wWeight; return _chs;
} }
public byte getField_4_chs() public byte [] getPanose()
{ {
return field_4_chs; return _panose;
} }
public byte getField_5_ixchSzAlt() public byte [] getFontSig()
{ {
return field_5_ixchSzAlt; return _fontSig;
}
public byte [] getField_6_panose()
{
return field_6_panose;
}
public byte [] getField_7_fontSig()
{
return field_7_fontSig;
}
public char [] getField_8_xszFfn()
{
return field_8_xszFfn;
} }
public int getSize() public int getSize()
{ {
return (field_1_cbFfnM1 + 1); return (_cbFfnM1 + 1);
} }
public char [] getMainFontName() public String getMainFontName()
{ {
char [] temp = new char[field_5_ixchSzAlt]; int index = 0;
System.arraycopy(field_8_xszFfn,0,temp,0,temp.length); for (;index < _xszFfnLength; index++)
return temp; {
if (_xszFfn[index] == '\0')
{
break;
}
}
return new String(_xszFfn, 0, index);
} }
public char [] getAltFontName() public String getAltFontName()
{ {
char [] temp = new char[xszFfnLength - field_5_ixchSzAlt]; int index = _ixchSzAlt;
System.arraycopy(field_8_xszFfn, field_5_ixchSzAlt, temp, 0, temp.length); for (;index < _xszFfnLength; index++)
return temp; {
if (_xszFfn[index] == '\0')
{
break;
}
}
return new String(_xszFfn, _ixchSzAlt, index);
} }
public void setField_1_cbFfnM1(int field_1_cbFfnM1) public void set_cbFfnM1(int _cbFfnM1)
{ {
this.field_1_cbFfnM1 = field_1_cbFfnM1; this._cbFfnM1 = _cbFfnM1;
} }
// changed protected to public // changed protected to public
@ -190,25 +188,25 @@ public class Ffn
int offset = 0; int offset = 0;
byte[] buf = new byte[this.getSize()]; byte[] buf = new byte[this.getSize()];
buf[offset] = (byte)field_1_cbFfnM1; buf[offset] = (byte)_cbFfnM1;
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
buf[offset] = field_2; buf[offset] = _info;
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
LittleEndian.putShort(buf, offset, field_3_wWeight); LittleEndian.putShort(buf, offset, _wWeight);
offset += LittleEndian.SHORT_SIZE; offset += LittleEndian.SHORT_SIZE;
buf[offset] = field_4_chs; buf[offset] = _chs;
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
buf[offset] = field_5_ixchSzAlt; buf[offset] = _ixchSzAlt;
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
System.arraycopy(field_6_panose,0,buf, offset,field_6_panose.length); System.arraycopy(_panose,0,buf, offset,_panose.length);
offset += field_6_panose.length; offset += _panose.length;
System.arraycopy(field_7_fontSig,0,buf, offset, field_7_fontSig.length); System.arraycopy(_fontSig,0,buf, offset, _fontSig.length);
offset += field_7_fontSig.length; offset += _fontSig.length;
for(int i = 0; i < field_8_xszFfn.length; i++) for(int i = 0; i < _xszFfn.length; i++)
{ {
buf[offset] = (byte)field_8_xszFfn[i]; buf[offset] = (byte)_xszFfn[i];
offset += LittleEndian.BYTE_SIZE; offset += LittleEndian.BYTE_SIZE;
} }
@ -220,21 +218,21 @@ public class Ffn
{ {
boolean retVal = true; boolean retVal = true;
if (((Ffn)o).getField_1_cbFfnM1() == field_1_cbFfnM1) if (((Ffn)o).get_cbFfnM1() == _cbFfnM1)
{ {
if(((Ffn)o).getField_2() == field_2) if(((Ffn)o)._info == _info)
{ {
if(((Ffn)o).getField_3_wWeight() == field_3_wWeight) if(((Ffn)o)._wWeight == _wWeight)
{ {
if(((Ffn)o).getField_4_chs() == field_4_chs) if(((Ffn)o)._chs == _chs)
{ {
if(((Ffn)o).getField_5_ixchSzAlt() == field_5_ixchSzAlt) if(((Ffn)o)._ixchSzAlt == _ixchSzAlt)
{ {
if(Arrays.equals(((Ffn)o).getField_6_panose(),field_6_panose)) if(Arrays.equals(((Ffn)o)._panose,_panose))
{ {
if(Arrays.equals(((Ffn)o).getField_7_fontSig(),field_7_fontSig)) if(Arrays.equals(((Ffn)o)._fontSig,_fontSig))
{ {
if(!(Arrays.equals(((Ffn)o).getField_8_xszFfn(),field_8_xszFfn))) if(!(Arrays.equals(((Ffn)o)._xszFfn,_xszFfn)))
retVal = false; retVal = false;
} }
else else

View File

@ -0,0 +1,21 @@
package org.apache.poi.hwpf.model;
import org.apache.poi.util.BitField;
public class FieldDescriptor
{
byte _fieldBoundaryType;
byte _info;
private final static BitField fZombieEmbed = new BitField(0x02);
private final static BitField fResultDiry = new BitField(0x04);
private final static BitField fResultEdited = new BitField(0x08);
private final static BitField fLocked = new BitField(0x10);
private final static BitField fPrivateResult = new BitField(0x20);
private final static BitField fNested = new BitField(0x40);
private final static BitField fHasSep = new BitField(0x80);
public FieldDescriptor()
{
}
}

View File

@ -96,6 +96,7 @@ public class FileInformationBlock extends FIBAbstractType
fieldSet.add(new Integer(FIBFieldHandler.PLCFSED)); fieldSet.add(new Integer(FIBFieldHandler.PLCFSED));
fieldSet.add(new Integer(FIBFieldHandler.PLCFLST)); fieldSet.add(new Integer(FIBFieldHandler.PLCFLST));
fieldSet.add(new Integer(FIBFieldHandler.PLFLFO)); fieldSet.add(new Integer(FIBFieldHandler.PLFLFO));
fieldSet.add(new Integer(FIBFieldHandler.PLCFFLDMOM));
fieldSet.add(new Integer(FIBFieldHandler.STTBFFFN)); fieldSet.add(new Integer(FIBFieldHandler.STTBFFFN));
fieldSet.add(new Integer(FIBFieldHandler.MODIFIED)); fieldSet.add(new Integer(FIBFieldHandler.MODIFIED));
@ -317,6 +318,16 @@ public class FileInformationBlock extends FIBAbstractType
_fieldHandler.clearFields(); _fieldHandler.clearFields();
} }
public int getFcPlcffldMom()
{
return _fieldHandler.getFieldOffset(FIBFieldHandler.PLCFFLDMOM);
}
public int getLcbPlcffldMom()
{
return _fieldHandler.getFieldSize(FIBFieldHandler.PLCFFLDMOM);
}
public void writeTo (byte[] mainStream, HWPFOutputStream tableStream) public void writeTo (byte[] mainStream, HWPFOutputStream tableStream)
throws IOException throws IOException
{ {

View File

@ -119,7 +119,7 @@ public class FontTable
return lcbSttbfffn; return lcbSttbfffn;
} }
public char [] getMainFont(int chpFtc ) public String getMainFont(int chpFtc )
{ {
if(chpFtc >= _stringCount) if(chpFtc >= _stringCount)
{ {
@ -130,7 +130,7 @@ public class FontTable
return _fontNames[chpFtc].getMainFontName(); return _fontNames[chpFtc].getMainFontName();
} }
public char [] getAltFont(int chpFtc ) public String getAltFont(int chpFtc )
{ {
if(chpFtc >= _stringCount) if(chpFtc >= _stringCount)
{ {

View File

@ -68,8 +68,9 @@ import org.apache.poi.util.LittleEndian;
public class PAPBinTable public class PAPBinTable
{ {
ArrayList _paragraphs = new ArrayList(); ArrayList _paragraphs = new ArrayList();
byte[] _dataStream;
public PAPBinTable(byte[] documentStream, byte[] tableStream, int offset, public PAPBinTable(byte[] documentStream, byte[] tableStream, byte[] dataStream, int offset,
int size, int fcMin) int size, int fcMin)
{ {
PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4); PlexOfCps binTable = new PlexOfCps(tableStream, offset, size, 4);
@ -83,7 +84,7 @@ public class PAPBinTable
int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum; int pageOffset = POIFSConstants.BIG_BLOCK_SIZE * pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream, PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(documentStream,
pageOffset, fcMin); dataStream, pageOffset, fcMin);
int fkpSize = pfkp.size(); int fkpSize = pfkp.size();
@ -92,11 +93,12 @@ public class PAPBinTable
_paragraphs.add(pfkp.getPAPX(y)); _paragraphs.add(pfkp.getPAPX(y));
} }
} }
_dataStream = dataStream;
} }
public void insert(int listIndex, int cpStart, SprmBuffer buf) public void insert(int listIndex, int cpStart, SprmBuffer buf)
{ {
PAPX forInsert = new PAPX(cpStart, cpStart, buf); PAPX forInsert = new PAPX(cpStart, cpStart, buf, _dataStream);
if (listIndex == _paragraphs.size()) if (listIndex == _paragraphs.size())
{ {
_paragraphs.add(forInsert); _paragraphs.add(forInsert);
@ -116,7 +118,7 @@ public class PAPBinTable
exc.printStackTrace(); exc.printStackTrace();
} }
currentPap.setEnd(cpStart); currentPap.setEnd(cpStart);
PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf); PAPX splitPap = new PAPX(cpStart, currentPap.getEnd(), clonedBuf, _dataStream);
_paragraphs.add(++listIndex, forInsert); _paragraphs.add(++listIndex, forInsert);
_paragraphs.add(++listIndex, splitPap); _paragraphs.add(++listIndex, splitPap);
} }
@ -220,7 +222,7 @@ public class PAPBinTable
PropertyNode startingProp = (PropertyNode)overflow.get(0); PropertyNode startingProp = (PropertyNode)overflow.get(0);
int start = startingProp.getStart() + fcMin; int start = startingProp.getStart() + fcMin;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(); PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream);
pfkp.fill(overflow); pfkp.fill(overflow);
byte[] bufFkp = pfkp.toByteArray(fcMin); byte[] bufFkp = pfkp.toByteArray(fcMin);

View File

@ -84,11 +84,12 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
private ArrayList _papxList = new ArrayList(); private ArrayList _papxList = new ArrayList();
private ArrayList _overFlow; private ArrayList _overFlow;
private byte[] _dataStream;
public PAPFormattedDiskPage() public PAPFormattedDiskPage(byte[] dataStream)
{ {
_dataStream = dataStream;
} }
/** /**
@ -96,15 +97,16 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
* *
* @param fkp a 512 byte array. * @param fkp a 512 byte array.
*/ */
public PAPFormattedDiskPage(byte[] documentStream, int offset, int fcMin) public PAPFormattedDiskPage(byte[] documentStream, byte[] dataStream, int offset, int fcMin)
{ {
super(documentStream, offset); super(documentStream, offset);
for (int x = 0; x < _crun; x++) for (int x = 0; x < _crun; x++)
{ {
_papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x))); _papxList.add(new PAPX(getStart(x) - fcMin, getEnd(x) - fcMin, getGrpprl(x), getParagraphHeight(x), dataStream));
} }
_fkp = null; _fkp = null;
_dataStream = dataStream;
} }
public void fill(List filler) public void fill(List filler)
@ -164,6 +166,12 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl(); byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl();
int grpprlLength = grpprl.length; int grpprlLength = grpprl.length;
// is grpprl huge?
if(grpprlLength > 488)
{
grpprlLength = 8; // set equal to size of sprmPHugePapx grpprl
}
// check to see if we have enough room for an FC, a BX, and the grpprl // check to see if we have enough room for an FC, a BX, and the grpprl
// and the 1 byte size of the grpprl. // and the 1 byte size of the grpprl.
int addition = 0; int addition = 0;
@ -219,6 +227,40 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
byte[] phe = papx.getParagraphHeight().toByteArray(); byte[] phe = papx.getParagraphHeight().toByteArray();
byte[] grpprl = papx.getGrpprl(); byte[] grpprl = papx.getGrpprl();
// is grpprl huge?
if(grpprl.length > 488)
{
// if so do we have storage at getHugeGrpprlOffset()
int hugeGrpprlOffset = papx.getHugeGrpprlOffset();
if(hugeGrpprlOffset == -1) // then we have no storage...
{
throw new UnsupportedOperationException(
"This Paragraph has no dataStream storage.");
}
else // we have some storage...
{
// get the size of the existing storage
int maxHugeGrpprlSize = LittleEndian.getUShort(_dataStream,
hugeGrpprlOffset);
if (maxHugeGrpprlSize < grpprl.length)
throw new UnsupportedOperationException(
"This Paragraph's dataStream storage is too small.");
}
// store grpprl at hugeGrpprlOffset
System.arraycopy(grpprl, 2, _dataStream, hugeGrpprlOffset + 2,
grpprl.length - 2);
LittleEndian.putUShort(_dataStream, hugeGrpprlOffset, grpprl.length);
// grpprl = grpprl containing only a sprmPHugePapx2
int istd = LittleEndian.getUShort(grpprl, 0);
grpprl = new byte[8];
LittleEndian.putUShort(grpprl, 0, istd);
LittleEndian.putUShort(grpprl, 2, 0x6646); // sprmPHugePapx2
LittleEndian.putInt(grpprl, 4, hugeGrpprlOffset);
}
boolean same = Arrays.equals(lastGrpprl, grpprl); boolean same = Arrays.equals(lastGrpprl, grpprl);
if (!same) if (!same)
{ {
@ -252,7 +294,7 @@ public class PAPFormattedDiskPage extends FormattedDiskPage
fcOffset += FC_SIZE; fcOffset += FC_SIZE;
} }
// put the last papx's end in
LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin); LittleEndian.putInt(buf, fcOffset, papx.getEnd() + fcMin);
return buf; return buf;
} }

View File

@ -61,6 +61,7 @@ import org.apache.poi.util.LittleEndian;
import org.apache.poi.hwpf.usermodel.ParagraphProperties; import org.apache.poi.hwpf.usermodel.ParagraphProperties;
import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor; import org.apache.poi.hwpf.sprm.ParagraphSprmUncompressor;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.sprm.SprmOperation;
/** /**
* Comment me * Comment me
@ -72,17 +73,55 @@ public class PAPX extends CachedPropertyNode
{ {
private ParagraphHeight _phe; private ParagraphHeight _phe;
private int _hugeGrpprlOffset = -1;
public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe) public PAPX(int fcStart, int fcEnd, byte[] papx, ParagraphHeight phe, byte[] dataStream)
{ {
super(fcStart, fcEnd, new SprmBuffer(papx)); super(fcStart, fcEnd, new SprmBuffer(papx));
_phe = phe; _phe = phe;
SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
if(buf != null)
_buf = buf;
} }
public PAPX(int fcStart, int fcEnd, SprmBuffer buf) public PAPX(int fcStart, int fcEnd, SprmBuffer buf, byte[] dataStream)
{ {
super(fcStart, fcEnd, buf); super(fcStart, fcEnd, buf);
_phe = new ParagraphHeight(); _phe = new ParagraphHeight();
buf = findHuge(buf, dataStream);
if(buf != null)
_buf = buf;
}
private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream)
{
byte[] grpprl = buf.toByteArray();
if(grpprl.length==8 && datastream!=null) // then check for sprmPHugePapx
{
SprmOperation sprm = new SprmOperation(grpprl, 2);
if ((sprm.getOperation()==0x45 || sprm.getOperation()==0x46)
&& sprm.getSizeCode() == 3)
{
int hugeGrpprlOffset = sprm.getOperand();
if(hugeGrpprlOffset+1 < datastream.length)
{
int grpprlSize = LittleEndian.getShort(datastream, hugeGrpprlOffset);
if( hugeGrpprlOffset+grpprlSize < datastream.length)
{
byte[] hugeGrpprl = new byte[grpprlSize];
// copy original istd into huge Grpprl
hugeGrpprl[0] = grpprl[0]; hugeGrpprl[1] = grpprl[1];
// copy Grpprl from dataStream
System.arraycopy(datastream, hugeGrpprlOffset + 2, hugeGrpprl, 2,
grpprlSize-2);
// save a pointer to where we got the huge Grpprl from
_hugeGrpprlOffset = hugeGrpprlOffset;
return new SprmBuffer(hugeGrpprl);
}
}
}
}
return null;
} }
@ -96,6 +135,11 @@ public class PAPX extends CachedPropertyNode
return ((SprmBuffer)_buf).toByteArray(); return ((SprmBuffer)_buf).toByteArray();
} }
public int getHugeGrpprlOffset()
{
return _hugeGrpprlOffset;
}
public short getIstd() public short getIstd()
{ {
byte[] buf = getGrpprl(); byte[] buf = getGrpprl();

View File

@ -220,6 +220,11 @@ public class StyleDescription implements HDFType
_chp = chp; _chp = chp;
} }
public String getName()
{
return _name;
}
public byte[] toByteArray() public byte[] toByteArray()
{ {
// size equals _baseLength bytes for known variables plus 2 bytes for name // size equals _baseLength bytes for known variables plus 2 bytes for name

View File

@ -80,6 +80,55 @@ public class SprmBuffer
_buf = new byte[4]; _buf = new byte[4];
_offset = 0; _offset = 0;
} }
private int findSprm(short opcode)
{
int operation = SprmOperation.getOperationFromOpcode(opcode);
int type = SprmOperation.getTypeFromOpcode(opcode);
SprmIterator si = new SprmIterator(_buf, 2);
while(si.hasNext())
{
SprmOperation i = si.next();
if(i.getOperation() == operation && i.getType() == type)
return i.getGrpprlOffset();
}
return -1;
}
public void updateSprm(short opcode, byte operand)
{
int grpprlOffset = findSprm(opcode);
if(grpprlOffset != -1)
{
_buf[grpprlOffset] = operand;
return;
}
else addSprm(opcode, operand);
}
public void updateSprm(short opcode, short operand)
{
int grpprlOffset = findSprm(opcode);
if(grpprlOffset != -1)
{
LittleEndian.putShort(_buf, grpprlOffset, operand);
return;
}
else addSprm(opcode, operand);
}
public void updateSprm(short opcode, int operand)
{
int grpprlOffset = findSprm(opcode);
if(grpprlOffset != -1)
{
LittleEndian.putInt(_buf, grpprlOffset, operand);
return;
}
else addSprm(opcode, operand);
}
public void addSprm(short opcode, byte operand) public void addSprm(short opcode, byte operand)
{ {
int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE; int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE;

View File

@ -97,6 +97,16 @@ public class SprmOperation
_size = initSize(sprmStart); _size = initSize(sprmStart);
} }
public static int getOperationFromOpcode(short opcode)
{
return OP_BITFIELD.getValue(opcode);
}
public static int getTypeFromOpcode(short opcode)
{
return TYPE_BITFIELD.getValue(opcode);
}
public int getType() public int getType()
{ {
return _type; return _type;

View File

@ -447,6 +447,12 @@ public class CharacterRun
_chpx.addSprm(SPRM_HIGHLIGHT, color); _chpx.addSprm(SPRM_HIGHLIGHT, color);
} }
public int getFontNameIndex()
{
return _props.getFtcAscii();
}
/** /**
* Get the ico24 field for the CHP record. * Get the ico24 field for the CHP record.
*/ */

View File

@ -4,6 +4,7 @@ import org.apache.poi.hwpf.model.ListFormatOverride;
import org.apache.poi.hwpf.model.ListFormatOverrideLevel; import org.apache.poi.hwpf.model.ListFormatOverrideLevel;
import org.apache.poi.hwpf.model.ListLevel; import org.apache.poi.hwpf.model.ListLevel;
import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.model.PAPX;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
@ -13,13 +14,12 @@ public class ListEntry
ListLevel _level; ListLevel _level;
ListFormatOverrideLevel _overrideLevel; ListFormatOverrideLevel _overrideLevel;
ListEntry(int start, int end, ListTables tables, ListEntry(PAPX papx, Range parent, ListTables tables)
ParagraphProperties pap, SprmBuffer sprmBuf, Range parent)
{ {
super(start, end, pap, sprmBuf, parent); super(papx, parent);
ListFormatOverride override = tables.getOverride(pap.getIlfo()); ListFormatOverride override = tables.getOverride(_props.getIlfo());
_overrideLevel = override.getOverrideLevel(pap.getIlvl()); _overrideLevel = override.getOverrideLevel(_props.getIlvl());
_level = tables.getLevel(override.getLsid(), pap.getIlvl()); _level = tables.getLevel(override.getLsid(), _props.getIlvl());
} }
public int type() public int type()

View File

@ -125,8 +125,8 @@ public class Paragraph
public final static short SPRM_FADJUSTRIGHT = 0x2448; public final static short SPRM_FADJUSTRIGHT = 0x2448;
private short _istd; protected short _istd;
private ParagraphProperties _props; protected ParagraphProperties _props;
protected SprmBuffer _papx; protected SprmBuffer _papx;
protected Paragraph(int startIdx, int endIdx, Table parent) protected Paragraph(int startIdx, int endIdx, Table parent)
@ -135,13 +135,20 @@ public class Paragraph
PAPX papx = (PAPX)_paragraphs.get(_parEnd - 1); PAPX papx = (PAPX)_paragraphs.get(_parEnd - 1);
_props = papx.getParagraphProperties(_doc.getStyleSheet()); _props = papx.getParagraphProperties(_doc.getStyleSheet());
_papx = papx.getSprmBuf(); _papx = papx.getSprmBuf();
_istd = papx.getIstd();
} }
public Paragraph(int start, int end, ParagraphProperties pap, SprmBuffer papx, Range parent) protected Paragraph(PAPX papx, Range parent)
{ {
super(start, end, parent); super(papx.getStart(), papx.getEnd(), parent);
_props = pap; _props = papx.getParagraphProperties(_doc.getStyleSheet());
_papx = papx; _papx = papx.getSprmBuf();
_istd = papx.getIstd();
}
public short getStyleIndex()
{
return _istd;
} }
public int type() public int type()

View File

@ -63,66 +63,6 @@ public class ParagraphProperties
extends PAPAbstractType extends PAPAbstractType
implements Cloneable implements Cloneable
{ {
public final static short SPRM_JC = 0x2403;
public final static short SPRM_FSIDEBYSIDE = 0x2404;
public final static short SPRM_FKEEP = 0x2405;
public final static short SPRM_FKEEPFOLLOW = 0x2406;
public final static short SPRM_FPAGEBREAKBEFORE = 0x2407;
public final static short SPRM_BRCL = 0x2408;
public final static short SPRM_BRCP = 0x2409;
public final static short SPRM_ILVL = 0x260A;
public final static short SPRM_ILFO = 0x460B;
public final static short SPRM_FNOLINENUMB = 0x240C;
public final static short SPRM_CHGTABSPAPX = (short)0xC60D;
public final static short SPRM_DXARIGHT = (short)0x840E;
public final static short SPRM_DXALEFT = (short)0x840F;
public final static short SPRM_DXALEFT1 = (short)0x8411;
public final static short SPRM_DYALINE = 0x6412;
public final static short SPRM_DYABEFORE = (short)0xA413;
public final static short SPRM_DYAAFTER = (short)0xA414;
public final static short SPRM_CHGTABS = (short)0xC615;
public final static short SPRM_FINTABLE = 0x2416;
public final static short SPRM_FTTP = 0x2417;
public final static short SPRM_DXAABS = (short)0x8418;
public final static short SPRM_DYAABS = (short)0x8419;
public final static short SPRM_DXAWIDTH = (short)0x841A;
public final static short SPRM_PC = 0x261B;
public final static short SPRM_WR = 0x2423;
public final static short SPRM_BRCTOP = 0x6424;
public final static short SPRM_BRCLEFT = 0x6425;
public final static short SPRM_BRCBOTTOM = 0x6426;
public final static short SPRM_BRCRIGHT = 0x6427;
public final static short SPRM_BRCBAR = 0x6629;
public final static short SPRM_FNOAUTOHYPH = 0x242A;
public final static short SPRM_WHEIGHTABS = 0x442B;
public final static short SPRM_DCS = 0x442C;
public final static short SPRM_SHD = 0x442D;
public final static short SPRM_DYAFROMTEXT = (short)0x842E;
public final static short SPRM_DXAFROMTEXT = (short)0x842F;
public final static short SPRM_FLOCKED = 0x2430;
public final static short SPRM_FWIDOWCONTROL = 0x2431;
public final static short SPRM_RULER = (short)0xC632;
public final static short SPRM_FKINSOKU = 0x2433;
public final static short SPRM_FWORDWRAP = 0x2434;
public final static short SPRM_FOVERFLOWPUNCT = 0x2435;
public final static short SPRM_FTOPLINEPUNCT = 0x2436;
public final static short SPRM_AUTOSPACEDE = 0x2437;
public final static short SPRM_AUTOSPACEDN = 0x2438;
public final static short SPRM_WALIGNFONT = 0x4439;
public final static short SPRM_FRAMETEXTFLOW = 0x443A;
public final static short SPRM_ANLD = (short)0xC63E;
public final static short SPRM_PROPRMARK = (short)0xC63F;
public final static short SPRM_OUTLVL = 0x2640;
public final static short SPRM_FBIDI = 0x2441;
public final static short SPRM_FNUMRMLNS = 0x2443;
public final static short SPRM_CRLF = 0x2444;
public final static short SPRM_NUMRM = (short)0xC645;
public final static short SPRM_USEPGSUSETTINGS = 0x2447;
public final static short SPRM_FADJUSTRIGHT = 0x2448;
private StyleDescription _baseStyle;
private SprmBuffer _papx;
public ParagraphProperties() public ParagraphProperties()
{ {

View File

@ -426,13 +426,11 @@ public class Range
Paragraph pap = null; Paragraph pap = null;
if (props.getIlfo() > 0) if (props.getIlfo() > 0)
{ {
pap = new ListEntry(papx.getStart(), papx.getEnd(), _doc.getListTables(), pap = new ListEntry(papx, this, _doc.getListTables());
props, papx.getSprmBuf(), this);
} }
else else
{ {
pap = new Paragraph(papx.getStart(), papx.getEnd(), props, pap = new Paragraph(papx, this);
papx.getSprmBuf(), this);
} }
return pap; return pap;
@ -443,6 +441,45 @@ public class Range
return TYPE_UNDEFINED; return TYPE_UNDEFINED;
} }
public Table getTable(Paragraph paragraph)
{
if (!paragraph.isInTable())
{
throw new IllegalArgumentException("This paragraph doesn't belong to a table");
}
Range r = (Range)paragraph;
if (r._parent.get() != this)
{
throw new IllegalArgumentException("This paragraph is not a child of this range");
}
r.initAll();
int tableEnd = r._parEnd;
if (r._parStart != 0 && ((Paragraph)r._paragraphs.get(r._parStart - 1)).isInTable())
{
throw new IllegalArgumentException("This paragraph is not the first one in the table");
}
int limit = r._paragraphs.size();
for (; tableEnd < limit; tableEnd++)
{
if (!((Paragraph)r._paragraphs.get(tableEnd)).isInTable())
{
break;
}
}
initAll();
if (tableEnd > _parEnd)
{
throw new ArrayIndexOutOfBoundsException("The table's bounds fall outside of this Range");
}
return new Table(r._parStart, tableEnd, r._doc.getRange(), 1);
}
private void initAll() private void initAll()
{ {
initText(); initText();

View File

@ -50,7 +50,7 @@ public class TableRow
public void setRowJustification(int jc) public void setRowJustification(int jc)
{ {
_tprops.setJc(jc); _tprops.setJc(jc);
_papx.addSprm(SPRM_TJC, (short)jc); _papx.updateSprm(SPRM_TJC, (short)jc);
} }
public int getGapHalf() public int getGapHalf()
@ -61,7 +61,7 @@ public class TableRow
public void setGapHalf(int dxaGapHalf) public void setGapHalf(int dxaGapHalf)
{ {
_tprops.setDxaGapHalf(dxaGapHalf); _tprops.setDxaGapHalf(dxaGapHalf);
_papx.addSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf); _papx.updateSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf);
} }
public int getRowHeight() public int getRowHeight()
@ -72,7 +72,7 @@ public class TableRow
public void setRowHeight(int dyaRowHeight) public void setRowHeight(int dyaRowHeight)
{ {
_tprops.setDyaRowHeight(dyaRowHeight); _tprops.setDyaRowHeight(dyaRowHeight);
_papx.addSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight); _papx.updateSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight);
} }
public boolean cantSplit() public boolean cantSplit()
@ -83,7 +83,7 @@ public class TableRow
public void setCantSplit(boolean cantSplit) public void setCantSplit(boolean cantSplit)
{ {
_tprops.setFCantSplit(cantSplit); _tprops.setFCantSplit(cantSplit);
_papx.addSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0)); _papx.updateSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0));
} }
public boolean isTableHeader() public boolean isTableHeader()
@ -94,7 +94,7 @@ public class TableRow
public void setTableHeader(boolean tableHeader) public void setTableHeader(boolean tableHeader)
{ {
_tprops.setFTableHeader(tableHeader); _tprops.setFTableHeader(tableHeader);
_papx.addSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0)); _papx.updateSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0));
} }
} }

View File

@ -80,7 +80,7 @@ public class TestPAPBinTable
byte[] tableStream = _hWPFDocFixture._tableStream; byte[] tableStream = _hWPFDocFixture._tableStream;
int fcMin = fib.getFcMin(); int fcMin = fib.getFcMin();
_pAPBinTable = new PAPBinTable(mainStream, tableStream, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin); _pAPBinTable = new PAPBinTable(mainStream, tableStream, null, fib.getFcPlcfbtePapx(), fib.getLcbPlcfbtePapx(), fcMin);
HWPFFileSystem fileSys = new HWPFFileSystem(); HWPFFileSystem fileSys = new HWPFFileSystem();
@ -91,7 +91,7 @@ public class TestPAPBinTable
byte[] newTableStream = tableOut.toByteArray(); byte[] newTableStream = tableOut.toByteArray();
byte[] newMainStream = mainOut.toByteArray(); byte[] newMainStream = mainOut.toByteArray();
PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, 0, newTableStream.length, 0); PAPBinTable newBinTable = new PAPBinTable(newMainStream, newTableStream, null,0, newTableStream.length, 0);
ArrayList oldTextRuns = _pAPBinTable.getParagraphs(); ArrayList oldTextRuns = _pAPBinTable.getParagraphs();
ArrayList newTextRuns = newBinTable.getParagraphs(); ArrayList newTextRuns = newBinTable.getParagraphs();