From 51655ccda2533509965cf8fe876b9b0283ca9af4 Mon Sep 17 00:00:00 2001 From: Glen Stampoultzis Date: Tue, 24 Aug 2004 12:54:01 +0000 Subject: [PATCH] Pier's other HWPF patch. git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353588 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hssf/usermodel/HSSFWorkbook.java | 37 ++++---- .../org/apache/poi/hwpf/model/ListLevel.java | 10 ++- .../org/apache/poi/hwpf/model/ListTables.java | 5 ++ .../apache/poi/hwpf/model/SectionTable.java | 90 ++++++++++--------- .../org/apache/poi/hwpf/model/TextPiece.java | 16 ++-- .../apache/poi/hwpf/model/TextPieceTable.java | 13 +-- .../hwpf/sprm/ParagraphSprmUncompressor.java | 4 +- .../poi/hwpf/sprm/TableSprmUncompressor.java | 20 ++++- .../poi/hwpf/usermodel/CharacterRun.java | 4 - .../apache/poi/hwpf/usermodel/Paragraph.java | 18 ++-- .../org/apache/poi/hwpf/usermodel/Range.java | 11 ++- .../org/apache/poi/hwpf/usermodel/Table.java | 4 +- .../apache/poi/hwpf/usermodel/TableRow.java | 3 +- 13 files changed, 134 insertions(+), 101 deletions(-) diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index f4f8002d9..6afe17f6f 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -22,15 +22,6 @@ */ package org.apache.poi.hssf.usermodel; -import java.io.ByteArrayInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; -import java.util.Stack; - import org.apache.poi.hssf.eventmodel.EventRecordFactory; import org.apache.poi.hssf.model.Sheet; import org.apache.poi.hssf.model.Workbook; @@ -39,14 +30,19 @@ import org.apache.poi.hssf.record.formula.Area3DPtg; import org.apache.poi.hssf.record.formula.MemFuncPtg; import org.apache.poi.hssf.record.formula.UnionPtg; import org.apache.poi.hssf.util.CellReference; -import org.apache.poi.poifs.filesystem.DirectoryEntry; -import org.apache.poi.poifs.filesystem.DocumentEntry; -import org.apache.poi.poifs.filesystem.DocumentInputStream; -import org.apache.poi.poifs.filesystem.Entry; -import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.poifs.filesystem.*; import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Stack; + /** * High level representation of a workbook. This is the first object most users * will construct whether they are reading or writing a workbook. It is also the @@ -84,7 +80,7 @@ public class HSSFWorkbook * this holds the HSSFSheet objects attached to this workbook */ - private ArrayList sheets; + protected ArrayList sheets; /** * this holds the HSSFName objects attached to this workbook @@ -121,9 +117,14 @@ public class HSSFWorkbook public HSSFWorkbook() { - workbook = Workbook.createWorkbook(); - sheets = new ArrayList(INITIAL_CAPACITY); - names = new ArrayList(INITIAL_CAPACITY); + this(Workbook.createWorkbook()); + } + + protected HSSFWorkbook( Workbook book ) + { + workbook = book; + sheets = new ArrayList( INITIAL_CAPACITY ); + names = new ArrayList( INITIAL_CAPACITY ); } public HSSFWorkbook(POIFSFileSystem fs) throws IOException { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java index f0a652833..5aa1761e2 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - + package org.apache.poi.hwpf.model; @@ -106,10 +106,10 @@ public class ListLevel _grpprlPapx = new byte[_cbGrpprlPapx]; _grpprlChpx = new byte[_cbGrpprlChpx]; - System.arraycopy(buf, offset, _grpprlChpx, 0, _cbGrpprlChpx); - offset += _cbGrpprlChpx; System.arraycopy(buf, offset, _grpprlPapx, 0, _cbGrpprlPapx); offset += _cbGrpprlPapx; + System.arraycopy(buf, offset, _grpprlChpx, 0, _cbGrpprlChpx); + offset += _cbGrpprlChpx; int numberTextLength = LittleEndian.getShort(buf, offset); _numberText = new char[numberTextLength]; @@ -168,6 +168,10 @@ public class ListLevel _grpprlPapx = grpprl; } + public byte[] getLevelProperties() + { + return _grpprlPapx; + } public boolean equals(Object obj) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java index 705912916..13e155df4 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java @@ -192,6 +192,11 @@ public class ListTables return lvl; } + public ListData getListData(int listID) + { + return (ListData) _listMap.get(new Integer(listID)); + } + public boolean equals(Object obj) { if (obj == null) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java index 01f698cb1..748ab68b7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java @@ -45,6 +45,7 @@ public class SectionTable { PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE); _text = tpt; + int length = sedPlex.length(); for (int x = 0; x < length; x++) @@ -85,50 +86,51 @@ public class SectionTable } } - private int CPtoFC(int cp) + // goss version of CPtoFC - this takes into account non-contiguous textpieces + // that we have come across in real world documents. Tests against the example + // code in HWPFDocument show no variation to Ryan's version of the code in + // normal use, but this version works with our non-contiguous test case. + // So far unable to get this test case to be written out as well due to + // other issues. - piers + private int CPtoFC(int CP) { - int size = _text.size(); - int x = 0; - int end = 0; - int fc = 0; - for (; x < size; x++) - { - TextPiece piece = (TextPiece)_text.get(x); - int currentStart = end; - end += ((piece.getEnd()- piece.getStart())/(piece.usesUnicode() ? 2 : 1)); - if (cp <= end) - { - fc += ((cp - currentStart) * (piece.usesUnicode() ? 2 : 1)); - break; - } - else - { - fc += (piece.getEnd() - piece.getStart()); - } - } - return fc; - } + TextPiece TP = null; - private int FCtoCP(int fc) - { - int size = _text.size(); - int cp = 0; - for (int x = 0; x < size; x++) - { - TextPiece piece = (TextPiece)_text.get(x); + for(int i=_text.size()-1; i>-1; i--) + { + TP = (TextPiece)_text.get(i); - if (fc <= piece.getEnd()) - { - cp += ((fc - piece.getStart())/ (piece.usesUnicode() ? 2 : 1)); - break; - } - else - { - cp += ((piece.getEnd() - piece.getStart())/ (piece.usesUnicode() ? 2 : 1)); + if(CP >= TP.getCP()) break; } + int FC = TP.getPieceDescriptor().getFilePosition(); + int offset = CP - TP.getCP(); + if(TP.usesUnicode()) offset*=2; + FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition(); + return FC; } - return cp; - } + + // Ryans code + private int FCtoCP(int fc) + { + int size = _text.size(); + int cp = 0; + for (int x = 0; x < size; x++) + { + TextPiece piece = (TextPiece)_text.get(x); + + if (fc <= piece.getEnd()) + { + cp += ((fc - piece.getStart())/ (piece.usesUnicode() ? 2 : 1)); + break; + } + else + { + cp += ((piece.getEnd() - piece.getStart())/ (piece.usesUnicode() ? 2 : 1)); + } + } + return cp; + } + public ArrayList getSections() { @@ -163,12 +165,20 @@ public class SectionTable sed.setFc(offset); // add the section descriptor bytes to the PlexOfCps. + + + // original line - + //GenericPropertyNode property = new GenericPropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray()); + + // Line using Ryan's FCtoCP() conversion method - + // unable to observe any effect on our testcases when using this code - piers GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStart()), FCtoCP(sepx.getEnd()), sed.toByteArray()); + + plex.addProperty(property); offset = docStream.getOffset(); } tableStream.write(plex.toByteArray()); } - } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java index a9a3f1fb0..d833072a1 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java @@ -31,13 +31,15 @@ public class TextPiece extends PropertyNode implements Comparable private PieceDescriptor _pd; + private int _cpStart; + /** * @param start Offset in main document stream. * @param length The total length of the text in bytes. Note: 1 character * does not necessarily refer to 1 byte. * @param unicode true if this text is unicode. */ - public TextPiece(int start, int end, byte[] text, PieceDescriptor pd) + public TextPiece(int start, int end, byte[] text, PieceDescriptor pd, int cpStart) throws UnsupportedEncodingException { /** start - end is length on file. This is double the expected when its @@ -45,6 +47,7 @@ public class TextPiece extends PropertyNode implements Comparable super(start, end, new StringBuffer(new String(text, pd.isUnicode() ? "UTF-16LE" : "Cp1252"))); _usesUnicode = pd.isUnicode(); _pd = pd; + _cpStart = cpStart; } /** * @return If this text piece uses unicode @@ -64,11 +67,6 @@ public class TextPiece extends PropertyNode implements Comparable return (StringBuffer)_buf; } - public void setStringBuffer(StringBuffer buf) - { - _buf = buf; - } - public byte[] getRawBytes() { try @@ -113,4 +111,10 @@ public class TextPiece extends PropertyNode implements Comparable return false; } + + public int getCP() + { + return _cpStart; + } + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java index db9f27511..cb1364546 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java @@ -65,10 +65,8 @@ public class TextPieceTable // } } - _cpMin = pieces[0].getFilePosition() - fcMin; - // if a piece is unicode the actual offset may be bumped because of the - // doubling of the needed size. - int bump = 0; + int firstPieceFilePosition = pieces[0].getFilePosition(); + _cpMin = firstPieceFilePosition - fcMin; // using the PieceDescriptors, build our list of TextPieces. for (int x = 0; x < pieces.length; x++) @@ -92,12 +90,9 @@ public class TextPieceTable byte[] buf = new byte[textSize]; System.arraycopy(documentStream, start, buf, 0, textSize); - _textPieces.add(new TextPiece(nodeStart + bump, nodeEnd + bump, buf, pieces[x])); - if (unicode) - { - bump += (node.getEnd() - nodeStart); - } + int startFilePosition = start - firstPieceFilePosition; + _textPieces.add(new TextPiece(startFilePosition, startFilePosition+textSize, buf, pieces[x], node.getStart())); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/sprm/ParagraphSprmUncompressor.java b/src/scratchpad/src/org/apache/poi/hwpf/sprm/ParagraphSprmUncompressor.java index e5f696439..2e893d9cf 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/ParagraphSprmUncompressor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/ParagraphSprmUncompressor.java @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - + package org.apache.poi.hwpf.sprm; @@ -407,7 +407,7 @@ public class ParagraphSprmUncompressor for (int x = 0; x < delSize; x++) { - tabMap.remove(new Integer(LittleEndian.getInt(grpprl, offset))); + tabMap.remove(new Integer(LittleEndian.getShort(grpprl, offset))); offset += LittleEndian.SHORT_SIZE; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/sprm/TableSprmUncompressor.java b/src/scratchpad/src/org/apache/poi/hwpf/sprm/TableSprmUncompressor.java index c9c569f40..2c37eeb9a 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/TableSprmUncompressor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/TableSprmUncompressor.java @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - + package org.apache.poi.hwpf.sprm; @@ -133,12 +133,26 @@ public class TableSprmUncompressor newTAP.setRgdxaCenter (rgdxaCenter); newTAP.setRgtc (rgtc); + // get the rgdxaCenters for (int x = 0; x < itcMac; x++) { rgdxaCenter[x] = LittleEndian.getShort (grpprl, offset + (1 + (x * 2))); - rgtc[x] = TableCellDescriptor.convertBytesToTC (grpprl, - offset + (1 + ((itcMac + 1) * 2) + (x * 20))); } + + // only try to get the TC entries if they exist... + int endOfSprm = offset+sprm.size()-6; // -2 bytes for sprm - 2 for size short - 2 to correct offsets being 0 based + int startOfTCs = offset + (1 + (itcMac + 1) * 2); + + boolean hasTCs = startOfTCs < endOfSprm; + + for (int x = 0; x < itcMac; x++) + { + if(hasTCs) rgtc[x] = TableCellDescriptor.convertBytesToTC(grpprl, + offset + (1 + ( (itcMac + 1) * 2) + (x * 20))); + else + rgtc[x] = new TableCellDescriptor(); + } + rgdxaCenter[itcMac] = LittleEndian.getShort (grpprl, offset + (1 + (itcMac * 2))); break; } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java index 4eb9dc46d..db9826347 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java @@ -18,13 +18,9 @@ package org.apache.poi.hwpf.usermodel; -import org.apache.poi.hwpf.model.types.CHPAbstractType; -import org.apache.poi.hwpf.model.StyleDescription; import org.apache.poi.hwpf.model.CHPX; import org.apache.poi.hwpf.model.StyleSheet; - import org.apache.poi.hwpf.sprm.SprmBuffer; -import org.apache.poi.hwpf.sprm.CharacterSprmCompressor; /** * This class represents a run of text that share common properties. diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java index 8a3f258a3..51130fb72 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java @@ -14,7 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - + package org.apache.poi.hwpf.usermodel; @@ -431,16 +431,14 @@ public class Paragraph } public int getIlfo() - { - return _props.getIlfo(); - } - - public int getIlvl() - { - return _props.getIlvl(); - } - + { + return _props.getIlfo(); + } + public int getIlvl() + { + return _props.getIlvl(); + } void setTableRowEnd(TableProperties props) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java index a2906c298..6570e9032 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java @@ -252,7 +252,13 @@ public class Range TextPiece piece = (TextPiece)_text.get(x); int start = _start > piece.getStart() ? _start - piece.getStart() : 0; int end = _end <= piece.getEnd() ? _end - piece.getStart() : piece.getEnd() - piece.getStart(); - sb.append(piece.substring(start, end)); + + if(piece.usesUnicode()) // convert the byte pointers to char pointers + { + start/=2; + end/=2; + } + sb.append(piece.getStringBuffer().substring(start, end)); } return sb.toString(); } @@ -693,7 +699,8 @@ public class Range r.initAll(); int tableEnd = r._parEnd; - if (r._parStart != 0 && getParagraph(r._parStart - 1).isInTable()) + if (r._parStart != 0 && getParagraph(r._parStart - 1).isInTable() + && getParagraph(r._parStart - 1)._sectionEnd >= r._sectionStart) { throw new IllegalArgumentException("This paragraph is not the first one in the table"); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java index 535590286..f766fa167 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java @@ -36,12 +36,12 @@ public class Table while (rowEnd < numParagraphs) { Paragraph p = getParagraph(rowEnd); + rowEnd++; if (p.isTableRowEnd() && p.getTableLevel() == levelNum) { - _rows.add(new TableRow(rowStart, rowEnd + 1, this, levelNum)); + _rows.add(new TableRow(rowStart, rowEnd, this, levelNum)); rowStart = rowEnd; } - rowEnd++; } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java index 410feed15..f0dddd4a0 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java @@ -14,11 +14,10 @@ See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ - + package org.apache.poi.hwpf.usermodel; import org.apache.poi.hwpf.sprm.TableSprmUncompressor; -import org.apache.poi.hwpf.sprm.SprmBuffer; public class TableRow extends Paragraph