Pier's other HWPF patch.

git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353588 13f79535-47bb-0310-9956-ffa450edef68
2004-08-24 12:54:01 +00:00 · 2004-08-24 12:54:01 +00:00 · 51655ccda2
commit 51655ccda2
parent c140d28e76
13 changed files with 134 additions and 101 deletions
--- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
@ -22,15 +22,6 @@
 */
 package org.apache.poi.hssf.usermodel;

-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Stack;
-
 import org.apache.poi.hssf.eventmodel.EventRecordFactory;
 import org.apache.poi.hssf.model.Sheet;
 import org.apache.poi.hssf.model.Workbook;
@ -39,14 +30,19 @@ import org.apache.poi.hssf.record.formula.Area3DPtg;
 import org.apache.poi.hssf.record.formula.MemFuncPtg;
 import org.apache.poi.hssf.record.formula.UnionPtg;
 import org.apache.poi.hssf.util.CellReference;
-import org.apache.poi.poifs.filesystem.DirectoryEntry;
-import org.apache.poi.poifs.filesystem.DocumentEntry;
-import org.apache.poi.poifs.filesystem.DocumentInputStream;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.poifs.filesystem.*;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;

+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Stack;
+
 /**
 * High level representation of a workbook.  This is the first object most users
 * will construct whether they are reading or writing a workbook.  It is also the
@ -84,7 +80,7 @@ public class HSSFWorkbook
     * this holds the HSSFSheet objects attached to this workbook
     */

-    private ArrayList sheets;
+    protected ArrayList sheets;

    /**
     * this holds the HSSFName objects attached to this workbook
@ -121,9 +117,14 @@ public class HSSFWorkbook

    public HSSFWorkbook()
    {
-        workbook = Workbook.createWorkbook();
-        sheets = new ArrayList(INITIAL_CAPACITY);
-        names  = new ArrayList(INITIAL_CAPACITY);
+        this(Workbook.createWorkbook());
+    }
+
+    protected HSSFWorkbook( Workbook book )
+    {
+        workbook = book;
+        sheets = new ArrayList( INITIAL_CAPACITY );
+        names = new ArrayList( INITIAL_CAPACITY );
    }

    public HSSFWorkbook(POIFSFileSystem fs) throws IOException {
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListLevel.java
@ -14,7 +14,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        
+

 package org.apache.poi.hwpf.model;

@ -106,10 +106,10 @@ public class ListLevel

    _grpprlPapx = new byte[_cbGrpprlPapx];
    _grpprlChpx = new byte[_cbGrpprlChpx];
-    System.arraycopy(buf, offset, _grpprlChpx, 0, _cbGrpprlChpx);
-    offset += _cbGrpprlChpx;
    System.arraycopy(buf, offset, _grpprlPapx, 0, _cbGrpprlPapx);
    offset += _cbGrpprlPapx;
+    System.arraycopy(buf, offset, _grpprlChpx, 0, _cbGrpprlChpx);
+    offset += _cbGrpprlChpx;

    int numberTextLength = LittleEndian.getShort(buf, offset);
    _numberText = new char[numberTextLength];
@ -168,6 +168,10 @@ public class ListLevel
    _grpprlPapx = grpprl;
  }

+  public byte[] getLevelProperties()
+  {
+    return _grpprlPapx;
+  }

  public boolean equals(Object obj)
  {
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/ListTables.java
@ -192,6 +192,11 @@ public class ListTables
    return lvl;
  }

+  public ListData getListData(int listID)
+  {
+    return (ListData) _listMap.get(new Integer(listID));
+  }
+
  public boolean equals(Object obj)
  {
    if (obj == null)
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
@ -45,6 +45,7 @@ public class SectionTable
  {
    PlexOfCps sedPlex = new PlexOfCps(tableStream, offset, size, SED_SIZE);
    _text = tpt;
+
    int length = sedPlex.length();

    for (int x = 0; x < length; x++)
@ -85,50 +86,51 @@ public class SectionTable
    }
  }

-  private int CPtoFC(int cp)
+  // goss version of CPtoFC - this takes into account non-contiguous textpieces
+  // that we have come across in real world documents. Tests against the example
+  // code in HWPFDocument show no variation to Ryan's version of the code in
+  // normal use, but this version works with our non-contiguous test case.
+  // So far unable to get this test case to be written out as well due to
+  // other issues. - piers
+   private int CPtoFC(int CP)
  {
-    int size = _text.size();
-    int x = 0;
-    int end = 0;
-    int fc = 0;
-    for (; x < size; x++)
-    {
-      TextPiece piece = (TextPiece)_text.get(x);
-      int currentStart = end;
-      end += ((piece.getEnd()- piece.getStart())/(piece.usesUnicode() ? 2 : 1));
-      if (cp <= end)
-      {
-        fc += ((cp - currentStart) * (piece.usesUnicode() ? 2 : 1));
-        break;
-      }
-      else
-      {
-        fc += (piece.getEnd() - piece.getStart());
-      }
-    }
-    return fc;
-  }
+      TextPiece TP = null;

-  private int FCtoCP(int fc)
-  {
-    int size = _text.size();
-    int cp = 0;
-    for (int x = 0; x < size; x++)
-    {
-      TextPiece piece = (TextPiece)_text.get(x);
+      for(int i=_text.size()-1; i>-1; i--)
+      {
+        TP = (TextPiece)_text.get(i);

-      if (fc <= piece.getEnd())
-      {
-        cp += ((fc - piece.getStart())/ (piece.usesUnicode() ? 2 : 1));
-        break;
-      }
-      else
-      {
-        cp += ((piece.getEnd() - piece.getStart())/ (piece.usesUnicode() ? 2 : 1));
+        if(CP >= TP.getCP()) break;
      }
+      int FC = TP.getPieceDescriptor().getFilePosition();
+      int offset = CP - TP.getCP();
+      if(TP.usesUnicode()) offset*=2;
+      FC = FC+offset-((TextPiece)_text.get(0)).getPieceDescriptor().getFilePosition();
+      return FC;
    }
-    return cp;
-  }
+
+    // Ryans code
+    private int FCtoCP(int fc)
+   {
+     int size = _text.size();
+     int cp = 0;
+     for (int x = 0; x < size; x++)
+     {
+       TextPiece piece = (TextPiece)_text.get(x);
+
+       if (fc <= piece.getEnd())
+       {
+         cp += ((fc - piece.getStart())/ (piece.usesUnicode() ? 2 : 1));
+         break;
+       }
+       else
+       {
+         cp += ((piece.getEnd() - piece.getStart())/ (piece.usesUnicode() ? 2 : 1));
+       }
+     }
+     return cp;
+   }
+

  public ArrayList getSections()
  {
@ -163,12 +165,20 @@ public class SectionTable
      sed.setFc(offset);

      // add the section descriptor bytes to the PlexOfCps.
+
+
+      // original line -
+      //GenericPropertyNode property = new GenericPropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray());
+
+      // Line using Ryan's FCtoCP() conversion method -
+      // unable to observe any effect on our testcases when using this code - piers
      GenericPropertyNode property = new GenericPropertyNode(FCtoCP(sepx.getStart()), FCtoCP(sepx.getEnd()), sed.toByteArray());
+
+
      plex.addProperty(property);

      offset = docStream.getOffset();
    }
    tableStream.write(plex.toByteArray());
  }
-
 }
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPiece.java
@ -31,13 +31,15 @@ public class TextPiece extends PropertyNode implements Comparable

  private PieceDescriptor _pd;

+  private int _cpStart;
+
  /**
   * @param start Offset in main document stream.
   * @param length The total length of the text in bytes. Note: 1 character
   *        does not necessarily refer to 1 byte.
   * @param unicode true if this text is unicode.
   */
-  public TextPiece(int start, int end, byte[] text, PieceDescriptor pd)
+  public TextPiece(int start, int end, byte[] text, PieceDescriptor pd, int cpStart)
    throws UnsupportedEncodingException
  {
     /** start - end is length on file. This is double the expected when its
@ -45,6 +47,7 @@ public class TextPiece extends PropertyNode implements Comparable
    super(start, end, new StringBuffer(new String(text, pd.isUnicode() ? "UTF-16LE" : "Cp1252")));
    _usesUnicode = pd.isUnicode();
    _pd = pd;
+    _cpStart = cpStart;
  }
  /**
   * @return If this text piece uses unicode
@ -64,11 +67,6 @@ public class TextPiece extends PropertyNode implements Comparable
     return (StringBuffer)_buf;
   }

-   public void setStringBuffer(StringBuffer buf)
-   {
-     _buf = buf;
-   }
-
   public byte[] getRawBytes()
   {
     try
@ -113,4 +111,10 @@ public class TextPiece extends PropertyNode implements Comparable
     return false;
   }

+
+   public int getCP()
+   {
+     return _cpStart;
+   }
+
 }
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@ -65,10 +65,8 @@ public class TextPieceTable
 //      }
    }

-    _cpMin = pieces[0].getFilePosition() - fcMin;
-    // if a piece is unicode the actual offset may be bumped because of the
-    // doubling of the needed size.
-    int bump = 0;
+    int firstPieceFilePosition = pieces[0].getFilePosition();
+        _cpMin = firstPieceFilePosition - fcMin;

    // using the PieceDescriptors, build our list of TextPieces.
    for (int x = 0; x < pieces.length; x++)
@ -92,12 +90,9 @@ public class TextPieceTable

      byte[] buf = new byte[textSize];
      System.arraycopy(documentStream, start, buf, 0, textSize);
-      _textPieces.add(new TextPiece(nodeStart + bump, nodeEnd + bump, buf, pieces[x]));

-      if (unicode)
-      {
-        bump += (node.getEnd() - nodeStart);
-      }
+      int startFilePosition = start - firstPieceFilePosition;
+      _textPieces.add(new TextPiece(startFilePosition, startFilePosition+textSize, buf, pieces[x], node.getStart()));
    }
  }

--- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/ParagraphSprmUncompressor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/ParagraphSprmUncompressor.java
@ -14,7 +14,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        
+

 package org.apache.poi.hwpf.sprm;

@ -407,7 +407,7 @@ public class ParagraphSprmUncompressor

    for (int x = 0; x < delSize; x++)
    {
-      tabMap.remove(new Integer(LittleEndian.getInt(grpprl, offset)));
+      tabMap.remove(new Integer(LittleEndian.getShort(grpprl, offset)));
      offset += LittleEndian.SHORT_SIZE;
    }

--- a/src/scratchpad/src/org/apache/poi/hwpf/sprm/TableSprmUncompressor.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/sprm/TableSprmUncompressor.java
@ -14,7 +14,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        
+

 package org.apache.poi.hwpf.sprm;

@ -133,12 +133,26 @@ public class TableSprmUncompressor
        newTAP.setRgdxaCenter (rgdxaCenter);
        newTAP.setRgtc (rgtc);

+        // get the rgdxaCenters
        for (int x = 0; x < itcMac; x++)
        {
          rgdxaCenter[x] = LittleEndian.getShort (grpprl, offset + (1 + (x * 2)));
-          rgtc[x] = TableCellDescriptor.convertBytesToTC (grpprl,
-            offset + (1 + ((itcMac + 1) * 2) + (x * 20)));
        }
+
+        // only try to get the TC entries if they exist...
+        int endOfSprm = offset+sprm.size()-6; // -2 bytes for sprm - 2 for size short - 2 to correct offsets being 0 based
+        int startOfTCs = offset + (1 + (itcMac + 1) * 2);
+
+        boolean hasTCs = startOfTCs < endOfSprm;
+
+        for (int x = 0; x < itcMac; x++)
+        {
+          if(hasTCs) rgtc[x] = TableCellDescriptor.convertBytesToTC(grpprl,
+              offset + (1 + ( (itcMac + 1) * 2) + (x * 20)));
+          else
+            rgtc[x] = new TableCellDescriptor();
+        }
+
        rgdxaCenter[itcMac] = LittleEndian.getShort (grpprl, offset + (1 + (itcMac * 2)));
        break;
      }
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/CharacterRun.java
@ -18,13 +18,9 @@

 package org.apache.poi.hwpf.usermodel;

-import org.apache.poi.hwpf.model.types.CHPAbstractType;
-import org.apache.poi.hwpf.model.StyleDescription;
 import org.apache.poi.hwpf.model.CHPX;
 import org.apache.poi.hwpf.model.StyleSheet;
-
 import org.apache.poi.hwpf.sprm.SprmBuffer;
-import org.apache.poi.hwpf.sprm.CharacterSprmCompressor;

 /**
 * This class represents a run of text that share common properties.
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Paragraph.java
@ -14,7 +14,7 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        
+

 package org.apache.poi.hwpf.usermodel;

@ -431,16 +431,14 @@ public class Paragraph
  }

  public int getIlfo()
-  {
-    return _props.getIlfo();
-  }
-
-  public int getIlvl()
-  {
-    return _props.getIlvl();
-  }
-
+   {
+     return _props.getIlfo();
+   }

+   public int getIlvl()
+   {
+     return _props.getIlvl();
+   }

  void setTableRowEnd(TableProperties props)
  {
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Range.java
@ -252,7 +252,13 @@ public class Range
      TextPiece piece = (TextPiece)_text.get(x);
      int start = _start > piece.getStart() ? _start - piece.getStart() : 0;
      int end = _end <= piece.getEnd() ? _end - piece.getStart() : piece.getEnd() - piece.getStart();
-      sb.append(piece.substring(start, end));
+
+      if(piece.usesUnicode()) // convert the byte pointers to char pointers
+      {
+        start/=2;
+        end/=2;
+      }
+      sb.append(piece.getStringBuffer().substring(start, end));
    }
    return sb.toString();
  }
@ -693,7 +699,8 @@ public class Range
    r.initAll();
    int tableEnd = r._parEnd;

-    if (r._parStart != 0 && getParagraph(r._parStart - 1).isInTable())
+    if (r._parStart != 0 && getParagraph(r._parStart - 1).isInTable()
+        && getParagraph(r._parStart - 1)._sectionEnd >= r._sectionStart)
    {
      throw new IllegalArgumentException("This paragraph is not the first one in the table");
    }
--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Table.java
@ -36,12 +36,12 @@ public class Table
    while (rowEnd < numParagraphs)
    {
      Paragraph p = getParagraph(rowEnd);
+      rowEnd++;
      if (p.isTableRowEnd() && p.getTableLevel() == levelNum)
      {
-        _rows.add(new TableRow(rowStart, rowEnd + 1, this, levelNum));
+        _rows.add(new TableRow(rowStart, rowEnd, this, levelNum));
        rowStart = rowEnd;
      }
-      rowEnd++;
    }
  }

--- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/TableRow.java
@ -14,11 +14,10 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 ==================================================================== */
-        
+
 package org.apache.poi.hwpf.usermodel;

 import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
-import org.apache.poi.hwpf.sprm.SprmBuffer;

 public class TableRow
  extends Paragraph