fix test case for 45473: calculate PAPX boundaries basing on char positions, not on previously read byte positions (they are outdated); fix boundaries checks (again)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143753 13f79535-47bb-0310-9956-ffa450edef68
2011-07-07 10:39:27 +00:00 · 2011-07-07 10:39:27 +00:00 · bbbea8860d
commit bbbea8860d
parent d1eb54b129
7 changed files with 103 additions and 24 deletions
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/CharIndexTranslator.java
@ -18,6 +18,15 @@
 package org.apache.poi.hwpf.model;

 public interface CharIndexTranslator {
+    /**
+     * Calculates the byte index of the given char index.
+     * 
+     * @param charPos
+     *            The char position
+     * @return The byte index
+     */
+    int getByteIndex( int charPos );
+
    /**
     * Calculates the char index of the given byte index.
     * Look forward if index is not in table
@ -36,7 +45,7 @@ public interface CharIndexTranslator {
     * @return the char index
     */
    int getCharIndex(int bytePos, int startCP);
-
+    
    /**
     * Check if index is in table
     *
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/OldSectionTable.java
@ -74,6 +74,11 @@ public final class OldSectionTable extends SectionTable
        this.tpt = tpt;
     }

+        public int getByteIndex( int charPos )
+        {
+            return charPos;
+        }
+
     public int getCharIndex(int bytePos, int startCP) {
        return bytePos;
     }
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPBinTable.java
@ -17,13 +17,13 @@

 package org.apache.poi.hwpf.model;

-import java.util.ArrayList;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.util.ArrayList;

-import org.apache.poi.hwpf.model.io.*;
+import org.apache.poi.hwpf.model.io.HWPFFileSystem;
+import org.apache.poi.hwpf.model.io.HWPFOutputStream;
 import org.apache.poi.hwpf.sprm.SprmBuffer;
-
 import org.apache.poi.poifs.common.POIFSConstants;
 import org.apache.poi.util.LittleEndian;

@ -223,7 +223,7 @@ public class PAPBinTable
      PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream);
      pfkp.fill(overflow);

-      byte[] bufFkp = pfkp.toByteArray(fcMin);
+      byte[] bufFkp = pfkp.toByteArray(tpt, fcMin);
      docStream.write(bufFkp);
      overflow = pfkp.getOverflow();

--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPFormattedDiskPage.java
@ -17,11 +17,11 @@

 package org.apache.poi.hwpf.model;

-import org.apache.poi.util.LittleEndian;
-
 import java.util.ArrayList;
-import java.util.List;
 import java.util.Arrays;
+import java.util.List;
+
+import org.apache.poi.util.LittleEndian;

 /**
 * Represents a PAP FKP. The style properties for paragraph and character runs
@ -137,7 +137,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
     * @param fcMin The file offset in the main stream where text begins.
     * @return A byte array representing this data structure.
     */
-    protected byte[] toByteArray(int fcMin)
+    protected byte[] toByteArray(CharIndexTranslator translator, int fcMin)
    {
      byte[] buf = new byte[512];
      int size = _papxList.size();
@ -152,7 +152,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
      int index = 0;
      for (; index < size; index++)
      {
-        byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl();
+        byte[] grpprl = _papxList.get(index).getGrpprl();
        int grpprlLength = grpprl.length;

        // is grpprl huge?
@ -255,7 +255,10 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
          grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
          grpprlOffset -= (grpprlOffset % 2);
        }
-        LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin);
+            // LittleEndian.putInt( buf, fcOffset,
+            // papx.getStartBytes() );
+            LittleEndian.putInt( buf, fcOffset,
+                    translator.getByteIndex( papx.getStart() ) );
        buf[bxOffset] = (byte)(grpprlOffset/2);
        System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);

@ -283,7 +286,9 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {

      }

-      LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
+        // LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
+        LittleEndian.putInt( buf, fcOffset,
+                translator.getByteIndex( papx.getEnd() ) );
      return buf;
    }

--- a/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/PAPX.java
@ -123,6 +123,31 @@ public final class PAPX extends BytePropertyNode<PAPX> {
    return (SprmBuffer)_buf;
  }

+    /**
+     * @deprecated Though bytes are actually stored in file, it is advised to
+     *             use char positions for all operations. Including save
+     *             operations, because only char positions are preserved.
+     */
+    @Deprecated
+    @Override
+    public int getEndBytes()
+    {
+        return super.getEndBytes();
+    }
+
+    /**
+     * @deprecated Though bytes are actually stored in file, it is advised to
+     *             use char positions for all operations. Including save
+     *             operations, because only char positions are preserved.
+     */
+    @Deprecated
+    @Override
+    public int getStartBytes()
+    {
+        // TODO Auto-generated method stub
+        return super.getStartBytes();
+    }
+
  public ParagraphProperties getParagraphProperties(StyleSheet ss)
  {
    if(ss == null) {
--- a/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/SectionTable.java
@ -179,14 +179,22 @@ public class SectionTable

      // add the section descriptor bytes to the PlexOfCps.

-
-      // original line -
-      //GenericPropertyNode property = new GenericPropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray());
-
-      // Line using Ryan's FCtoCP() conversion method -
-      // unable to observe any effect on our testcases when using this code - piers
-      GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());
-
+            /* original line */
+            // GenericPropertyNode property = new
+            // GenericPropertyNode(sepx.getStart(), sepx.getEnd(),
+            // sed.toByteArray());
+            /*
+             * Line using Ryan's FCtoCP() conversion method - unable to observe
+             * any effect on our testcases when using this code - piers
+             */
+            /*
+             * there is an effect on Bug45743.doc actually. writeoutreadback
+             * changes byte offset of chars (but preserve string offsets) -
+             * sergey
+             */
+            GenericPropertyNode property = new GenericPropertyNode(
+                    tpt.getCharIndex( sepx.getStartBytes() ),
+                    tpt.getCharIndex( sepx.getEndBytes() ), sed.toByteArray() );

      plex.addProperty(property);

--- a/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
+++ b/src/scratchpad/src/org/apache/poi/hwpf/model/TextPieceTable.java
@ -17,15 +17,15 @@

 package org.apache.poi.hwpf.model;

-import org.apache.poi.hwpf.model.io.HWPFOutputStream;
-import org.apache.poi.poifs.common.POIFSConstants;
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;

+import org.apache.poi.hwpf.model.io.HWPFOutputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+
 /**
 * The piece table for matching up character positions to bits of text. This
 * mostly works in bytes, but the TextPieces themselves work in characters. This
@ -197,6 +197,33 @@ public class TextPieceTable implements CharIndexTranslator {
 		return false;
 	}

+    public int getByteIndex( int charPos )
+    {
+        int byteCount = 0;
+        for ( TextPiece tp : _textPieces )
+        {
+            if ( charPos >= tp.getEnd() )
+            {
+                byteCount = tp.getPieceDescriptor().getFilePosition()
+                        + ( tp.getEnd() - tp.getStart() )
+                        * ( tp.isUnicode() ? 2 : 1 );
+
+                if ( charPos == tp.getEnd() )
+                    break;
+
+                continue;
+            }
+            if ( charPos < tp.getEnd() )
+            {
+                int left = charPos - tp.getStart();
+                byteCount = tp.getPieceDescriptor().getFilePosition() + left
+                        * ( tp.isUnicode() ? 2 : 1 );
+                break;
+            }
+        }
+        return byteCount;
+    }
+
    public int getCharIndex(int bytePos) {
        return getCharIndex(bytePos, 0);
    }
@ -297,7 +324,7 @@ public class TextPieceTable implements CharIndexTranslator {
        for(TextPiece tp : _textPiecesFCOrder) {
            int pieceStart = tp.getPieceDescriptor().getFilePosition();

-            if (startBytePos > pieceStart + tp.bytesLength()) {
+            if (startBytePos >= pieceStart + tp.bytesLength()) {
                continue;
            }