fix test case for 45473: calculate PAPX boundaries basing on char positions, not on previously read byte positions (they are outdated); fix boundaries checks (again)

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143753 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-07 10:39:27 +00:00
parent d1eb54b129
commit bbbea8860d
7 changed files with 103 additions and 24 deletions

View File

@ -18,6 +18,15 @@
package org.apache.poi.hwpf.model;
public interface CharIndexTranslator {
/**
* Calculates the byte index of the given char index.
*
* @param charPos
* The char position
* @return The byte index
*/
int getByteIndex( int charPos );
/**
* Calculates the char index of the given byte index.
* Look forward if index is not in table
@ -36,7 +45,7 @@ public interface CharIndexTranslator {
* @return the char index
*/
int getCharIndex(int bytePos, int startCP);
/**
* Check if index is in table
*

View File

@ -74,6 +74,11 @@ public final class OldSectionTable extends SectionTable
this.tpt = tpt;
}
public int getByteIndex( int charPos )
{
return charPos;
}
public int getCharIndex(int bytePos, int startCP) {
return bytePos;
}

View File

@ -17,13 +17,13 @@
package org.apache.poi.hwpf.model;
import java.util.ArrayList;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import org.apache.poi.hwpf.model.io.*;
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.util.LittleEndian;
@ -223,7 +223,7 @@ public class PAPBinTable
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(_dataStream);
pfkp.fill(overflow);
byte[] bufFkp = pfkp.toByteArray(fcMin);
byte[] bufFkp = pfkp.toByteArray(tpt, fcMin);
docStream.write(bufFkp);
overflow = pfkp.getOverflow();

View File

@ -17,11 +17,11 @@
package org.apache.poi.hwpf.model;
import org.apache.poi.util.LittleEndian;
import java.util.ArrayList;
import java.util.List;
import java.util.Arrays;
import java.util.List;
import org.apache.poi.util.LittleEndian;
/**
* Represents a PAP FKP. The style properties for paragraph and character runs
@ -137,7 +137,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
* @param fcMin The file offset in the main stream where text begins.
* @return A byte array representing this data structure.
*/
protected byte[] toByteArray(int fcMin)
protected byte[] toByteArray(CharIndexTranslator translator, int fcMin)
{
byte[] buf = new byte[512];
int size = _papxList.size();
@ -152,7 +152,7 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
int index = 0;
for (; index < size; index++)
{
byte[] grpprl = ((PAPX)_papxList.get(index)).getGrpprl();
byte[] grpprl = _papxList.get(index).getGrpprl();
int grpprlLength = grpprl.length;
// is grpprl huge?
@ -255,7 +255,10 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
grpprlOffset -= (grpprl.length + (2 - grpprl.length % 2));
grpprlOffset -= (grpprlOffset % 2);
}
LittleEndian.putInt(buf, fcOffset, papx.getStartBytes() + fcMin);
// LittleEndian.putInt( buf, fcOffset,
// papx.getStartBytes() );
LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getStart() ) );
buf[bxOffset] = (byte)(grpprlOffset/2);
System.arraycopy(phe, 0, buf, bxOffset + 1, phe.length);
@ -283,7 +286,9 @@ public final class PAPFormattedDiskPage extends FormattedDiskPage {
}
LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
// LittleEndian.putInt(buf, fcOffset, papx.getEndBytes() + fcMin);
LittleEndian.putInt( buf, fcOffset,
translator.getByteIndex( papx.getEnd() ) );
return buf;
}

View File

@ -123,6 +123,31 @@ public final class PAPX extends BytePropertyNode<PAPX> {
return (SprmBuffer)_buf;
}
/**
* @deprecated Though bytes are actually stored in file, it is advised to
* use char positions for all operations. Including save
* operations, because only char positions are preserved.
*/
@Deprecated
@Override
public int getEndBytes()
{
return super.getEndBytes();
}
/**
* @deprecated Though bytes are actually stored in file, it is advised to
* use char positions for all operations. Including save
* operations, because only char positions are preserved.
*/
@Deprecated
@Override
public int getStartBytes()
{
// TODO Auto-generated method stub
return super.getStartBytes();
}
public ParagraphProperties getParagraphProperties(StyleSheet ss)
{
if(ss == null) {

View File

@ -179,14 +179,22 @@ public class SectionTable
// add the section descriptor bytes to the PlexOfCps.
// original line -
//GenericPropertyNode property = new GenericPropertyNode(sepx.getStart(), sepx.getEnd(), sed.toByteArray());
// Line using Ryan's FCtoCP() conversion method -
// unable to observe any effect on our testcases when using this code - piers
GenericPropertyNode property = new GenericPropertyNode(tpt.getCharIndex(sepx.getStartBytes()), tpt.getCharIndex(sepx.getEndBytes()), sed.toByteArray());
/* original line */
// GenericPropertyNode property = new
// GenericPropertyNode(sepx.getStart(), sepx.getEnd(),
// sed.toByteArray());
/*
* Line using Ryan's FCtoCP() conversion method - unable to observe
* any effect on our testcases when using this code - piers
*/
/*
* there is an effect on Bug45743.doc actually. writeoutreadback
* changes byte offset of chars (but preserve string offsets) -
* sergey
*/
GenericPropertyNode property = new GenericPropertyNode(
tpt.getCharIndex( sepx.getStartBytes() ),
tpt.getCharIndex( sepx.getEndBytes() ), sed.toByteArray() );
plex.addProperty(property);

View File

@ -17,15 +17,15 @@
package org.apache.poi.hwpf.model;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.poifs.common.POIFSConstants;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
import org.apache.poi.poifs.common.POIFSConstants;
/**
* The piece table for matching up character positions to bits of text. This
* mostly works in bytes, but the TextPieces themselves work in characters. This
@ -197,6 +197,33 @@ public class TextPieceTable implements CharIndexTranslator {
return false;
}
public int getByteIndex( int charPos )
{
int byteCount = 0;
for ( TextPiece tp : _textPieces )
{
if ( charPos >= tp.getEnd() )
{
byteCount = tp.getPieceDescriptor().getFilePosition()
+ ( tp.getEnd() - tp.getStart() )
* ( tp.isUnicode() ? 2 : 1 );
if ( charPos == tp.getEnd() )
break;
continue;
}
if ( charPos < tp.getEnd() )
{
int left = charPos - tp.getStart();
byteCount = tp.getPieceDescriptor().getFilePosition() + left
* ( tp.isUnicode() ? 2 : 1 );
break;
}
}
return byteCount;
}
public int getCharIndex(int bytePos) {
return getCharIndex(bytePos, 0);
}
@ -297,7 +324,7 @@ public class TextPieceTable implements CharIndexTranslator {
for(TextPiece tp : _textPiecesFCOrder) {
int pieceStart = tp.getPieceDescriptor().getFilePosition();
if (startBytePos > pieceStart + tp.bytesLength()) {
if (startBytePos >= pieceStart + tp.bytesLength()) {
continue;
}