SEPX uses chars as boundaries coordinates, don't do double conversions

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145276 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-11 18:36:37 +00:00
parent 2ee84d720a
commit a67574d250
4 changed files with 93 additions and 87 deletions

View File

@ -20,8 +20,6 @@ package org.apache.poi.hwpf.model;
import java.util.Collections;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* This class holds all of the section formatting
@ -33,15 +31,20 @@ import org.apache.poi.util.POILogger;
*/
public final class OldSectionTable extends SectionTable
{
private static final POILogger logger = POILogFactory
.getLogger( OldSectionTable.class );
public OldSectionTable(byte[] documentStream, int offset,
int size, int fcMin,
TextPieceTable tpt)
{
PlexOfCps sedPlex = new PlexOfCps(documentStream, offset, size, 12);
CharIsBytes charConv = new CharIsBytes(tpt);
/**
* @deprecated Use {@link #OldSectionTable(byte[],int,int)} instead
*/
@Deprecated
@SuppressWarnings( "unused" )
public OldSectionTable( byte[] documentStream, int offset, int size,
int fcMin, TextPieceTable tpt )
{
this( documentStream, offset, size );
}
public OldSectionTable( byte[] documentStream, int offset, int size )
{
PlexOfCps sedPlex = new PlexOfCps( documentStream, offset, size, 12 );
int length = sedPlex.length();
@ -58,7 +61,7 @@ public final class OldSectionTable extends SectionTable
// check for the optimization
if (fileOffset == 0xffffffff)
{
sepx = new SEPX(sed, startAt, endAt, charConv, new byte[0]);
sepx = new SEPX(sed, startAt, endAt, new byte[0]);
}
else
{
@ -71,45 +74,11 @@ public final class OldSectionTable extends SectionTable
byte[] buf = new byte[sepxSize+2];
fileOffset += LittleEndian.SHORT_SIZE;
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
sepx = new SEPX(sed, startAt, endAt, charConv, buf);
sepx = new SEPX(sed, startAt, endAt, buf);
}
/*
* section descriptor in old Word files seems to refer to char
* indexes, not bytes positions. Check Word6.doc for example. -
* sergey
*/
_sections.add( sepx );
}
Collections.sort( _sections, PropertyNode.StartComparator.instance );
}
private static class CharIsBytes implements CharIndexTranslator {
private TextPieceTable tpt;
private CharIsBytes(TextPieceTable tpt) {
this.tpt = tpt;
}
public int getByteIndex( int charPos )
{
return charPos;
}
public int getCharIndex(int bytePos, int startCP) {
return bytePos;
}
public int getCharIndex(int bytePos) {
return bytePos;
}
public boolean isIndexInTable(int bytePos) {
return tpt.isIndexInTable(bytePos);
}
public int lookIndexBackward(int bytePos) {
return tpt.lookIndexBackward(bytePos);
}
public int lookIndexForward(int bytePos) {
return tpt.lookIndexForward(bytePos);
}
}
}

View File

@ -22,17 +22,16 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.usermodel.SectionProperties;
public final class SEPX extends BytePropertyNode<SEPX>
public final class SEPX extends PropertyNode<SEPX>
{
SectionProperties sectionProperties;
SectionDescriptor _sed;
public SEPX( SectionDescriptor sed, int start, int end,
CharIndexTranslator translator, byte[] grpprl )
public SEPX( SectionDescriptor sed, int start, int end, byte[] grpprl )
{
super( start, end, translator, new SprmBuffer( grpprl ) );
super( start, end, new SprmBuffer( grpprl ) );
_sed = sed;
}
@ -75,7 +74,6 @@ public final class SEPX extends BytePropertyNode<SEPX>
public String toString()
{
return "SEPX from " + getStart() + " to " + getEnd() + " (in bytes "
+ getStartBytes() + " to " + getEndBytes() + ")";
return "SEPX from " + getStart() + " to " + getEnd();
}
}

View File

@ -19,13 +19,36 @@ package org.apache.poi.hwpf.model;
import org.apache.poi.util.LittleEndian;
/**
* Section Descriptor (SED)
*
* @see page 186 for details
*/
public final class SectionDescriptor
{
private short fn;
private int fc;
private short fnMpr;
private int fcMpr;
/**
* "Used internally by Word"
*/
private short fn;
/**
* "File offset in main stream to beginning of SEPX stored for section. If
* sed.fcSepx==0xFFFFFFFF, the section properties for the section are equal
* to the standard SEP (see SEP definition)."
*/
private int fcSepx;
/**
* "Used internally by Word"
*/
private short fnMpr;
/**
* "Points to offset in FC space of main stream where the Macintosh Print
* Record for a document created on a Macintosh will be stored"
*/
private int fcMpr;
public SectionDescriptor()
{
@ -35,7 +58,7 @@ public final class SectionDescriptor
{
fn = LittleEndian.getShort(buf, offset);
offset += LittleEndian.SHORT_SIZE;
fc = LittleEndian.getInt(buf, offset);
fcSepx = LittleEndian.getInt(buf, offset);
offset += LittleEndian.INT_SIZE;
fnMpr = LittleEndian.getShort(buf, offset);
offset += LittleEndian.SHORT_SIZE;
@ -44,12 +67,12 @@ public final class SectionDescriptor
public int getFc()
{
return fc;
return fcSepx;
}
public void setFc(int fc)
{
this.fc = fc;
this.fcSepx = fc;
}
public boolean equals(Object o)
@ -65,7 +88,7 @@ public final class SectionDescriptor
LittleEndian.putShort(buf, offset, fn);
offset += LittleEndian.SHORT_SIZE;
LittleEndian.putInt(buf, offset, fc);
LittleEndian.putInt(buf, offset, fcSepx);
offset += LittleEndian.INT_SIZE;
LittleEndian.putShort(buf, offset, fnMpr);
offset += LittleEndian.SHORT_SIZE;
@ -73,4 +96,11 @@ public final class SectionDescriptor
return buf;
}
@Override
public String toString()
{
return "[SED] (fn: " + fn + "; fcSepx: " + fcSepx + "; fnMpr: " + fnMpr
+ "; fcMpr: " + fcMpr + ")";
}
}

View File

@ -62,14 +62,16 @@ public class SectionTable
GenericPropertyNode node = sedPlex.getProperty(x);
SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
int fileOffset = sed.getFc();
int startAt = CPtoFC(node.getStart());
int endAt = CPtoFC(node.getEnd());
int fileOffset = sed.getFc();
// int startAt = CPtoFC(node.getStart());
// int endAt = CPtoFC(node.getEnd());
int startAt = node.getStart();
int endAt = node.getEnd();
// check for the optimization
if (fileOffset == 0xffffffff)
{
_sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
_sections.add(new SEPX(sed, startAt, endAt, new byte[0]));
}
else
{
@ -78,7 +80,7 @@ public class SectionTable
byte[] buf = new byte[sepxSize];
fileOffset += LittleEndian.SHORT_SIZE;
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
_sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
_sections.add(new SEPX(sed, startAt, endAt, buf));
}
}
@ -92,7 +94,7 @@ public class SectionTable
SEPX s = _sections.get(i);
if(s.getEnd() == mainEndsAt) {
matchAt = true;
} else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) {
} else if(s.getEnd() == mainEndsAt || s.getEnd() == mainEndsAt-1) {
matchHalf = true;
}
}
@ -102,8 +104,12 @@ public class SectionTable
SEPX s = _sections.get(i);
GenericPropertyNode node = sedPlex.getProperty(i);
s.setStart( CPtoFC(node.getStart()) );
s.setEnd( CPtoFC(node.getEnd()) );
// s.setStart( CPtoFC(node.getStart()) );
// s.setEnd( CPtoFC(node.getEnd()) );
int startAt = node.getStart();
int endAt = node.getEnd();
s.setStart( startAt );
s.setEnd( endAt );
}
}
@ -130,24 +136,27 @@ public class SectionTable
// normal use, but this version works with our non-contiguous test case.
// So far unable to get this test case to be written out as well due to
// other issues. - piers
private int CPtoFC(int CP)
{
TextPiece TP = null;
for(int i=_text.size()-1; i>-1; i--)
{
TP = _text.get(i);
if(CP >= TP.getCP()) break;
}
int FC = TP.getPieceDescriptor().getFilePosition();
int offset = CP - TP.getCP();
if (TP.isUnicode()) {
offset = offset*2;
}
FC = FC+offset;
return FC;
}
//
// i'm commenting this out, because it just doesn't work with non-contiguous
// textpieces :( Usual (as for PAPX and CHPX) call to TextPiecesTable does.
// private int CPtoFC(int CP)
// {
// TextPiece TP = null;
//
// for(int i=_text.size()-1; i>-1; i--)
// {
// TP = _text.get(i);
//
// if(CP >= TP.getCP()) break;
// }
// int FC = TP.getPieceDescriptor().getFilePosition();
// int offset = CP - TP.getCP();
// if (TP.isUnicode()) {
// offset = offset*2;
// }
// FC = FC+offset;
// return FC;
// }
public ArrayList<SEPX> getSections()
{