SEPX uses chars as boundaries coordinates, don't do double conversions

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145276 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-11 18:36:37 +00:00
parent 2ee84d720a
commit a67574d250
4 changed files with 93 additions and 87 deletions

View File

@ -20,8 +20,6 @@ package org.apache.poi.hwpf.model;
import java.util.Collections; import java.util.Collections;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/** /**
* This class holds all of the section formatting * This class holds all of the section formatting
@ -33,15 +31,20 @@ import org.apache.poi.util.POILogger;
*/ */
public final class OldSectionTable extends SectionTable public final class OldSectionTable extends SectionTable
{ {
private static final POILogger logger = POILogFactory /**
.getLogger( OldSectionTable.class ); * @deprecated Use {@link #OldSectionTable(byte[],int,int)} instead
*/
public OldSectionTable(byte[] documentStream, int offset, @Deprecated
int size, int fcMin, @SuppressWarnings( "unused" )
TextPieceTable tpt) public OldSectionTable( byte[] documentStream, int offset, int size,
{ int fcMin, TextPieceTable tpt )
PlexOfCps sedPlex = new PlexOfCps(documentStream, offset, size, 12); {
CharIsBytes charConv = new CharIsBytes(tpt); this( documentStream, offset, size );
}
public OldSectionTable( byte[] documentStream, int offset, int size )
{
PlexOfCps sedPlex = new PlexOfCps( documentStream, offset, size, 12 );
int length = sedPlex.length(); int length = sedPlex.length();
@ -58,7 +61,7 @@ public final class OldSectionTable extends SectionTable
// check for the optimization // check for the optimization
if (fileOffset == 0xffffffff) if (fileOffset == 0xffffffff)
{ {
sepx = new SEPX(sed, startAt, endAt, charConv, new byte[0]); sepx = new SEPX(sed, startAt, endAt, new byte[0]);
} }
else else
{ {
@ -71,45 +74,11 @@ public final class OldSectionTable extends SectionTable
byte[] buf = new byte[sepxSize+2]; byte[] buf = new byte[sepxSize+2];
fileOffset += LittleEndian.SHORT_SIZE; fileOffset += LittleEndian.SHORT_SIZE;
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
sepx = new SEPX(sed, startAt, endAt, charConv, buf); sepx = new SEPX(sed, startAt, endAt, buf);
} }
/*
* section descriptor in old Word files seems to refer to char
* indexes, not bytes positions. Check Word6.doc for example. -
* sergey
*/
_sections.add( sepx ); _sections.add( sepx );
} }
Collections.sort( _sections, PropertyNode.StartComparator.instance ); Collections.sort( _sections, PropertyNode.StartComparator.instance );
} }
private static class CharIsBytes implements CharIndexTranslator {
private TextPieceTable tpt;
private CharIsBytes(TextPieceTable tpt) {
this.tpt = tpt;
}
public int getByteIndex( int charPos )
{
return charPos;
}
public int getCharIndex(int bytePos, int startCP) {
return bytePos;
}
public int getCharIndex(int bytePos) {
return bytePos;
}
public boolean isIndexInTable(int bytePos) {
return tpt.isIndexInTable(bytePos);
}
public int lookIndexBackward(int bytePos) {
return tpt.lookIndexBackward(bytePos);
}
public int lookIndexForward(int bytePos) {
return tpt.lookIndexForward(bytePos);
}
}
} }

View File

@ -22,17 +22,16 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
import org.apache.poi.hwpf.sprm.SprmBuffer; import org.apache.poi.hwpf.sprm.SprmBuffer;
import org.apache.poi.hwpf.usermodel.SectionProperties; import org.apache.poi.hwpf.usermodel.SectionProperties;
public final class SEPX extends BytePropertyNode<SEPX> public final class SEPX extends PropertyNode<SEPX>
{ {
SectionProperties sectionProperties; SectionProperties sectionProperties;
SectionDescriptor _sed; SectionDescriptor _sed;
public SEPX( SectionDescriptor sed, int start, int end, public SEPX( SectionDescriptor sed, int start, int end, byte[] grpprl )
CharIndexTranslator translator, byte[] grpprl )
{ {
super( start, end, translator, new SprmBuffer( grpprl ) ); super( start, end, new SprmBuffer( grpprl ) );
_sed = sed; _sed = sed;
} }
@ -75,7 +74,6 @@ public final class SEPX extends BytePropertyNode<SEPX>
public String toString() public String toString()
{ {
return "SEPX from " + getStart() + " to " + getEnd() + " (in bytes " return "SEPX from " + getStart() + " to " + getEnd();
+ getStartBytes() + " to " + getEndBytes() + ")";
} }
} }

View File

@ -19,13 +19,36 @@ package org.apache.poi.hwpf.model;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
/**
* Section Descriptor (SED)
*
* @see page 186 for details
*/
public final class SectionDescriptor public final class SectionDescriptor
{ {
private short fn; /**
private int fc; * "Used internally by Word"
private short fnMpr; */
private int fcMpr; private short fn;
/**
* "File offset in main stream to beginning of SEPX stored for section. If
* sed.fcSepx==0xFFFFFFFF, the section properties for the section are equal
* to the standard SEP (see SEP definition)."
*/
private int fcSepx;
/**
* "Used internally by Word"
*/
private short fnMpr;
/**
* "Points to offset in FC space of main stream where the Macintosh Print
* Record for a document created on a Macintosh will be stored"
*/
private int fcMpr;
public SectionDescriptor() public SectionDescriptor()
{ {
@ -35,7 +58,7 @@ public final class SectionDescriptor
{ {
fn = LittleEndian.getShort(buf, offset); fn = LittleEndian.getShort(buf, offset);
offset += LittleEndian.SHORT_SIZE; offset += LittleEndian.SHORT_SIZE;
fc = LittleEndian.getInt(buf, offset); fcSepx = LittleEndian.getInt(buf, offset);
offset += LittleEndian.INT_SIZE; offset += LittleEndian.INT_SIZE;
fnMpr = LittleEndian.getShort(buf, offset); fnMpr = LittleEndian.getShort(buf, offset);
offset += LittleEndian.SHORT_SIZE; offset += LittleEndian.SHORT_SIZE;
@ -44,12 +67,12 @@ public final class SectionDescriptor
public int getFc() public int getFc()
{ {
return fc; return fcSepx;
} }
public void setFc(int fc) public void setFc(int fc)
{ {
this.fc = fc; this.fcSepx = fc;
} }
public boolean equals(Object o) public boolean equals(Object o)
@ -65,7 +88,7 @@ public final class SectionDescriptor
LittleEndian.putShort(buf, offset, fn); LittleEndian.putShort(buf, offset, fn);
offset += LittleEndian.SHORT_SIZE; offset += LittleEndian.SHORT_SIZE;
LittleEndian.putInt(buf, offset, fc); LittleEndian.putInt(buf, offset, fcSepx);
offset += LittleEndian.INT_SIZE; offset += LittleEndian.INT_SIZE;
LittleEndian.putShort(buf, offset, fnMpr); LittleEndian.putShort(buf, offset, fnMpr);
offset += LittleEndian.SHORT_SIZE; offset += LittleEndian.SHORT_SIZE;
@ -73,4 +96,11 @@ public final class SectionDescriptor
return buf; return buf;
} }
@Override
public String toString()
{
return "[SED] (fn: " + fn + "; fcSepx: " + fcSepx + "; fnMpr: " + fnMpr
+ "; fcMpr: " + fcMpr + ")";
}
} }

View File

@ -62,14 +62,16 @@ public class SectionTable
GenericPropertyNode node = sedPlex.getProperty(x); GenericPropertyNode node = sedPlex.getProperty(x);
SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0); SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
int fileOffset = sed.getFc(); int fileOffset = sed.getFc();
int startAt = CPtoFC(node.getStart()); // int startAt = CPtoFC(node.getStart());
int endAt = CPtoFC(node.getEnd()); // int endAt = CPtoFC(node.getEnd());
int startAt = node.getStart();
int endAt = node.getEnd();
// check for the optimization // check for the optimization
if (fileOffset == 0xffffffff) if (fileOffset == 0xffffffff)
{ {
_sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0])); _sections.add(new SEPX(sed, startAt, endAt, new byte[0]));
} }
else else
{ {
@ -78,7 +80,7 @@ public class SectionTable
byte[] buf = new byte[sepxSize]; byte[] buf = new byte[sepxSize];
fileOffset += LittleEndian.SHORT_SIZE; fileOffset += LittleEndian.SHORT_SIZE;
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length); System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
_sections.add(new SEPX(sed, startAt, endAt, tpt, buf)); _sections.add(new SEPX(sed, startAt, endAt, buf));
} }
} }
@ -92,7 +94,7 @@ public class SectionTable
SEPX s = _sections.get(i); SEPX s = _sections.get(i);
if(s.getEnd() == mainEndsAt) { if(s.getEnd() == mainEndsAt) {
matchAt = true; matchAt = true;
} else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) { } else if(s.getEnd() == mainEndsAt || s.getEnd() == mainEndsAt-1) {
matchHalf = true; matchHalf = true;
} }
} }
@ -102,8 +104,12 @@ public class SectionTable
SEPX s = _sections.get(i); SEPX s = _sections.get(i);
GenericPropertyNode node = sedPlex.getProperty(i); GenericPropertyNode node = sedPlex.getProperty(i);
s.setStart( CPtoFC(node.getStart()) ); // s.setStart( CPtoFC(node.getStart()) );
s.setEnd( CPtoFC(node.getEnd()) ); // s.setEnd( CPtoFC(node.getEnd()) );
int startAt = node.getStart();
int endAt = node.getEnd();
s.setStart( startAt );
s.setEnd( endAt );
} }
} }
@ -130,24 +136,27 @@ public class SectionTable
// normal use, but this version works with our non-contiguous test case. // normal use, but this version works with our non-contiguous test case.
// So far unable to get this test case to be written out as well due to // So far unable to get this test case to be written out as well due to
// other issues. - piers // other issues. - piers
private int CPtoFC(int CP) //
{ // i'm commenting this out, because it just doesn't work with non-contiguous
TextPiece TP = null; // textpieces :( Usual (as for PAPX and CHPX) call to TextPiecesTable does.
// private int CPtoFC(int CP)
for(int i=_text.size()-1; i>-1; i--) // {
{ // TextPiece TP = null;
TP = _text.get(i); //
// for(int i=_text.size()-1; i>-1; i--)
if(CP >= TP.getCP()) break; // {
} // TP = _text.get(i);
int FC = TP.getPieceDescriptor().getFilePosition(); //
int offset = CP - TP.getCP(); // if(CP >= TP.getCP()) break;
if (TP.isUnicode()) { // }
offset = offset*2; // int FC = TP.getPieceDescriptor().getFilePosition();
} // int offset = CP - TP.getCP();
FC = FC+offset; // if (TP.isUnicode()) {
return FC; // offset = offset*2;
} // }
// FC = FC+offset;
// return FC;
// }
public ArrayList<SEPX> getSections() public ArrayList<SEPX> getSections()
{ {