SEPX uses chars as boundaries coordinates, don't do double conversions
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145276 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2ee84d720a
commit
a67574d250
@ -20,8 +20,6 @@ package org.apache.poi.hwpf.model;
|
|||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
|
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.POILogFactory;
|
|
||||||
import org.apache.poi.util.POILogger;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class holds all of the section formatting
|
* This class holds all of the section formatting
|
||||||
@ -33,15 +31,20 @@ import org.apache.poi.util.POILogger;
|
|||||||
*/
|
*/
|
||||||
public final class OldSectionTable extends SectionTable
|
public final class OldSectionTable extends SectionTable
|
||||||
{
|
{
|
||||||
private static final POILogger logger = POILogFactory
|
/**
|
||||||
.getLogger( OldSectionTable.class );
|
* @deprecated Use {@link #OldSectionTable(byte[],int,int)} instead
|
||||||
|
*/
|
||||||
public OldSectionTable(byte[] documentStream, int offset,
|
@Deprecated
|
||||||
int size, int fcMin,
|
@SuppressWarnings( "unused" )
|
||||||
TextPieceTable tpt)
|
public OldSectionTable( byte[] documentStream, int offset, int size,
|
||||||
{
|
int fcMin, TextPieceTable tpt )
|
||||||
PlexOfCps sedPlex = new PlexOfCps(documentStream, offset, size, 12);
|
{
|
||||||
CharIsBytes charConv = new CharIsBytes(tpt);
|
this( documentStream, offset, size );
|
||||||
|
}
|
||||||
|
|
||||||
|
public OldSectionTable( byte[] documentStream, int offset, int size )
|
||||||
|
{
|
||||||
|
PlexOfCps sedPlex = new PlexOfCps( documentStream, offset, size, 12 );
|
||||||
|
|
||||||
int length = sedPlex.length();
|
int length = sedPlex.length();
|
||||||
|
|
||||||
@ -58,7 +61,7 @@ public final class OldSectionTable extends SectionTable
|
|||||||
// check for the optimization
|
// check for the optimization
|
||||||
if (fileOffset == 0xffffffff)
|
if (fileOffset == 0xffffffff)
|
||||||
{
|
{
|
||||||
sepx = new SEPX(sed, startAt, endAt, charConv, new byte[0]);
|
sepx = new SEPX(sed, startAt, endAt, new byte[0]);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -71,45 +74,11 @@ public final class OldSectionTable extends SectionTable
|
|||||||
byte[] buf = new byte[sepxSize+2];
|
byte[] buf = new byte[sepxSize+2];
|
||||||
fileOffset += LittleEndian.SHORT_SIZE;
|
fileOffset += LittleEndian.SHORT_SIZE;
|
||||||
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
|
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
|
||||||
sepx = new SEPX(sed, startAt, endAt, charConv, buf);
|
sepx = new SEPX(sed, startAt, endAt, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* section descriptor in old Word files seems to refer to char
|
|
||||||
* indexes, not bytes positions. Check Word6.doc for example. -
|
|
||||||
* sergey
|
|
||||||
*/
|
|
||||||
_sections.add( sepx );
|
_sections.add( sepx );
|
||||||
}
|
}
|
||||||
Collections.sort( _sections, PropertyNode.StartComparator.instance );
|
Collections.sort( _sections, PropertyNode.StartComparator.instance );
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class CharIsBytes implements CharIndexTranslator {
|
|
||||||
private TextPieceTable tpt;
|
|
||||||
private CharIsBytes(TextPieceTable tpt) {
|
|
||||||
this.tpt = tpt;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getByteIndex( int charPos )
|
|
||||||
{
|
|
||||||
return charPos;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getCharIndex(int bytePos, int startCP) {
|
|
||||||
return bytePos;
|
|
||||||
}
|
|
||||||
public int getCharIndex(int bytePos) {
|
|
||||||
return bytePos;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isIndexInTable(int bytePos) {
|
|
||||||
return tpt.isIndexInTable(bytePos);
|
|
||||||
}
|
|
||||||
public int lookIndexBackward(int bytePos) {
|
|
||||||
return tpt.lookIndexBackward(bytePos);
|
|
||||||
}
|
|
||||||
public int lookIndexForward(int bytePos) {
|
|
||||||
return tpt.lookIndexForward(bytePos);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -22,17 +22,16 @@ import org.apache.poi.hwpf.sprm.SectionSprmUncompressor;
|
|||||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||||
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
||||||
|
|
||||||
public final class SEPX extends BytePropertyNode<SEPX>
|
public final class SEPX extends PropertyNode<SEPX>
|
||||||
{
|
{
|
||||||
|
|
||||||
SectionProperties sectionProperties;
|
SectionProperties sectionProperties;
|
||||||
|
|
||||||
SectionDescriptor _sed;
|
SectionDescriptor _sed;
|
||||||
|
|
||||||
public SEPX( SectionDescriptor sed, int start, int end,
|
public SEPX( SectionDescriptor sed, int start, int end, byte[] grpprl )
|
||||||
CharIndexTranslator translator, byte[] grpprl )
|
|
||||||
{
|
{
|
||||||
super( start, end, translator, new SprmBuffer( grpprl ) );
|
super( start, end, new SprmBuffer( grpprl ) );
|
||||||
_sed = sed;
|
_sed = sed;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,7 +74,6 @@ public final class SEPX extends BytePropertyNode<SEPX>
|
|||||||
|
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
return "SEPX from " + getStart() + " to " + getEnd() + " (in bytes "
|
return "SEPX from " + getStart() + " to " + getEnd();
|
||||||
+ getStartBytes() + " to " + getEndBytes() + ")";
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -19,13 +19,36 @@ package org.apache.poi.hwpf.model;
|
|||||||
|
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Section Descriptor (SED)
|
||||||
|
*
|
||||||
|
* @see page 186 for details
|
||||||
|
*/
|
||||||
public final class SectionDescriptor
|
public final class SectionDescriptor
|
||||||
{
|
{
|
||||||
|
|
||||||
private short fn;
|
/**
|
||||||
private int fc;
|
* "Used internally by Word"
|
||||||
private short fnMpr;
|
*/
|
||||||
private int fcMpr;
|
private short fn;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* "File offset in main stream to beginning of SEPX stored for section. If
|
||||||
|
* sed.fcSepx==0xFFFFFFFF, the section properties for the section are equal
|
||||||
|
* to the standard SEP (see SEP definition)."
|
||||||
|
*/
|
||||||
|
private int fcSepx;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* "Used internally by Word"
|
||||||
|
*/
|
||||||
|
private short fnMpr;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* "Points to offset in FC space of main stream where the Macintosh Print
|
||||||
|
* Record for a document created on a Macintosh will be stored"
|
||||||
|
*/
|
||||||
|
private int fcMpr;
|
||||||
|
|
||||||
public SectionDescriptor()
|
public SectionDescriptor()
|
||||||
{
|
{
|
||||||
@ -35,7 +58,7 @@ public final class SectionDescriptor
|
|||||||
{
|
{
|
||||||
fn = LittleEndian.getShort(buf, offset);
|
fn = LittleEndian.getShort(buf, offset);
|
||||||
offset += LittleEndian.SHORT_SIZE;
|
offset += LittleEndian.SHORT_SIZE;
|
||||||
fc = LittleEndian.getInt(buf, offset);
|
fcSepx = LittleEndian.getInt(buf, offset);
|
||||||
offset += LittleEndian.INT_SIZE;
|
offset += LittleEndian.INT_SIZE;
|
||||||
fnMpr = LittleEndian.getShort(buf, offset);
|
fnMpr = LittleEndian.getShort(buf, offset);
|
||||||
offset += LittleEndian.SHORT_SIZE;
|
offset += LittleEndian.SHORT_SIZE;
|
||||||
@ -44,12 +67,12 @@ public final class SectionDescriptor
|
|||||||
|
|
||||||
public int getFc()
|
public int getFc()
|
||||||
{
|
{
|
||||||
return fc;
|
return fcSepx;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setFc(int fc)
|
public void setFc(int fc)
|
||||||
{
|
{
|
||||||
this.fc = fc;
|
this.fcSepx = fc;
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean equals(Object o)
|
public boolean equals(Object o)
|
||||||
@ -65,7 +88,7 @@ public final class SectionDescriptor
|
|||||||
|
|
||||||
LittleEndian.putShort(buf, offset, fn);
|
LittleEndian.putShort(buf, offset, fn);
|
||||||
offset += LittleEndian.SHORT_SIZE;
|
offset += LittleEndian.SHORT_SIZE;
|
||||||
LittleEndian.putInt(buf, offset, fc);
|
LittleEndian.putInt(buf, offset, fcSepx);
|
||||||
offset += LittleEndian.INT_SIZE;
|
offset += LittleEndian.INT_SIZE;
|
||||||
LittleEndian.putShort(buf, offset, fnMpr);
|
LittleEndian.putShort(buf, offset, fnMpr);
|
||||||
offset += LittleEndian.SHORT_SIZE;
|
offset += LittleEndian.SHORT_SIZE;
|
||||||
@ -73,4 +96,11 @@ public final class SectionDescriptor
|
|||||||
|
|
||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return "[SED] (fn: " + fn + "; fcSepx: " + fcSepx + "; fnMpr: " + fnMpr
|
||||||
|
+ "; fcMpr: " + fcMpr + ")";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -62,14 +62,16 @@ public class SectionTable
|
|||||||
GenericPropertyNode node = sedPlex.getProperty(x);
|
GenericPropertyNode node = sedPlex.getProperty(x);
|
||||||
SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
|
SectionDescriptor sed = new SectionDescriptor(node.getBytes(), 0);
|
||||||
|
|
||||||
int fileOffset = sed.getFc();
|
int fileOffset = sed.getFc();
|
||||||
int startAt = CPtoFC(node.getStart());
|
// int startAt = CPtoFC(node.getStart());
|
||||||
int endAt = CPtoFC(node.getEnd());
|
// int endAt = CPtoFC(node.getEnd());
|
||||||
|
int startAt = node.getStart();
|
||||||
|
int endAt = node.getEnd();
|
||||||
|
|
||||||
// check for the optimization
|
// check for the optimization
|
||||||
if (fileOffset == 0xffffffff)
|
if (fileOffset == 0xffffffff)
|
||||||
{
|
{
|
||||||
_sections.add(new SEPX(sed, startAt, endAt, tpt, new byte[0]));
|
_sections.add(new SEPX(sed, startAt, endAt, new byte[0]));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -78,7 +80,7 @@ public class SectionTable
|
|||||||
byte[] buf = new byte[sepxSize];
|
byte[] buf = new byte[sepxSize];
|
||||||
fileOffset += LittleEndian.SHORT_SIZE;
|
fileOffset += LittleEndian.SHORT_SIZE;
|
||||||
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
|
System.arraycopy(documentStream, fileOffset, buf, 0, buf.length);
|
||||||
_sections.add(new SEPX(sed, startAt, endAt, tpt, buf));
|
_sections.add(new SEPX(sed, startAt, endAt, buf));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,7 +94,7 @@ public class SectionTable
|
|||||||
SEPX s = _sections.get(i);
|
SEPX s = _sections.get(i);
|
||||||
if(s.getEnd() == mainEndsAt) {
|
if(s.getEnd() == mainEndsAt) {
|
||||||
matchAt = true;
|
matchAt = true;
|
||||||
} else if(s.getEndBytes() == mainEndsAt || s.getEndBytes() == mainEndsAt-1) {
|
} else if(s.getEnd() == mainEndsAt || s.getEnd() == mainEndsAt-1) {
|
||||||
matchHalf = true;
|
matchHalf = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -102,8 +104,12 @@ public class SectionTable
|
|||||||
SEPX s = _sections.get(i);
|
SEPX s = _sections.get(i);
|
||||||
GenericPropertyNode node = sedPlex.getProperty(i);
|
GenericPropertyNode node = sedPlex.getProperty(i);
|
||||||
|
|
||||||
s.setStart( CPtoFC(node.getStart()) );
|
// s.setStart( CPtoFC(node.getStart()) );
|
||||||
s.setEnd( CPtoFC(node.getEnd()) );
|
// s.setEnd( CPtoFC(node.getEnd()) );
|
||||||
|
int startAt = node.getStart();
|
||||||
|
int endAt = node.getEnd();
|
||||||
|
s.setStart( startAt );
|
||||||
|
s.setEnd( endAt );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -130,24 +136,27 @@ public class SectionTable
|
|||||||
// normal use, but this version works with our non-contiguous test case.
|
// normal use, but this version works with our non-contiguous test case.
|
||||||
// So far unable to get this test case to be written out as well due to
|
// So far unable to get this test case to be written out as well due to
|
||||||
// other issues. - piers
|
// other issues. - piers
|
||||||
private int CPtoFC(int CP)
|
//
|
||||||
{
|
// i'm commenting this out, because it just doesn't work with non-contiguous
|
||||||
TextPiece TP = null;
|
// textpieces :( Usual (as for PAPX and CHPX) call to TextPiecesTable does.
|
||||||
|
// private int CPtoFC(int CP)
|
||||||
for(int i=_text.size()-1; i>-1; i--)
|
// {
|
||||||
{
|
// TextPiece TP = null;
|
||||||
TP = _text.get(i);
|
//
|
||||||
|
// for(int i=_text.size()-1; i>-1; i--)
|
||||||
if(CP >= TP.getCP()) break;
|
// {
|
||||||
}
|
// TP = _text.get(i);
|
||||||
int FC = TP.getPieceDescriptor().getFilePosition();
|
//
|
||||||
int offset = CP - TP.getCP();
|
// if(CP >= TP.getCP()) break;
|
||||||
if (TP.isUnicode()) {
|
// }
|
||||||
offset = offset*2;
|
// int FC = TP.getPieceDescriptor().getFilePosition();
|
||||||
}
|
// int offset = CP - TP.getCP();
|
||||||
FC = FC+offset;
|
// if (TP.isUnicode()) {
|
||||||
return FC;
|
// offset = offset*2;
|
||||||
}
|
// }
|
||||||
|
// FC = FC+offset;
|
||||||
|
// return FC;
|
||||||
|
// }
|
||||||
|
|
||||||
public ArrayList<SEPX> getSections()
|
public ArrayList<SEPX> getSections()
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user