rewrite PAPX / CHPX loading, allowing to read complex files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145342 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4a0d32fa98
commit
098cc9fc4f
@ -470,7 +470,6 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
{
|
||||
tableBody.appendChild( tableRowElement );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final Element tableElement = htmlDocumentFacade.createTable();
|
||||
@ -485,11 +484,9 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.log(
|
||||
POILogger.WARN,
|
||||
"Table without body starting on offset "
|
||||
+ table.getStartOffset() + " -- "
|
||||
+ table.getEndOffset() );
|
||||
logger.log( POILogger.WARN, "Table without body starting at [",
|
||||
Integer.valueOf( table.getStartOffset() ), "; ",
|
||||
Integer.valueOf( table.getEndOffset() ), ")" );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,8 @@ import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Arrays;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
@ -152,13 +154,16 @@ public final class HWPFLister
|
||||
if ( outputTextRuns )
|
||||
{
|
||||
System.out.println( "== Text runs ==" );
|
||||
lister.dumpTextRuns( outputTextRunsSprms );
|
||||
lister.dumpChpx( outputTextRunsSprms );
|
||||
}
|
||||
|
||||
if ( outputParagraphs )
|
||||
{
|
||||
System.out.println( "== Paragraphs ==" );
|
||||
lister.dumpParagraphs( outputParagraphsSprms, outputPapx,
|
||||
System.out.println( "== Text paragraphs ==" );
|
||||
lister.dumpParagraphs( true );
|
||||
|
||||
System.out.println( "== DOM paragraphs ==" );
|
||||
lister.dumpParagraphsDom( outputParagraphsSprms, outputPapx,
|
||||
outputParagraphsText );
|
||||
}
|
||||
|
||||
@ -188,63 +193,64 @@ public final class HWPFLister
|
||||
|
||||
private final HWPFDocumentCore _doc;
|
||||
|
||||
private LinkedHashMap<Integer, String> paragraphs;
|
||||
|
||||
private String text;
|
||||
|
||||
public HWPFLister( HWPFDocumentCore doc )
|
||||
{
|
||||
_doc = doc;
|
||||
|
||||
buildText();
|
||||
buildParagraphs();
|
||||
}
|
||||
|
||||
public void dumpFIB()
|
||||
private void buildParagraphs()
|
||||
{
|
||||
FileInformationBlock fib = _doc.getFileInformationBlock();
|
||||
System.out.println( fib );
|
||||
}
|
||||
paragraphs = new LinkedHashMap<Integer, String>();
|
||||
|
||||
public void dumpPapx( boolean withProperties )
|
||||
{
|
||||
for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
|
||||
StringBuilder part = new StringBuilder();
|
||||
for ( int charIndex = 0; charIndex < text.length(); charIndex++ )
|
||||
{
|
||||
System.out.println( papx );
|
||||
|
||||
if ( withProperties )
|
||||
System.out.println( papx.getParagraphProperties( _doc
|
||||
.getStyleSheet() ) );
|
||||
char c = text.charAt( charIndex );
|
||||
part.append( c );
|
||||
if ( c == 13 || c == 7 || c == 12 )
|
||||
{
|
||||
paragraphs.put( Integer.valueOf( charIndex ), part.toString() );
|
||||
part.setLength( 0 );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void dumpParagraphs( boolean withSprms, boolean withPapx,
|
||||
boolean withText )
|
||||
private void buildText()
|
||||
{
|
||||
Range range = _doc.getOverallRange();
|
||||
for ( int p = 0; p < range.numParagraphs(); p++ )
|
||||
StringBuilder builder = new StringBuilder();
|
||||
for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )
|
||||
{
|
||||
Paragraph paragraph = range.getParagraph( p );
|
||||
System.out.println( p + ":\t" + paragraph.toString( withPapx ) );
|
||||
String toAppend = textPiece.getStringBuffer().toString();
|
||||
|
||||
if ( withSprms )
|
||||
if ( toAppend.length() != ( textPiece.getEnd() - textPiece
|
||||
.getStart() ) )
|
||||
{
|
||||
PAPX papx = _doc.getParagraphTable().getParagraphs().get( p );
|
||||
|
||||
SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
|
||||
while ( sprmIt.hasNext() )
|
||||
{
|
||||
SprmOperation sprm = sprmIt.next();
|
||||
System.out.println( "\t" + sprm.toString() );
|
||||
}
|
||||
throw new AssertionError();
|
||||
}
|
||||
|
||||
if ( withText )
|
||||
System.out.println( paragraph.text() );
|
||||
builder.replace( textPiece.getStart(), textPiece.getEnd(), toAppend );
|
||||
}
|
||||
this.text = builder.toString();
|
||||
}
|
||||
|
||||
public void dumpTextRuns( boolean withSprms )
|
||||
public void dumpChpx( boolean withSprms )
|
||||
{
|
||||
for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() )
|
||||
for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() )
|
||||
{
|
||||
System.out.println( chpx );
|
||||
|
||||
System.out.println( chpx.getCharacterProperties(
|
||||
_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) );
|
||||
if ( false )
|
||||
{
|
||||
System.out.println( chpx.getCharacterProperties(
|
||||
_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) );
|
||||
}
|
||||
|
||||
if ( withSprms )
|
||||
{
|
||||
@ -264,12 +270,92 @@ public final class HWPFLister
|
||||
public String toString()
|
||||
{
|
||||
return "CHPX range (" + super.toString() + ")";
|
||||
};
|
||||
}
|
||||
}.text() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void dumpFIB()
|
||||
{
|
||||
FileInformationBlock fib = _doc.getFileInformationBlock();
|
||||
System.out.println( fib );
|
||||
}
|
||||
|
||||
public void dumpPapx( boolean withProperties )
|
||||
{
|
||||
for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
|
||||
{
|
||||
System.out.println( papx );
|
||||
|
||||
if ( withProperties )
|
||||
System.out.println( papx.getParagraphProperties( _doc
|
||||
.getStyleSheet() ) );
|
||||
|
||||
if ( true )
|
||||
{
|
||||
SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
|
||||
while ( sprmIt.hasNext() )
|
||||
{
|
||||
SprmOperation sprm = sprmIt.next();
|
||||
System.out.println( "\t" + sprm.toString() );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void dumpParagraphs( boolean dumpAssotiatedPapx )
|
||||
{
|
||||
for ( Map.Entry<Integer, String> entry : paragraphs.entrySet() )
|
||||
{
|
||||
Integer endOfParagraphCharOffset = entry.getKey();
|
||||
System.out.println( "[...; " + ( endOfParagraphCharOffset + 1 )
|
||||
+ "): " + entry.getValue() );
|
||||
|
||||
if ( dumpAssotiatedPapx )
|
||||
{
|
||||
boolean hasAssotiatedPapx = false;
|
||||
for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
|
||||
{
|
||||
if ( papx.getStart() <= endOfParagraphCharOffset.intValue()
|
||||
&& endOfParagraphCharOffset.intValue() < papx
|
||||
.getEnd() )
|
||||
{
|
||||
hasAssotiatedPapx = true;
|
||||
System.out.println( "* " + papx );
|
||||
|
||||
SprmIterator sprmIt = new SprmIterator(
|
||||
papx.getGrpprl(), 2 );
|
||||
while ( sprmIt.hasNext() )
|
||||
{
|
||||
SprmOperation sprm = sprmIt.next();
|
||||
System.out.println( "** " + sprm.toString() );
|
||||
}
|
||||
}
|
||||
}
|
||||
if ( !hasAssotiatedPapx )
|
||||
{
|
||||
System.out.println( "* "
|
||||
+ "NO PAPX ASSOTIATED WITH PARAGRAPH!" );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void dumpParagraphsDom( boolean withSprms, boolean withPapx,
|
||||
boolean withText )
|
||||
{
|
||||
Range range = _doc.getOverallRange();
|
||||
for ( int p = 0; p < range.numParagraphs(); p++ )
|
||||
{
|
||||
Paragraph paragraph = range.getParagraph( p );
|
||||
System.out.println( p + ":\t" + paragraph.toString() );
|
||||
|
||||
if ( withText )
|
||||
System.out.println( paragraph.text() );
|
||||
}
|
||||
}
|
||||
|
||||
public void dumpTextPieces( boolean withText )
|
||||
{
|
||||
for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )
|
||||
|
@ -23,7 +23,10 @@ package org.apache.poi.hwpf.model;
|
||||
* still work despite that.
|
||||
* It handles the conversion as required between bytes
|
||||
* and characters.
|
||||
*
|
||||
* @deprecated byte positions shall not be saved in memory
|
||||
*/
|
||||
@Deprecated
|
||||
public abstract class BytePropertyNode<T extends BytePropertyNode<T>> extends
|
||||
PropertyNode<T>
|
||||
{
|
||||
|
@ -21,13 +21,18 @@ import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* This class holds all of the character formatting properties.
|
||||
@ -36,8 +41,10 @@ import org.apache.poi.util.LittleEndian;
|
||||
*/
|
||||
public class CHPBinTable
|
||||
{
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( CHPBinTable.class );
|
||||
|
||||
/** List of character properties.*/
|
||||
/** List of character properties.*/
|
||||
protected ArrayList<CHPX> _textRuns = new ArrayList<CHPX>();
|
||||
|
||||
/** So we can know if things are unicode or not */
|
||||
@ -97,7 +104,98 @@ public class CHPBinTable
|
||||
_textRuns.add(chpx);
|
||||
}
|
||||
}
|
||||
Collections.sort( _textRuns, PropertyNode.StartComparator.instance );
|
||||
|
||||
// rebuild document paragraphs structure
|
||||
StringBuilder docText = new StringBuilder();
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
{
|
||||
String toAppend = textPiece.getStringBuffer().toString();
|
||||
int toAppendLength = toAppend.length();
|
||||
|
||||
if ( toAppendLength != textPiece.getEnd() - textPiece.getStart() )
|
||||
{
|
||||
logger.log(
|
||||
POILogger.WARN,
|
||||
"Text piece has boundaries [",
|
||||
Integer.valueOf( textPiece.getStart() ),
|
||||
"; ",
|
||||
Integer.valueOf( textPiece.getEnd() ),
|
||||
") but length ",
|
||||
Integer.valueOf( textPiece.getEnd()
|
||||
- textPiece.getStart() ) );
|
||||
}
|
||||
|
||||
docText.replace( textPiece.getStart(), textPiece.getStart()
|
||||
+ toAppendLength, toAppend );
|
||||
}
|
||||
|
||||
Set<Integer> textRunsBoundariesSet = new HashSet<Integer>();
|
||||
for ( CHPX chpx : _textRuns )
|
||||
{
|
||||
textRunsBoundariesSet.add( Integer.valueOf( chpx.getStart() ) );
|
||||
textRunsBoundariesSet.add( Integer.valueOf( chpx.getEnd() ) );
|
||||
}
|
||||
textRunsBoundariesSet.remove( Integer.valueOf( 0 ) );
|
||||
List<Integer> textRunsBoundariesList = new ArrayList<Integer>(
|
||||
textRunsBoundariesSet );
|
||||
Collections.sort( textRunsBoundariesList );
|
||||
|
||||
List<CHPX> newChpxs = new LinkedList<CHPX>();
|
||||
int lastTextRunStart = 0;
|
||||
for ( Integer boundary : textRunsBoundariesList )
|
||||
{
|
||||
final int startInclusive = lastTextRunStart;
|
||||
final int endExclusive = boundary.intValue();
|
||||
lastTextRunStart = endExclusive;
|
||||
|
||||
List<CHPX> chpxs = new LinkedList<CHPX>();
|
||||
for ( CHPX chpx : _textRuns )
|
||||
{
|
||||
int left = Math.max( startInclusive, chpx.getStart() );
|
||||
int right = Math.min( endExclusive, chpx.getEnd() );
|
||||
|
||||
if ( left < right )
|
||||
{
|
||||
chpxs.add( chpx );
|
||||
}
|
||||
}
|
||||
|
||||
if ( chpxs.size() == 0 )
|
||||
{
|
||||
logger.log( POILogger.WARN, "Text piece [",
|
||||
Integer.valueOf( startInclusive ), "; ",
|
||||
Integer.valueOf( endExclusive ),
|
||||
") has no CHPX. Creating new one." );
|
||||
// create it manually
|
||||
CHPX chpx = new CHPX( startInclusive, endExclusive,
|
||||
new SprmBuffer( 0 ) );
|
||||
newChpxs.add( chpx );
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( chpxs.size() == 1 )
|
||||
{
|
||||
// can we reuse existing?
|
||||
CHPX existing = chpxs.get( 0 );
|
||||
if ( existing.getStart() == startInclusive
|
||||
&& existing.getEnd() == endExclusive )
|
||||
{
|
||||
newChpxs.add( existing );
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
SprmBuffer sprmBuffer = new SprmBuffer( 0 );
|
||||
for ( CHPX chpx : chpxs )
|
||||
{
|
||||
sprmBuffer.append( chpx.getGrpprl(), 0 );
|
||||
}
|
||||
CHPX newChpx = new CHPX( startInclusive, endExclusive, sprmBuffer );
|
||||
newChpxs.add( newChpx );
|
||||
|
||||
continue;
|
||||
}
|
||||
this._textRuns = new ArrayList<CHPX>( newChpxs );
|
||||
}
|
||||
|
||||
public void adjustForDelete(int listIndex, int offset, int length)
|
||||
|
@ -30,20 +30,26 @@ import org.apache.poi.hwpf.usermodel.CharacterProperties;
|
||||
*
|
||||
* @author Ryan Ackley
|
||||
*/
|
||||
|
||||
@SuppressWarnings( "deprecation" )
|
||||
public final class CHPX extends BytePropertyNode<CHPX>
|
||||
{
|
||||
|
||||
@Deprecated
|
||||
public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] grpprl)
|
||||
{
|
||||
super(fcStart, translator.lookIndexBackward(fcEnd), translator, new SprmBuffer(grpprl));
|
||||
super(fcStart, translator.lookIndexBackward(fcEnd), translator, new SprmBuffer(grpprl, 0));
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public CHPX(int fcStart, int fcEnd, CharIndexTranslator translator, SprmBuffer buf)
|
||||
{
|
||||
super(fcStart, translator.lookIndexBackward(fcEnd), translator ,buf);
|
||||
}
|
||||
|
||||
CHPX( int charStart, int charEnd, SprmBuffer buf )
|
||||
{
|
||||
super( charStart, charEnd, buf );
|
||||
}
|
||||
|
||||
public byte[] getGrpprl()
|
||||
{
|
||||
|
@ -20,12 +20,16 @@ package org.apache.poi.hwpf.model;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hwpf.model.io.HWPFFileSystem;
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.hwpf.sprm.SprmBuffer;
|
||||
import org.apache.poi.poifs.common.POIFSConstants;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
/**
|
||||
* This class represents the bin table of Word document but it also serves as a
|
||||
@ -36,6 +40,9 @@ import org.apache.poi.util.LittleEndian;
|
||||
*/
|
||||
public class PAPBinTable
|
||||
{
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( PAPBinTable.class );
|
||||
|
||||
protected ArrayList<PAPX> _paragraphs = new ArrayList<PAPX>();
|
||||
byte[] _dataStream;
|
||||
|
||||
@ -87,8 +94,106 @@ public class PAPBinTable
|
||||
}
|
||||
}
|
||||
|
||||
_dataStream = dataStream;
|
||||
}
|
||||
// rebuild document paragraphs structure
|
||||
StringBuilder docText = new StringBuilder();
|
||||
for ( TextPiece textPiece : tpt.getTextPieces() )
|
||||
{
|
||||
String toAppend = textPiece.getStringBuffer().toString();
|
||||
int toAppendLength = toAppend.length();
|
||||
|
||||
if ( toAppendLength != textPiece.getEnd() - textPiece.getStart() )
|
||||
{
|
||||
logger.log(
|
||||
POILogger.WARN,
|
||||
"Text piece has boundaries [",
|
||||
Integer.valueOf( textPiece.getStart() ),
|
||||
"; ",
|
||||
Integer.valueOf( textPiece.getEnd() ),
|
||||
") but length ",
|
||||
Integer.valueOf( textPiece.getEnd()
|
||||
- textPiece.getStart() ) );
|
||||
}
|
||||
|
||||
docText.replace( textPiece.getStart(), textPiece.getStart()
|
||||
+ toAppendLength, toAppend );
|
||||
}
|
||||
|
||||
List<PAPX> newPapxs = new LinkedList<PAPX>();
|
||||
int lastParStart = 0;
|
||||
for ( int charIndex = 0; charIndex < docText.length(); charIndex++ )
|
||||
{
|
||||
final char c = docText.charAt( charIndex );
|
||||
if ( c != 13 && c != 7 && c != 12 )
|
||||
continue;
|
||||
|
||||
final int startInclusive = lastParStart;
|
||||
final int endExclusive = charIndex + 1;
|
||||
|
||||
List<PAPX> papxs = new LinkedList<PAPX>();
|
||||
for ( PAPX papx : _paragraphs )
|
||||
{
|
||||
// TODO: Tests, check, etc
|
||||
for ( int f = papx.getEnd() - 1; f <= charIndex; f++ )
|
||||
{
|
||||
if ( f == charIndex )
|
||||
{
|
||||
papxs.add( papx );
|
||||
break;
|
||||
}
|
||||
final char fChar = docText.charAt( charIndex );
|
||||
if ( fChar == 13 || fChar == 7 || fChar == 12 )
|
||||
break;
|
||||
}
|
||||
// if ( papx.getStart() <= charIndex && charIndex <
|
||||
// papx.getEnd() )
|
||||
// {
|
||||
// papxs.add( papx );
|
||||
// }
|
||||
}
|
||||
|
||||
if ( papxs.size() == 0 )
|
||||
{
|
||||
logger.log( POILogger.WARN, "Paragraph [",
|
||||
Integer.valueOf( startInclusive ), "; ",
|
||||
Integer.valueOf( endExclusive ),
|
||||
") has no PAPX. Creating new one." );
|
||||
// create it manually
|
||||
PAPX papx = new PAPX( startInclusive, endExclusive,
|
||||
new SprmBuffer( 2 ), dataStream );
|
||||
newPapxs.add( papx );
|
||||
|
||||
lastParStart = endExclusive;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( papxs.size() == 1 )
|
||||
{
|
||||
// can we reuse existing?
|
||||
PAPX existing = papxs.get( 0 );
|
||||
if ( existing.getStart() == startInclusive && existing.getEnd() == endExclusive )
|
||||
{
|
||||
newPapxs.add( existing );
|
||||
lastParStart = endExclusive;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
SprmBuffer sprmBuffer = new SprmBuffer( 2 );
|
||||
for ( PAPX papx : papxs )
|
||||
{
|
||||
sprmBuffer.append( papx.getGrpprl(), 2 );
|
||||
}
|
||||
PAPX newPapx = new PAPX( startInclusive, endExclusive, sprmBuffer,
|
||||
dataStream );
|
||||
newPapxs.add( newPapx );
|
||||
|
||||
lastParStart = endExclusive;
|
||||
continue;
|
||||
}
|
||||
this._paragraphs = new ArrayList<PAPX>( newPapxs );
|
||||
|
||||
_dataStream = dataStream;
|
||||
}
|
||||
|
||||
public void insert(int listIndex, int cpStart, SprmBuffer buf)
|
||||
{
|
||||
|
@ -33,7 +33,7 @@ import org.apache.poi.util.LittleEndian;
|
||||
*
|
||||
* @author Ryan Ackley
|
||||
*/
|
||||
|
||||
@SuppressWarnings( "deprecation" )
|
||||
public final class PAPX extends BytePropertyNode<PAPX> {
|
||||
|
||||
private ParagraphHeight _phe;
|
||||
@ -41,9 +41,9 @@ public final class PAPX extends BytePropertyNode<PAPX> {
|
||||
|
||||
public PAPX(int fcStart, int fcEnd, CharIndexTranslator translator, byte[] papx, ParagraphHeight phe, byte[] dataStream)
|
||||
{
|
||||
super(fcStart, fcEnd, translator, new SprmBuffer(papx));
|
||||
super(fcStart, fcEnd, translator, new SprmBuffer(papx, 0));
|
||||
_phe = phe;
|
||||
SprmBuffer buf = findHuge(new SprmBuffer(papx), dataStream);
|
||||
SprmBuffer buf = findHuge(new SprmBuffer(papx, 2), dataStream);
|
||||
if(buf != null)
|
||||
_buf = buf;
|
||||
}
|
||||
@ -57,6 +57,15 @@ public final class PAPX extends BytePropertyNode<PAPX> {
|
||||
_buf = buf;
|
||||
}
|
||||
|
||||
public PAPX( int charStart, int charEnd, SprmBuffer buf, byte[] dataStream )
|
||||
{
|
||||
super( charStart, charEnd, buf );
|
||||
_phe = new ParagraphHeight();
|
||||
buf = findHuge( buf, dataStream );
|
||||
if ( buf != null )
|
||||
_buf = buf;
|
||||
}
|
||||
|
||||
private SprmBuffer findHuge(SprmBuffer buf, byte[] datastream)
|
||||
{
|
||||
byte[] grpprl = buf.toByteArray();
|
||||
@ -80,7 +89,7 @@ public final class PAPX extends BytePropertyNode<PAPX> {
|
||||
grpprlSize);
|
||||
// save a pointer to where we got the huge Grpprl from
|
||||
_hugeGrpprlOffset = hugeGrpprlOffset;
|
||||
return new SprmBuffer(hugeGrpprl);
|
||||
return new SprmBuffer(hugeGrpprl, 2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ public final class SEPX extends PropertyNode<SEPX>
|
||||
|
||||
public SEPX( SectionDescriptor sed, int start, int end, byte[] grpprl )
|
||||
{
|
||||
super( start, end, new SprmBuffer( grpprl ) );
|
||||
super( start, end, new SprmBuffer( grpprl, 0 ) );
|
||||
_sed = sed;
|
||||
}
|
||||
|
||||
@ -41,7 +41,7 @@ public final class SEPX extends PropertyNode<SEPX>
|
||||
{
|
||||
byte[] grpprl = SectionSprmCompressor
|
||||
.compressSectionProperty( sectionProperties );
|
||||
_buf = new SprmBuffer( grpprl );
|
||||
_buf = new SprmBuffer( grpprl, 0 );
|
||||
}
|
||||
|
||||
return ( (SprmBuffer) _buf ).toByteArray();
|
||||
|
@ -21,74 +21,183 @@ import java.util.Arrays;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
public final class SprmBuffer
|
||||
implements Cloneable
|
||||
public final class SprmBuffer implements Cloneable
|
||||
{
|
||||
byte[] _buf;
|
||||
int _offset;
|
||||
boolean _istd;
|
||||
byte[] _buf;
|
||||
boolean _istd;
|
||||
int _offset;
|
||||
|
||||
public SprmBuffer(byte[] buf, boolean istd)
|
||||
{
|
||||
_offset = buf.length;
|
||||
_buf = buf;
|
||||
_istd = istd;
|
||||
}
|
||||
public SprmBuffer(byte[] buf)
|
||||
{
|
||||
this(buf, false);
|
||||
}
|
||||
public SprmBuffer()
|
||||
{
|
||||
_buf = new byte[4];
|
||||
_offset = 0;
|
||||
}
|
||||
private final int _sprmsStartOffset;
|
||||
|
||||
public SprmOperation findSprm( short opcode )
|
||||
/**
|
||||
* @deprecated Use {@link #SprmBuffer(int)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public SprmBuffer()
|
||||
{
|
||||
int operation = SprmOperation.getOperationFromOpcode( opcode );
|
||||
int type = SprmOperation.getTypeFromOpcode( opcode );
|
||||
this( 0 );
|
||||
}
|
||||
|
||||
SprmIterator si = new SprmIterator( _buf, 2 );
|
||||
while ( si.hasNext() )
|
||||
/**
|
||||
* @deprecated Use {@link #SprmBuffer(byte[],int)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public SprmBuffer( byte[] buf )
|
||||
{
|
||||
this( buf, 0 );
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated Use {@link #SprmBuffer(byte[],boolean,int)} instead
|
||||
*/
|
||||
@Deprecated
|
||||
public SprmBuffer( byte[] buf, boolean istd )
|
||||
{
|
||||
this( buf, istd, 0 );
|
||||
}
|
||||
|
||||
public SprmBuffer( byte[] buf, boolean istd, int sprmsStartOffset )
|
||||
{
|
||||
_offset = buf.length;
|
||||
_buf = buf;
|
||||
_istd = istd;
|
||||
_sprmsStartOffset = sprmsStartOffset;
|
||||
}
|
||||
|
||||
public SprmBuffer( byte[] buf, int _sprmsStartOffset )
|
||||
{
|
||||
this( buf, false, _sprmsStartOffset );
|
||||
}
|
||||
|
||||
public SprmBuffer( int sprmsStartOffset )
|
||||
{
|
||||
_buf = new byte[sprmsStartOffset + 4];
|
||||
_offset = sprmsStartOffset;
|
||||
_sprmsStartOffset = sprmsStartOffset;
|
||||
}
|
||||
|
||||
public void addSprm(short opcode, byte operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
_buf[_offset++] = operand;
|
||||
}
|
||||
|
||||
public void addSprm(short opcode, byte[] operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE + operand.length;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
_buf[_offset++] = (byte)operand.length;
|
||||
System.arraycopy(operand, 0, _buf, _offset, operand.length);
|
||||
}
|
||||
|
||||
public void addSprm(short opcode, int operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.INT_SIZE;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
LittleEndian.putInt(_buf, _offset, operand);
|
||||
_offset += LittleEndian.INT_SIZE;
|
||||
}
|
||||
|
||||
public void addSprm(short opcode, short operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.SHORT_SIZE;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
LittleEndian.putShort(_buf, _offset, operand);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
}
|
||||
|
||||
public void append( byte[] grpprl )
|
||||
{
|
||||
append( grpprl, 0 );
|
||||
}
|
||||
|
||||
public void append( byte[] grpprl, int offset )
|
||||
{
|
||||
ensureCapacity( grpprl.length - offset );
|
||||
System.arraycopy( grpprl, offset, _buf, _offset, grpprl.length - offset );
|
||||
_offset += grpprl.length - offset;
|
||||
}
|
||||
public Object clone()
|
||||
throws CloneNotSupportedException
|
||||
{
|
||||
SprmBuffer retVal = (SprmBuffer)super.clone();
|
||||
retVal._buf = new byte[_buf.length];
|
||||
System.arraycopy(_buf, 0, retVal._buf, 0, _buf.length);
|
||||
return retVal;
|
||||
}
|
||||
private void ensureCapacity( int addition )
|
||||
{
|
||||
if ( _offset + addition >= _buf.length )
|
||||
{
|
||||
// add 6 more than they need for use the next iteration
|
||||
//
|
||||
// commented - buffer shall not contain any additional bytes --
|
||||
// sergey
|
||||
// byte[] newBuf = new byte[_offset + addition + 6];
|
||||
byte[] newBuf = new byte[_offset + addition];
|
||||
System.arraycopy( _buf, 0, newBuf, 0, _buf.length );
|
||||
_buf = newBuf;
|
||||
}
|
||||
}
|
||||
public boolean equals(Object obj)
|
||||
{
|
||||
SprmBuffer sprmBuf = (SprmBuffer)obj;
|
||||
return (Arrays.equals(_buf, sprmBuf._buf));
|
||||
}
|
||||
|
||||
public SprmOperation findSprm( short opcode )
|
||||
{
|
||||
int operation = SprmOperation.getOperationFromOpcode( opcode );
|
||||
int type = SprmOperation.getTypeFromOpcode( opcode );
|
||||
|
||||
SprmIterator si = new SprmIterator( _buf, 2 );
|
||||
while ( si.hasNext() )
|
||||
{
|
||||
SprmOperation i = si.next();
|
||||
if ( i.getOperation() == operation && i.getType() == type )
|
||||
return i;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private int findSprmOffset( short opcode )
|
||||
{
|
||||
SprmOperation sprmOperation = findSprm( opcode );
|
||||
if ( sprmOperation == null )
|
||||
return -1;
|
||||
|
||||
return sprmOperation.getGrpprlOffset();
|
||||
}
|
||||
|
||||
public byte[] toByteArray()
|
||||
{
|
||||
return _buf;
|
||||
}
|
||||
|
||||
public SprmIterator iterator()
|
||||
{
|
||||
return new SprmIterator( _buf, _sprmsStartOffset );
|
||||
}
|
||||
|
||||
public void updateSprm(short opcode, byte operand)
|
||||
{
|
||||
int grpprlOffset = findSprmOffset(opcode);
|
||||
if(grpprlOffset != -1)
|
||||
{
|
||||
SprmOperation i = si.next();
|
||||
if ( i.getOperation() == operation && i.getType() == type )
|
||||
return i;
|
||||
_buf[grpprlOffset] = operand;
|
||||
return;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
private int findSprmOffset( short opcode )
|
||||
{
|
||||
SprmOperation sprmOperation = findSprm( opcode );
|
||||
if ( sprmOperation == null )
|
||||
return -1;
|
||||
|
||||
return sprmOperation.getGrpprlOffset();
|
||||
}
|
||||
|
||||
public void updateSprm(short opcode, byte operand)
|
||||
{
|
||||
int grpprlOffset = findSprmOffset(opcode);
|
||||
if(grpprlOffset != -1)
|
||||
{
|
||||
_buf[grpprlOffset] = operand;
|
||||
return;
|
||||
}
|
||||
addSprm(opcode, operand);
|
||||
}
|
||||
|
||||
public void updateSprm(short opcode, short operand)
|
||||
{
|
||||
int grpprlOffset = findSprmOffset(opcode);
|
||||
if(grpprlOffset != -1)
|
||||
{
|
||||
LittleEndian.putShort(_buf, grpprlOffset, operand);
|
||||
return;
|
||||
}
|
||||
addSprm(opcode, operand);
|
||||
}
|
||||
addSprm(opcode, operand);
|
||||
}
|
||||
|
||||
public void updateSprm(short opcode, int operand)
|
||||
{
|
||||
@ -101,86 +210,14 @@ public final class SprmBuffer
|
||||
addSprm(opcode, operand);
|
||||
}
|
||||
|
||||
public void addSprm(short opcode, byte operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
_buf[_offset++] = operand;
|
||||
}
|
||||
public void addSprm(short opcode, short operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.SHORT_SIZE;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
LittleEndian.putShort(_buf, _offset, operand);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
}
|
||||
public void addSprm(short opcode, int operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.INT_SIZE;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
LittleEndian.putInt(_buf, _offset, operand);
|
||||
_offset += LittleEndian.INT_SIZE;
|
||||
}
|
||||
public void addSprm(short opcode, byte[] operand)
|
||||
{
|
||||
int addition = LittleEndian.SHORT_SIZE + LittleEndian.BYTE_SIZE + operand.length;
|
||||
ensureCapacity(addition);
|
||||
LittleEndian.putShort(_buf, _offset, opcode);
|
||||
_offset += LittleEndian.SHORT_SIZE;
|
||||
_buf[_offset++] = (byte)operand.length;
|
||||
System.arraycopy(operand, 0, _buf, _offset, operand.length);
|
||||
}
|
||||
|
||||
public byte[] toByteArray()
|
||||
{
|
||||
return _buf;
|
||||
}
|
||||
|
||||
public boolean equals(Object obj)
|
||||
{
|
||||
SprmBuffer sprmBuf = (SprmBuffer)obj;
|
||||
return (Arrays.equals(_buf, sprmBuf._buf));
|
||||
}
|
||||
|
||||
public void append( byte[] grpprl )
|
||||
{
|
||||
append( grpprl, 0 );
|
||||
}
|
||||
|
||||
public void append( byte[] grpprl, int offset )
|
||||
{
|
||||
ensureCapacity( grpprl.length - offset );
|
||||
System.arraycopy( grpprl, offset, _buf, _offset, grpprl.length - offset );
|
||||
_offset += grpprl.length - offset;
|
||||
}
|
||||
|
||||
public Object clone()
|
||||
throws CloneNotSupportedException
|
||||
{
|
||||
SprmBuffer retVal = (SprmBuffer)super.clone();
|
||||
retVal._buf = new byte[_buf.length];
|
||||
System.arraycopy(_buf, 0, retVal._buf, 0, _buf.length);
|
||||
return retVal;
|
||||
}
|
||||
|
||||
private void ensureCapacity( int addition )
|
||||
{
|
||||
if ( _offset + addition >= _buf.length )
|
||||
public void updateSprm(short opcode, short operand)
|
||||
{
|
||||
int grpprlOffset = findSprmOffset(opcode);
|
||||
if(grpprlOffset != -1)
|
||||
{
|
||||
// add 6 more than they need for use the next iteration
|
||||
//
|
||||
// commented - buffer shall not contain any additional bytes --
|
||||
// sergey
|
||||
// byte[] newBuf = new byte[_offset + addition + 6];
|
||||
byte[] newBuf = new byte[_offset + addition];
|
||||
System.arraycopy( _buf, 0, newBuf, 0, _buf.length );
|
||||
_buf = newBuf;
|
||||
LittleEndian.putShort(_buf, grpprlOffset, operand);
|
||||
return;
|
||||
}
|
||||
}
|
||||
addSprm(opcode, operand);
|
||||
}
|
||||
}
|
||||
|
@ -33,6 +33,7 @@ public final class TableSprmUncompressor
|
||||
{
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public static TableProperties uncompressTAP(byte[] grpprl,
|
||||
int offset)
|
||||
{
|
||||
@ -51,12 +52,8 @@ public final class TableSprmUncompressor
|
||||
try {
|
||||
unCompressTAPOperation(newProperties, sprm);
|
||||
} catch (ArrayIndexOutOfBoundsException ex) {
|
||||
logger.log(
|
||||
POILogger.ERROR,
|
||||
"Unable to apply SPRM operation '"
|
||||
+ sprm.getOperation() + "': ",
|
||||
ex
|
||||
);
|
||||
logger.log( POILogger.ERROR, "Unable to apply ", sprm,
|
||||
": ", ex, ex );
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -64,6 +61,49 @@ public final class TableSprmUncompressor
|
||||
return newProperties;
|
||||
}
|
||||
|
||||
public static TableProperties uncompressTAP( SprmBuffer sprmBuffer )
|
||||
{
|
||||
TableProperties tableProperties;
|
||||
|
||||
SprmOperation sprmOperation = sprmBuffer.findSprm( (short) 0xd608 );
|
||||
if ( sprmOperation != null )
|
||||
{
|
||||
byte[] grpprl = sprmOperation.getGrpprl();
|
||||
int offset = sprmOperation.getGrpprlOffset();
|
||||
short itcMac = grpprl[offset];
|
||||
tableProperties = new TableProperties( itcMac );
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.log( POILogger.WARN,
|
||||
"Some table rows didn't specify number of columns in SPRMs" );
|
||||
tableProperties = new TableProperties( (short) 1 );
|
||||
}
|
||||
|
||||
for ( SprmIterator iterator = sprmBuffer.iterator(); iterator.hasNext(); )
|
||||
{
|
||||
SprmOperation sprm = iterator.next();
|
||||
|
||||
/*
|
||||
* TAPXs are actually PAPXs so we have to make sure we are only
|
||||
* trying to uncompress the right type of sprm.
|
||||
*/
|
||||
if ( sprm.getType() == SprmOperation.TYPE_TAP )
|
||||
{
|
||||
try
|
||||
{
|
||||
unCompressTAPOperation( tableProperties, sprm );
|
||||
}
|
||||
catch ( ArrayIndexOutOfBoundsException ex )
|
||||
{
|
||||
logger.log( POILogger.ERROR, "Unable to apply ", sprm,
|
||||
": ", ex, ex );
|
||||
}
|
||||
}
|
||||
}
|
||||
return tableProperties;
|
||||
}
|
||||
|
||||
/**
|
||||
* Used to uncompress a table property. Performs an operation defined
|
||||
* by a sprm stored in a tapx.
|
||||
|
@ -503,7 +503,7 @@ public class Paragraph extends Range implements Cloneable {
|
||||
Paragraph p = (Paragraph)super.clone();
|
||||
p._props = (ParagraphProperties)_props.clone();
|
||||
//p._baseStyle = _baseStyle;
|
||||
p._papx = new SprmBuffer();
|
||||
p._papx = new SprmBuffer(0);
|
||||
return p;
|
||||
}
|
||||
|
||||
@ -528,17 +528,6 @@ public class Paragraph extends Range implements Cloneable {
|
||||
@Override
|
||||
public String toString()
|
||||
{
|
||||
return toString( true );
|
||||
}
|
||||
|
||||
public String toString( boolean withPapx )
|
||||
{
|
||||
return "Paragraph ("
|
||||
+ getStartOffset()
|
||||
+ "--"
|
||||
+ getEndOffset()
|
||||
+ ")"
|
||||
+ ( withPapx ? "\n"
|
||||
+ _props.toString().replaceAll( "\n", "\n\t" ) : "" );
|
||||
return "Paragraph [" + getStartOffset() + "; " + getEndOffset() + ")";
|
||||
}
|
||||
}
|
||||
|
@ -459,7 +459,7 @@ public class Range { // TODO -instantiable superclass
|
||||
StyleSheet ss = _doc.getStyleSheet();
|
||||
CharacterProperties baseStyle = ss.getCharacterStyle(istd);
|
||||
byte[] grpprl = CharacterSprmCompressor.compressCharacterProperty(props, baseStyle);
|
||||
SprmBuffer buf = new SprmBuffer(grpprl);
|
||||
SprmBuffer buf = new SprmBuffer(grpprl, 0);
|
||||
_doc.getCharacterTable().insert(_charStart, _start, buf);
|
||||
|
||||
return insertBefore(text);
|
||||
@ -486,7 +486,7 @@ public class Range { // TODO -instantiable superclass
|
||||
StyleSheet ss = _doc.getStyleSheet();
|
||||
CharacterProperties baseStyle = ss.getCharacterStyle(istd);
|
||||
byte[] grpprl = CharacterSprmCompressor.compressCharacterProperty(props, baseStyle);
|
||||
SprmBuffer buf = new SprmBuffer(grpprl);
|
||||
SprmBuffer buf = new SprmBuffer(grpprl, 0);
|
||||
_doc.getCharacterTable().insert(_charEnd, _end, buf);
|
||||
_charEnd++;
|
||||
return insertAfter(text);
|
||||
@ -534,7 +534,7 @@ public class Range { // TODO -instantiable superclass
|
||||
byte[] withIndex = new byte[grpprl.length + LittleEndian.SHORT_SIZE];
|
||||
LittleEndian.putShort(withIndex, (short) styleIndex);
|
||||
System.arraycopy(grpprl, 0, withIndex, LittleEndian.SHORT_SIZE, grpprl.length);
|
||||
SprmBuffer buf = new SprmBuffer(withIndex);
|
||||
SprmBuffer buf = new SprmBuffer(withIndex, 0);
|
||||
|
||||
_doc.getParagraphTable().insert(_parStart, _start, buf);
|
||||
insertBefore(text, baseChp);
|
||||
@ -584,7 +584,7 @@ public class Range { // TODO -instantiable superclass
|
||||
byte[] withIndex = new byte[grpprl.length + LittleEndian.SHORT_SIZE];
|
||||
LittleEndian.putShort(withIndex, (short) styleIndex);
|
||||
System.arraycopy(grpprl, 0, withIndex, LittleEndian.SHORT_SIZE, grpprl.length);
|
||||
SprmBuffer buf = new SprmBuffer(withIndex);
|
||||
SprmBuffer buf = new SprmBuffer(withIndex, 0);
|
||||
|
||||
_doc.getParagraphTable().insert(_parEnd, _end, buf);
|
||||
_parEnd++;
|
||||
@ -781,12 +781,13 @@ public class Range { // TODO -instantiable superclass
|
||||
public CharacterRun getCharacterRun( int index )
|
||||
{
|
||||
initCharacterRuns();
|
||||
CHPX chpx = _characters.get( index + _charStart );
|
||||
return getCharacterRun( chpx );
|
||||
}
|
||||
|
||||
private CharacterRun getCharacterRun( CHPX chpx )
|
||||
{
|
||||
if ( index + _charStart >= _charEnd )
|
||||
throw new IndexOutOfBoundsException( "CHPX #" + index + " ("
|
||||
+ ( index + _charStart ) + ") not in range [" + _charStart
|
||||
+ "; " + _charEnd + ")" );
|
||||
|
||||
CHPX chpx = _characters.get( index + _charStart );
|
||||
if ( chpx == null )
|
||||
{
|
||||
return null;
|
||||
@ -884,9 +885,9 @@ public class Range { // TODO -instantiable superclass
|
||||
throw new IllegalArgumentException("This paragraph is not a child of this range");
|
||||
}
|
||||
|
||||
r.initAll();
|
||||
int tableLevel = paragraph.getTableLevel();
|
||||
int tableEndInclusive = r._parEnd ;
|
||||
r.initAll();
|
||||
int tableLevel = paragraph.getTableLevel();
|
||||
int tableEndInclusive = r._parStart;
|
||||
|
||||
if ( r._parStart != 0 )
|
||||
{
|
||||
@ -912,7 +913,7 @@ public class Range { // TODO -instantiable superclass
|
||||
}
|
||||
|
||||
initAll();
|
||||
if ( tableEndInclusive + 1 > _parEnd )
|
||||
if ( tableEndInclusive >= this._parEnd )
|
||||
{
|
||||
throw new ArrayIndexOutOfBoundsException(
|
||||
"The table's bounds fall outside of this Range" );
|
||||
|
@ -48,7 +48,7 @@ public final class TableRow extends Paragraph
|
||||
{
|
||||
super( startIdxInclusive, endIdxExclusive, parent );
|
||||
|
||||
_tprops = TableSprmUncompressor.uncompressTAP( _papx.toByteArray(), 2 );
|
||||
_tprops = TableSprmUncompressor.uncompressTAP( _papx );
|
||||
_levelNum = levelNum;
|
||||
initCells();
|
||||
}
|
||||
|
@ -127,9 +127,7 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
|
||||
public void testBug48075() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "Bug48075.doc" );
|
||||
|
||||
assertTrue( result.contains( "<table>" ) );
|
||||
getHtmlText( "Bug48075.doc" );
|
||||
}
|
||||
|
||||
public void testO_kurs_doc() throws Exception
|
||||
|
Loading…
Reference in New Issue
Block a user