rewrite table bounds detection for Word 97, including inner table support
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143070 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fa46737e44
commit
8ac3172759
@ -241,6 +241,14 @@ public abstract class AbstractWordConverter
|
||||
if ( paragraph.isInTable()
|
||||
&& paragraph.getTableLevel() != currentTableLevel )
|
||||
{
|
||||
if ( paragraph.getTableLevel() < currentTableLevel )
|
||||
throw new IllegalStateException(
|
||||
"Trying to process table cell with higher level ("
|
||||
+ paragraph.getTableLevel()
|
||||
+ ") than current table level ("
|
||||
+ currentTableLevel
|
||||
+ ") as inner table part" );
|
||||
|
||||
Table table = range.getTable( paragraph );
|
||||
processTable( wordDocument, flow, table );
|
||||
|
||||
|
@ -84,9 +84,9 @@ public class Paragraph extends Range implements Cloneable {
|
||||
protected ParagraphProperties _props;
|
||||
protected SprmBuffer _papx;
|
||||
|
||||
protected Paragraph(int startIdx, int endIdx, Table parent)
|
||||
protected Paragraph(int startIdxInclusive, int endIdxExclusive, Table parent)
|
||||
{
|
||||
super(startIdx, endIdx, Range.TYPE_PARAGRAPH, parent);
|
||||
super(startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent);
|
||||
PAPX papx = _paragraphs.get(_parEnd - 1);
|
||||
_props = papx.getParagraphProperties(_doc.getStyleSheet());
|
||||
_papx = papx.getSprmBuf();
|
||||
|
@ -90,10 +90,10 @@ public class Range { // TODO -instantiable superclass
|
||||
/** All paragraphs that belong to the document this Range belongs to. */
|
||||
protected List<PAPX> _paragraphs;
|
||||
|
||||
/** The start index in the paragraphs list for this Range */
|
||||
/** The start index in the paragraphs list for this Range, inclusive */
|
||||
protected int _parStart;
|
||||
|
||||
/** The end index in the paragraphs list for this Range. */
|
||||
/** The end index in the paragraphs list for this Range, exclusive */
|
||||
protected int _parEnd;
|
||||
|
||||
/** Have we loaded the characterRun indexes yet. */
|
||||
@ -178,9 +178,9 @@ public class Range { // TODO -instantiable superclass
|
||||
* lists.
|
||||
*
|
||||
* @param startIdx
|
||||
* The starting index in the list.
|
||||
* The starting index in the list, inclusive
|
||||
* @param endIdx
|
||||
* The ending index in the list.
|
||||
* The ending index in the list, exclusive
|
||||
* @param idxType
|
||||
* The list type.
|
||||
* @param parent
|
||||
@ -199,27 +199,27 @@ public class Range { // TODO -instantiable superclass
|
||||
_parStart = parent._parStart + startIdx;
|
||||
_parEnd = parent._parStart + endIdx;
|
||||
_start = _paragraphs.get(_parStart).getStart();
|
||||
_end = _paragraphs.get(_parEnd).getEnd();
|
||||
_end = _paragraphs.get(_parEnd - 1).getEnd();
|
||||
_parRangeFound = true;
|
||||
break;
|
||||
case TYPE_CHARACTER:
|
||||
_charStart = parent._charStart + startIdx;
|
||||
_charEnd = parent._charStart + endIdx;
|
||||
_start = _characters.get(_charStart).getStart();
|
||||
_start = _characters.get(_charStart - 1).getStart();
|
||||
_end = _characters.get(_charEnd).getEnd();
|
||||
_charRangeFound = true;
|
||||
break;
|
||||
case TYPE_SECTION:
|
||||
_sectionStart = parent._sectionStart + startIdx;
|
||||
_sectionEnd = parent._sectionStart + endIdx;
|
||||
_start = _sections.get(_sectionStart).getStart();
|
||||
_start = _sections.get(_sectionStart - 1).getStart();
|
||||
_end = _sections.get(_sectionEnd).getEnd();
|
||||
_sectionRangeFound = true;
|
||||
break;
|
||||
case TYPE_TEXT:
|
||||
_textStart = parent._textStart + startIdx;
|
||||
_textEnd = parent._textStart + endIdx;
|
||||
_start = _text.get(_textStart).getStart();
|
||||
_start = _text.get(_textStart - 1).getStart();
|
||||
_end = _text.get(_textEnd).getEnd();
|
||||
_textRangeFound = true;
|
||||
break;
|
||||
@ -833,6 +833,12 @@ public class Range { // TODO -instantiable superclass
|
||||
|
||||
public Paragraph getParagraph(int index) {
|
||||
initParagraphs();
|
||||
|
||||
if ( index + _parStart >= _parEnd )
|
||||
throw new IndexOutOfBoundsException( "Paragraph #" + index + " ("
|
||||
+ (index + _parStart) + ") not in range [" + _parStart
|
||||
+ "; " + _parEnd + ")" );
|
||||
|
||||
PAPX papx = _paragraphs.get(index + _parStart);
|
||||
|
||||
ParagraphProperties props = papx.getParagraphProperties(_doc.getStyleSheet());
|
||||
@ -880,7 +886,7 @@ public class Range { // TODO -instantiable superclass
|
||||
|
||||
r.initAll();
|
||||
int tableLevel = paragraph.getTableLevel();
|
||||
int tableEnd = r._parEnd;
|
||||
int tableEndInclusive = r._parEnd ;
|
||||
|
||||
if ( r._parStart != 0 )
|
||||
{
|
||||
@ -895,24 +901,30 @@ public class Range { // TODO -instantiable superclass
|
||||
}
|
||||
}
|
||||
|
||||
final Range overallrange = getDocument() instanceof HWPFDocument ? ((HWPFDocument) getDocument())
|
||||
.getOverallRange() : getDocument().getRange();
|
||||
int limit = _paragraphs.size();
|
||||
for ( ; tableEnd < limit; tableEnd++ )
|
||||
for ( ; tableEndInclusive < limit - 1; tableEndInclusive++ )
|
||||
{
|
||||
Paragraph next = new Paragraph( _paragraphs.get( tableEnd ), this );
|
||||
Paragraph next = new Paragraph( _paragraphs.get( tableEndInclusive + 1 ),
|
||||
overallrange );
|
||||
if ( !next.isInTable() || next.getTableLevel() < tableLevel )
|
||||
break;
|
||||
}
|
||||
|
||||
initAll();
|
||||
if (tableEnd > _parEnd) {
|
||||
if ( tableEndInclusive + 1 > _parEnd )
|
||||
{
|
||||
throw new ArrayIndexOutOfBoundsException(
|
||||
"The table's bounds fall outside of this Range");
|
||||
"The table's bounds fall outside of this Range" );
|
||||
}
|
||||
if (tableEnd < 0) {
|
||||
if ( tableEndInclusive < 0 )
|
||||
{
|
||||
throw new ArrayIndexOutOfBoundsException(
|
||||
"The table's end is negative, which isn't allowed!");
|
||||
"The table's end is negative, which isn't allowed!" );
|
||||
}
|
||||
return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel());
|
||||
return new Table( r._parStart, tableEndInclusive + 1, r._doc.getRange(),
|
||||
paragraph.getTableLevel() );
|
||||
}
|
||||
|
||||
/**
|
||||
@ -989,7 +1001,11 @@ public class Range { // TODO -instantiable superclass
|
||||
*/
|
||||
private int[] findRange(List<? extends PropertyNode> rpl, int min, int start, int end) {
|
||||
int x = min;
|
||||
PropertyNode node = rpl.get(x);
|
||||
|
||||
if ( rpl.size() == min )
|
||||
return new int[] { min, min };
|
||||
|
||||
PropertyNode node = rpl.get( x );
|
||||
|
||||
while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) {
|
||||
x++;
|
||||
|
@ -25,9 +25,9 @@ public final class Table extends Range
|
||||
|
||||
private int _tableLevel;
|
||||
|
||||
Table( int startIdx, int endIdx, Range parent, int levelNum )
|
||||
Table( int startIdxInclusive, int endIdxExclusive, Range parent, int levelNum )
|
||||
{
|
||||
super( startIdx, endIdx, Range.TYPE_PARAGRAPH, parent );
|
||||
super( startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent );
|
||||
_rows = new ArrayList<TableRow>();
|
||||
_tableLevel = levelNum;
|
||||
|
||||
@ -41,8 +41,8 @@ public final class Table extends Range
|
||||
rowEnd++;
|
||||
if ( p.isTableRowEnd() && p.getTableLevel() == levelNum )
|
||||
{
|
||||
_rows.add( new TableRow( rowStart, rowEnd, this, levelNum ) );
|
||||
rowStart = rowEnd;
|
||||
_rows.add( new TableRow( rowStart, rowEnd + 1, this, levelNum ) );
|
||||
rowStart = rowEnd + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -25,9 +25,11 @@ public final class TableCell
|
||||
private int _leftEdge;
|
||||
private int _width;
|
||||
|
||||
public TableCell(int startIdx, int endIdx, TableRow parent, int levelNum, TableCellDescriptor tcd, int leftEdge, int width)
|
||||
public TableCell( int startIdxInclusive, int endIdxExclusive,
|
||||
TableRow parent, int levelNum, TableCellDescriptor tcd,
|
||||
int leftEdge, int width )
|
||||
{
|
||||
super(startIdx, endIdx, Range.TYPE_PARAGRAPH, parent);
|
||||
super( startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent );
|
||||
_tcd = tcd;
|
||||
_leftEdge = leftEdge;
|
||||
_width = width;
|
||||
|
@ -17,62 +17,98 @@
|
||||
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public final class TableRow
|
||||
extends Paragraph
|
||||
import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
public final class TableRow extends Paragraph
|
||||
{
|
||||
private final static char TABLE_CELL_MARK = '\u0007';
|
||||
|
||||
private final static short SPRM_TJC = 0x5400;
|
||||
private final static short SPRM_DXAGAPHALF = (short)0x9602;
|
||||
private final static short SPRM_DXAGAPHALF = (short) 0x9602;
|
||||
private final static short SPRM_FCANTSPLIT = 0x3403;
|
||||
private final static short SPRM_FTABLEHEADER = 0x3404;
|
||||
private final static short SPRM_DYAROWHEIGHT = (short)0x9407;
|
||||
private final static short SPRM_DYAROWHEIGHT = (short) 0x9407;
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( TableRow.class );
|
||||
|
||||
int _levelNum;
|
||||
private TableProperties _tprops;
|
||||
private TableCell[] _cells;
|
||||
|
||||
public TableRow(int startIdx, int endIdx, Table parent, int levelNum)
|
||||
public TableRow( int startIdxInclusive, int endIdxExclusive, Table parent,
|
||||
int levelNum )
|
||||
{
|
||||
super(startIdx, endIdx, parent);
|
||||
super( startIdxInclusive, endIdxExclusive, parent );
|
||||
|
||||
_tprops = TableSprmUncompressor.uncompressTAP(_papx.toByteArray(), 2);
|
||||
_tprops = TableSprmUncompressor.uncompressTAP( _papx.toByteArray(), 2 );
|
||||
_levelNum = levelNum;
|
||||
_cells = new TableCell[_tprops.getItcMac()];
|
||||
final short expectedCellsCount = _tprops.getItcMac();
|
||||
|
||||
int start = 0;
|
||||
int end = 0;
|
||||
|
||||
for (int cellIndex = 0; cellIndex < _cells.length; cellIndex++)
|
||||
int lastCellStart = 0;
|
||||
List<TableCell> cells = new ArrayList<TableCell>( expectedCellsCount );
|
||||
for ( int p = 0; p < (endIdxExclusive - startIdxInclusive); p++ )
|
||||
{
|
||||
Paragraph p = getParagraph(start);
|
||||
String s = p.text();
|
||||
Paragraph paragraph = getParagraph( p );
|
||||
String s = paragraph.text();
|
||||
|
||||
while (! ( s.length() > 0 && (s.charAt(s.length() - 1) == TABLE_CELL_MARK) ||
|
||||
p.isEmbeddedCellMark() && p.getTableLevel() == levelNum) && end < endIdx)
|
||||
if ( s.length() > 0
|
||||
&& s.charAt( s.length() - 1 ) == TABLE_CELL_MARK
|
||||
&& paragraph.getTableLevel() == levelNum )
|
||||
{
|
||||
end++;
|
||||
p = getParagraph(end);
|
||||
s = p.text();
|
||||
TableCellDescriptor tableCellDescriptor = _tprops.getRgtc() != null
|
||||
&& _tprops.getRgtc().length > cells.size() ? _tprops
|
||||
.getRgtc()[cells.size()] : new TableCellDescriptor();
|
||||
final short leftEdge = _tprops.getRgdxaCenter() != null
|
||||
&& _tprops.getRgdxaCenter().length > cells.size() ? _tprops
|
||||
.getRgdxaCenter()[cells.size()] : 0;
|
||||
final short rightEdge = _tprops.getRgdxaCenter() != null
|
||||
&& _tprops.getRgdxaCenter().length > cells.size() + 1 ? _tprops
|
||||
.getRgdxaCenter()[cells.size() + 1] : 0;
|
||||
|
||||
TableCell tableCell = new TableCell( lastCellStart, p + 1,
|
||||
this, levelNum, tableCellDescriptor, leftEdge,
|
||||
rightEdge - leftEdge );
|
||||
cells.add( tableCell );
|
||||
lastCellStart = p + 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Create it for the correct paragraph range
|
||||
_cells[cellIndex] = new TableCell(start, end, this, levelNum,
|
||||
_tprops.getRgtc()[cellIndex],
|
||||
_tprops.getRgdxaCenter()[cellIndex],
|
||||
_tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]);
|
||||
// Now we've decided where everything is, tweak the
|
||||
// record of the paragraph end so that the
|
||||
// paragraph level counts work
|
||||
// This is a bit hacky, we really need a better fix...
|
||||
_cells[cellIndex]._parEnd++;
|
||||
if ( lastCellStart < (endIdxExclusive - startIdxInclusive - 1) )
|
||||
{
|
||||
TableCellDescriptor tableCellDescriptor = _tprops.getRgtc() != null
|
||||
&& _tprops.getRgtc().length > cells.size() ? _tprops
|
||||
.getRgtc()[cells.size()] : new TableCellDescriptor();
|
||||
final short leftEdge = _tprops.getRgdxaCenter() != null
|
||||
&& _tprops.getRgdxaCenter().length > cells.size() ? _tprops
|
||||
.getRgdxaCenter()[cells.size()] : 0;
|
||||
final short rightEdge = _tprops.getRgdxaCenter() != null
|
||||
&& _tprops.getRgdxaCenter().length > cells.size() + 1 ? _tprops
|
||||
.getRgdxaCenter()[cells.size() + 1] : 0;
|
||||
|
||||
// Next!
|
||||
end++;
|
||||
start = end;
|
||||
TableCell tableCell = new TableCell( lastCellStart,
|
||||
(endIdxExclusive - startIdxInclusive - 1), this, levelNum,
|
||||
tableCellDescriptor, leftEdge, rightEdge - leftEdge );
|
||||
cells.add( tableCell );
|
||||
}
|
||||
|
||||
if ( cells.size() != expectedCellsCount )
|
||||
{
|
||||
logger.log( POILogger.WARN,
|
||||
"Number of found table cells (" + cells.size()
|
||||
+ ") for table row [" + getStartOffset() + "c; "
|
||||
+ getEndOffset()
|
||||
+ "c] not equals to stored property value "
|
||||
+ expectedCellsCount );
|
||||
_tprops.setItcMac( (short) cells.size() );
|
||||
}
|
||||
|
||||
_cells = cells.toArray( new TableCell[cells.size()] );
|
||||
}
|
||||
|
||||
public int getRowJustification()
|
||||
@ -80,10 +116,10 @@ public final class TableRow
|
||||
return _tprops.getJc();
|
||||
}
|
||||
|
||||
public void setRowJustification(int jc)
|
||||
public void setRowJustification( int jc )
|
||||
{
|
||||
_tprops.setJc((short) jc);
|
||||
_papx.updateSprm(SPRM_TJC, (short)jc);
|
||||
_tprops.setJc( (short) jc );
|
||||
_papx.updateSprm( SPRM_TJC, (short) jc );
|
||||
}
|
||||
|
||||
public int getGapHalf()
|
||||
@ -91,10 +127,10 @@ public final class TableRow
|
||||
return _tprops.getDxaGapHalf();
|
||||
}
|
||||
|
||||
public void setGapHalf(int dxaGapHalf)
|
||||
public void setGapHalf( int dxaGapHalf )
|
||||
{
|
||||
_tprops.setDxaGapHalf(dxaGapHalf);
|
||||
_papx.updateSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf);
|
||||
_tprops.setDxaGapHalf( dxaGapHalf );
|
||||
_papx.updateSprm( SPRM_DXAGAPHALF, (short) dxaGapHalf );
|
||||
}
|
||||
|
||||
public int getRowHeight()
|
||||
@ -102,10 +138,10 @@ public final class TableRow
|
||||
return _tprops.getDyaRowHeight();
|
||||
}
|
||||
|
||||
public void setRowHeight(int dyaRowHeight)
|
||||
public void setRowHeight( int dyaRowHeight )
|
||||
{
|
||||
_tprops.setDyaRowHeight(dyaRowHeight);
|
||||
_papx.updateSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight);
|
||||
_tprops.setDyaRowHeight( dyaRowHeight );
|
||||
_papx.updateSprm( SPRM_DYAROWHEIGHT, (short) dyaRowHeight );
|
||||
}
|
||||
|
||||
public boolean cantSplit()
|
||||
@ -113,10 +149,10 @@ public final class TableRow
|
||||
return _tprops.getFCantSplit();
|
||||
}
|
||||
|
||||
public void setCantSplit(boolean cantSplit)
|
||||
public void setCantSplit( boolean cantSplit )
|
||||
{
|
||||
_tprops.setFCantSplit(cantSplit);
|
||||
_papx.updateSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0));
|
||||
_tprops.setFCantSplit( cantSplit );
|
||||
_papx.updateSprm( SPRM_FCANTSPLIT, (byte) (cantSplit ? 1 : 0) );
|
||||
}
|
||||
|
||||
public boolean isTableHeader()
|
||||
@ -124,10 +160,10 @@ public final class TableRow
|
||||
return _tprops.getFTableHeader();
|
||||
}
|
||||
|
||||
public void setTableHeader(boolean tableHeader)
|
||||
public void setTableHeader( boolean tableHeader )
|
||||
{
|
||||
_tprops.setFTableHeader(tableHeader);
|
||||
_papx.updateSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0));
|
||||
_tprops.setFTableHeader( tableHeader );
|
||||
_papx.updateSprm( SPRM_FTABLEHEADER, (byte) (tableHeader ? 1 : 0) );
|
||||
}
|
||||
|
||||
public int numCells()
|
||||
@ -135,37 +171,44 @@ public final class TableRow
|
||||
return _cells.length;
|
||||
}
|
||||
|
||||
public TableCell getCell(int index)
|
||||
public TableCell getCell( int index )
|
||||
{
|
||||
return _cells[index];
|
||||
}
|
||||
|
||||
public BorderCode getTopBorder() {
|
||||
public BorderCode getTopBorder()
|
||||
{
|
||||
return _tprops.getBrcBottom();
|
||||
}
|
||||
|
||||
public BorderCode getBottomBorder() {
|
||||
public BorderCode getBottomBorder()
|
||||
{
|
||||
return _tprops.getBrcBottom();
|
||||
}
|
||||
|
||||
public BorderCode getLeftBorder() {
|
||||
public BorderCode getLeftBorder()
|
||||
{
|
||||
return _tprops.getBrcLeft();
|
||||
}
|
||||
|
||||
public BorderCode getRightBorder() {
|
||||
public BorderCode getRightBorder()
|
||||
{
|
||||
return _tprops.getBrcRight();
|
||||
}
|
||||
|
||||
public BorderCode getHorizontalBorder() {
|
||||
public BorderCode getHorizontalBorder()
|
||||
{
|
||||
return _tprops.getBrcHorizontal();
|
||||
}
|
||||
|
||||
public BorderCode getVerticalBorder() {
|
||||
public BorderCode getVerticalBorder()
|
||||
{
|
||||
return _tprops.getBrcVertical();
|
||||
}
|
||||
|
||||
public BorderCode getBarBorder() {
|
||||
throw new UnsupportedOperationException("not applicable for TableRow");
|
||||
public BorderCode getBarBorder()
|
||||
{
|
||||
throw new UnsupportedOperationException( "not applicable for TableRow" );
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -40,7 +40,8 @@ public class TestWordToConverterSuite
|
||||
/**
|
||||
* YK: a quick hack to exclude failing documents from the suite.
|
||||
*/
|
||||
private static List<String> failingFiles = Arrays.asList();
|
||||
private static List<String> failingFiles = Arrays
|
||||
.asList( "ProblemExtracting.doc" );
|
||||
|
||||
public static Test suite()
|
||||
{
|
||||
|
@ -68,11 +68,6 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
|
||||
}
|
||||
|
||||
public void testBug33519() throws Exception
|
||||
{
|
||||
getHtmlText( "Bug33519.doc" );
|
||||
}
|
||||
|
||||
public void testBug46610_2() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "Bug46610_2.doc" );
|
||||
|
@ -79,14 +79,14 @@ public final class TestRange extends TestCase
|
||||
assertEquals( range.getStartOffset(), 0 );
|
||||
assertEquals( range.getEndOffset(), 766 );
|
||||
|
||||
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() );
|
||||
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() - 1);
|
||||
assertTrue( lastInMainRange.getEndOffset() <= 766 );
|
||||
|
||||
Section section = range.getSection( 0 );
|
||||
assertTrue( section.getEndOffset() <= 766 );
|
||||
|
||||
Paragraph lastInMainSection = section.getParagraph( section
|
||||
.numParagraphs() );
|
||||
.numParagraphs() - 1);
|
||||
assertTrue( lastInMainSection.getEndOffset() <= 766 );
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user