rewrite table bounds detection for Word 97, including inner table support

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143070 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-05 14:13:27 +00:00
parent fa46737e44
commit 8ac3172759
9 changed files with 240 additions and 175 deletions

View File

@ -241,6 +241,14 @@ public abstract class AbstractWordConverter
if ( paragraph.isInTable()
&& paragraph.getTableLevel() != currentTableLevel )
{
if ( paragraph.getTableLevel() < currentTableLevel )
throw new IllegalStateException(
"Trying to process table cell with higher level ("
+ paragraph.getTableLevel()
+ ") than current table level ("
+ currentTableLevel
+ ") as inner table part" );
Table table = range.getTable( paragraph );
processTable( wordDocument, flow, table );

View File

@ -84,9 +84,9 @@ public class Paragraph extends Range implements Cloneable {
protected ParagraphProperties _props;
protected SprmBuffer _papx;
protected Paragraph(int startIdx, int endIdx, Table parent)
protected Paragraph(int startIdxInclusive, int endIdxExclusive, Table parent)
{
super(startIdx, endIdx, Range.TYPE_PARAGRAPH, parent);
super(startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent);
PAPX papx = _paragraphs.get(_parEnd - 1);
_props = papx.getParagraphProperties(_doc.getStyleSheet());
_papx = papx.getSprmBuf();

View File

@ -90,10 +90,10 @@ public class Range { // TODO -instantiable superclass
/** All paragraphs that belong to the document this Range belongs to. */
protected List<PAPX> _paragraphs;
/** The start index in the paragraphs list for this Range */
/** The start index in the paragraphs list for this Range, inclusive */
protected int _parStart;
/** The end index in the paragraphs list for this Range. */
/** The end index in the paragraphs list for this Range, exclusive */
protected int _parEnd;
/** Have we loaded the characterRun indexes yet. */
@ -178,9 +178,9 @@ public class Range { // TODO -instantiable superclass
* lists.
*
* @param startIdx
* The starting index in the list.
* The starting index in the list, inclusive
* @param endIdx
* The ending index in the list.
* The ending index in the list, exclusive
* @param idxType
* The list type.
* @param parent
@ -199,27 +199,27 @@ public class Range { // TODO -instantiable superclass
_parStart = parent._parStart + startIdx;
_parEnd = parent._parStart + endIdx;
_start = _paragraphs.get(_parStart).getStart();
_end = _paragraphs.get(_parEnd).getEnd();
_end = _paragraphs.get(_parEnd - 1).getEnd();
_parRangeFound = true;
break;
case TYPE_CHARACTER:
_charStart = parent._charStart + startIdx;
_charEnd = parent._charStart + endIdx;
_start = _characters.get(_charStart).getStart();
_start = _characters.get(_charStart - 1).getStart();
_end = _characters.get(_charEnd).getEnd();
_charRangeFound = true;
break;
case TYPE_SECTION:
_sectionStart = parent._sectionStart + startIdx;
_sectionEnd = parent._sectionStart + endIdx;
_start = _sections.get(_sectionStart).getStart();
_start = _sections.get(_sectionStart - 1).getStart();
_end = _sections.get(_sectionEnd).getEnd();
_sectionRangeFound = true;
break;
case TYPE_TEXT:
_textStart = parent._textStart + startIdx;
_textEnd = parent._textStart + endIdx;
_start = _text.get(_textStart).getStart();
_start = _text.get(_textStart - 1).getStart();
_end = _text.get(_textEnd).getEnd();
_textRangeFound = true;
break;
@ -833,6 +833,12 @@ public class Range { // TODO -instantiable superclass
public Paragraph getParagraph(int index) {
initParagraphs();
if ( index + _parStart >= _parEnd )
throw new IndexOutOfBoundsException( "Paragraph #" + index + " ("
+ (index + _parStart) + ") not in range [" + _parStart
+ "; " + _parEnd + ")" );
PAPX papx = _paragraphs.get(index + _parStart);
ParagraphProperties props = papx.getParagraphProperties(_doc.getStyleSheet());
@ -880,7 +886,7 @@ public class Range { // TODO -instantiable superclass
r.initAll();
int tableLevel = paragraph.getTableLevel();
int tableEnd = r._parEnd;
int tableEndInclusive = r._parEnd ;
if ( r._parStart != 0 )
{
@ -895,24 +901,30 @@ public class Range { // TODO -instantiable superclass
}
}
final Range overallrange = getDocument() instanceof HWPFDocument ? ((HWPFDocument) getDocument())
.getOverallRange() : getDocument().getRange();
int limit = _paragraphs.size();
for ( ; tableEnd < limit; tableEnd++ )
for ( ; tableEndInclusive < limit - 1; tableEndInclusive++ )
{
Paragraph next = new Paragraph( _paragraphs.get( tableEnd ), this );
Paragraph next = new Paragraph( _paragraphs.get( tableEndInclusive + 1 ),
overallrange );
if ( !next.isInTable() || next.getTableLevel() < tableLevel )
break;
}
initAll();
if (tableEnd > _parEnd) {
if ( tableEndInclusive + 1 > _parEnd )
{
throw new ArrayIndexOutOfBoundsException(
"The table's bounds fall outside of this Range");
"The table's bounds fall outside of this Range" );
}
if (tableEnd < 0) {
if ( tableEndInclusive < 0 )
{
throw new ArrayIndexOutOfBoundsException(
"The table's end is negative, which isn't allowed!");
"The table's end is negative, which isn't allowed!" );
}
return new Table(r._parStart, tableEnd, r._doc.getRange(), paragraph.getTableLevel());
return new Table( r._parStart, tableEndInclusive + 1, r._doc.getRange(),
paragraph.getTableLevel() );
}
/**
@ -989,7 +1001,11 @@ public class Range { // TODO -instantiable superclass
*/
private int[] findRange(List<? extends PropertyNode> rpl, int min, int start, int end) {
int x = min;
PropertyNode node = rpl.get(x);
if ( rpl.size() == min )
return new int[] { min, min };
PropertyNode node = rpl.get( x );
while (node==null || (node.getEnd() <= start && x < rpl.size() - 1)) {
x++;

View File

@ -25,9 +25,9 @@ public final class Table extends Range
private int _tableLevel;
Table( int startIdx, int endIdx, Range parent, int levelNum )
Table( int startIdxInclusive, int endIdxExclusive, Range parent, int levelNum )
{
super( startIdx, endIdx, Range.TYPE_PARAGRAPH, parent );
super( startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent );
_rows = new ArrayList<TableRow>();
_tableLevel = levelNum;
@ -41,8 +41,8 @@ public final class Table extends Range
rowEnd++;
if ( p.isTableRowEnd() && p.getTableLevel() == levelNum )
{
_rows.add( new TableRow( rowStart, rowEnd, this, levelNum ) );
rowStart = rowEnd;
_rows.add( new TableRow( rowStart, rowEnd + 1, this, levelNum ) );
rowStart = rowEnd + 1;
}
}
}

View File

@ -25,9 +25,11 @@ public final class TableCell
private int _leftEdge;
private int _width;
public TableCell(int startIdx, int endIdx, TableRow parent, int levelNum, TableCellDescriptor tcd, int leftEdge, int width)
public TableCell( int startIdxInclusive, int endIdxExclusive,
TableRow parent, int levelNum, TableCellDescriptor tcd,
int leftEdge, int width )
{
super(startIdx, endIdx, Range.TYPE_PARAGRAPH, parent);
super( startIdxInclusive, endIdxExclusive, Range.TYPE_PARAGRAPH, parent );
_tcd = tcd;
_leftEdge = leftEdge;
_width = width;

View File

@ -17,62 +17,98 @@
package org.apache.poi.hwpf.usermodel;
import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
import java.util.ArrayList;
import java.util.List;
public final class TableRow
extends Paragraph
import org.apache.poi.hwpf.sprm.TableSprmUncompressor;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
public final class TableRow extends Paragraph
{
private final static char TABLE_CELL_MARK = '\u0007';
private final static short SPRM_TJC = 0x5400;
private final static short SPRM_DXAGAPHALF = (short)0x9602;
private final static short SPRM_DXAGAPHALF = (short) 0x9602;
private final static short SPRM_FCANTSPLIT = 0x3403;
private final static short SPRM_FTABLEHEADER = 0x3404;
private final static short SPRM_DYAROWHEIGHT = (short)0x9407;
private final static short SPRM_DYAROWHEIGHT = (short) 0x9407;
private static final POILogger logger = POILogFactory
.getLogger( TableRow.class );
int _levelNum;
private TableProperties _tprops;
private TableCell[] _cells;
public TableRow(int startIdx, int endIdx, Table parent, int levelNum)
public TableRow( int startIdxInclusive, int endIdxExclusive, Table parent,
int levelNum )
{
super(startIdx, endIdx, parent);
super( startIdxInclusive, endIdxExclusive, parent );
_tprops = TableSprmUncompressor.uncompressTAP(_papx.toByteArray(), 2);
_tprops = TableSprmUncompressor.uncompressTAP( _papx.toByteArray(), 2 );
_levelNum = levelNum;
_cells = new TableCell[_tprops.getItcMac()];
final short expectedCellsCount = _tprops.getItcMac();
int start = 0;
int end = 0;
for (int cellIndex = 0; cellIndex < _cells.length; cellIndex++)
int lastCellStart = 0;
List<TableCell> cells = new ArrayList<TableCell>( expectedCellsCount );
for ( int p = 0; p < (endIdxExclusive - startIdxInclusive); p++ )
{
Paragraph p = getParagraph(start);
String s = p.text();
Paragraph paragraph = getParagraph( p );
String s = paragraph.text();
while (! ( s.length() > 0 && (s.charAt(s.length() - 1) == TABLE_CELL_MARK) ||
p.isEmbeddedCellMark() && p.getTableLevel() == levelNum) && end < endIdx)
if ( s.length() > 0
&& s.charAt( s.length() - 1 ) == TABLE_CELL_MARK
&& paragraph.getTableLevel() == levelNum )
{
end++;
p = getParagraph(end);
s = p.text();
TableCellDescriptor tableCellDescriptor = _tprops.getRgtc() != null
&& _tprops.getRgtc().length > cells.size() ? _tprops
.getRgtc()[cells.size()] : new TableCellDescriptor();
final short leftEdge = _tprops.getRgdxaCenter() != null
&& _tprops.getRgdxaCenter().length > cells.size() ? _tprops
.getRgdxaCenter()[cells.size()] : 0;
final short rightEdge = _tprops.getRgdxaCenter() != null
&& _tprops.getRgdxaCenter().length > cells.size() + 1 ? _tprops
.getRgdxaCenter()[cells.size() + 1] : 0;
TableCell tableCell = new TableCell( lastCellStart, p + 1,
this, levelNum, tableCellDescriptor, leftEdge,
rightEdge - leftEdge );
cells.add( tableCell );
lastCellStart = p + 1;
}
}
// Create it for the correct paragraph range
_cells[cellIndex] = new TableCell(start, end, this, levelNum,
_tprops.getRgtc()[cellIndex],
_tprops.getRgdxaCenter()[cellIndex],
_tprops.getRgdxaCenter()[cellIndex+1]-_tprops.getRgdxaCenter()[cellIndex]);
// Now we've decided where everything is, tweak the
// record of the paragraph end so that the
// paragraph level counts work
// This is a bit hacky, we really need a better fix...
_cells[cellIndex]._parEnd++;
if ( lastCellStart < (endIdxExclusive - startIdxInclusive - 1) )
{
TableCellDescriptor tableCellDescriptor = _tprops.getRgtc() != null
&& _tprops.getRgtc().length > cells.size() ? _tprops
.getRgtc()[cells.size()] : new TableCellDescriptor();
final short leftEdge = _tprops.getRgdxaCenter() != null
&& _tprops.getRgdxaCenter().length > cells.size() ? _tprops
.getRgdxaCenter()[cells.size()] : 0;
final short rightEdge = _tprops.getRgdxaCenter() != null
&& _tprops.getRgdxaCenter().length > cells.size() + 1 ? _tprops
.getRgdxaCenter()[cells.size() + 1] : 0;
// Next!
end++;
start = end;
TableCell tableCell = new TableCell( lastCellStart,
(endIdxExclusive - startIdxInclusive - 1), this, levelNum,
tableCellDescriptor, leftEdge, rightEdge - leftEdge );
cells.add( tableCell );
}
if ( cells.size() != expectedCellsCount )
{
logger.log( POILogger.WARN,
"Number of found table cells (" + cells.size()
+ ") for table row [" + getStartOffset() + "c; "
+ getEndOffset()
+ "c] not equals to stored property value "
+ expectedCellsCount );
_tprops.setItcMac( (short) cells.size() );
}
_cells = cells.toArray( new TableCell[cells.size()] );
}
public int getRowJustification()
@ -80,10 +116,10 @@ public final class TableRow
return _tprops.getJc();
}
public void setRowJustification(int jc)
public void setRowJustification( int jc )
{
_tprops.setJc((short) jc);
_papx.updateSprm(SPRM_TJC, (short)jc);
_tprops.setJc( (short) jc );
_papx.updateSprm( SPRM_TJC, (short) jc );
}
public int getGapHalf()
@ -91,10 +127,10 @@ public final class TableRow
return _tprops.getDxaGapHalf();
}
public void setGapHalf(int dxaGapHalf)
public void setGapHalf( int dxaGapHalf )
{
_tprops.setDxaGapHalf(dxaGapHalf);
_papx.updateSprm(SPRM_DXAGAPHALF, (short)dxaGapHalf);
_tprops.setDxaGapHalf( dxaGapHalf );
_papx.updateSprm( SPRM_DXAGAPHALF, (short) dxaGapHalf );
}
public int getRowHeight()
@ -102,10 +138,10 @@ public final class TableRow
return _tprops.getDyaRowHeight();
}
public void setRowHeight(int dyaRowHeight)
public void setRowHeight( int dyaRowHeight )
{
_tprops.setDyaRowHeight(dyaRowHeight);
_papx.updateSprm(SPRM_DYAROWHEIGHT, (short)dyaRowHeight);
_tprops.setDyaRowHeight( dyaRowHeight );
_papx.updateSprm( SPRM_DYAROWHEIGHT, (short) dyaRowHeight );
}
public boolean cantSplit()
@ -113,10 +149,10 @@ public final class TableRow
return _tprops.getFCantSplit();
}
public void setCantSplit(boolean cantSplit)
public void setCantSplit( boolean cantSplit )
{
_tprops.setFCantSplit(cantSplit);
_papx.updateSprm(SPRM_FCANTSPLIT, (byte)(cantSplit ? 1 : 0));
_tprops.setFCantSplit( cantSplit );
_papx.updateSprm( SPRM_FCANTSPLIT, (byte) (cantSplit ? 1 : 0) );
}
public boolean isTableHeader()
@ -124,10 +160,10 @@ public final class TableRow
return _tprops.getFTableHeader();
}
public void setTableHeader(boolean tableHeader)
public void setTableHeader( boolean tableHeader )
{
_tprops.setFTableHeader(tableHeader);
_papx.updateSprm(SPRM_FTABLEHEADER, (byte)(tableHeader ? 1 : 0));
_tprops.setFTableHeader( tableHeader );
_papx.updateSprm( SPRM_FTABLEHEADER, (byte) (tableHeader ? 1 : 0) );
}
public int numCells()
@ -135,37 +171,44 @@ public final class TableRow
return _cells.length;
}
public TableCell getCell(int index)
public TableCell getCell( int index )
{
return _cells[index];
}
public BorderCode getTopBorder() {
public BorderCode getTopBorder()
{
return _tprops.getBrcBottom();
}
public BorderCode getBottomBorder() {
public BorderCode getBottomBorder()
{
return _tprops.getBrcBottom();
}
public BorderCode getLeftBorder() {
public BorderCode getLeftBorder()
{
return _tprops.getBrcLeft();
}
public BorderCode getRightBorder() {
public BorderCode getRightBorder()
{
return _tprops.getBrcRight();
}
public BorderCode getHorizontalBorder() {
public BorderCode getHorizontalBorder()
{
return _tprops.getBrcHorizontal();
}
public BorderCode getVerticalBorder() {
public BorderCode getVerticalBorder()
{
return _tprops.getBrcVertical();
}
public BorderCode getBarBorder() {
throw new UnsupportedOperationException("not applicable for TableRow");
public BorderCode getBarBorder()
{
throw new UnsupportedOperationException( "not applicable for TableRow" );
}
}

View File

@ -40,7 +40,8 @@ public class TestWordToConverterSuite
/**
* YK: a quick hack to exclude failing documents from the suite.
*/
private static List<String> failingFiles = Arrays.asList();
private static List<String> failingFiles = Arrays
.asList( "ProblemExtracting.doc" );
public static Test suite()
{

View File

@ -68,11 +68,6 @@ public class TestWordToHtmlConverter extends TestCase
assertTrue( result.substring( 0, 2000 ).contains( "<table>" ) );
}
public void testBug33519() throws Exception
{
getHtmlText( "Bug33519.doc" );
}
public void testBug46610_2() throws Exception
{
String result = getHtmlText( "Bug46610_2.doc" );

View File

@ -79,14 +79,14 @@ public final class TestRange extends TestCase
assertEquals( range.getStartOffset(), 0 );
assertEquals( range.getEndOffset(), 766 );
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() );
Paragraph lastInMainRange = range.getParagraph( range.numParagraphs() - 1);
assertTrue( lastInMainRange.getEndOffset() <= 766 );
Section section = range.getSection( 0 );
assertTrue( section.getEndOffset() <= 766 );
Paragraph lastInMainSection = section.getParagraph( section
.numParagraphs() );
.numParagraphs() - 1);
assertTrue( lastInMainSection.getEndOffset() <= 766 );
}
}