better processing of word tables in cases different rows have different cell widths

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-22 09:42:32 +00:00
parent 4bb6a792f8
commit e956fa6fbf
7 changed files with 149 additions and 50 deletions

View File

@ -20,6 +20,8 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.HWPFDocumentCore;
@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel;
import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.usermodel.BorderCode; import org.apache.poi.hwpf.usermodel.BorderCode;
import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.w3c.dom.Attr; import org.w3c.dom.Attr;
@ -44,6 +49,42 @@ public class AbstractWordUtils
public static final float TWIPS_PER_INCH = 1440.0f; public static final float TWIPS_PER_INCH = 1440.0f;
public static final int TWIPS_PER_PT = 20; public static final int TWIPS_PER_PT = 20;
/**
* Creates array of all possible cell edges. In HTML (and FO) cells from
* different rows and same column should have same width, otherwise spanning
* shall be used.
*
* @param table
* table to build cell edges array from
* @return array of cell edges (including leftest one) in twips
*/
static int[] buildTableCellEdgesArray( Table table )
{
Set<Integer> edges = new TreeSet<Integer>();
for ( int r = 0; r < table.numRows(); r++ )
{
TableRow tableRow = table.getRow( r );
for ( int c = 0; c < tableRow.numCells(); c++ )
{
TableCell tableCell = tableRow.getCell( c );
edges.add( Integer.valueOf( tableCell.getLeftEdge() ) );
edges.add( Integer.valueOf( tableCell.getLeftEdge()
+ tableCell.getWidth() ) );
}
}
Integer[] sorted = edges.toArray( new Integer[edges.size()] );
int[] result = new int[sorted.length];
for ( int i = 0; i < sorted.length; i++ )
{
result[i] = sorted[i].intValue();
}
return result;
}
static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
{ {
if ( node1.getNodeType() != Node.ELEMENT_NODE if ( node1.getNodeType() != Node.ELEMENT_NODE

View File

@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter
Element tableHeader = foDocumentFacade.createTableHeader(); Element tableHeader = foDocumentFacade.createTableHeader();
Element tableBody = foDocumentFacade.createTableBody(); Element tableBody = foDocumentFacade.createTableBody();
final int[] tableCellEdges = WordToHtmlUtils
.buildTableCellEdgesArray( table );
final int tableRows = table.numRows(); final int tableRows = table.numRows();
int maxColumns = Integer.MIN_VALUE; int maxColumns = Integer.MIN_VALUE;
@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter
Element tableRowElement = foDocumentFacade.createTableRow(); Element tableRowElement = foDocumentFacade.createTableRow();
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement ); WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
// index of current element in tableCellEdges[]
int currentEdgeIndex = 0;
final int rowCells = tableRow.numCells(); final int rowCells = tableRow.numCells();
for ( int c = 0; c < rowCells; c++ ) for ( int c = 0; c < rowCells; c++ )
{ {
@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter
tableCellElement, r == 0, r == tableRows - 1, c == 0, tableCellElement, r == 0, r == tableRows - 1, c == 0,
c == rowCells - 1 ); c == rowCells - 1 );
if ( tableCell.isFirstMerged() ) int colSpan = 0;
int cellRightEdge = tableCell.getLeftEdge()
+ tableCell.getWidth();
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
{ {
int count = 0; colSpan++;
for ( int c1 = c; c1 < rowCells; c1++ ) currentEdgeIndex++;
{
TableCell nextCell = tableRow.getCell( c1 );
if ( nextCell.isMerged() )
count++;
if ( !nextCell.isMerged() )
break;
}
tableCellElement.setAttribute( "number-columns-spanned", ""
+ count );
}
else
{
if ( c == rowCells - 1 && c != maxColumns - 1 )
{
tableCellElement.setAttribute(
"number-columns-spanned", ""
+ ( maxColumns - c ) );
}
} }
if ( colSpan == 0 )
continue;
if ( colSpan != 1 )
tableCellElement.setAttribute( "number-columns-spanned",
String.valueOf( colSpan ) );
if ( tableCell.isFirstVerticallyMerged() ) if ( tableCell.isFirstVerticallyMerged() )
{ {
int count = 0; int count = 0;
@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter
if ( !nextCell.isVerticallyMerged() ) if ( !nextCell.isVerticallyMerged() )
break; break;
} }
tableCellElement.setAttribute( "number-rows-spanned", "" if ( count > 1 )
+ count ); tableCellElement.setAttribute( "number-rows-spanned",
String.valueOf( count ) );
} }
processParagraphes( wordDocument, tableCellElement, tableCell, processParagraphes( wordDocument, tableCellElement, tableCell,

View File

@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
Element tableHeader = htmlDocumentFacade.createTableHeader(); Element tableHeader = htmlDocumentFacade.createTableHeader();
Element tableBody = htmlDocumentFacade.createTableBody(); Element tableBody = htmlDocumentFacade.createTableBody();
final int[] tableCellEdges = WordToHtmlUtils
.buildTableCellEdgesArray( table );
final int tableRows = table.numRows(); final int tableRows = table.numRows();
int maxColumns = Integer.MIN_VALUE; int maxColumns = Integer.MIN_VALUE;
@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
StringBuilder tableRowStyle = new StringBuilder(); StringBuilder tableRowStyle = new StringBuilder();
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle ); WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
// index of current element in tableCellEdges[]
int currentEdgeIndex = 0;
final int rowCells = tableRow.numCells(); final int rowCells = tableRow.numCells();
for ( int c = 0; c < rowCells; c++ ) for ( int c = 0; c < rowCells; c++ )
{ {
TableCell tableCell = tableRow.getCell( c ); TableCell tableCell = tableRow.getCell( c );
if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
continue;
if ( tableCell.isVerticallyMerged() if ( tableCell.isVerticallyMerged()
&& !tableCell.isFirstVerticallyMerged() ) && !tableCell.isFirstVerticallyMerged() )
continue; continue;
@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter
r == 0, r == tableRows - 1, c == 0, c == rowCells - 1, r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
tableCellStyle ); tableCellStyle );
if ( tableCell.isFirstMerged() ) int colSpan = 0;
int cellRightEdge = tableCell.getLeftEdge()
+ tableCell.getWidth();
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
{ {
int count = 0; colSpan++;
for ( int c1 = c; c1 < rowCells; c1++ ) currentEdgeIndex++;
{
TableCell nextCell = tableRow.getCell( c1 );
if ( nextCell.isMerged() )
count++;
if ( !nextCell.isMerged() )
break;
} }
tableCellElement.setAttribute( "colspan", "" + count );
} if ( colSpan == 0 )
else continue;
if ( colSpan != 1 )
{ {
if ( c == rowCells - 1 && c != maxColumns - 1 ) tableCellElement.setAttribute( "colspan",
{ String.valueOf( colSpan ) );
tableCellElement.setAttribute( "colspan", ""
+ ( maxColumns - c ) );
}
} }
if ( tableCell.isFirstVerticallyMerged() ) if ( tableCell.isFirstVerticallyMerged() )
{ {
int count = 0; int count = 1;
for ( int r1 = r; r1 < tableRows; r1++ ) for ( int r1 = r + 1; r1 < tableRows; r1++ )
{ {
TableRow nextRow = table.getRow( r1 ); TableRow nextRow = table.getRow( r1 );
if ( nextRow.numCells() < c ) if ( nextRow.numCells() < c )
break; break;
TableCell nextCell = nextRow.getCell( c ); TableCell nextCell = nextRow.getCell( c );
if ( nextCell.isVerticallyMerged() ) if ( !nextCell.isVerticallyMerged()
count++; || nextCell.isFirstVerticallyMerged() )
if ( !nextCell.isVerticallyMerged() )
break; break;
count++;
} }
tableCellElement.setAttribute( "rowspan", "" + count ); if ( count > 1 )
tableCellElement.setAttribute( "rowspan",
String.valueOf( count ) );
} }
processParagraphes( hwpfDocument, tableCellElement, tableCell, processParagraphes( hwpfDocument, tableCellElement, tableCell,

View File

@ -19,7 +19,7 @@ package org.apache.poi.hwpf;
import junit.framework.Test; import junit.framework.Test;
import junit.framework.TestSuite; import junit.framework.TestSuite;
import org.apache.poi.hwpf.converter.AbstractWordUtilsTest;
import org.apache.poi.hwpf.converter.TestWordToFoConverter; import org.apache.poi.hwpf.converter.TestWordToFoConverter;
import org.apache.poi.hwpf.converter.TestWordToHtmlConverter; import org.apache.poi.hwpf.converter.TestWordToHtmlConverter;
import org.apache.poi.hwpf.extractor.TestDifferentRoutes; import org.apache.poi.hwpf.extractor.TestDifferentRoutes;
@ -72,6 +72,7 @@ public final class AllHWPFTests
// org.apache.poi.hwpf.converter // org.apache.poi.hwpf.converter
// suite.addTestSuite( TestWordToConverterSuite.class ); // suite.addTestSuite( TestWordToConverterSuite.class );
suite.addTestSuite( AbstractWordUtilsTest.class );
suite.addTestSuite( TestWordToFoConverter.class ); suite.addTestSuite( TestWordToFoConverter.class );
suite.addTestSuite( TestWordToHtmlConverter.class ); suite.addTestSuite( TestWordToHtmlConverter.class );

View File

@ -0,0 +1,53 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf.converter;
import org.apache.poi.hwpf.usermodel.Range;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.usermodel.Table;
/**
* Test cases for {@link AbstractWordUtils}
*
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/
public class AbstractWordUtilsTest extends TestCase
{
/**
* Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)}
*/
public void testBuildTableCellEdgesArray()
{
HWPFDocument document = HWPFTestDataSamples
.openSampleFile( "table-merges.doc" );
final Range range = document.getRange();
Table table = range.getTable( range.getParagraph( 0 ) );
int[] result = AbstractWordUtils.buildTableCellEdgesArray( table );
assertEquals( 6, result.length );
assertEquals( 0000, result[0] );
assertEquals( 1062, result[1] );
assertEquals( 5738, result[2] );
assertEquals( 6872, result[3] );
assertEquals( 8148, result[4] );
assertEquals( 9302, result[5] );
}
}

View File

@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase
getHtmlText( "innertable.doc" ); getHtmlText( "innertable.doc" );
} }
public void testTableMerges() throws Exception
{
String result = getHtmlText( "table-merges.doc" );
assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
}
public void testO_kurs_doc() throws Exception public void testO_kurs_doc() throws Exception
{ {
getHtmlText( "o_kurs.doc" ); getHtmlText( "o_kurs.doc" );

Binary file not shown.