better processing of word tables in cases different rows have different cell widths

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-22 09:42:32 +00:00
parent 4bb6a792f8
commit e956fa6fbf
7 changed files with 149 additions and 50 deletions

View File

@ -20,6 +20,8 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Set;
import java.util.TreeSet;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel;
import org.apache.poi.hwpf.model.ListTables;
import org.apache.poi.hwpf.usermodel.BorderCode;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils;
import org.w3c.dom.Attr;
@ -44,6 +49,42 @@ public class AbstractWordUtils
public static final float TWIPS_PER_INCH = 1440.0f;
public static final int TWIPS_PER_PT = 20;
/**
* Creates array of all possible cell edges. In HTML (and FO) cells from
* different rows and same column should have same width, otherwise spanning
* shall be used.
*
* @param table
* table to build cell edges array from
* @return array of cell edges (including leftest one) in twips
*/
static int[] buildTableCellEdgesArray( Table table )
{
Set<Integer> edges = new TreeSet<Integer>();
for ( int r = 0; r < table.numRows(); r++ )
{
TableRow tableRow = table.getRow( r );
for ( int c = 0; c < tableRow.numCells(); c++ )
{
TableCell tableCell = tableRow.getCell( c );
edges.add( Integer.valueOf( tableCell.getLeftEdge() ) );
edges.add( Integer.valueOf( tableCell.getLeftEdge()
+ tableCell.getWidth() ) );
}
}
Integer[] sorted = edges.toArray( new Integer[edges.size()] );
int[] result = new int[sorted.length];
for ( int i = 0; i < sorted.length; i++ )
{
result[i] = sorted[i].intValue();
}
return result;
}
static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
{
if ( node1.getNodeType() != Node.ELEMENT_NODE

View File

@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter
Element tableHeader = foDocumentFacade.createTableHeader();
Element tableBody = foDocumentFacade.createTableBody();
final int[] tableCellEdges = WordToHtmlUtils
.buildTableCellEdgesArray( table );
final int tableRows = table.numRows();
int maxColumns = Integer.MIN_VALUE;
@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter
Element tableRowElement = foDocumentFacade.createTableRow();
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
// index of current element in tableCellEdges[]
int currentEdgeIndex = 0;
final int rowCells = tableRow.numCells();
for ( int c = 0; c < rowCells; c++ )
{
@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter
tableCellElement, r == 0, r == tableRows - 1, c == 0,
c == rowCells - 1 );
if ( tableCell.isFirstMerged() )
int colSpan = 0;
int cellRightEdge = tableCell.getLeftEdge()
+ tableCell.getWidth();
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
{
int count = 0;
for ( int c1 = c; c1 < rowCells; c1++ )
{
TableCell nextCell = tableRow.getCell( c1 );
if ( nextCell.isMerged() )
count++;
if ( !nextCell.isMerged() )
break;
}
tableCellElement.setAttribute( "number-columns-spanned", ""
+ count );
}
else
{
if ( c == rowCells - 1 && c != maxColumns - 1 )
{
tableCellElement.setAttribute(
"number-columns-spanned", ""
+ ( maxColumns - c ) );
}
colSpan++;
currentEdgeIndex++;
}
if ( colSpan == 0 )
continue;
if ( colSpan != 1 )
tableCellElement.setAttribute( "number-columns-spanned",
String.valueOf( colSpan ) );
if ( tableCell.isFirstVerticallyMerged() )
{
int count = 0;
@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter
if ( !nextCell.isVerticallyMerged() )
break;
}
tableCellElement.setAttribute( "number-rows-spanned", ""
+ count );
if ( count > 1 )
tableCellElement.setAttribute( "number-rows-spanned",
String.valueOf( count ) );
}
processParagraphes( wordDocument, tableCellElement, tableCell,

View File

@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
Element tableHeader = htmlDocumentFacade.createTableHeader();
Element tableBody = htmlDocumentFacade.createTableBody();
final int[] tableCellEdges = WordToHtmlUtils
.buildTableCellEdgesArray( table );
final int tableRows = table.numRows();
int maxColumns = Integer.MIN_VALUE;
@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
StringBuilder tableRowStyle = new StringBuilder();
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
// index of current element in tableCellEdges[]
int currentEdgeIndex = 0;
final int rowCells = tableRow.numCells();
for ( int c = 0; c < rowCells; c++ )
{
TableCell tableCell = tableRow.getCell( c );
if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
continue;
if ( tableCell.isVerticallyMerged()
&& !tableCell.isFirstVerticallyMerged() )
continue;
@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter
r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
tableCellStyle );
if ( tableCell.isFirstMerged() )
int colSpan = 0;
int cellRightEdge = tableCell.getLeftEdge()
+ tableCell.getWidth();
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
{
int count = 0;
for ( int c1 = c; c1 < rowCells; c1++ )
{
TableCell nextCell = tableRow.getCell( c1 );
if ( nextCell.isMerged() )
count++;
if ( !nextCell.isMerged() )
break;
}
tableCellElement.setAttribute( "colspan", "" + count );
colSpan++;
currentEdgeIndex++;
}
else
if ( colSpan == 0 )
continue;
if ( colSpan != 1 )
{
if ( c == rowCells - 1 && c != maxColumns - 1 )
{
tableCellElement.setAttribute( "colspan", ""
+ ( maxColumns - c ) );
}
tableCellElement.setAttribute( "colspan",
String.valueOf( colSpan ) );
}
if ( tableCell.isFirstVerticallyMerged() )
{
int count = 0;
for ( int r1 = r; r1 < tableRows; r1++ )
int count = 1;
for ( int r1 = r + 1; r1 < tableRows; r1++ )
{
TableRow nextRow = table.getRow( r1 );
if ( nextRow.numCells() < c )
break;
TableCell nextCell = nextRow.getCell( c );
if ( nextCell.isVerticallyMerged() )
count++;
if ( !nextCell.isVerticallyMerged() )
if ( !nextCell.isVerticallyMerged()
|| nextCell.isFirstVerticallyMerged() )
break;
count++;
}
tableCellElement.setAttribute( "rowspan", "" + count );
if ( count > 1 )
tableCellElement.setAttribute( "rowspan",
String.valueOf( count ) );
}
processParagraphes( hwpfDocument, tableCellElement, tableCell,

View File

@ -19,7 +19,7 @@ package org.apache.poi.hwpf;
import junit.framework.Test;
import junit.framework.TestSuite;
import org.apache.poi.hwpf.converter.AbstractWordUtilsTest;
import org.apache.poi.hwpf.converter.TestWordToFoConverter;
import org.apache.poi.hwpf.converter.TestWordToHtmlConverter;
import org.apache.poi.hwpf.extractor.TestDifferentRoutes;
@ -72,6 +72,7 @@ public final class AllHWPFTests
// org.apache.poi.hwpf.converter
// suite.addTestSuite( TestWordToConverterSuite.class );
suite.addTestSuite( AbstractWordUtilsTest.class );
suite.addTestSuite( TestWordToFoConverter.class );
suite.addTestSuite( TestWordToHtmlConverter.class );

View File

@ -0,0 +1,53 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hwpf.converter;
import org.apache.poi.hwpf.usermodel.Range;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.usermodel.Table;
/**
* Test cases for {@link AbstractWordUtils}
*
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/
public class AbstractWordUtilsTest extends TestCase
{
/**
* Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)}
*/
public void testBuildTableCellEdgesArray()
{
HWPFDocument document = HWPFTestDataSamples
.openSampleFile( "table-merges.doc" );
final Range range = document.getRange();
Table table = range.getTable( range.getParagraph( 0 ) );
int[] result = AbstractWordUtils.buildTableCellEdgesArray( table );
assertEquals( 6, result.length );
assertEquals( 0000, result[0] );
assertEquals( 1062, result[1] );
assertEquals( 5738, result[2] );
assertEquals( 6872, result[3] );
assertEquals( 8148, result[4] );
assertEquals( 9302, result[5] );
}
}

View File

@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase
getHtmlText( "innertable.doc" );
}
public void testTableMerges() throws Exception
{
String result = getHtmlText( "table-merges.doc" );
assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
}
public void testO_kurs_doc() throws Exception
{
getHtmlText( "o_kurs.doc" );

Binary file not shown.