better processing of word tables in cases different rows have different cell widths
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4bb6a792f8
commit
e956fa6fbf
@ -20,6 +20,8 @@ import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.usermodel.BorderCode;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Table;
|
||||
import org.apache.poi.hwpf.usermodel.TableCell;
|
||||
import org.apache.poi.hwpf.usermodel.TableRow;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.w3c.dom.Attr;
|
||||
@ -44,6 +49,42 @@ public class AbstractWordUtils
|
||||
public static final float TWIPS_PER_INCH = 1440.0f;
|
||||
public static final int TWIPS_PER_PT = 20;
|
||||
|
||||
/**
|
||||
* Creates array of all possible cell edges. In HTML (and FO) cells from
|
||||
* different rows and same column should have same width, otherwise spanning
|
||||
* shall be used.
|
||||
*
|
||||
* @param table
|
||||
* table to build cell edges array from
|
||||
* @return array of cell edges (including leftest one) in twips
|
||||
*/
|
||||
static int[] buildTableCellEdgesArray( Table table )
|
||||
{
|
||||
Set<Integer> edges = new TreeSet<Integer>();
|
||||
|
||||
for ( int r = 0; r < table.numRows(); r++ )
|
||||
{
|
||||
TableRow tableRow = table.getRow( r );
|
||||
for ( int c = 0; c < tableRow.numCells(); c++ )
|
||||
{
|
||||
TableCell tableCell = tableRow.getCell( c );
|
||||
|
||||
edges.add( Integer.valueOf( tableCell.getLeftEdge() ) );
|
||||
edges.add( Integer.valueOf( tableCell.getLeftEdge()
|
||||
+ tableCell.getWidth() ) );
|
||||
}
|
||||
}
|
||||
|
||||
Integer[] sorted = edges.toArray( new Integer[edges.size()] );
|
||||
int[] result = new int[sorted.length];
|
||||
for ( int i = 0; i < sorted.length; i++ )
|
||||
{
|
||||
result[i] = sorted[i].intValue();
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
|
||||
{
|
||||
if ( node1.getNodeType() != Node.ELEMENT_NODE
|
||||
|
@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
Element tableHeader = foDocumentFacade.createTableHeader();
|
||||
Element tableBody = foDocumentFacade.createTableBody();
|
||||
|
||||
final int[] tableCellEdges = WordToHtmlUtils
|
||||
.buildTableCellEdgesArray( table );
|
||||
final int tableRows = table.numRows();
|
||||
|
||||
int maxColumns = Integer.MIN_VALUE;
|
||||
@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
Element tableRowElement = foDocumentFacade.createTableRow();
|
||||
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
|
||||
|
||||
// index of current element in tableCellEdges[]
|
||||
int currentEdgeIndex = 0;
|
||||
final int rowCells = tableRow.numCells();
|
||||
for ( int c = 0; c < rowCells; c++ )
|
||||
{
|
||||
@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
tableCellElement, r == 0, r == tableRows - 1, c == 0,
|
||||
c == rowCells - 1 );
|
||||
|
||||
if ( tableCell.isFirstMerged() )
|
||||
int colSpan = 0;
|
||||
int cellRightEdge = tableCell.getLeftEdge()
|
||||
+ tableCell.getWidth();
|
||||
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
|
||||
{
|
||||
int count = 0;
|
||||
for ( int c1 = c; c1 < rowCells; c1++ )
|
||||
{
|
||||
TableCell nextCell = tableRow.getCell( c1 );
|
||||
if ( nextCell.isMerged() )
|
||||
count++;
|
||||
if ( !nextCell.isMerged() )
|
||||
break;
|
||||
}
|
||||
tableCellElement.setAttribute( "number-columns-spanned", ""
|
||||
+ count );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
||||
{
|
||||
tableCellElement.setAttribute(
|
||||
"number-columns-spanned", ""
|
||||
+ ( maxColumns - c ) );
|
||||
}
|
||||
colSpan++;
|
||||
currentEdgeIndex++;
|
||||
}
|
||||
|
||||
if ( colSpan == 0 )
|
||||
continue;
|
||||
|
||||
if ( colSpan != 1 )
|
||||
tableCellElement.setAttribute( "number-columns-spanned",
|
||||
String.valueOf( colSpan ) );
|
||||
|
||||
if ( tableCell.isFirstVerticallyMerged() )
|
||||
{
|
||||
int count = 0;
|
||||
@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
if ( !nextCell.isVerticallyMerged() )
|
||||
break;
|
||||
}
|
||||
tableCellElement.setAttribute( "number-rows-spanned", ""
|
||||
+ count );
|
||||
if ( count > 1 )
|
||||
tableCellElement.setAttribute( "number-rows-spanned",
|
||||
String.valueOf( count ) );
|
||||
}
|
||||
|
||||
processParagraphes( wordDocument, tableCellElement, tableCell,
|
||||
|
@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
Element tableHeader = htmlDocumentFacade.createTableHeader();
|
||||
Element tableBody = htmlDocumentFacade.createTableBody();
|
||||
|
||||
final int[] tableCellEdges = WordToHtmlUtils
|
||||
.buildTableCellEdgesArray( table );
|
||||
final int tableRows = table.numRows();
|
||||
|
||||
int maxColumns = Integer.MIN_VALUE;
|
||||
@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
StringBuilder tableRowStyle = new StringBuilder();
|
||||
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
|
||||
|
||||
// index of current element in tableCellEdges[]
|
||||
int currentEdgeIndex = 0;
|
||||
final int rowCells = tableRow.numCells();
|
||||
for ( int c = 0; c < rowCells; c++ )
|
||||
{
|
||||
TableCell tableCell = tableRow.getCell( c );
|
||||
|
||||
if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
|
||||
continue;
|
||||
|
||||
if ( tableCell.isVerticallyMerged()
|
||||
&& !tableCell.isFirstVerticallyMerged() )
|
||||
continue;
|
||||
@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
|
||||
tableCellStyle );
|
||||
|
||||
if ( tableCell.isFirstMerged() )
|
||||
int colSpan = 0;
|
||||
int cellRightEdge = tableCell.getLeftEdge()
|
||||
+ tableCell.getWidth();
|
||||
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
|
||||
{
|
||||
int count = 0;
|
||||
for ( int c1 = c; c1 < rowCells; c1++ )
|
||||
{
|
||||
TableCell nextCell = tableRow.getCell( c1 );
|
||||
if ( nextCell.isMerged() )
|
||||
count++;
|
||||
if ( !nextCell.isMerged() )
|
||||
break;
|
||||
colSpan++;
|
||||
currentEdgeIndex++;
|
||||
}
|
||||
tableCellElement.setAttribute( "colspan", "" + count );
|
||||
}
|
||||
else
|
||||
|
||||
if ( colSpan == 0 )
|
||||
continue;
|
||||
|
||||
if ( colSpan != 1 )
|
||||
{
|
||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
||||
{
|
||||
tableCellElement.setAttribute( "colspan", ""
|
||||
+ ( maxColumns - c ) );
|
||||
}
|
||||
tableCellElement.setAttribute( "colspan",
|
||||
String.valueOf( colSpan ) );
|
||||
}
|
||||
|
||||
if ( tableCell.isFirstVerticallyMerged() )
|
||||
{
|
||||
int count = 0;
|
||||
for ( int r1 = r; r1 < tableRows; r1++ )
|
||||
int count = 1;
|
||||
for ( int r1 = r + 1; r1 < tableRows; r1++ )
|
||||
{
|
||||
TableRow nextRow = table.getRow( r1 );
|
||||
if ( nextRow.numCells() < c )
|
||||
break;
|
||||
TableCell nextCell = nextRow.getCell( c );
|
||||
if ( nextCell.isVerticallyMerged() )
|
||||
count++;
|
||||
if ( !nextCell.isVerticallyMerged() )
|
||||
if ( !nextCell.isVerticallyMerged()
|
||||
|| nextCell.isFirstVerticallyMerged() )
|
||||
break;
|
||||
count++;
|
||||
}
|
||||
tableCellElement.setAttribute( "rowspan", "" + count );
|
||||
if ( count > 1 )
|
||||
tableCellElement.setAttribute( "rowspan",
|
||||
String.valueOf( count ) );
|
||||
}
|
||||
|
||||
processParagraphes( hwpfDocument, tableCellElement, tableCell,
|
||||
|
@ -19,7 +19,7 @@ package org.apache.poi.hwpf;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestSuite;
|
||||
|
||||
import org.apache.poi.hwpf.converter.AbstractWordUtilsTest;
|
||||
import org.apache.poi.hwpf.converter.TestWordToFoConverter;
|
||||
import org.apache.poi.hwpf.converter.TestWordToHtmlConverter;
|
||||
import org.apache.poi.hwpf.extractor.TestDifferentRoutes;
|
||||
@ -72,6 +72,7 @@ public final class AllHWPFTests
|
||||
|
||||
// org.apache.poi.hwpf.converter
|
||||
// suite.addTestSuite( TestWordToConverterSuite.class );
|
||||
suite.addTestSuite( AbstractWordUtilsTest.class );
|
||||
suite.addTestSuite( TestWordToFoConverter.class );
|
||||
suite.addTestSuite( TestWordToHtmlConverter.class );
|
||||
|
||||
|
@ -0,0 +1,53 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.converter;
|
||||
|
||||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||
import org.apache.poi.hwpf.usermodel.Table;
|
||||
|
||||
/**
|
||||
* Test cases for {@link AbstractWordUtils}
|
||||
*
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class AbstractWordUtilsTest extends TestCase
|
||||
{
|
||||
/**
|
||||
* Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)}
|
||||
*/
|
||||
public void testBuildTableCellEdgesArray()
|
||||
{
|
||||
HWPFDocument document = HWPFTestDataSamples
|
||||
.openSampleFile( "table-merges.doc" );
|
||||
final Range range = document.getRange();
|
||||
Table table = range.getTable( range.getParagraph( 0 ) );
|
||||
|
||||
int[] result = AbstractWordUtils.buildTableCellEdgesArray( table );
|
||||
assertEquals( 6, result.length );
|
||||
|
||||
assertEquals( 0000, result[0] );
|
||||
assertEquals( 1062, result[1] );
|
||||
assertEquals( 5738, result[2] );
|
||||
assertEquals( 6872, result[3] );
|
||||
assertEquals( 8148, result[4] );
|
||||
assertEquals( 9302, result[5] );
|
||||
}
|
||||
}
|
@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase
|
||||
getHtmlText( "innertable.doc" );
|
||||
}
|
||||
|
||||
public void testTableMerges() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "table-merges.doc" );
|
||||
|
||||
assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
|
||||
assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
|
||||
}
|
||||
|
||||
public void testO_kurs_doc() throws Exception
|
||||
{
|
||||
getHtmlText( "o_kurs.doc" );
|
||||
|
BIN
test-data/document/table-merges.doc
Normal file
BIN
test-data/document/table-merges.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user