better processing of word tables in cases different rows have different cell widths
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4bb6a792f8
commit
e956fa6fbf
@ -20,6 +20,8 @@ import java.io.File;
|
|||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||||
@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel;
|
|||||||
import org.apache.poi.hwpf.model.ListTables;
|
import org.apache.poi.hwpf.model.ListTables;
|
||||||
import org.apache.poi.hwpf.usermodel.BorderCode;
|
import org.apache.poi.hwpf.usermodel.BorderCode;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Table;
|
||||||
|
import org.apache.poi.hwpf.usermodel.TableCell;
|
||||||
|
import org.apache.poi.hwpf.usermodel.TableRow;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
import org.w3c.dom.Attr;
|
import org.w3c.dom.Attr;
|
||||||
@ -44,6 +49,42 @@ public class AbstractWordUtils
|
|||||||
public static final float TWIPS_PER_INCH = 1440.0f;
|
public static final float TWIPS_PER_INCH = 1440.0f;
|
||||||
public static final int TWIPS_PER_PT = 20;
|
public static final int TWIPS_PER_PT = 20;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates array of all possible cell edges. In HTML (and FO) cells from
|
||||||
|
* different rows and same column should have same width, otherwise spanning
|
||||||
|
* shall be used.
|
||||||
|
*
|
||||||
|
* @param table
|
||||||
|
* table to build cell edges array from
|
||||||
|
* @return array of cell edges (including leftest one) in twips
|
||||||
|
*/
|
||||||
|
static int[] buildTableCellEdgesArray( Table table )
|
||||||
|
{
|
||||||
|
Set<Integer> edges = new TreeSet<Integer>();
|
||||||
|
|
||||||
|
for ( int r = 0; r < table.numRows(); r++ )
|
||||||
|
{
|
||||||
|
TableRow tableRow = table.getRow( r );
|
||||||
|
for ( int c = 0; c < tableRow.numCells(); c++ )
|
||||||
|
{
|
||||||
|
TableCell tableCell = tableRow.getCell( c );
|
||||||
|
|
||||||
|
edges.add( Integer.valueOf( tableCell.getLeftEdge() ) );
|
||||||
|
edges.add( Integer.valueOf( tableCell.getLeftEdge()
|
||||||
|
+ tableCell.getWidth() ) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Integer[] sorted = edges.toArray( new Integer[edges.size()] );
|
||||||
|
int[] result = new int[sorted.length];
|
||||||
|
for ( int i = 0; i < sorted.length; i++ )
|
||||||
|
{
|
||||||
|
result[i] = sorted[i].intValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
|
static boolean canBeMerged( Node node1, Node node2, String requiredTagName )
|
||||||
{
|
{
|
||||||
if ( node1.getNodeType() != Node.ELEMENT_NODE
|
if ( node1.getNodeType() != Node.ELEMENT_NODE
|
||||||
|
@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter
|
|||||||
Element tableHeader = foDocumentFacade.createTableHeader();
|
Element tableHeader = foDocumentFacade.createTableHeader();
|
||||||
Element tableBody = foDocumentFacade.createTableBody();
|
Element tableBody = foDocumentFacade.createTableBody();
|
||||||
|
|
||||||
|
final int[] tableCellEdges = WordToHtmlUtils
|
||||||
|
.buildTableCellEdgesArray( table );
|
||||||
final int tableRows = table.numRows();
|
final int tableRows = table.numRows();
|
||||||
|
|
||||||
int maxColumns = Integer.MIN_VALUE;
|
int maxColumns = Integer.MIN_VALUE;
|
||||||
@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter
|
|||||||
Element tableRowElement = foDocumentFacade.createTableRow();
|
Element tableRowElement = foDocumentFacade.createTableRow();
|
||||||
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
|
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
|
||||||
|
|
||||||
|
// index of current element in tableCellEdges[]
|
||||||
|
int currentEdgeIndex = 0;
|
||||||
final int rowCells = tableRow.numCells();
|
final int rowCells = tableRow.numCells();
|
||||||
for ( int c = 0; c < rowCells; c++ )
|
for ( int c = 0; c < rowCells; c++ )
|
||||||
{
|
{
|
||||||
@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter
|
|||||||
tableCellElement, r == 0, r == tableRows - 1, c == 0,
|
tableCellElement, r == 0, r == tableRows - 1, c == 0,
|
||||||
c == rowCells - 1 );
|
c == rowCells - 1 );
|
||||||
|
|
||||||
if ( tableCell.isFirstMerged() )
|
int colSpan = 0;
|
||||||
|
int cellRightEdge = tableCell.getLeftEdge()
|
||||||
|
+ tableCell.getWidth();
|
||||||
|
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
|
||||||
{
|
{
|
||||||
int count = 0;
|
colSpan++;
|
||||||
for ( int c1 = c; c1 < rowCells; c1++ )
|
currentEdgeIndex++;
|
||||||
{
|
|
||||||
TableCell nextCell = tableRow.getCell( c1 );
|
|
||||||
if ( nextCell.isMerged() )
|
|
||||||
count++;
|
|
||||||
if ( !nextCell.isMerged() )
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
tableCellElement.setAttribute( "number-columns-spanned", ""
|
|
||||||
+ count );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
|
||||||
{
|
|
||||||
tableCellElement.setAttribute(
|
|
||||||
"number-columns-spanned", ""
|
|
||||||
+ ( maxColumns - c ) );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ( colSpan == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ( colSpan != 1 )
|
||||||
|
tableCellElement.setAttribute( "number-columns-spanned",
|
||||||
|
String.valueOf( colSpan ) );
|
||||||
|
|
||||||
if ( tableCell.isFirstVerticallyMerged() )
|
if ( tableCell.isFirstVerticallyMerged() )
|
||||||
{
|
{
|
||||||
int count = 0;
|
int count = 0;
|
||||||
@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter
|
|||||||
if ( !nextCell.isVerticallyMerged() )
|
if ( !nextCell.isVerticallyMerged() )
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
tableCellElement.setAttribute( "number-rows-spanned", ""
|
if ( count > 1 )
|
||||||
+ count );
|
tableCellElement.setAttribute( "number-rows-spanned",
|
||||||
|
String.valueOf( count ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
processParagraphes( wordDocument, tableCellElement, tableCell,
|
processParagraphes( wordDocument, tableCellElement, tableCell,
|
||||||
|
@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
Element tableHeader = htmlDocumentFacade.createTableHeader();
|
Element tableHeader = htmlDocumentFacade.createTableHeader();
|
||||||
Element tableBody = htmlDocumentFacade.createTableBody();
|
Element tableBody = htmlDocumentFacade.createTableBody();
|
||||||
|
|
||||||
|
final int[] tableCellEdges = WordToHtmlUtils
|
||||||
|
.buildTableCellEdgesArray( table );
|
||||||
final int tableRows = table.numRows();
|
final int tableRows = table.numRows();
|
||||||
|
|
||||||
int maxColumns = Integer.MIN_VALUE;
|
int maxColumns = Integer.MIN_VALUE;
|
||||||
@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
StringBuilder tableRowStyle = new StringBuilder();
|
StringBuilder tableRowStyle = new StringBuilder();
|
||||||
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
|
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
|
||||||
|
|
||||||
|
// index of current element in tableCellEdges[]
|
||||||
|
int currentEdgeIndex = 0;
|
||||||
final int rowCells = tableRow.numCells();
|
final int rowCells = tableRow.numCells();
|
||||||
for ( int c = 0; c < rowCells; c++ )
|
for ( int c = 0; c < rowCells; c++ )
|
||||||
{
|
{
|
||||||
TableCell tableCell = tableRow.getCell( c );
|
TableCell tableCell = tableRow.getCell( c );
|
||||||
|
|
||||||
if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if ( tableCell.isVerticallyMerged()
|
if ( tableCell.isVerticallyMerged()
|
||||||
&& !tableCell.isFirstVerticallyMerged() )
|
&& !tableCell.isFirstVerticallyMerged() )
|
||||||
continue;
|
continue;
|
||||||
@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
|
r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
|
||||||
tableCellStyle );
|
tableCellStyle );
|
||||||
|
|
||||||
if ( tableCell.isFirstMerged() )
|
int colSpan = 0;
|
||||||
|
int cellRightEdge = tableCell.getLeftEdge()
|
||||||
|
+ tableCell.getWidth();
|
||||||
|
while ( tableCellEdges[currentEdgeIndex] < cellRightEdge )
|
||||||
{
|
{
|
||||||
int count = 0;
|
colSpan++;
|
||||||
for ( int c1 = c; c1 < rowCells; c1++ )
|
currentEdgeIndex++;
|
||||||
{
|
|
||||||
TableCell nextCell = tableRow.getCell( c1 );
|
|
||||||
if ( nextCell.isMerged() )
|
|
||||||
count++;
|
|
||||||
if ( !nextCell.isMerged() )
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
tableCellElement.setAttribute( "colspan", "" + count );
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
|
if ( colSpan == 0 )
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if ( colSpan != 1 )
|
||||||
{
|
{
|
||||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
tableCellElement.setAttribute( "colspan",
|
||||||
{
|
String.valueOf( colSpan ) );
|
||||||
tableCellElement.setAttribute( "colspan", ""
|
|
||||||
+ ( maxColumns - c ) );
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tableCell.isFirstVerticallyMerged() )
|
if ( tableCell.isFirstVerticallyMerged() )
|
||||||
{
|
{
|
||||||
int count = 0;
|
int count = 1;
|
||||||
for ( int r1 = r; r1 < tableRows; r1++ )
|
for ( int r1 = r + 1; r1 < tableRows; r1++ )
|
||||||
{
|
{
|
||||||
TableRow nextRow = table.getRow( r1 );
|
TableRow nextRow = table.getRow( r1 );
|
||||||
if ( nextRow.numCells() < c )
|
if ( nextRow.numCells() < c )
|
||||||
break;
|
break;
|
||||||
TableCell nextCell = nextRow.getCell( c );
|
TableCell nextCell = nextRow.getCell( c );
|
||||||
if ( nextCell.isVerticallyMerged() )
|
if ( !nextCell.isVerticallyMerged()
|
||||||
count++;
|
|| nextCell.isFirstVerticallyMerged() )
|
||||||
if ( !nextCell.isVerticallyMerged() )
|
|
||||||
break;
|
break;
|
||||||
|
count++;
|
||||||
}
|
}
|
||||||
tableCellElement.setAttribute( "rowspan", "" + count );
|
if ( count > 1 )
|
||||||
|
tableCellElement.setAttribute( "rowspan",
|
||||||
|
String.valueOf( count ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
processParagraphes( hwpfDocument, tableCellElement, tableCell,
|
processParagraphes( hwpfDocument, tableCellElement, tableCell,
|
||||||
|
@ -19,7 +19,7 @@ package org.apache.poi.hwpf;
|
|||||||
|
|
||||||
import junit.framework.Test;
|
import junit.framework.Test;
|
||||||
import junit.framework.TestSuite;
|
import junit.framework.TestSuite;
|
||||||
|
import org.apache.poi.hwpf.converter.AbstractWordUtilsTest;
|
||||||
import org.apache.poi.hwpf.converter.TestWordToFoConverter;
|
import org.apache.poi.hwpf.converter.TestWordToFoConverter;
|
||||||
import org.apache.poi.hwpf.converter.TestWordToHtmlConverter;
|
import org.apache.poi.hwpf.converter.TestWordToHtmlConverter;
|
||||||
import org.apache.poi.hwpf.extractor.TestDifferentRoutes;
|
import org.apache.poi.hwpf.extractor.TestDifferentRoutes;
|
||||||
@ -72,6 +72,7 @@ public final class AllHWPFTests
|
|||||||
|
|
||||||
// org.apache.poi.hwpf.converter
|
// org.apache.poi.hwpf.converter
|
||||||
// suite.addTestSuite( TestWordToConverterSuite.class );
|
// suite.addTestSuite( TestWordToConverterSuite.class );
|
||||||
|
suite.addTestSuite( AbstractWordUtilsTest.class );
|
||||||
suite.addTestSuite( TestWordToFoConverter.class );
|
suite.addTestSuite( TestWordToFoConverter.class );
|
||||||
suite.addTestSuite( TestWordToHtmlConverter.class );
|
suite.addTestSuite( TestWordToHtmlConverter.class );
|
||||||
|
|
||||||
|
@ -0,0 +1,53 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.hwpf.converter;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Table;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases for {@link AbstractWordUtils}
|
||||||
|
*
|
||||||
|
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||||
|
*/
|
||||||
|
public class AbstractWordUtilsTest extends TestCase
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)}
|
||||||
|
*/
|
||||||
|
public void testBuildTableCellEdgesArray()
|
||||||
|
{
|
||||||
|
HWPFDocument document = HWPFTestDataSamples
|
||||||
|
.openSampleFile( "table-merges.doc" );
|
||||||
|
final Range range = document.getRange();
|
||||||
|
Table table = range.getTable( range.getParagraph( 0 ) );
|
||||||
|
|
||||||
|
int[] result = AbstractWordUtils.buildTableCellEdgesArray( table );
|
||||||
|
assertEquals( 6, result.length );
|
||||||
|
|
||||||
|
assertEquals( 0000, result[0] );
|
||||||
|
assertEquals( 1062, result[1] );
|
||||||
|
assertEquals( 5738, result[2] );
|
||||||
|
assertEquals( 6872, result[3] );
|
||||||
|
assertEquals( 8148, result[4] );
|
||||||
|
assertEquals( 9302, result[5] );
|
||||||
|
}
|
||||||
|
}
|
@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
getHtmlText( "innertable.doc" );
|
getHtmlText( "innertable.doc" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testTableMerges() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "table-merges.doc" );
|
||||||
|
|
||||||
|
assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
|
||||||
|
assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
|
||||||
|
}
|
||||||
|
|
||||||
public void testO_kurs_doc() throws Exception
|
public void testO_kurs_doc() throws Exception
|
||||||
{
|
{
|
||||||
getHtmlText( "o_kurs.doc" );
|
getHtmlText( "o_kurs.doc" );
|
||||||
|
BIN
test-data/document/table-merges.doc
Normal file
BIN
test-data/document/table-merges.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user