From e956fa6fbfb5f463258766805af4b84c66b2b272 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Fri, 22 Jul 2011 09:42:32 +0000 Subject: [PATCH] better processing of word tables in cases different rows have different cell widths git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1149528 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hwpf/converter/AbstractWordUtils.java | 41 ++++++++++++++ .../poi/hwpf/converter/WordToFoConverter.java | 43 +++++++------- .../hwpf/converter/WordToHtmlConverter.java | 51 +++++++++-------- .../org/apache/poi/hwpf/AllHWPFTests.java | 3 +- .../hwpf/converter/AbstractWordUtilsTest.java | 53 ++++++++++++++++++ .../converter/TestWordToHtmlConverter.java | 8 +++ test-data/document/table-merges.doc | Bin 0 -> 11264 bytes 7 files changed, 149 insertions(+), 50 deletions(-) create mode 100644 src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java create mode 100644 test-data/document/table-merges.doc diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java index d464afc26..6e974ea6d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java @@ -20,6 +20,8 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.util.Set; +import java.util.TreeSet; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; @@ -29,6 +31,9 @@ import org.apache.poi.hwpf.model.ListLevel; import org.apache.poi.hwpf.model.ListTables; import org.apache.poi.hwpf.usermodel.BorderCode; import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Table; +import org.apache.poi.hwpf.usermodel.TableCell; +import org.apache.poi.hwpf.usermodel.TableRow; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.IOUtils; import org.w3c.dom.Attr; @@ -44,6 +49,42 @@ public class AbstractWordUtils public static final float TWIPS_PER_INCH = 1440.0f; public static final int TWIPS_PER_PT = 20; + /** + * Creates array of all possible cell edges. In HTML (and FO) cells from + * different rows and same column should have same width, otherwise spanning + * shall be used. + * + * @param table + * table to build cell edges array from + * @return array of cell edges (including leftest one) in twips + */ + static int[] buildTableCellEdgesArray( Table table ) + { + Set edges = new TreeSet(); + + for ( int r = 0; r < table.numRows(); r++ ) + { + TableRow tableRow = table.getRow( r ); + for ( int c = 0; c < tableRow.numCells(); c++ ) + { + TableCell tableCell = tableRow.getCell( c ); + + edges.add( Integer.valueOf( tableCell.getLeftEdge() ) ); + edges.add( Integer.valueOf( tableCell.getLeftEdge() + + tableCell.getWidth() ) ); + } + } + + Integer[] sorted = edges.toArray( new Integer[edges.size()] ); + int[] result = new int[sorted.length]; + for ( int i = 0; i < sorted.length; i++ ) + { + result[i] = sorted[i].intValue(); + } + + return result; + } + static boolean canBeMerged( Node node1, Node node2, String requiredTagName ) { if ( node1.getNodeType() != Node.ELEMENT_NODE diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index ba757248a..ca5b4a89e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -489,6 +489,8 @@ public class WordToFoConverter extends AbstractWordConverter Element tableHeader = foDocumentFacade.createTableHeader(); Element tableBody = foDocumentFacade.createTableBody(); + final int[] tableCellEdges = WordToHtmlUtils + .buildTableCellEdgesArray( table ); final int tableRows = table.numRows(); int maxColumns = Integer.MIN_VALUE; @@ -504,6 +506,8 @@ public class WordToFoConverter extends AbstractWordConverter Element tableRowElement = foDocumentFacade.createTableRow(); WordToFoUtils.setTableRowProperties( tableRow, tableRowElement ); + // index of current element in tableCellEdges[] + int currentEdgeIndex = 0; final int rowCells = tableRow.numCells(); for ( int c = 0; c < rowCells; c++ ) { @@ -521,30 +525,22 @@ public class WordToFoConverter extends AbstractWordConverter tableCellElement, r == 0, r == tableRows - 1, c == 0, c == rowCells - 1 ); - if ( tableCell.isFirstMerged() ) + int colSpan = 0; + int cellRightEdge = tableCell.getLeftEdge() + + tableCell.getWidth(); + while ( tableCellEdges[currentEdgeIndex] < cellRightEdge ) { - int count = 0; - for ( int c1 = c; c1 < rowCells; c1++ ) - { - TableCell nextCell = tableRow.getCell( c1 ); - if ( nextCell.isMerged() ) - count++; - if ( !nextCell.isMerged() ) - break; - } - tableCellElement.setAttribute( "number-columns-spanned", "" - + count ); - } - else - { - if ( c == rowCells - 1 && c != maxColumns - 1 ) - { - tableCellElement.setAttribute( - "number-columns-spanned", "" - + ( maxColumns - c ) ); - } + colSpan++; + currentEdgeIndex++; } + if ( colSpan == 0 ) + continue; + + if ( colSpan != 1 ) + tableCellElement.setAttribute( "number-columns-spanned", + String.valueOf( colSpan ) ); + if ( tableCell.isFirstVerticallyMerged() ) { int count = 0; @@ -559,8 +555,9 @@ public class WordToFoConverter extends AbstractWordConverter if ( !nextCell.isVerticallyMerged() ) break; } - tableCellElement.setAttribute( "number-rows-spanned", "" - + count ); + if ( count > 1 ) + tableCellElement.setAttribute( "number-rows-spanned", + String.valueOf( count ) ); } processParagraphes( wordDocument, tableCellElement, tableCell, diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index aa25963ae..72410abc7 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -557,6 +557,8 @@ public class WordToHtmlConverter extends AbstractWordConverter Element tableHeader = htmlDocumentFacade.createTableHeader(); Element tableBody = htmlDocumentFacade.createTableBody(); + final int[] tableCellEdges = WordToHtmlUtils + .buildTableCellEdgesArray( table ); final int tableRows = table.numRows(); int maxColumns = Integer.MIN_VALUE; @@ -573,14 +575,13 @@ public class WordToHtmlConverter extends AbstractWordConverter StringBuilder tableRowStyle = new StringBuilder(); WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle ); + // index of current element in tableCellEdges[] + int currentEdgeIndex = 0; final int rowCells = tableRow.numCells(); for ( int c = 0; c < rowCells; c++ ) { TableCell tableCell = tableRow.getCell( c ); - if ( tableCell.isMerged() && !tableCell.isFirstMerged() ) - continue; - if ( tableCell.isVerticallyMerged() && !tableCell.isFirstVerticallyMerged() ) continue; @@ -600,43 +601,41 @@ public class WordToHtmlConverter extends AbstractWordConverter r == 0, r == tableRows - 1, c == 0, c == rowCells - 1, tableCellStyle ); - if ( tableCell.isFirstMerged() ) + int colSpan = 0; + int cellRightEdge = tableCell.getLeftEdge() + + tableCell.getWidth(); + while ( tableCellEdges[currentEdgeIndex] < cellRightEdge ) { - int count = 0; - for ( int c1 = c; c1 < rowCells; c1++ ) - { - TableCell nextCell = tableRow.getCell( c1 ); - if ( nextCell.isMerged() ) - count++; - if ( !nextCell.isMerged() ) - break; - } - tableCellElement.setAttribute( "colspan", "" + count ); + colSpan++; + currentEdgeIndex++; } - else + + if ( colSpan == 0 ) + continue; + + if ( colSpan != 1 ) { - if ( c == rowCells - 1 && c != maxColumns - 1 ) - { - tableCellElement.setAttribute( "colspan", "" - + ( maxColumns - c ) ); - } + tableCellElement.setAttribute( "colspan", + String.valueOf( colSpan ) ); } if ( tableCell.isFirstVerticallyMerged() ) { - int count = 0; - for ( int r1 = r; r1 < tableRows; r1++ ) + int count = 1; + for ( int r1 = r + 1; r1 < tableRows; r1++ ) { TableRow nextRow = table.getRow( r1 ); if ( nextRow.numCells() < c ) break; TableCell nextCell = nextRow.getCell( c ); - if ( nextCell.isVerticallyMerged() ) - count++; - if ( !nextCell.isVerticallyMerged() ) + if ( !nextCell.isVerticallyMerged() + || nextCell.isFirstVerticallyMerged() ) break; + count++; } - tableCellElement.setAttribute( "rowspan", "" + count ); + if ( count > 1 ) + tableCellElement.setAttribute( "rowspan", + String.valueOf( count ) ); } processParagraphes( hwpfDocument, tableCellElement, tableCell, diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java b/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java index a488fe339..c2215a0c0 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/AllHWPFTests.java @@ -19,7 +19,7 @@ package org.apache.poi.hwpf; import junit.framework.Test; import junit.framework.TestSuite; - +import org.apache.poi.hwpf.converter.AbstractWordUtilsTest; import org.apache.poi.hwpf.converter.TestWordToFoConverter; import org.apache.poi.hwpf.converter.TestWordToHtmlConverter; import org.apache.poi.hwpf.extractor.TestDifferentRoutes; @@ -72,6 +72,7 @@ public final class AllHWPFTests // org.apache.poi.hwpf.converter // suite.addTestSuite( TestWordToConverterSuite.class ); + suite.addTestSuite( AbstractWordUtilsTest.class ); suite.addTestSuite( TestWordToFoConverter.class ); suite.addTestSuite( TestWordToHtmlConverter.class ); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java new file mode 100644 index 000000000..4cb37b8de --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/AbstractWordUtilsTest.java @@ -0,0 +1,53 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.converter; + +import org.apache.poi.hwpf.usermodel.Range; + +import junit.framework.TestCase; +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.HWPFTestDataSamples; +import org.apache.poi.hwpf.usermodel.Table; + +/** + * Test cases for {@link AbstractWordUtils} + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +public class AbstractWordUtilsTest extends TestCase +{ + /** + * Test case for {@link AbstractWordUtils#buildTableCellEdgesArray(Table)} + */ + public void testBuildTableCellEdgesArray() + { + HWPFDocument document = HWPFTestDataSamples + .openSampleFile( "table-merges.doc" ); + final Range range = document.getRange(); + Table table = range.getTable( range.getParagraph( 0 ) ); + + int[] result = AbstractWordUtils.buildTableCellEdgesArray( table ); + assertEquals( 6, result.length ); + + assertEquals( 0000, result[0] ); + assertEquals( 1062, result[1] ); + assertEquals( 5738, result[2] ); + assertEquals( 6872, result[3] ); + assertEquals( 8148, result[4] ); + assertEquals( 9302, result[5] ); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java index 7dbe0d06a..b6b70fcc0 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/converter/TestWordToHtmlConverter.java @@ -190,6 +190,14 @@ public class TestWordToHtmlConverter extends TestCase getHtmlText( "innertable.doc" ); } + public void testTableMerges() throws Exception + { + String result = getHtmlText( "table-merges.doc" ); + + assertContains( result, "" ); + assertContains( result, "" ); + } + public void testO_kurs_doc() throws Exception { getHtmlText( "o_kurs.doc" ); diff --git a/test-data/document/table-merges.doc b/test-data/document/table-merges.doc new file mode 100644 index 0000000000000000000000000000000000000000..77822acd1633822446804e6a5bbb589b57200dd3 GIT binary patch literal 11264 zcmeHNTWpj?6h41-Wv>Uw)Q$ZxEESXYnaepd zXYTXwxu1$Jp8TThH;JRQVv~tTzSu4C2ErvKUnG))Fykg7k%$?MV3zJ93*3m+m3^qt ziMas#JP&XJ`9J|M3z!Ye0SbY+0EU1R11w9-i-lt%v)+ARfn5^9Z&X~eMTX#qMM?I!Cs5-SxHDQQzC#!YKO1I&| zfU6h(N@)+;WGApGxKKs{ixjafV91W3lA(Zd91P}42hyGnG^tSl65`1(zUC{Dlj29h zBJoM5j7SI8)8n#AcEj($>e>xA40lAjqz|Ef>D4J=xK{GZM8%cJ(-@Y*ZR@4Zy4D%AokN!B{;Gq!9=RKJO-ms2sFpb!HFfHaTJ11cnEmpU!>G|R{P5Y{<){yP z-Ge$cqSf0WB^T=xOGkCC)A@H9>yY1xnr%=G$l0t`L+3hhjk;YPRg2PI)q=4@3G=lC zkrtHRX>=MbC|x{-b!n4Z%g(~8XmcAKZi98OOQXX$;Naq`S49ZtBC2O0{j>>7wHM&& z4JMQ_pe7b)p60-6;wnta;qi^ch#k6E)M+gp(-qT+37kC`z=P6*;p~#f;lBW%$B98b z*hLy4$DEQ0#9(?HLzzyL96+}Es?@am^aSeAWmw}AiB!bWlGV7~o|>F}Xzh^flq0&O zPrwc?X@lE^I)xApAiWN0WJ4*GT1dm|>_*^PjzI9$|^-0#QjVd)c)78U|Nn<$P z#D4G{I<(KAISbER{gggQN9cu>i>skSN?sWwXCfL)jUAflYSM=H24taRRnG3Ap#{loPOm7A_ z{sI8cx!nNoy2Ahm>WcvHysrbi_r43zIG1ij&#+vwclU&!={e@wJJ8YHKiD4*J&!n^ zZ<&w!ngRL!jc?B)vksF7HPg9NyT3JX*3e<{8Bn73=bZoX{0B9Qi{5!3?SJIj=d2IU zbZk3O1pM)dVYIpOX$I%la6rtacLMGxg|ew5G|(~RjmGLE)5%w*Qm6N=XpFhDnCbC; zBr;d;P}I#ZcP3jb9G{mq7x{{2wD?hOYL|I=14%|dBVPkD*`b=f>x6Huc{}*hnBHWv z5~bI$X&F!tHt|l2{xy)pTUC~3rUf`oI1cYM<;aD5tum`GYk{l<{#O?8;WXq2@?;aR z8Q21}0Q|KA0bnc6KS3R{O}jh|WobrO0B1Y}IC$0roIokS7lfriB~S&d1o)!R0PtLR z{rW$XG$ZWrfHXo41xl3U;rU4i=J2)1?EeCUga}XyBG$JjXIEHYl#Mw8RI~VtwDNm%SqZo&hv6$s=5BMu1m~0Ze_w& z=CGCNv@-jx%v}s8ZB?56ii>VZN zcr^mI_4`zD!>MOYV|>`Trh5Rc>#G5-?bG-B##p_zx8BNR5T+&9#9O|MFB7-VpQzn1 zxt!yCp@~^+*vh2Q>iF^V;P$`rNX6-wYh0(l_bod;q$+{sV?)Xf)M0h29qThZ!|dC7 zaMg8E=pmD(8DjxVDgB-~V;YogNLm)SoB#jef*$`Lf~hKlbBZ)`83!4-ck+|ru(ab7 z;4MdqXoe)N6DKZ?VIHISABH5^CzDN02J{Z#b6uuX0PWPvHl?bc^)+9rYh@2UB6en6 zhr*bC>L#Vq4%YPt?17|Qe11Iw`}3nM|NFHSA0KW+^&smQVZ|Lb4GWiLkn literal 0 HcmV?d00001