bug 51351: more progress with WordToFoExtractor: support for hyperlinks, common fields and code cleanup

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1137673 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yegor Kozlov 2011-06-20 15:56:28 +00:00
parent 02c3df2e5f
commit 6e90e3bc06
6 changed files with 867 additions and 493 deletions

View File

@ -0,0 +1,206 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.hwpf.extractor;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
public abstract class AbstractToFoExtractor
{
private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
protected final Document document;
protected final Element layoutMasterSet;
protected final Element root;
public AbstractToFoExtractor( Document document )
{
this.document = document;
root = document.createElementNS( NS_XSLFO, "fo:root" );
document.appendChild( root );
layoutMasterSet = document.createElementNS( NS_XSLFO,
"fo:layout-master-set" );
root.appendChild( layoutMasterSet );
}
protected Element addFlowToPageSequence( final Element pageSequence,
String flowName )
{
final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
flow.setAttribute( "flow-name", flowName );
pageSequence.appendChild( flow );
return flow;
}
protected Element addListItem( Element listBlock )
{
Element result = createListItem();
listBlock.appendChild( result );
return result;
}
protected Element addListItemBody( Element listItem )
{
Element result = createListItemBody();
listItem.appendChild( result );
return result;
}
protected Element addListItemLabel( Element listItem, String text )
{
Element result = createListItemLabel( text );
listItem.appendChild( result );
return result;
}
protected Element addPageSequence( String pageMaster )
{
final Element pageSequence = document.createElementNS( NS_XSLFO,
"fo:page-sequence" );
pageSequence.setAttribute( "master-reference", pageMaster );
root.appendChild( pageSequence );
return pageSequence;
}
protected Element addRegionBody( Element pageMaster )
{
final Element regionBody = document.createElementNS( NS_XSLFO,
"fo:region-body" );
pageMaster.appendChild( regionBody );
return regionBody;
}
protected Element addSimplePageMaster( String masterName )
{
final Element simplePageMaster = document.createElementNS( NS_XSLFO,
"fo:simple-page-master" );
simplePageMaster.setAttribute( "master-name", masterName );
layoutMasterSet.appendChild( simplePageMaster );
return simplePageMaster;
}
protected Element addTable( Element flow )
{
final Element table = document.createElementNS( NS_XSLFO, "fo:table" );
flow.appendChild( table );
return table;
}
protected Element createBasicLinkExternal( String externalDestination )
{
final Element basicLink = document.createElementNS( NS_XSLFO,
"fo:basic-link" );
basicLink.setAttribute( "external-destination", externalDestination );
return basicLink;
}
protected Element createBasicLinkInternal( String internalDestination )
{
final Element basicLink = document.createElementNS( NS_XSLFO,
"fo:basic-link" );
basicLink.setAttribute( "internal-destination", internalDestination );
return basicLink;
}
protected Element createBlock()
{
return document.createElementNS( NS_XSLFO, "fo:block" );
}
protected Element createExternalGraphic( String source )
{
Element result = document.createElementNS( NS_XSLFO,
"fo:external-graphic" );
result.setAttribute( "src", "url('" + source + "')" );
return result;
}
protected Element createInline()
{
return document.createElementNS( NS_XSLFO, "fo:inline" );
}
protected Element createLeader()
{
return document.createElementNS( NS_XSLFO, "fo:leader" );
}
protected Element createListBlock()
{
return document.createElementNS( NS_XSLFO, "fo:list-block" );
}
protected Element createListItem()
{
return document.createElementNS( NS_XSLFO, "fo:list-item" );
}
protected Element createListItemBody()
{
return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
}
protected Element createListItemLabel( String text )
{
Element result = document.createElementNS( NS_XSLFO,
"fo:list-item-label" );
Element block = createBlock();
block.appendChild( document.createTextNode( text ) );
result.appendChild( block );
return result;
}
protected Element createTableBody()
{
return document.createElementNS( NS_XSLFO, "fo:table-body" );
}
protected Element createTableCell()
{
return document.createElementNS( NS_XSLFO, "fo:table-cell" );
}
protected Element createTableHeader()
{
return document.createElementNS( NS_XSLFO, "fo:table-header" );
}
protected Element createTableRow()
{
return document.createElementNS( NS_XSLFO, "fo:table-row" );
}
protected Text createText( String data )
{
return document.createTextNode( data );
}
public Document getDocument()
{
return document;
}
}

View File

@ -0,0 +1,95 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ====================================================================
*/
package org.apache.poi.hwpf.extractor;
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
/**
* Test cases for {@link WordToFoExtractor}
*
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/
public class TestWordToFoExtractor extends TestCase
{
private static String getFoText( final String sampleFileName )
throws Exception
{
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
.getDocumentInstance().openResourceAsStream( sampleFileName ) );
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() );
wordToFoExtractor.processDocument( hwpfDocument );
StringWriter stringWriter = new StringWriter();
Transformer transformer = TransformerFactory.newInstance()
.newTransformer();
transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
transformer.transform(
new DOMSource( wordToFoExtractor.getDocument() ),
new StreamResult( stringWriter ) );
String result = stringWriter.toString();
return result;
}
public void testHyperlink() throws Exception
{
final String sampleFileName = "hyperlink.doc";
String result = getFoText( sampleFileName );
assertTrue( result
.contains( "<fo:basic-link external-destination=\"http://testuri.org/\">" ) );
assertTrue( result.contains( "Hyperlink text" ) );
}
public void testEquation() throws Exception
{
final String sampleFileName = "equation.doc";
String result = getFoText( sampleFileName );
assertTrue( result
.contains( "<!--Image link to '0.emf' can be here-->" ) );
}
public void testPageref() throws Exception
{
final String sampleFileName = "pageref.doc";
String result = getFoText( sampleFileName );
System.out.println( result );
assertTrue( result
.contains( "<fo:basic-link internal-destination=\"userref\">" ) );
assertTrue( result.contains( "1" ) );
}
}

Binary file not shown.

Binary file not shown.

Binary file not shown.