output document properties to html and pdf

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143314 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-06 09:37:32 +00:00
parent fc3c8fcf10
commit 67ff6e3513
8 changed files with 271 additions and 3 deletions

View File

@ -30,6 +30,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
@ -347,6 +348,23 @@ public class ExcelToHtmlConverter
return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0;
}
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
htmlDocumentFacade
.addDescription( summaryInformation.getComments() );
}
protected boolean processRow( HSSFRow row, Element tableRowElement )
{
boolean emptyRow = true;
@ -451,6 +469,13 @@ public class ExcelToHtmlConverter
public void processWorkbook( HSSFWorkbook workbook )
{
final SummaryInformation summaryInformation = workbook
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
for ( short i = 0; i < workbook.getNumCellStyles(); i++ )
{
HSSFCellStyle cellStyle = workbook.getCellStyleAt( i );

View File

@ -20,6 +20,7 @@ import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.model.ListFormatOverride;
@ -127,6 +128,13 @@ public abstract class AbstractWordConverter
public void processDocument( HWPFDocumentCore wordDocument )
{
final SummaryInformation summaryInformation = wordDocument
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
final Range range = wordDocument.getRange();
for ( int s = 0; s < range.numSections(); s++ )
{
@ -134,6 +142,9 @@ public abstract class AbstractWordConverter
}
}
protected abstract void processDocumentInformation(
SummaryInformation summaryInformation );
protected void processField( HWPFDocumentCore wordDocument,
Element currentBlock, Paragraph paragraph, int currentTableLevel,
List<CharacterRun> characterRuns, int beginMark, int separatorMark,

View File

@ -18,14 +18,20 @@ package org.apache.poi.hwpf.converter;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
public class FoDocumentFacade
{
private static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
protected final Element declarations;
protected final Document document;
protected final Element layoutMasterSet;
protected Element propertiesRoot;
protected final Element root;
public FoDocumentFacade( Document document )
@ -38,6 +44,9 @@ public class FoDocumentFacade
layoutMasterSet = document.createElementNS( NS_XSLFO,
"fo:layout-master-set" );
root.appendChild( layoutMasterSet );
declarations = document.createElementNS( NS_XSLFO, "fo:declarations" );
root.appendChild( declarations );
}
public Element addFlowToPageSequence( final Element pageSequence,
@ -198,4 +207,116 @@ public class FoDocumentFacade
return document;
}
protected Element getOrCreatePropertiesRoot()
{
if ( propertiesRoot != null )
return propertiesRoot;
// See http://xmlgraphics.apache.org/fop/0.95/metadata.html
Element xmpmeta = document.createElementNS( "adobe:ns:meta",
"x:xmpmeta" );
declarations.appendChild( xmpmeta );
Element rdf = document.createElementNS( NS_RDF, "rdf:RDF" );
xmpmeta.appendChild( rdf );
propertiesRoot = document.createElementNS( NS_RDF, "rdf:Description" );
rdf.appendChild( propertiesRoot );
return propertiesRoot;
}
public void setCreator( String value )
{
setDublinCoreProperty( "creator", value );
}
public void setCreatorTool( String value )
{
setXmpProperty( "CreatorTool", value );
}
public void setDescription( String value )
{
Element element = setDublinCoreProperty( "description", value );
if ( element != null )
{
element.setAttributeNS( "http://www.w3.org/XML/1998/namespace",
"xml:lang", "x-default" );
}
}
public Element setDublinCoreProperty( String name, String value )
{
return setProperty( "http://purl.org/dc/elements/1.1/", "dc", name,
value );
}
public void setKeywords( String value )
{
setPdfProperty( "Keywords", value );
}
public Element setPdfProperty( String name, String value )
{
return setProperty( "http://ns.adobe.com/pdf/1.3/", "pdf", name, value );
}
public void setProducer( String value )
{
setPdfProperty( "Producer", value );
}
protected Element setProperty( String namespace, String prefix,
String name, String value )
{
Element propertiesRoot = getOrCreatePropertiesRoot();
NodeList existingChildren = propertiesRoot.getChildNodes();
for ( int i = 0; i < existingChildren.getLength(); i++ )
{
Node child = existingChildren.item( i );
if ( child.getNodeType() == Node.ELEMENT_NODE )
{
Element childElement = (Element) child;
if ( WordToFoUtils.isNotEmpty( childElement.getNamespaceURI() )
&& WordToFoUtils.isNotEmpty( childElement
.getLocalName() )
&& namespace.equals( childElement.getNamespaceURI() )
&& name.equals( childElement.getLocalName() ) )
{
propertiesRoot.removeChild( childElement );
break;
}
}
}
if ( WordToFoUtils.isNotEmpty( value ) )
{
Element property = document.createElementNS( namespace, prefix
+ ":" + name );
property.appendChild( document.createTextNode( value ) );
propertiesRoot.appendChild( property );
return property;
}
return null;
}
public void setSubject( String value )
{
setDublinCoreProperty( "title", value );
}
public void setTitle( String value )
{
setDublinCoreProperty( "title", value );
}
public Element setXmpProperty( String name, String value )
{
return setProperty( "http://ns.adobe.com/xap/1.0/", "xmp", name, value );
}
}

View File

@ -28,6 +28,9 @@ public class HtmlDocumentFacade
protected final Element head;
protected final Element html;
protected Element title;
protected Text titleText;
public HtmlDocumentFacade( Document document )
{
this.document = document;
@ -42,6 +45,29 @@ public class HtmlDocumentFacade
html.appendChild( body );
}
public void addAuthor( String value )
{
addMeta( "author", value );
}
public void addDescription( String value )
{
addMeta( "description", value );
}
public void addKeywords( String value )
{
addMeta( "keywords", value );
}
public void addMeta( final String name, String value )
{
Element meta = document.createElement( "meta" );
meta.setAttribute( "name", name );
meta.setAttribute( "content", value );
head.appendChild( meta );
}
public Element createHeader1()
{
return document.createElement( "h1" );
@ -119,4 +145,31 @@ public class HtmlDocumentFacade
return head;
}
public String getTitle()
{
if ( title == null )
return null;
return titleText.getTextContent();
}
public void setTitle( String titleText )
{
if ( WordToHtmlUtils.isEmpty( titleText ) && this.title != null )
{
this.head.removeChild( this.title );
this.title = null;
this.titleText = null;
}
if ( this.title == null )
{
this.title = document.createElement( "title" );
this.titleText = document.createTextNode( titleText );
this.title.appendChild( this.titleText );
this.head.appendChild( title );
}
this.titleText.setData( titleText );
}
}

View File

@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.usermodel.CharacterRun;
@ -230,6 +231,23 @@ public class WordToFoConverter extends AbstractWordConverter
inline.appendChild( textNode );
}
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
foDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
foDocumentFacade.setCreator( summaryInformation.getAuthor() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
foDocumentFacade.setKeywords( summaryInformation.getKeywords() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
foDocumentFacade.setDescription( summaryInformation.getComments() );
}
protected void processHyperlink( HWPFDocumentCore hwpfDocument,
Element currentBlock, Paragraph paragraph,
List<CharacterRun> characterRuns, int currentTableLevel,
@ -368,7 +386,8 @@ public class WordToFoConverter extends AbstractWordConverter
Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
"xsl-region-body" );
processSectionParagraphes( wordDocument, flow, section, Integer.MIN_VALUE );
processSectionParagraphes( wordDocument, flow, section,
Integer.MIN_VALUE );
}
protected void processTable( HWPFDocumentCore wordDocument, Element flow,

View File

@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.usermodel.CharacterRun;
@ -207,6 +208,24 @@ public class WordToHtmlConverter extends AbstractWordConverter
span.appendChild( textNode );
}
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
htmlDocumentFacade
.addDescription( summaryInformation.getComments() );
}
protected void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Paragraph paragraph,
List<CharacterRun> characterRuns, int currentTableLevel,
@ -326,7 +345,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
div.setAttribute( "style", getSectionStyle( section ) );
htmlDocumentFacade.body.appendChild( div );
processSectionParagraphes( wordDocument, div, section, Integer.MIN_VALUE );
processSectionParagraphes( wordDocument, div, section,
Integer.MIN_VALUE );
}
@Override

View File

@ -62,6 +62,16 @@ public class TestWordToFoConverter extends TestCase
return result;
}
public void testDocumentProperties() throws Exception
{
String result = getFoText( "documentProperties.doc" );
assertTrue( result
.contains( "<dc:title xmlns:dc=\"http://purl.org/dc/elements/1.1/\">This is document title</dc:title>" ) );
assertTrue( result
.contains( "<pdf:Keywords xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">This is document keywords</pdf:Keywords>" ) );
}
public void testEquation() throws Exception
{
final String sampleFileName = "equation.doc";

View File

@ -30,7 +30,7 @@ import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
/**
* Test cases for {@link WordToFoConverter}
* Test cases for {@link WordToHtmlConverter}
*
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/
@ -81,6 +81,15 @@ public class TestWordToHtmlConverter extends TestCase
assertTrue( result.contains( "<table>" ) );
}
public void testDocumentProperties() throws Exception
{
String result = getHtmlText( "documentProperties.doc" );
assertTrue( result.contains( "<title>This is document title</title>" ) );
assertTrue( result
.contains( "<meta content=\"This is document keywords\" name=\"keywords\">" ) );
}
public void testEquation() throws Exception
{
String result = getHtmlText( "equation.doc" );