output document properties to html and pdf

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143314 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-06 09:37:32 +00:00
parent fc3c8fcf10
commit 67ff6e3513
8 changed files with 271 additions and 3 deletions

View File

@ -30,6 +30,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource; import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle; import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormatter; import org.apache.poi.hssf.usermodel.HSSFDataFormatter;
@ -347,6 +348,23 @@ public class ExcelToHtmlConverter
return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0; return ExcelToHtmlUtils.isEmpty( value ) && cellStyleIndex == 0;
} }
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
if ( ExcelToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
htmlDocumentFacade
.addDescription( summaryInformation.getComments() );
}
protected boolean processRow( HSSFRow row, Element tableRowElement ) protected boolean processRow( HSSFRow row, Element tableRowElement )
{ {
boolean emptyRow = true; boolean emptyRow = true;
@ -451,6 +469,13 @@ public class ExcelToHtmlConverter
public void processWorkbook( HSSFWorkbook workbook ) public void processWorkbook( HSSFWorkbook workbook )
{ {
final SummaryInformation summaryInformation = workbook
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
for ( short i = 0; i < workbook.getNumCellStyles(); i++ ) for ( short i = 0; i < workbook.getNumCellStyles(); i++ )
{ {
HSSFCellStyle cellStyle = workbook.getCellStyleAt( i ); HSSFCellStyle cellStyle = workbook.getCellStyleAt( i );

View File

@ -20,6 +20,7 @@ import java.util.List;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.model.ListFormatOverride; import org.apache.poi.hwpf.model.ListFormatOverride;
@ -127,6 +128,13 @@ public abstract class AbstractWordConverter
public void processDocument( HWPFDocumentCore wordDocument ) public void processDocument( HWPFDocumentCore wordDocument )
{ {
final SummaryInformation summaryInformation = wordDocument
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
final Range range = wordDocument.getRange(); final Range range = wordDocument.getRange();
for ( int s = 0; s < range.numSections(); s++ ) for ( int s = 0; s < range.numSections(); s++ )
{ {
@ -134,6 +142,9 @@ public abstract class AbstractWordConverter
} }
} }
protected abstract void processDocumentInformation(
SummaryInformation summaryInformation );
protected void processField( HWPFDocumentCore wordDocument, protected void processField( HWPFDocumentCore wordDocument,
Element currentBlock, Paragraph paragraph, int currentTableLevel, Element currentBlock, Paragraph paragraph, int currentTableLevel,
List<CharacterRun> characterRuns, int beginMark, int separatorMark, List<CharacterRun> characterRuns, int beginMark, int separatorMark,

View File

@ -18,14 +18,20 @@ package org.apache.poi.hwpf.converter;
import org.w3c.dom.Document; import org.w3c.dom.Document;
import org.w3c.dom.Element; import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text; import org.w3c.dom.Text;
public class FoDocumentFacade public class FoDocumentFacade
{ {
private static final String NS_RDF = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format"; private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
protected final Element declarations;
protected final Document document; protected final Document document;
protected final Element layoutMasterSet; protected final Element layoutMasterSet;
protected Element propertiesRoot;
protected final Element root; protected final Element root;
public FoDocumentFacade( Document document ) public FoDocumentFacade( Document document )
@ -38,6 +44,9 @@ public class FoDocumentFacade
layoutMasterSet = document.createElementNS( NS_XSLFO, layoutMasterSet = document.createElementNS( NS_XSLFO,
"fo:layout-master-set" ); "fo:layout-master-set" );
root.appendChild( layoutMasterSet ); root.appendChild( layoutMasterSet );
declarations = document.createElementNS( NS_XSLFO, "fo:declarations" );
root.appendChild( declarations );
} }
public Element addFlowToPageSequence( final Element pageSequence, public Element addFlowToPageSequence( final Element pageSequence,
@ -198,4 +207,116 @@ public class FoDocumentFacade
return document; return document;
} }
protected Element getOrCreatePropertiesRoot()
{
if ( propertiesRoot != null )
return propertiesRoot;
// See http://xmlgraphics.apache.org/fop/0.95/metadata.html
Element xmpmeta = document.createElementNS( "adobe:ns:meta",
"x:xmpmeta" );
declarations.appendChild( xmpmeta );
Element rdf = document.createElementNS( NS_RDF, "rdf:RDF" );
xmpmeta.appendChild( rdf );
propertiesRoot = document.createElementNS( NS_RDF, "rdf:Description" );
rdf.appendChild( propertiesRoot );
return propertiesRoot;
}
public void setCreator( String value )
{
setDublinCoreProperty( "creator", value );
}
public void setCreatorTool( String value )
{
setXmpProperty( "CreatorTool", value );
}
public void setDescription( String value )
{
Element element = setDublinCoreProperty( "description", value );
if ( element != null )
{
element.setAttributeNS( "http://www.w3.org/XML/1998/namespace",
"xml:lang", "x-default" );
}
}
public Element setDublinCoreProperty( String name, String value )
{
return setProperty( "http://purl.org/dc/elements/1.1/", "dc", name,
value );
}
public void setKeywords( String value )
{
setPdfProperty( "Keywords", value );
}
public Element setPdfProperty( String name, String value )
{
return setProperty( "http://ns.adobe.com/pdf/1.3/", "pdf", name, value );
}
public void setProducer( String value )
{
setPdfProperty( "Producer", value );
}
protected Element setProperty( String namespace, String prefix,
String name, String value )
{
Element propertiesRoot = getOrCreatePropertiesRoot();
NodeList existingChildren = propertiesRoot.getChildNodes();
for ( int i = 0; i < existingChildren.getLength(); i++ )
{
Node child = existingChildren.item( i );
if ( child.getNodeType() == Node.ELEMENT_NODE )
{
Element childElement = (Element) child;
if ( WordToFoUtils.isNotEmpty( childElement.getNamespaceURI() )
&& WordToFoUtils.isNotEmpty( childElement
.getLocalName() )
&& namespace.equals( childElement.getNamespaceURI() )
&& name.equals( childElement.getLocalName() ) )
{
propertiesRoot.removeChild( childElement );
break;
}
}
}
if ( WordToFoUtils.isNotEmpty( value ) )
{
Element property = document.createElementNS( namespace, prefix
+ ":" + name );
property.appendChild( document.createTextNode( value ) );
propertiesRoot.appendChild( property );
return property;
}
return null;
}
public void setSubject( String value )
{
setDublinCoreProperty( "title", value );
}
public void setTitle( String value )
{
setDublinCoreProperty( "title", value );
}
public Element setXmpProperty( String name, String value )
{
return setProperty( "http://ns.adobe.com/xap/1.0/", "xmp", name, value );
}
} }

View File

@ -28,6 +28,9 @@ public class HtmlDocumentFacade
protected final Element head; protected final Element head;
protected final Element html; protected final Element html;
protected Element title;
protected Text titleText;
public HtmlDocumentFacade( Document document ) public HtmlDocumentFacade( Document document )
{ {
this.document = document; this.document = document;
@ -42,6 +45,29 @@ public class HtmlDocumentFacade
html.appendChild( body ); html.appendChild( body );
} }
public void addAuthor( String value )
{
addMeta( "author", value );
}
public void addDescription( String value )
{
addMeta( "description", value );
}
public void addKeywords( String value )
{
addMeta( "keywords", value );
}
public void addMeta( final String name, String value )
{
Element meta = document.createElement( "meta" );
meta.setAttribute( "name", name );
meta.setAttribute( "content", value );
head.appendChild( meta );
}
public Element createHeader1() public Element createHeader1()
{ {
return document.createElement( "h1" ); return document.createElement( "h1" );
@ -119,4 +145,31 @@ public class HtmlDocumentFacade
return head; return head;
} }
public String getTitle()
{
if ( title == null )
return null;
return titleText.getTextContent();
}
public void setTitle( String titleText )
{
if ( WordToHtmlUtils.isEmpty( titleText ) && this.title != null )
{
this.head.removeChild( this.title );
this.title = null;
this.titleText = null;
}
if ( this.title == null )
{
this.title = document.createElement( "title" );
this.titleText = document.createTextNode( titleText );
this.title.appendChild( this.titleText );
this.head.appendChild( title );
}
this.titleText.setData( titleText );
}
} }

View File

@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource; import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.CharacterRun;
@ -230,6 +231,23 @@ public class WordToFoConverter extends AbstractWordConverter
inline.appendChild( textNode ); inline.appendChild( textNode );
} }
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
foDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
foDocumentFacade.setCreator( summaryInformation.getAuthor() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
foDocumentFacade.setKeywords( summaryInformation.getKeywords() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
foDocumentFacade.setDescription( summaryInformation.getComments() );
}
protected void processHyperlink( HWPFDocumentCore hwpfDocument, protected void processHyperlink( HWPFDocumentCore hwpfDocument,
Element currentBlock, Paragraph paragraph, Element currentBlock, Paragraph paragraph,
List<CharacterRun> characterRuns, int currentTableLevel, List<CharacterRun> characterRuns, int currentTableLevel,
@ -368,7 +386,8 @@ public class WordToFoConverter extends AbstractWordConverter
Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence, Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
"xsl-region-body" ); "xsl-region-body" );
processSectionParagraphes( wordDocument, flow, section, Integer.MIN_VALUE ); processSectionParagraphes( wordDocument, flow, section,
Integer.MIN_VALUE );
} }
protected void processTable( HWPFDocumentCore wordDocument, Element flow, protected void processTable( HWPFDocumentCore wordDocument, Element flow,

View File

@ -28,6 +28,7 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource; import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.CharacterRun;
@ -207,6 +208,24 @@ public class WordToHtmlConverter extends AbstractWordConverter
span.appendChild( textNode ); span.appendChild( textNode );
} }
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() ) )
htmlDocumentFacade.addKeywords( summaryInformation.getKeywords() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() ) )
htmlDocumentFacade
.addDescription( summaryInformation.getComments() );
}
protected void processHyperlink( HWPFDocumentCore wordDocument, protected void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Paragraph paragraph, Element currentBlock, Paragraph paragraph,
List<CharacterRun> characterRuns, int currentTableLevel, List<CharacterRun> characterRuns, int currentTableLevel,
@ -326,7 +345,8 @@ public class WordToHtmlConverter extends AbstractWordConverter
div.setAttribute( "style", getSectionStyle( section ) ); div.setAttribute( "style", getSectionStyle( section ) );
htmlDocumentFacade.body.appendChild( div ); htmlDocumentFacade.body.appendChild( div );
processSectionParagraphes( wordDocument, div, section, Integer.MIN_VALUE ); processSectionParagraphes( wordDocument, div, section,
Integer.MIN_VALUE );
} }
@Override @Override

View File

@ -62,6 +62,16 @@ public class TestWordToFoConverter extends TestCase
return result; return result;
} }
public void testDocumentProperties() throws Exception
{
String result = getFoText( "documentProperties.doc" );
assertTrue( result
.contains( "<dc:title xmlns:dc=\"http://purl.org/dc/elements/1.1/\">This is document title</dc:title>" ) );
assertTrue( result
.contains( "<pdf:Keywords xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">This is document keywords</pdf:Keywords>" ) );
}
public void testEquation() throws Exception public void testEquation() throws Exception
{ {
final String sampleFileName = "equation.doc"; final String sampleFileName = "equation.doc";

View File

@ -30,7 +30,7 @@ import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
/** /**
* Test cases for {@link WordToFoConverter} * Test cases for {@link WordToHtmlConverter}
* *
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
*/ */
@ -81,6 +81,15 @@ public class TestWordToHtmlConverter extends TestCase
assertTrue( result.contains( "<table>" ) ); assertTrue( result.contains( "<table>" ) );
} }
public void testDocumentProperties() throws Exception
{
String result = getHtmlText( "documentProperties.doc" );
assertTrue( result.contains( "<title>This is document title</title>" ) );
assertTrue( result
.contains( "<meta content=\"This is document keywords\" name=\"keywords\">" ) );
}
public void testEquation() throws Exception public void testEquation() throws Exception
{ {
String result = getHtmlText( "equation.doc" ); String result = getHtmlText( "equation.doc" );