compact HTML output of WordToHtmlConverter

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1148269 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-19 10:51:23 +00:00
parent 1ba8c3e781
commit 92e5199a95
3 changed files with 89 additions and 72 deletions

View File

@ -70,20 +70,20 @@ public class WordToHtmlConverter extends AbstractWordConverter
float bottomMargin = section.getMarginBottom() / TWIPS_PER_INCH; float bottomMargin = section.getMarginBottom() / TWIPS_PER_INCH;
String style = "margin: " + topMargin + "in " + rightMargin + "in " String style = "margin: " + topMargin + "in " + rightMargin + "in "
+ bottomMargin + "in " + leftMargin + "in; "; + bottomMargin + "in " + leftMargin + "in;";
if ( section.getNumColumns() > 1 ) if ( section.getNumColumns() > 1 )
{ {
style += "column-count: " + ( section.getNumColumns() ) + "; "; style += "column-count: " + ( section.getNumColumns() ) + ";";
if ( section.isColumnsEvenlySpaced() ) if ( section.isColumnsEvenlySpaced() )
{ {
float distance = section.getDistanceBetweenColumns() float distance = section.getDistanceBetweenColumns()
/ TWIPS_PER_INCH; / TWIPS_PER_INCH;
style += "column-gap: " + distance + "in; "; style += "column-gap: " + distance + "in;";
} }
else else
{ {
style += "column-gap: 0.25in; "; style += "column-gap: 0.25in;";
} }
} }
return style; return style;
@ -160,6 +160,7 @@ public class WordToHtmlConverter extends AbstractWordConverter
public Document getDocument() public Document getDocument()
{ {
htmlDocumentFacade.updateStylesheet();
return htmlDocumentFacade.getDocument(); return htmlDocumentFacade.getDocument();
} }
@ -178,24 +179,25 @@ public class WordToHtmlConverter extends AbstractWordConverter
&& !WordToHtmlUtils.equals( triplet.fontName, && !WordToHtmlUtils.equals( triplet.fontName,
blockProperies.pFontName ) ) blockProperies.pFontName ) )
{ {
style.append( "font-family: " + triplet.fontName + "; " ); style.append( "font-family:" + triplet.fontName + ";" );
} }
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize ) if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
{ {
style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " ); style.append( "font-size:" + characterRun.getFontSize() / 2 + "pt;" );
} }
if ( triplet.bold ) if ( triplet.bold )
{ {
style.append( "font-weight: bold; " ); style.append( "font-weight:bold;" );
} }
if ( triplet.italic ) if ( triplet.italic )
{ {
style.append( "font-style: italic; " ); style.append( "font-style:italic;" );
} }
WordToHtmlUtils.addCharactersProperties( characterRun, style ); WordToHtmlUtils.addCharactersProperties( characterRun, style );
if ( style.length() != 0 ) if ( style.length() != 0 )
span.setAttribute( "style", style.toString() ); span.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass(
span.getTagName(), "s", style.toString() ) );
Text textNode = htmlDocumentFacade.createText( text ); Text textNode = htmlDocumentFacade.createText( text );
span.appendChild( textNode ); span.appendChild( textNode );
@ -312,22 +314,28 @@ public class WordToHtmlConverter extends AbstractWordConverter
float visibleHeight = Math.max( 0, imageHeight - cropTop float visibleHeight = Math.max( 0, imageHeight - cropTop
- cropBottom ); - cropBottom );
root = htmlDocumentFacade.document.createElement( "div" ); root = htmlDocumentFacade.createBlock();
root.setAttribute( "style", "vertical-align:text-bottom;width:" root.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass(
+ visibleWidth + "in;height:" + visibleHeight + "in;" ); root.getTagName(), "d", "vertical-align:text-bottom;width:"
+ visibleWidth + "in;height:" + visibleHeight
+ "in;" ) );
// complex // complex
Element inner = htmlDocumentFacade.document.createElement( "div" ); Element inner = htmlDocumentFacade.createBlock();
inner.setAttribute( "style", "position:relative;width:" inner.setAttribute( "class", htmlDocumentFacade
+ visibleWidth + "in;height:" + visibleHeight .getOrCreateCssClass( inner.getTagName(), "d",
+ "in;overflow:hidden;" ); "position:relative;width:" + visibleWidth
+ "in;height:" + visibleHeight
+ "in;overflow:hidden;" ) );
root.appendChild( inner ); root.appendChild( inner );
Element image = htmlDocumentFacade.document.createElement( "img" ); Element image = htmlDocumentFacade.document.createElement( "img" );
image.setAttribute( "src", imageSourcePath ); image.setAttribute( "src", imageSourcePath );
image.setAttribute( "style", "position:absolute;left:-" + cropLeft image.setAttribute( "class", htmlDocumentFacade
+ ";top:-" + cropTop + ";width:" + imageWidth .getOrCreateCssClass( image.getTagName(), "i",
+ "in;height:" + imageHeight + "in;" ); "position:absolute;left:-" + cropLeft + ";top:-"
+ cropTop + ";width:" + imageWidth
+ "in;height:" + imageHeight + "in;" ) );
inner.appendChild( image ); inner.appendChild( image );
style.append( "overflow:hidden;" ); style.append( "overflow:hidden;" );
@ -414,7 +422,10 @@ public class WordToHtmlConverter extends AbstractWordConverter
} }
if ( style.length() > 0 ) if ( style.length() > 0 )
pElement.setAttribute( "style", style.toString() ); pElement.setAttribute(
"class",
htmlDocumentFacade.getOrCreateCssClass(
pElement.getTagName(), "p", style.toString() ) );
return; return;
} }
@ -422,8 +433,9 @@ public class WordToHtmlConverter extends AbstractWordConverter
protected void processSection( HWPFDocumentCore wordDocument, protected void processSection( HWPFDocumentCore wordDocument,
Section section, int sectionCounter ) Section section, int sectionCounter )
{ {
Element div = htmlDocumentFacade.document.createElement( "div" ); Element div = htmlDocumentFacade.createBlock();
div.setAttribute( "style", getSectionStyle( section ) ); div.setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass(
div.getTagName(), "d", getSectionStyle( section ) ) );
htmlDocumentFacade.body.appendChild( div ); htmlDocumentFacade.body.appendChild( div );
processSectionParagraphes( wordDocument, div, section, processSectionParagraphes( wordDocument, div, section,
@ -434,8 +446,9 @@ public class WordToHtmlConverter extends AbstractWordConverter
protected void processSingleSection( HWPFDocumentCore wordDocument, protected void processSingleSection( HWPFDocumentCore wordDocument,
Section section ) Section section )
{ {
htmlDocumentFacade.body.setAttribute( "style", htmlDocumentFacade.body
getSectionStyle( section ) ); .setAttribute( "class", htmlDocumentFacade.getOrCreateCssClass(
"body", "b", getSectionStyle( section ) ) );
processSectionParagraphes( wordDocument, htmlDocumentFacade.body, processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
section, Integer.MIN_VALUE ); section, Integer.MIN_VALUE );
@ -538,15 +551,19 @@ public class WordToHtmlConverter extends AbstractWordConverter
.createParagraph() ); .createParagraph() );
} }
if ( tableCellStyle.length() > 0 ) if ( tableCellStyle.length() > 0 )
tableCellElement.setAttribute( "style", tableCellElement.setAttribute( "class", htmlDocumentFacade
tableCellStyle.toString() ); .getOrCreateCssClass(
tableCellElement.getTagName(),
tableCellElement.getTagName(),
tableCellStyle.toString() ) );
tableRowElement.appendChild( tableCellElement ); tableRowElement.appendChild( tableCellElement );
} }
if ( tableRowStyle.length() > 0 ) if ( tableRowStyle.length() > 0 )
tableRowElement tableRowElement.setAttribute( "class", htmlDocumentFacade
.setAttribute( "style", tableRowStyle.toString() ); .getOrCreateCssClass( "tr", "r",
tableRowStyle.toString() ) );
if ( tableRow.isTableHeader() ) if ( tableRow.isTableHeader() )
{ {

View File

@ -26,7 +26,7 @@ public class WordToHtmlUtils extends AbstractWordUtils
{ {
public static void addBold( final boolean bold, StringBuilder style ) public static void addBold( final boolean bold, StringBuilder style )
{ {
style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" ); style.append( "font-weight:" + ( bold ? "bold" : "normal" ) + ";" );
} }
public static void addBorder( BorderCode borderCode, String where, public static void addBorder( BorderCode borderCode, String where,
@ -37,21 +37,21 @@ public class WordToHtmlUtils extends AbstractWordUtils
if ( isEmpty( where ) ) if ( isEmpty( where ) )
{ {
style.append( "border-style: " + getBorderType( borderCode ) + "; " ); style.append( "border:" );
style.append( "border-color: " + getColor( borderCode.getColor() )
+ "; " );
style.append( "border-width: " + getBorderWidth( borderCode )
+ "; " );
} }
else else
{ {
style.append( "border-" + where + "-style: " style.append( "border-" );
+ getBorderType( borderCode ) + "; " ); style.append( where );
style.append( "border-" + where + "-color: "
+ getColor( borderCode.getColor() ) + "; " );
style.append( "border-" + where + "-width: "
+ getBorderWidth( borderCode ) + "; " );
} }
style.append( ":" );
style.append( getBorderWidth( borderCode ) );
style.append( ' ' );
style.append( getBorderType( borderCode ) );
style.append( ' ' );
style.append( getColor( borderCode.getColor() ) );
style.append( ';' );
} }
public static void addCharactersProperties( public static void addCharactersProperties(
@ -61,43 +61,43 @@ public class WordToHtmlUtils extends AbstractWordUtils
if ( characterRun.isCapitalized() ) if ( characterRun.isCapitalized() )
{ {
style.append( "text-transform: uppercase; " ); style.append( "text-transform:uppercase;" );
} }
if ( characterRun.isHighlighted() ) if ( characterRun.isHighlighted() )
{ {
style.append( "background-color: " style.append( "background-color:"
+ getColor( characterRun.getHighlightedColor() ) + "; " ); + getColor( characterRun.getHighlightedColor() ) + ";" );
} }
if ( characterRun.isStrikeThrough() ) if ( characterRun.isStrikeThrough() )
{ {
style.append( "text-decoration: line-through; " ); style.append( "text-decoration:line-through;" );
} }
if ( characterRun.isShadowed() ) if ( characterRun.isShadowed() )
{ {
style.append( "text-shadow: " + characterRun.getFontSize() / 24 style.append( "text-shadow:" + characterRun.getFontSize() / 24
+ "pt; " ); + "pt;" );
} }
if ( characterRun.isSmallCaps() ) if ( characterRun.isSmallCaps() )
{ {
style.append( "font-variant: small-caps; " ); style.append( "font-variant:small-caps;" );
} }
if ( characterRun.getSubSuperScriptIndex() == 1 ) if ( characterRun.getSubSuperScriptIndex() == 1 )
{ {
style.append( "baseline-shift: super; " ); style.append( "baseline-shift:super;" );
style.append( "font-size: smaller; " ); style.append( "font-size:smaller;" );
} }
if ( characterRun.getSubSuperScriptIndex() == 2 ) if ( characterRun.getSubSuperScriptIndex() == 2 )
{ {
style.append( "baseline-shift: sub; " ); style.append( "baseline-shift:sub;" );
style.append( "font-size: smaller; " ); style.append( "font-size:smaller;" );
} }
if ( characterRun.getUnderlineCode() > 0 ) if ( characterRun.getUnderlineCode() > 0 )
{ {
style.append( "text-decoration: underline; " ); style.append( "text-decoration:underline;" );
} }
if ( characterRun.isVanished() ) if ( characterRun.isVanished() )
{ {
style.append( "visibility: hidden; " ); style.append( "visibility:hidden;" );
} }
} }
@ -107,12 +107,12 @@ public class WordToHtmlUtils extends AbstractWordUtils
if ( isEmpty( fontFamily ) ) if ( isEmpty( fontFamily ) )
return; return;
style.append( "font-family: " + fontFamily + "; " ); style.append( "font-family:" + fontFamily + ";" );
} }
public static void addFontSize( final int fontSize, StringBuilder style ) public static void addFontSize( final int fontSize, StringBuilder style )
{ {
style.append( "font-size: " + fontSize + "pt; " ); style.append( "font-size:" + fontSize + "pt;" );
} }
public static void addIndent( Paragraph paragraph, StringBuilder style ) public static void addIndent( Paragraph paragraph, StringBuilder style )
@ -130,7 +130,7 @@ public class WordToHtmlUtils extends AbstractWordUtils
if ( twipsValue == 0 ) if ( twipsValue == 0 )
return; return;
style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " ); style.append( cssName + ":" + ( twipsValue / TWIPS_PER_PT ) + "pt;" );
} }
public static void addJustification( Paragraph paragraph, public static void addJustification( Paragraph paragraph,
@ -138,7 +138,7 @@ public class WordToHtmlUtils extends AbstractWordUtils
{ {
String justification = getJustification( paragraph.getJustification() ); String justification = getJustification( paragraph.getJustification() );
if ( isNotEmpty( justification ) ) if ( isNotEmpty( justification ) )
style.append( "text-align: " + justification + "; " ); style.append( "text-align:" + justification + ";" );
} }
public static void addParagraphProperties( Paragraph paragraph, public static void addParagraphProperties( Paragraph paragraph,
@ -154,20 +154,20 @@ public class WordToHtmlUtils extends AbstractWordUtils
if ( paragraph.pageBreakBefore() ) if ( paragraph.pageBreakBefore() )
{ {
style.append( "break-before: page; " ); style.append( "break-before:page;" );
} }
style.append( "hyphenate: " style.append( "hyphenate:"
+ ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + "; " ); + ( paragraph.isAutoHyphenated() ? "auto" : "none" ) + ";" );
if ( paragraph.keepOnPage() ) if ( paragraph.keepOnPage() )
{ {
style.append( "keep-together.within-page: always; " ); style.append( "keep-together.within-page:always;" );
} }
if ( paragraph.keepWithNext() ) if ( paragraph.keepWithNext() )
{ {
style.append( "keep-with-next.within-page: always; " ); style.append( "keep-with-next.within-page:always;" );
} }
} }
@ -175,12 +175,12 @@ public class WordToHtmlUtils extends AbstractWordUtils
TableCell tableCell, boolean toppest, boolean bottomest, TableCell tableCell, boolean toppest, boolean bottomest,
boolean leftest, boolean rightest, StringBuilder style ) boolean leftest, boolean rightest, StringBuilder style )
{ {
style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH ) style.append( "width:" + ( tableCell.getWidth() / TWIPS_PER_INCH )
+ "in; " ); + "in;" );
style.append( "padding-start: " style.append( "padding-start:"
+ ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " ); + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in;" );
style.append( "padding-end: " style.append( "padding-end:"
+ ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " ); + ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in;" );
BorderCode top = tableCell.getBrcTop() != null BorderCode top = tableCell.getBrcTop() != null
&& tableCell.getBrcTop().getBorderType() != 0 ? tableCell && tableCell.getBrcTop().getBorderType() != 0 ? tableCell
@ -211,12 +211,12 @@ public class WordToHtmlUtils extends AbstractWordUtils
{ {
if ( tableRow.getRowHeight() > 0 ) if ( tableRow.getRowHeight() > 0 )
{ {
style.append( "height: " style.append( "height:"
+ ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " ); + ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in;" );
} }
if ( !tableRow.cantSplit() ) if ( !tableRow.cantSplit() )
{ {
style.append( "keep-together: always; " ); style.append( "keep-together:always;" );
} }
} }

View File

@ -92,7 +92,7 @@ public class TestWordToHtmlConverter extends TestCase
public void testAIOOBTap() throws Exception public void testAIOOBTap() throws Exception
{ {
String result = getHtmlText( "AIOOB-Tap.doc" ); String result = getHtmlText( "AIOOB-Tap.doc" );
assertContains( result.substring( 0, 2000 ), "<table>" ); assertContains( result.substring( 0, 6000 ), "<table>" );
} }
public void testBug33519() throws Exception public void testBug33519() throws Exception