diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 9cf29f193..2f740f513 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -19,6 +19,8 @@ package org.apache.poi.hwpf.converter; import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.poi.hwpf.converter.FontReplacer.Triplet; + import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; @@ -50,8 +52,25 @@ public abstract class AbstractWordConverter private static final POILogger logger = POILogFactory .getLogger( AbstractWordConverter.class ); + private FontReplacer fontReplacer = new DefaultFontReplacer(); + + protected Triplet getCharacterRunTriplet( CharacterRun characterRun ) + { + Triplet original = new Triplet(); + original.bold = characterRun.isBold(); + original.italic = characterRun.isItalic(); + original.fontName = characterRun.getFontName(); + Triplet updated = getFontReplacer().update( original ); + return updated; + } + public abstract Document getDocument(); + public FontReplacer getFontReplacer() + { + return fontReplacer; + } + protected abstract void outputCharacters( Element block, CharacterRun characterRun, String text ); @@ -144,25 +163,6 @@ public abstract class AbstractWordConverter return haveAnyText; } - public void processDocument( HWPFDocumentCore wordDocument ) - { - final SummaryInformation summaryInformation = wordDocument - .getSummaryInformation(); - if ( summaryInformation != null ) - { - processDocumentInformation( summaryInformation ); - } - - final Range range = wordDocument.getRange(); - for ( int s = 0; s < range.numSections(); s++ ) - { - processSection( wordDocument, range.getSection( s ), s ); - } - } - - protected abstract void processDocumentInformation( - SummaryInformation summaryInformation ); - protected void processDeadField( HWPFDocumentCore wordDocument, Element currentBlock, Range range, int currentTableLevel, int beginMark, int separatorMark, int endMark ) @@ -195,6 +195,97 @@ public abstract class AbstractWordConverter return; } + public void processDocument( HWPFDocumentCore wordDocument ) + { + final SummaryInformation summaryInformation = wordDocument + .getSummaryInformation(); + if ( summaryInformation != null ) + { + processDocumentInformation( summaryInformation ); + } + + final Range range = wordDocument.getRange(); + for ( int s = 0; s < range.numSections(); s++ ) + { + processSection( wordDocument, range.getSection( s ), s ); + } + } + + protected abstract void processDocumentInformation( + SummaryInformation summaryInformation ); + + protected void processField( HWPFDocument hwpfDocument, Range parentRange, + int currentTableLevel, Field field, Element currentBlock ) + { + switch ( field.getType() ) + { + case 37: // page reference + { + final Range firstSubrange = field.firstSubrange( parentRange ); + if ( firstSubrange != null ) + { + String formula = firstSubrange.text(); + Pattern pagerefPattern = Pattern + .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" ); + Matcher matcher = pagerefPattern.matcher( formula ); + if ( matcher.find() ) + { + String pageref = matcher.group( 1 ); + processPageref( hwpfDocument, currentBlock, + field.secondSubrange( parentRange ), + currentTableLevel, pageref ); + return; + } + } + break; + } + case 88: // hyperlink + { + final Range firstSubrange = field.firstSubrange( parentRange ); + if ( firstSubrange != null ) + { + String formula = firstSubrange.text(); + Pattern hyperlinkPattern = Pattern + .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" ); + Matcher matcher = hyperlinkPattern.matcher( formula ); + if ( matcher.find() ) + { + String hyperlink = matcher.group( 1 ); + processHyperlink( hwpfDocument, currentBlock, + field.secondSubrange( parentRange ), + currentTableLevel, hyperlink ); + return; + } + } + break; + } + } + + logger.log( POILogger.WARN, parentRange + " contains " + field + + " with unsupported type or format" ); + processCharacters( hwpfDocument, currentTableLevel, + field.secondSubrange( parentRange ), currentBlock ); + } + + protected Field processField( HWPFDocumentCore wordDocument, + Range charactersRange, int currentTableLevel, int startOffset, + Element currentBlock ) + { + if ( !( wordDocument instanceof HWPFDocument ) ) + return null; + + HWPFDocument hwpfDocument = (HWPFDocument) wordDocument; + Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset( + FieldsTables.PLCFFLDMOM, startOffset ); + if ( field == null ) + return null; + + processField( hwpfDocument, charactersRange, currentTableLevel, field, + currentBlock ); + + return field; + } + protected abstract void processHyperlink( HWPFDocumentCore wordDocument, Element currentBlock, Range textRange, int currentTableLevel, String hyperlink ); @@ -292,76 +383,9 @@ public abstract class AbstractWordConverter protected abstract void processTable( HWPFDocumentCore wordDocument, Element flow, Table table ); - protected Field processField( HWPFDocumentCore wordDocument, - Range charactersRange, int currentTableLevel, int startOffset, - Element currentBlock ) + public void setFontReplacer( FontReplacer fontReplacer ) { - if ( !( wordDocument instanceof HWPFDocument ) ) - return null; - - HWPFDocument hwpfDocument = (HWPFDocument) wordDocument; - Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset( - FieldsTables.PLCFFLDMOM, startOffset ); - if ( field == null ) - return null; - - processField( hwpfDocument, charactersRange, currentTableLevel, field, - currentBlock ); - - return field; - } - - protected void processField( HWPFDocument hwpfDocument, Range parentRange, - int currentTableLevel, Field field, Element currentBlock ) - { - switch ( field.getType() ) - { - case 37: // page reference - { - final Range firstSubrange = field.firstSubrange( parentRange ); - if ( firstSubrange != null ) - { - String formula = firstSubrange.text(); - Pattern pagerefPattern = Pattern - .compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" ); - Matcher matcher = pagerefPattern.matcher( formula ); - if ( matcher.find() ) - { - String pageref = matcher.group( 1 ); - processPageref( hwpfDocument, currentBlock, - field.secondSubrange( parentRange ), - currentTableLevel, pageref ); - return; - } - } - break; - } - case 88: // hyperlink - { - final Range firstSubrange = field.firstSubrange( parentRange ); - if ( firstSubrange != null ) - { - String formula = firstSubrange.text(); - Pattern hyperlinkPattern = Pattern - .compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" ); - Matcher matcher = hyperlinkPattern.matcher( formula ); - if ( matcher.find() ) - { - String hyperlink = matcher.group( 1 ); - processHyperlink( hwpfDocument, currentBlock, - field.secondSubrange( parentRange ), - currentTableLevel, hyperlink ); - return; - } - } - break; - } - } - - logger.log( POILogger.WARN, parentRange + " contains " + field - + " with unsupported type or format" ); - processCharacters( hwpfDocument, currentTableLevel, - field.secondSubrange( parentRange ), currentBlock ); + this.fontReplacer = fontReplacer; } protected int tryDeadField( HWPFDocumentCore wordDocument, Range range, diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java index b74ddc043..79c5f7e64 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java @@ -98,7 +98,7 @@ public class AbstractWordUtils return "solid"; } } - + public static String getBorderWidth( BorderCode borderCode ) { int lineWidth = borderCode.getLineWidth(); @@ -320,4 +320,18 @@ public class AbstractWordUtils } } + static String substringBeforeLast( String str, String separator ) + { + if ( isEmpty( str ) || isEmpty( separator ) ) + { + return str; + } + int pos = str.lastIndexOf( separator ); + if ( pos == -1 ) + { + return str; + } + return str.substring( 0, pos ); + } + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/DefaultFontReplacer.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/DefaultFontReplacer.java new file mode 100644 index 000000000..b99816e6d --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/DefaultFontReplacer.java @@ -0,0 +1,71 @@ +package org.apache.poi.hwpf.converter; + +public class DefaultFontReplacer implements FontReplacer +{ + public Triplet update( Triplet original ) + { + if ( !AbstractWordUtils.isNotEmpty( original.fontName ) ) + { + String fontName = original.fontName; + + if ( fontName.endsWith( " Regular" ) ) + fontName = AbstractWordUtils.substringBeforeLast( fontName, + " Regular" ); + + if ( fontName + .endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) ) + fontName = AbstractWordUtils + .substringBeforeLast( fontName, + " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) + + " Bold"; + + if ( fontName + .endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) ) + fontName = AbstractWordUtils + .substringBeforeLast( + fontName, + " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) + + " Bold Italic"; + + if ( fontName.endsWith( " \u041A\u0443\u0440\u0441\u0438\u0432" ) ) + fontName = AbstractWordUtils.substringBeforeLast( fontName, + " \u041A\u0443\u0440\u0441\u0438\u0432" ) + " Italic"; + + original.fontName = fontName; + } + + if ( !AbstractWordUtils.isNotEmpty( original.fontName ) ) + { + if ( "Times Regular".equals( original.fontName ) + || "Times-Regular".equals( original.fontName ) ) + { + original.fontName = "Times"; + original.bold = false; + original.italic = false; + } + if ( "Times Bold".equals( original.fontName ) + || "Times-Bold".equals( original.fontName ) ) + { + original.fontName = "Times"; + original.bold = true; + original.italic = false; + } + if ( "Times Italic".equals( original.fontName ) + || "Times-Italic".equals( original.fontName ) ) + { + original.fontName = "Times"; + original.bold = false; + original.italic = true; + } + if ( "Times Bold Italic".equals( original.fontName ) + || "Times-BoldItalic".equals( original.fontName ) ) + { + original.fontName = "Times"; + original.bold = true; + original.italic = true; + } + } + + return original; + } +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/FontReplacer.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/FontReplacer.java new file mode 100644 index 000000000..51ec26d67 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/FontReplacer.java @@ -0,0 +1,13 @@ +package org.apache.poi.hwpf.converter; + +public interface FontReplacer +{ + public class Triplet + { + public String fontName; + public boolean bold; + public boolean italic; + } + + public Triplet update( Triplet original ); +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index 3be52d8d0..e1f47d738 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.hwpf.converter.FontReplacer.Triplet; + import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; @@ -206,16 +208,19 @@ public class WordToFoConverter extends AbstractWordConverter { BlockProperies blockProperies = this.blocksProperies.peek(); Element inline = foDocumentFacade.createInline(); - if ( characterRun.isBold() != blockProperies.pBold ) + + Triplet triplet = getCharacterRunTriplet( characterRun ); + + if ( triplet.bold != blockProperies.pBold ) { - WordToFoUtils.setBold( inline, characterRun.isBold() ); + WordToFoUtils.setBold( inline, triplet.bold ); } - if ( characterRun.isItalic() != blockProperies.pItalic ) + if ( triplet.italic != blockProperies.pItalic ) { - WordToFoUtils.setItalic( inline, characterRun.isItalic() ); + WordToFoUtils.setItalic( inline, triplet.italic ); } - if ( characterRun.getFontName() != null - && !AbstractWordUtils.equals( characterRun.getFontName(), + if ( WordToFoUtils.isNotEmpty( triplet.fontName ) + && !WordToFoUtils.equals( triplet.fontName, blockProperies.pFontName ) ) { WordToFoUtils.setFontFamily( inline, characterRun.getFontName() ); @@ -318,24 +323,17 @@ public class WordToFoConverter extends AbstractWordConverter } { - final String pFontName; - final int pFontSize; - final boolean pBold; - final boolean pItalic; - { - CharacterRun characterRun = paragraph.getCharacterRun( 0 ); - pFontSize = characterRun.getFontSize() / 2; - pFontName = characterRun.getFontName(); - pBold = characterRun.isBold(); - pItalic = characterRun.isItalic(); - } - WordToFoUtils.setFontFamily( block, pFontName ); - WordToFoUtils.setFontSize( block, pFontSize ); - WordToFoUtils.setBold( block, pBold ); - WordToFoUtils.setItalic( block, pItalic ); + CharacterRun characterRun = paragraph.getCharacterRun( 0 ); + int pFontSize = characterRun.getFontSize() / 2; + Triplet triplet = getCharacterRunTriplet( characterRun ); - blocksProperies.push( new BlockProperies( pFontName, pFontSize, - pBold, pItalic ) ); + WordToFoUtils.setFontFamily( block, triplet.fontName ); + WordToFoUtils.setFontSize( block, pFontSize ); + WordToFoUtils.setBold( block, triplet.bold ); + WordToFoUtils.setItalic( block, triplet.italic ); + + blocksProperies.push( new BlockProperies( triplet.fontName, + pFontSize, triplet.bold, triplet.italic ) ); } try { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index 2355413c8..4c8e03f29 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.hwpf.converter.FontReplacer.Triplet; + import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocumentCore; @@ -189,16 +191,26 @@ public class WordToHtmlConverter extends AbstractWordConverter StringBuilder style = new StringBuilder(); BlockProperies blockProperies = this.blocksProperies.peek(); - if ( characterRun.getFontName() != null - && !WordToHtmlUtils.equals( characterRun.getFontName(), + Triplet triplet = getCharacterRunTriplet( characterRun ); + + if ( WordToHtmlUtils.isNotEmpty( triplet.fontName ) + && !WordToHtmlUtils.equals( triplet.fontName, blockProperies.pFontName ) ) { - style.append( "font-family: " + characterRun.getFontName() + "; " ); + style.append( "font-family: " + triplet.fontName + "; " ); } if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize ) { style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " ); } + if ( triplet.bold ) + { + style.append( "font-weight: bold; " ); + } + if ( triplet.italic ) + { + style.append( "font-style: italic; " ); + } WordToHtmlUtils.addCharactersProperties( characterRun, style ); if ( style.length() != 0 ) @@ -299,8 +311,9 @@ public class WordToHtmlConverter extends AbstractWordConverter final CharacterRun characterRun = paragraph.getCharacterRun( 0 ); if ( characterRun != null ) { + Triplet triplet = getCharacterRunTriplet(characterRun); pFontSize = characterRun.getFontSize() / 2; - pFontName = characterRun.getFontName(); + pFontName = triplet.fontName; WordToHtmlUtils.addFontFamily( pFontName, style ); WordToHtmlUtils.addFontSize( pFontSize, style ); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java index 80e13a453..f257ed3c0 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java @@ -63,15 +63,6 @@ public class WordToHtmlUtils extends AbstractWordUtils final CharacterProperties clonedProperties = characterRun .cloneProperties(); - if ( characterRun.isBold() ) - { - style.append( "font-weight: bold; " ); - } - if ( characterRun.isItalic() ) - { - style.append( "font-style: italic; " ); - } - addBorder( clonedProperties.getBrc(), EMPTY, style ); if ( characterRun.isCapitalized() )