add font replacer interface and default implementation

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-12 14:17:57 +00:00
parent 0a631075ec
commit 5ee8f707be
7 changed files with 249 additions and 125 deletions

View File

@ -19,6 +19,8 @@ package org.apache.poi.hwpf.converter;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
@ -50,8 +52,25 @@ public abstract class AbstractWordConverter
private static final POILogger logger = POILogFactory
.getLogger( AbstractWordConverter.class );
private FontReplacer fontReplacer = new DefaultFontReplacer();
protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
{
Triplet original = new Triplet();
original.bold = characterRun.isBold();
original.italic = characterRun.isItalic();
original.fontName = characterRun.getFontName();
Triplet updated = getFontReplacer().update( original );
return updated;
}
public abstract Document getDocument();
public FontReplacer getFontReplacer()
{
return fontReplacer;
}
protected abstract void outputCharacters( Element block,
CharacterRun characterRun, String text );
@ -144,25 +163,6 @@ public abstract class AbstractWordConverter
return haveAnyText;
}
public void processDocument( HWPFDocumentCore wordDocument )
{
final SummaryInformation summaryInformation = wordDocument
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
final Range range = wordDocument.getRange();
for ( int s = 0; s < range.numSections(); s++ )
{
processSection( wordDocument, range.getSection( s ), s );
}
}
protected abstract void processDocumentInformation(
SummaryInformation summaryInformation );
protected void processDeadField( HWPFDocumentCore wordDocument,
Element currentBlock, Range range, int currentTableLevel,
int beginMark, int separatorMark, int endMark )
@ -195,6 +195,97 @@ public abstract class AbstractWordConverter
return;
}
public void processDocument( HWPFDocumentCore wordDocument )
{
final SummaryInformation summaryInformation = wordDocument
.getSummaryInformation();
if ( summaryInformation != null )
{
processDocumentInformation( summaryInformation );
}
final Range range = wordDocument.getRange();
for ( int s = 0; s < range.numSections(); s++ )
{
processSection( wordDocument, range.getSection( s ), s );
}
}
protected abstract void processDocumentInformation(
SummaryInformation summaryInformation );
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
int currentTableLevel, Field field, Element currentBlock )
{
switch ( field.getType() )
{
case 37: // page reference
{
final Range firstSubrange = field.firstSubrange( parentRange );
if ( firstSubrange != null )
{
String formula = firstSubrange.text();
Pattern pagerefPattern = Pattern
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
Matcher matcher = pagerefPattern.matcher( formula );
if ( matcher.find() )
{
String pageref = matcher.group( 1 );
processPageref( hwpfDocument, currentBlock,
field.secondSubrange( parentRange ),
currentTableLevel, pageref );
return;
}
}
break;
}
case 88: // hyperlink
{
final Range firstSubrange = field.firstSubrange( parentRange );
if ( firstSubrange != null )
{
String formula = firstSubrange.text();
Pattern hyperlinkPattern = Pattern
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
Matcher matcher = hyperlinkPattern.matcher( formula );
if ( matcher.find() )
{
String hyperlink = matcher.group( 1 );
processHyperlink( hwpfDocument, currentBlock,
field.secondSubrange( parentRange ),
currentTableLevel, hyperlink );
return;
}
}
break;
}
}
logger.log( POILogger.WARN, parentRange + " contains " + field
+ " with unsupported type or format" );
processCharacters( hwpfDocument, currentTableLevel,
field.secondSubrange( parentRange ), currentBlock );
}
protected Field processField( HWPFDocumentCore wordDocument,
Range charactersRange, int currentTableLevel, int startOffset,
Element currentBlock )
{
if ( !( wordDocument instanceof HWPFDocument ) )
return null;
HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
FieldsTables.PLCFFLDMOM, startOffset );
if ( field == null )
return null;
processField( hwpfDocument, charactersRange, currentTableLevel, field,
currentBlock );
return field;
}
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String hyperlink );
@ -292,76 +383,9 @@ public abstract class AbstractWordConverter
protected abstract void processTable( HWPFDocumentCore wordDocument,
Element flow, Table table );
protected Field processField( HWPFDocumentCore wordDocument,
Range charactersRange, int currentTableLevel, int startOffset,
Element currentBlock )
public void setFontReplacer( FontReplacer fontReplacer )
{
if ( !( wordDocument instanceof HWPFDocument ) )
return null;
HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
FieldsTables.PLCFFLDMOM, startOffset );
if ( field == null )
return null;
processField( hwpfDocument, charactersRange, currentTableLevel, field,
currentBlock );
return field;
}
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
int currentTableLevel, Field field, Element currentBlock )
{
switch ( field.getType() )
{
case 37: // page reference
{
final Range firstSubrange = field.firstSubrange( parentRange );
if ( firstSubrange != null )
{
String formula = firstSubrange.text();
Pattern pagerefPattern = Pattern
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
Matcher matcher = pagerefPattern.matcher( formula );
if ( matcher.find() )
{
String pageref = matcher.group( 1 );
processPageref( hwpfDocument, currentBlock,
field.secondSubrange( parentRange ),
currentTableLevel, pageref );
return;
}
}
break;
}
case 88: // hyperlink
{
final Range firstSubrange = field.firstSubrange( parentRange );
if ( firstSubrange != null )
{
String formula = firstSubrange.text();
Pattern hyperlinkPattern = Pattern
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
Matcher matcher = hyperlinkPattern.matcher( formula );
if ( matcher.find() )
{
String hyperlink = matcher.group( 1 );
processHyperlink( hwpfDocument, currentBlock,
field.secondSubrange( parentRange ),
currentTableLevel, hyperlink );
return;
}
}
break;
}
}
logger.log( POILogger.WARN, parentRange + " contains " + field
+ " with unsupported type or format" );
processCharacters( hwpfDocument, currentTableLevel,
field.secondSubrange( parentRange ), currentBlock );
this.fontReplacer = fontReplacer;
}
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,

View File

@ -320,4 +320,18 @@ public class AbstractWordUtils
}
}
static String substringBeforeLast( String str, String separator )
{
if ( isEmpty( str ) || isEmpty( separator ) )
{
return str;
}
int pos = str.lastIndexOf( separator );
if ( pos == -1 )
{
return str;
}
return str.substring( 0, pos );
}
}

View File

@ -0,0 +1,71 @@
package org.apache.poi.hwpf.converter;
public class DefaultFontReplacer implements FontReplacer
{
public Triplet update( Triplet original )
{
if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
{
String fontName = original.fontName;
if ( fontName.endsWith( " Regular" ) )
fontName = AbstractWordUtils.substringBeforeLast( fontName,
" Regular" );
if ( fontName
.endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) )
fontName = AbstractWordUtils
.substringBeforeLast( fontName,
" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" )
+ " Bold";
if ( fontName
.endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) )
fontName = AbstractWordUtils
.substringBeforeLast(
fontName,
" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" )
+ " Bold Italic";
if ( fontName.endsWith( " \u041A\u0443\u0440\u0441\u0438\u0432" ) )
fontName = AbstractWordUtils.substringBeforeLast( fontName,
" \u041A\u0443\u0440\u0441\u0438\u0432" ) + " Italic";
original.fontName = fontName;
}
if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
{
if ( "Times Regular".equals( original.fontName )
|| "Times-Regular".equals( original.fontName ) )
{
original.fontName = "Times";
original.bold = false;
original.italic = false;
}
if ( "Times Bold".equals( original.fontName )
|| "Times-Bold".equals( original.fontName ) )
{
original.fontName = "Times";
original.bold = true;
original.italic = false;
}
if ( "Times Italic".equals( original.fontName )
|| "Times-Italic".equals( original.fontName ) )
{
original.fontName = "Times";
original.bold = false;
original.italic = true;
}
if ( "Times Bold Italic".equals( original.fontName )
|| "Times-BoldItalic".equals( original.fontName ) )
{
original.fontName = "Times";
original.bold = true;
original.italic = true;
}
}
return original;
}
}

View File

@ -0,0 +1,13 @@
package org.apache.poi.hwpf.converter;
public interface FontReplacer
{
public class Triplet
{
public String fontName;
public boolean bold;
public boolean italic;
}
public Triplet update( Triplet original );
}

View File

@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
@ -206,16 +208,19 @@ public class WordToFoConverter extends AbstractWordConverter
{
BlockProperies blockProperies = this.blocksProperies.peek();
Element inline = foDocumentFacade.createInline();
if ( characterRun.isBold() != blockProperies.pBold )
Triplet triplet = getCharacterRunTriplet( characterRun );
if ( triplet.bold != blockProperies.pBold )
{
WordToFoUtils.setBold( inline, characterRun.isBold() );
WordToFoUtils.setBold( inline, triplet.bold );
}
if ( characterRun.isItalic() != blockProperies.pItalic )
if ( triplet.italic != blockProperies.pItalic )
{
WordToFoUtils.setItalic( inline, characterRun.isItalic() );
WordToFoUtils.setItalic( inline, triplet.italic );
}
if ( characterRun.getFontName() != null
&& !AbstractWordUtils.equals( characterRun.getFontName(),
if ( WordToFoUtils.isNotEmpty( triplet.fontName )
&& !WordToFoUtils.equals( triplet.fontName,
blockProperies.pFontName ) )
{
WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
@ -317,25 +322,18 @@ public class WordToFoConverter extends AbstractWordConverter
return;
}
{
final String pFontName;
final int pFontSize;
final boolean pBold;
final boolean pItalic;
{
CharacterRun characterRun = paragraph.getCharacterRun( 0 );
pFontSize = characterRun.getFontSize() / 2;
pFontName = characterRun.getFontName();
pBold = characterRun.isBold();
pItalic = characterRun.isItalic();
}
WordToFoUtils.setFontFamily( block, pFontName );
WordToFoUtils.setFontSize( block, pFontSize );
WordToFoUtils.setBold( block, pBold );
WordToFoUtils.setItalic( block, pItalic );
int pFontSize = characterRun.getFontSize() / 2;
Triplet triplet = getCharacterRunTriplet( characterRun );
blocksProperies.push( new BlockProperies( pFontName, pFontSize,
pBold, pItalic ) );
WordToFoUtils.setFontFamily( block, triplet.fontName );
WordToFoUtils.setFontSize( block, pFontSize );
WordToFoUtils.setBold( block, triplet.bold );
WordToFoUtils.setItalic( block, triplet.italic );
blocksProperies.push( new BlockProperies( triplet.fontName,
pFontSize, triplet.bold, triplet.italic ) );
}
try
{

View File

@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
@ -189,16 +191,26 @@ public class WordToHtmlConverter extends AbstractWordConverter
StringBuilder style = new StringBuilder();
BlockProperies blockProperies = this.blocksProperies.peek();
if ( characterRun.getFontName() != null
&& !WordToHtmlUtils.equals( characterRun.getFontName(),
Triplet triplet = getCharacterRunTriplet( characterRun );
if ( WordToHtmlUtils.isNotEmpty( triplet.fontName )
&& !WordToHtmlUtils.equals( triplet.fontName,
blockProperies.pFontName ) )
{
style.append( "font-family: " + characterRun.getFontName() + "; " );
style.append( "font-family: " + triplet.fontName + "; " );
}
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
{
style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
}
if ( triplet.bold )
{
style.append( "font-weight: bold; " );
}
if ( triplet.italic )
{
style.append( "font-style: italic; " );
}
WordToHtmlUtils.addCharactersProperties( characterRun, style );
if ( style.length() != 0 )
@ -299,8 +311,9 @@ public class WordToHtmlConverter extends AbstractWordConverter
final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
if ( characterRun != null )
{
Triplet triplet = getCharacterRunTriplet(characterRun);
pFontSize = characterRun.getFontSize() / 2;
pFontName = characterRun.getFontName();
pFontName = triplet.fontName;
WordToHtmlUtils.addFontFamily( pFontName, style );
WordToHtmlUtils.addFontSize( pFontSize, style );
}

View File

@ -63,15 +63,6 @@ public class WordToHtmlUtils extends AbstractWordUtils
final CharacterProperties clonedProperties = characterRun
.cloneProperties();
if ( characterRun.isBold() )
{
style.append( "font-weight: bold; " );
}
if ( characterRun.isItalic() )
{
style.append( "font-style: italic; " );
}
addBorder( clonedProperties.getBrc(), EMPTY, style );
if ( characterRun.isCapitalized() )