add font replacer interface and default implementation
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1145604 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0a631075ec
commit
5ee8f707be
@ -19,6 +19,8 @@ package org.apache.poi.hwpf.converter;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
@ -50,8 +52,25 @@ public abstract class AbstractWordConverter
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( AbstractWordConverter.class );
|
||||
|
||||
private FontReplacer fontReplacer = new DefaultFontReplacer();
|
||||
|
||||
protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
|
||||
{
|
||||
Triplet original = new Triplet();
|
||||
original.bold = characterRun.isBold();
|
||||
original.italic = characterRun.isItalic();
|
||||
original.fontName = characterRun.getFontName();
|
||||
Triplet updated = getFontReplacer().update( original );
|
||||
return updated;
|
||||
}
|
||||
|
||||
public abstract Document getDocument();
|
||||
|
||||
public FontReplacer getFontReplacer()
|
||||
{
|
||||
return fontReplacer;
|
||||
}
|
||||
|
||||
protected abstract void outputCharacters( Element block,
|
||||
CharacterRun characterRun, String text );
|
||||
|
||||
@ -144,25 +163,6 @@ public abstract class AbstractWordConverter
|
||||
return haveAnyText;
|
||||
}
|
||||
|
||||
public void processDocument( HWPFDocumentCore wordDocument )
|
||||
{
|
||||
final SummaryInformation summaryInformation = wordDocument
|
||||
.getSummaryInformation();
|
||||
if ( summaryInformation != null )
|
||||
{
|
||||
processDocumentInformation( summaryInformation );
|
||||
}
|
||||
|
||||
final Range range = wordDocument.getRange();
|
||||
for ( int s = 0; s < range.numSections(); s++ )
|
||||
{
|
||||
processSection( wordDocument, range.getSection( s ), s );
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract void processDocumentInformation(
|
||||
SummaryInformation summaryInformation );
|
||||
|
||||
protected void processDeadField( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Range range, int currentTableLevel,
|
||||
int beginMark, int separatorMark, int endMark )
|
||||
@ -195,6 +195,97 @@ public abstract class AbstractWordConverter
|
||||
return;
|
||||
}
|
||||
|
||||
public void processDocument( HWPFDocumentCore wordDocument )
|
||||
{
|
||||
final SummaryInformation summaryInformation = wordDocument
|
||||
.getSummaryInformation();
|
||||
if ( summaryInformation != null )
|
||||
{
|
||||
processDocumentInformation( summaryInformation );
|
||||
}
|
||||
|
||||
final Range range = wordDocument.getRange();
|
||||
for ( int s = 0; s < range.numSections(); s++ )
|
||||
{
|
||||
processSection( wordDocument, range.getSection( s ), s );
|
||||
}
|
||||
}
|
||||
|
||||
protected abstract void processDocumentInformation(
|
||||
SummaryInformation summaryInformation );
|
||||
|
||||
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
|
||||
int currentTableLevel, Field field, Element currentBlock )
|
||||
{
|
||||
switch ( field.getType() )
|
||||
{
|
||||
case 37: // page reference
|
||||
{
|
||||
final Range firstSubrange = field.firstSubrange( parentRange );
|
||||
if ( firstSubrange != null )
|
||||
{
|
||||
String formula = firstSubrange.text();
|
||||
Pattern pagerefPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
||||
Matcher matcher = pagerefPattern.matcher( formula );
|
||||
if ( matcher.find() )
|
||||
{
|
||||
String pageref = matcher.group( 1 );
|
||||
processPageref( hwpfDocument, currentBlock,
|
||||
field.secondSubrange( parentRange ),
|
||||
currentTableLevel, pageref );
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 88: // hyperlink
|
||||
{
|
||||
final Range firstSubrange = field.firstSubrange( parentRange );
|
||||
if ( firstSubrange != null )
|
||||
{
|
||||
String formula = firstSubrange.text();
|
||||
Pattern hyperlinkPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
||||
Matcher matcher = hyperlinkPattern.matcher( formula );
|
||||
if ( matcher.find() )
|
||||
{
|
||||
String hyperlink = matcher.group( 1 );
|
||||
processHyperlink( hwpfDocument, currentBlock,
|
||||
field.secondSubrange( parentRange ),
|
||||
currentTableLevel, hyperlink );
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
logger.log( POILogger.WARN, parentRange + " contains " + field
|
||||
+ " with unsupported type or format" );
|
||||
processCharacters( hwpfDocument, currentTableLevel,
|
||||
field.secondSubrange( parentRange ), currentBlock );
|
||||
}
|
||||
|
||||
protected Field processField( HWPFDocumentCore wordDocument,
|
||||
Range charactersRange, int currentTableLevel, int startOffset,
|
||||
Element currentBlock )
|
||||
{
|
||||
if ( !( wordDocument instanceof HWPFDocument ) )
|
||||
return null;
|
||||
|
||||
HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
|
||||
Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
|
||||
FieldsTables.PLCFFLDMOM, startOffset );
|
||||
if ( field == null )
|
||||
return null;
|
||||
|
||||
processField( hwpfDocument, charactersRange, currentTableLevel, field,
|
||||
currentBlock );
|
||||
|
||||
return field;
|
||||
}
|
||||
|
||||
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Range textRange, int currentTableLevel,
|
||||
String hyperlink );
|
||||
@ -292,76 +383,9 @@ public abstract class AbstractWordConverter
|
||||
protected abstract void processTable( HWPFDocumentCore wordDocument,
|
||||
Element flow, Table table );
|
||||
|
||||
protected Field processField( HWPFDocumentCore wordDocument,
|
||||
Range charactersRange, int currentTableLevel, int startOffset,
|
||||
Element currentBlock )
|
||||
public void setFontReplacer( FontReplacer fontReplacer )
|
||||
{
|
||||
if ( !( wordDocument instanceof HWPFDocument ) )
|
||||
return null;
|
||||
|
||||
HWPFDocument hwpfDocument = (HWPFDocument) wordDocument;
|
||||
Field field = hwpfDocument.getFieldsTables().lookupFieldByStartOffset(
|
||||
FieldsTables.PLCFFLDMOM, startOffset );
|
||||
if ( field == null )
|
||||
return null;
|
||||
|
||||
processField( hwpfDocument, charactersRange, currentTableLevel, field,
|
||||
currentBlock );
|
||||
|
||||
return field;
|
||||
}
|
||||
|
||||
protected void processField( HWPFDocument hwpfDocument, Range parentRange,
|
||||
int currentTableLevel, Field field, Element currentBlock )
|
||||
{
|
||||
switch ( field.getType() )
|
||||
{
|
||||
case 37: // page reference
|
||||
{
|
||||
final Range firstSubrange = field.firstSubrange( parentRange );
|
||||
if ( firstSubrange != null )
|
||||
{
|
||||
String formula = firstSubrange.text();
|
||||
Pattern pagerefPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
||||
Matcher matcher = pagerefPattern.matcher( formula );
|
||||
if ( matcher.find() )
|
||||
{
|
||||
String pageref = matcher.group( 1 );
|
||||
processPageref( hwpfDocument, currentBlock,
|
||||
field.secondSubrange( parentRange ),
|
||||
currentTableLevel, pageref );
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 88: // hyperlink
|
||||
{
|
||||
final Range firstSubrange = field.firstSubrange( parentRange );
|
||||
if ( firstSubrange != null )
|
||||
{
|
||||
String formula = firstSubrange.text();
|
||||
Pattern hyperlinkPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
||||
Matcher matcher = hyperlinkPattern.matcher( formula );
|
||||
if ( matcher.find() )
|
||||
{
|
||||
String hyperlink = matcher.group( 1 );
|
||||
processHyperlink( hwpfDocument, currentBlock,
|
||||
field.secondSubrange( parentRange ),
|
||||
currentTableLevel, hyperlink );
|
||||
return;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
logger.log( POILogger.WARN, parentRange + " contains " + field
|
||||
+ " with unsupported type or format" );
|
||||
processCharacters( hwpfDocument, currentTableLevel,
|
||||
field.secondSubrange( parentRange ), currentBlock );
|
||||
this.fontReplacer = fontReplacer;
|
||||
}
|
||||
|
||||
protected int tryDeadField( HWPFDocumentCore wordDocument, Range range,
|
||||
|
@ -320,4 +320,18 @@ public class AbstractWordUtils
|
||||
}
|
||||
}
|
||||
|
||||
static String substringBeforeLast( String str, String separator )
|
||||
{
|
||||
if ( isEmpty( str ) || isEmpty( separator ) )
|
||||
{
|
||||
return str;
|
||||
}
|
||||
int pos = str.lastIndexOf( separator );
|
||||
if ( pos == -1 )
|
||||
{
|
||||
return str;
|
||||
}
|
||||
return str.substring( 0, pos );
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,71 @@
|
||||
package org.apache.poi.hwpf.converter;
|
||||
|
||||
public class DefaultFontReplacer implements FontReplacer
|
||||
{
|
||||
public Triplet update( Triplet original )
|
||||
{
|
||||
if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
|
||||
{
|
||||
String fontName = original.fontName;
|
||||
|
||||
if ( fontName.endsWith( " Regular" ) )
|
||||
fontName = AbstractWordUtils.substringBeforeLast( fontName,
|
||||
" Regular" );
|
||||
|
||||
if ( fontName
|
||||
.endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" ) )
|
||||
fontName = AbstractWordUtils
|
||||
.substringBeforeLast( fontName,
|
||||
" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439" )
|
||||
+ " Bold";
|
||||
|
||||
if ( fontName
|
||||
.endsWith( " \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" ) )
|
||||
fontName = AbstractWordUtils
|
||||
.substringBeforeLast(
|
||||
fontName,
|
||||
" \u041F\u043E\u043B\u0443\u0436\u0438\u0440\u043D\u044B\u0439 \u041A\u0443\u0440\u0441\u0438\u0432" )
|
||||
+ " Bold Italic";
|
||||
|
||||
if ( fontName.endsWith( " \u041A\u0443\u0440\u0441\u0438\u0432" ) )
|
||||
fontName = AbstractWordUtils.substringBeforeLast( fontName,
|
||||
" \u041A\u0443\u0440\u0441\u0438\u0432" ) + " Italic";
|
||||
|
||||
original.fontName = fontName;
|
||||
}
|
||||
|
||||
if ( !AbstractWordUtils.isNotEmpty( original.fontName ) )
|
||||
{
|
||||
if ( "Times Regular".equals( original.fontName )
|
||||
|| "Times-Regular".equals( original.fontName ) )
|
||||
{
|
||||
original.fontName = "Times";
|
||||
original.bold = false;
|
||||
original.italic = false;
|
||||
}
|
||||
if ( "Times Bold".equals( original.fontName )
|
||||
|| "Times-Bold".equals( original.fontName ) )
|
||||
{
|
||||
original.fontName = "Times";
|
||||
original.bold = true;
|
||||
original.italic = false;
|
||||
}
|
||||
if ( "Times Italic".equals( original.fontName )
|
||||
|| "Times-Italic".equals( original.fontName ) )
|
||||
{
|
||||
original.fontName = "Times";
|
||||
original.bold = false;
|
||||
original.italic = true;
|
||||
}
|
||||
if ( "Times Bold Italic".equals( original.fontName )
|
||||
|| "Times-BoldItalic".equals( original.fontName ) )
|
||||
{
|
||||
original.fontName = "Times";
|
||||
original.bold = true;
|
||||
original.italic = true;
|
||||
}
|
||||
}
|
||||
|
||||
return original;
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package org.apache.poi.hwpf.converter;
|
||||
|
||||
public interface FontReplacer
|
||||
{
|
||||
public class Triplet
|
||||
{
|
||||
public String fontName;
|
||||
public boolean bold;
|
||||
public boolean italic;
|
||||
}
|
||||
|
||||
public Triplet update( Triplet original );
|
||||
}
|
@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
@ -206,16 +208,19 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
{
|
||||
BlockProperies blockProperies = this.blocksProperies.peek();
|
||||
Element inline = foDocumentFacade.createInline();
|
||||
if ( characterRun.isBold() != blockProperies.pBold )
|
||||
|
||||
Triplet triplet = getCharacterRunTriplet( characterRun );
|
||||
|
||||
if ( triplet.bold != blockProperies.pBold )
|
||||
{
|
||||
WordToFoUtils.setBold( inline, characterRun.isBold() );
|
||||
WordToFoUtils.setBold( inline, triplet.bold );
|
||||
}
|
||||
if ( characterRun.isItalic() != blockProperies.pItalic )
|
||||
if ( triplet.italic != blockProperies.pItalic )
|
||||
{
|
||||
WordToFoUtils.setItalic( inline, characterRun.isItalic() );
|
||||
WordToFoUtils.setItalic( inline, triplet.italic );
|
||||
}
|
||||
if ( characterRun.getFontName() != null
|
||||
&& !AbstractWordUtils.equals( characterRun.getFontName(),
|
||||
if ( WordToFoUtils.isNotEmpty( triplet.fontName )
|
||||
&& !WordToFoUtils.equals( triplet.fontName,
|
||||
blockProperies.pFontName ) )
|
||||
{
|
||||
WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
|
||||
@ -317,25 +322,18 @@ public class WordToFoConverter extends AbstractWordConverter
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
final boolean pBold;
|
||||
final boolean pItalic;
|
||||
{
|
||||
CharacterRun characterRun = paragraph.getCharacterRun( 0 );
|
||||
pFontSize = characterRun.getFontSize() / 2;
|
||||
pFontName = characterRun.getFontName();
|
||||
pBold = characterRun.isBold();
|
||||
pItalic = characterRun.isItalic();
|
||||
}
|
||||
WordToFoUtils.setFontFamily( block, pFontName );
|
||||
WordToFoUtils.setFontSize( block, pFontSize );
|
||||
WordToFoUtils.setBold( block, pBold );
|
||||
WordToFoUtils.setItalic( block, pItalic );
|
||||
int pFontSize = characterRun.getFontSize() / 2;
|
||||
Triplet triplet = getCharacterRunTriplet( characterRun );
|
||||
|
||||
blocksProperies.push( new BlockProperies( pFontName, pFontSize,
|
||||
pBold, pItalic ) );
|
||||
WordToFoUtils.setFontFamily( block, triplet.fontName );
|
||||
WordToFoUtils.setFontSize( block, pFontSize );
|
||||
WordToFoUtils.setBold( block, triplet.bold );
|
||||
WordToFoUtils.setItalic( block, triplet.italic );
|
||||
|
||||
blocksProperies.push( new BlockProperies( triplet.fontName,
|
||||
pFontSize, triplet.bold, triplet.italic ) );
|
||||
}
|
||||
try
|
||||
{
|
||||
|
@ -27,6 +27,8 @@ import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
@ -189,16 +191,26 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
|
||||
StringBuilder style = new StringBuilder();
|
||||
BlockProperies blockProperies = this.blocksProperies.peek();
|
||||
if ( characterRun.getFontName() != null
|
||||
&& !WordToHtmlUtils.equals( characterRun.getFontName(),
|
||||
Triplet triplet = getCharacterRunTriplet( characterRun );
|
||||
|
||||
if ( WordToHtmlUtils.isNotEmpty( triplet.fontName )
|
||||
&& !WordToHtmlUtils.equals( triplet.fontName,
|
||||
blockProperies.pFontName ) )
|
||||
{
|
||||
style.append( "font-family: " + characterRun.getFontName() + "; " );
|
||||
style.append( "font-family: " + triplet.fontName + "; " );
|
||||
}
|
||||
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
|
||||
{
|
||||
style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
|
||||
}
|
||||
if ( triplet.bold )
|
||||
{
|
||||
style.append( "font-weight: bold; " );
|
||||
}
|
||||
if ( triplet.italic )
|
||||
{
|
||||
style.append( "font-style: italic; " );
|
||||
}
|
||||
|
||||
WordToHtmlUtils.addCharactersProperties( characterRun, style );
|
||||
if ( style.length() != 0 )
|
||||
@ -299,8 +311,9 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
||||
final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
|
||||
if ( characterRun != null )
|
||||
{
|
||||
Triplet triplet = getCharacterRunTriplet(characterRun);
|
||||
pFontSize = characterRun.getFontSize() / 2;
|
||||
pFontName = characterRun.getFontName();
|
||||
pFontName = triplet.fontName;
|
||||
WordToHtmlUtils.addFontFamily( pFontName, style );
|
||||
WordToHtmlUtils.addFontSize( pFontSize, style );
|
||||
}
|
||||
|
@ -63,15 +63,6 @@ public class WordToHtmlUtils extends AbstractWordUtils
|
||||
final CharacterProperties clonedProperties = characterRun
|
||||
.cloneProperties();
|
||||
|
||||
if ( characterRun.isBold() )
|
||||
{
|
||||
style.append( "font-weight: bold; " );
|
||||
}
|
||||
if ( characterRun.isItalic() )
|
||||
{
|
||||
style.append( "font-style: italic; " );
|
||||
}
|
||||
|
||||
addBorder( clonedProperties.getBrc(), EMPTY, style );
|
||||
|
||||
if ( characterRun.isCapitalized() )
|
||||
|
Loading…
Reference in New Issue
Block a user