diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 11f13b34f..3a0c978ba 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -342,8 +342,8 @@ public abstract class AbstractWordConverter bookmarkStack.addAll( bookmarks ); try { - Range subrange = new Range( structure.start, structure.end, - range ) + int end = Math.min( range.getEndOffset(), structure.end ); + Range subrange = new Range( structure.start, end, range ) { @Override public String toString() @@ -372,7 +372,7 @@ public abstract class AbstractWordConverter + structure.structure.getClass() ); } - previous = structure.end; + previous = Math.min( range.getEndOffset(), structure.end ); } if ( previous != range.getStartOffset() ) @@ -865,6 +865,9 @@ public abstract class AbstractWordConverter return false; } + protected abstract void processPageBreak( HWPFDocumentCore wordDocument, + Element flow ); + protected abstract void processPageref( HWPFDocumentCore wordDocument, Element currentBlock, Range textRange, int currentTableLevel, String pageref ); @@ -903,6 +906,11 @@ public abstract class AbstractWordConverter continue; } + if ( paragraph.text().equals( "\u000c" ) ) + { + processPageBreak( wordDocument, flow ); + } + if ( paragraph.getIlfo() != currentListInfo ) { currentListInfo = paragraph.getIlfo(); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java index 455eb21b0..02c79665f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordUtils.java @@ -130,7 +130,7 @@ public class AbstractWordUtils return true; } - static void compactChildNodes( Element parentElement, String childTagName ) + static void compactChildNodesR( Element parentElement, String childTagName ) { NodeList childNodes = parentElement.getChildNodes(); for ( int i = 0; i < childNodes.getLength() - 1; i++ ) @@ -146,6 +146,16 @@ public class AbstractWordUtils child2.getParentNode().removeChild( child2 ); i--; } + + childNodes = parentElement.getChildNodes(); + for ( int i = 0; i < childNodes.getLength() - 1; i++ ) + { + Node child = childNodes.item( i ); + if ( child instanceof Element ) + { + compactChildNodesR( (Element) child, childTagName ); + } + } } static boolean equals( String str1, String str2 ) @@ -320,10 +330,12 @@ public class AbstractWordUtils if ( argbValue == -1 ) throw new IllegalArgumentException( "This colorref is empty" ); - int value = argbValue & 0x00FFFFFF; + int bgrValue = argbValue & 0x00FFFFFF; + int rgbValue = ( bgrValue & 0x0000FF ) << 16 | ( bgrValue & 0x00FF00 ) + | ( bgrValue & 0xFF0000 ) >> 16; // http://www.w3.org/TR/REC-html40/types.html#h-6.5 - switch ( value ) + switch ( rgbValue ) { case 0xFFFFFF: return "white"; @@ -360,7 +372,7 @@ public class AbstractWordUtils } StringBuilder result = new StringBuilder( "#" ); - String hex = Integer.toHexString( value ); + String hex = Integer.toHexString( rgbValue ); for ( int i = hex.length(); i < 6; i++ ) { result.append( '0' ); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index 7e3e4dfe7..90cf8aff2 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -50,6 +50,8 @@ import org.apache.poi.util.POILogFactory; import org.apache.poi.util.POILogger; import org.w3c.dom.Document; import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; import org.w3c.dom.Text; /** @@ -119,6 +121,8 @@ public class WordToFoConverter extends AbstractWordConverter private AtomicInteger internalLinkCounter = new AtomicInteger( 0 ); + private boolean outputCharactersLanguage = false; + private Set usedIds = new LinkedHashSet(); /** @@ -202,6 +206,11 @@ public class WordToFoConverter extends AbstractWordConverter return foDocumentFacade.getDocument(); } + public boolean isOutputCharactersLanguage() + { + return outputCharactersLanguage; + } + @Override protected void outputCharacters( Element block, CharacterRun characterRun, String text ) @@ -211,11 +220,15 @@ public class WordToFoConverter extends AbstractWordConverter Triplet triplet = getCharacterRunTriplet( characterRun ); if ( WordToFoUtils.isNotEmpty( triplet.fontName ) ) - WordToFoUtils.setFontFamily( inline, characterRun.getFontName() ); + WordToFoUtils.setFontFamily( inline, triplet.fontName ); WordToFoUtils.setBold( inline, triplet.bold ); WordToFoUtils.setItalic( inline, triplet.italic ); WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 ); WordToFoUtils.setCharactersProperties( characterRun, inline ); + + if ( isOutputCharactersLanguage() ) + WordToFoUtils.setLanguage( characterRun, inline ); + block.appendChild( inline ); Text textNode = foDocumentFacade.createText( text ); @@ -411,6 +424,32 @@ public class WordToFoConverter extends AbstractWordConverter block.appendChild( foDocumentFacade.createBlock() ); } + @Override + protected void processPageBreak( HWPFDocumentCore wordDocument, Element flow ) + { + Element block = null; + NodeList childNodes = flow.getChildNodes(); + if ( childNodes.getLength() > 0 ) + { + Node lastChild = childNodes.item( childNodes.getLength() - 1 ); + if ( lastChild instanceof Element ) + { + Element lastElement = (Element) lastChild; + if ( !lastElement.hasAttribute( "break-after" ) ) + { + block = lastElement; + } + } + } + + if ( block == null ) + { + block = foDocumentFacade.createBlock(); + flow.appendChild( block ); + } + block.setAttribute( "break-after", "page" ); + } + protected void processPageref( HWPFDocumentCore hwpfDocument, Element currentBlock, Range textRange, int currentTableLevel, String pageref ) @@ -606,4 +645,9 @@ public class WordToFoConverter extends AbstractWordConverter return true; } + public void setOutputCharactersLanguage( boolean outputCharactersLanguage ) + { + this.outputCharactersLanguage = outputCharactersLanguage; + } + } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java index f80f59c68..c79a3d611 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java @@ -30,7 +30,7 @@ public class WordToFoUtils extends AbstractWordUtils { static void compactInlines( Element blockElement ) { - compactChildNodes( blockElement, "fo:inline" ); + compactChildNodesR( blockElement, "fo:inline" ); } public static void setBold( final Element element, final boolean bold ) @@ -82,12 +82,6 @@ public class WordToFoUtils extends AbstractWordUtils inline.setAttribute( "opacity", getOpacity( characterRun.getIco24() ) ); } - if ( characterRun.getLanguageCode() != 0 ) - { - final String language = getLanguage( characterRun.getLanguageCode() ); - if ( isNotEmpty( language ) ) - inline.setAttribute( "language", language ); - } if ( characterRun.isCapitalized() ) { inline.setAttribute( "text-transform", "uppercase" ); @@ -206,6 +200,17 @@ public class WordToFoUtils extends AbstractWordUtils element.setAttribute( "text-align", justification ); } + public static void setLanguage( final CharacterRun characterRun, + final Element inline ) + { + if ( characterRun.getLanguageCode() != 0 ) + { + final String language = getLanguage( characterRun.getLanguageCode() ); + if ( isNotEmpty( language ) ) + inline.setAttribute( "language", language ); + } + } + public static void setParagraphProperties( Paragraph paragraph, Element block ) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index 59e20e1a0..c33776495 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -495,6 +495,12 @@ public class WordToHtmlConverter extends AbstractWordConverter } } + @Override + protected void processPageBreak( HWPFDocumentCore wordDocument, Element flow ) + { + flow.appendChild( htmlDocumentFacade.createLineBreak() ); + } + protected void processPageref( HWPFDocumentCore hwpfDocument, Element currentBlock, Range textRange, int currentTableLevel, String pageref ) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java index b36e1db83..02f85ffbd 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlUtils.java @@ -233,7 +233,7 @@ public class WordToHtmlUtils extends AbstractWordUtils static void compactSpans( Element pElement ) { - compactChildNodes( pElement, "span" ); + compactChildNodesR( pElement, "span" ); } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java index 5998a2b90..57ec92f8e 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToTextConverter.java @@ -380,6 +380,14 @@ public class WordToTextConverter extends AbstractWordConverter } } + @Override + protected void processPageBreak( HWPFDocumentCore wordDocument, Element flow ) + { + Element block = textDocumentFacade.createBlock(); + block.appendChild( textDocumentFacade.createText( "\n" ) ); + flow.appendChild( block ); + } + @Override protected void processPageref( HWPFDocumentCore wordDocument, Element currentBlock, Range textRange, int currentTableLevel,