add Word-to-HTML extractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1142765 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2b97a034fe
commit
4fbd693851
@ -0,0 +1,365 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.model.ListFormatOverride;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
import org.apache.poi.hwpf.usermodel.Section;
|
||||
import org.apache.poi.hwpf.usermodel.Table;
|
||||
import org.apache.poi.hwpf.usermodel.TableIterator;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
public abstract class AbstractWordExtractor
|
||||
{
|
||||
private static final byte BEL_MARK = 7;
|
||||
|
||||
private static final byte FIELD_BEGIN_MARK = 19;
|
||||
|
||||
private static final byte FIELD_END_MARK = 21;
|
||||
|
||||
private static final byte FIELD_SEPARATOR_MARK = 20;
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( AbstractWordExtractor.class );
|
||||
|
||||
public abstract Document getDocument();
|
||||
|
||||
protected abstract void outputCharacters( Element block,
|
||||
CharacterRun characterRun, String text );
|
||||
|
||||
protected boolean processCharacters( HWPFDocumentCore hwpfDocument,
|
||||
int currentTableLevel, Paragraph paragraph, final Element block,
|
||||
List<CharacterRun> characterRuns, final int start, final int end )
|
||||
{
|
||||
boolean haveAnyText = false;
|
||||
|
||||
for ( int c = start; c < end; c++ )
|
||||
{
|
||||
CharacterRun characterRun = characterRuns.get( c );
|
||||
|
||||
if ( characterRun == null )
|
||||
throw new AssertionError();
|
||||
|
||||
if ( hwpfDocument instanceof HWPFDocument
|
||||
&& ( (HWPFDocument) hwpfDocument ).getPicturesTable()
|
||||
.hasPicture( characterRun ) )
|
||||
{
|
||||
HWPFDocument newFormat = (HWPFDocument) hwpfDocument;
|
||||
Picture picture = newFormat.getPicturesTable().extractPicture(
|
||||
characterRun, true );
|
||||
|
||||
processImage( block, characterRun.text().charAt( 0 ) == 0x01,
|
||||
picture );
|
||||
continue;
|
||||
}
|
||||
|
||||
String text = characterRun.text();
|
||||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
|
||||
{
|
||||
int skipTo = tryField( hwpfDocument, paragraph,
|
||||
currentTableLevel, characterRuns, c, block );
|
||||
|
||||
if ( skipTo != c )
|
||||
{
|
||||
c = skipTo;
|
||||
continue;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
// shall not appear without FIELD_BEGIN_MARK
|
||||
continue;
|
||||
}
|
||||
if ( text.getBytes()[0] == FIELD_END_MARK )
|
||||
{
|
||||
// shall not appear without FIELD_BEGIN_MARK
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( characterRun.isSpecialCharacter() || characterRun.isObj()
|
||||
|| characterRun.isOle2() )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( text.endsWith( "\r" )
|
||||
|| ( text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0 ) )
|
||||
text = text.substring( 0, text.length() - 1 );
|
||||
|
||||
outputCharacters( block, characterRun, text );
|
||||
|
||||
haveAnyText |= text.trim().length() != 0;
|
||||
}
|
||||
|
||||
return haveAnyText;
|
||||
}
|
||||
|
||||
public void processDocument( HWPFDocumentCore wordDocument )
|
||||
{
|
||||
final Range range = wordDocument.getRange();
|
||||
for ( int s = 0; s < range.numSections(); s++ )
|
||||
{
|
||||
processSection( wordDocument, range.getSection( s ), s );
|
||||
}
|
||||
}
|
||||
|
||||
protected void processField( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
List<CharacterRun> characterRuns, int beginMark, int separatorMark,
|
||||
int endMark )
|
||||
{
|
||||
|
||||
Pattern hyperlinkPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
||||
Pattern pagerefPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
||||
|
||||
if ( separatorMark - beginMark > 1 )
|
||||
{
|
||||
int index = beginMark + 1;
|
||||
CharacterRun firstAfterBegin = null;
|
||||
while ( index < separatorMark )
|
||||
{
|
||||
firstAfterBegin = paragraph.getCharacterRun( index );
|
||||
if ( firstAfterBegin == null )
|
||||
{
|
||||
logger.log( POILogger.WARN,
|
||||
"Paragraph " + paragraph.getStartOffset() + "--"
|
||||
+ paragraph.getEndOffset()
|
||||
+ " contains null CharacterRun #" + index );
|
||||
index++;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if ( firstAfterBegin != null )
|
||||
{
|
||||
final Matcher hyperlinkMatcher = hyperlinkPattern
|
||||
.matcher( firstAfterBegin.text() );
|
||||
if ( hyperlinkMatcher.matches() )
|
||||
{
|
||||
String hyperlink = hyperlinkMatcher.group( 1 );
|
||||
processHyperlink( wordDocument, currentBlock, paragraph,
|
||||
characterRuns, currentTableLevel, hyperlink,
|
||||
separatorMark + 1, endMark );
|
||||
return;
|
||||
}
|
||||
|
||||
final Matcher pagerefMatcher = pagerefPattern
|
||||
.matcher( firstAfterBegin.text() );
|
||||
if ( pagerefMatcher.matches() )
|
||||
{
|
||||
String pageref = pagerefMatcher.group( 1 );
|
||||
processPageref( wordDocument, currentBlock, paragraph,
|
||||
characterRuns, currentTableLevel, pageref,
|
||||
separatorMark + 1, endMark );
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
|
||||
for ( int i = beginMark; i <= endMark; i++ )
|
||||
{
|
||||
debug.append( "\t" );
|
||||
debug.append( paragraph.getCharacterRun( i ) );
|
||||
debug.append( "\n" );
|
||||
}
|
||||
logger.log( POILogger.WARN, debug );
|
||||
|
||||
// just output field value
|
||||
if ( separatorMark + 1 < endMark )
|
||||
processCharacters( wordDocument, currentTableLevel, paragraph,
|
||||
currentBlock, characterRuns, separatorMark + 1, endMark );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
protected abstract void processHyperlink( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Paragraph paragraph,
|
||||
List<CharacterRun> characterRuns, int currentTableLevel,
|
||||
String hyperlink, int i, int endMark );
|
||||
|
||||
protected abstract void processImage( Element currentBlock,
|
||||
boolean inlined, Picture picture );
|
||||
|
||||
protected abstract void processPageref( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Paragraph paragraph,
|
||||
List<CharacterRun> characterRuns, int currentTableLevel,
|
||||
String pageref, int beginTextInclusive, int endTextExclusive );
|
||||
|
||||
protected abstract void processParagraph( HWPFDocumentCore wordDocument,
|
||||
Element parentFopElement, int currentTableLevel,
|
||||
Paragraph paragraph, String bulletText );
|
||||
|
||||
protected abstract void processSection( HWPFDocumentCore wordDocument,
|
||||
Section section, int s );
|
||||
|
||||
protected void processSectionParagraphes( HWPFDocumentCore wordDocument,
|
||||
Element flow, Range range, int currentTableLevel )
|
||||
{
|
||||
final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
|
||||
for ( TableIterator tableIterator = AbstractWordUtils.newTableIterator(
|
||||
range, currentTableLevel + 1 ); tableIterator.hasNext(); )
|
||||
{
|
||||
Table next = tableIterator.next();
|
||||
allTables.put( Integer.valueOf( next.getStartOffset() ), next );
|
||||
}
|
||||
|
||||
final ListTables listTables = wordDocument.getListTables();
|
||||
int currentListInfo = 0;
|
||||
|
||||
final int paragraphs = range.numParagraphs();
|
||||
for ( int p = 0; p < paragraphs; p++ )
|
||||
{
|
||||
Paragraph paragraph = range.getParagraph( p );
|
||||
|
||||
if ( allTables.containsKey( Integer.valueOf( paragraph
|
||||
.getStartOffset() ) ) )
|
||||
{
|
||||
Table table = allTables.get( Integer.valueOf( paragraph
|
||||
.getStartOffset() ) );
|
||||
processTable( wordDocument, flow, table, currentTableLevel + 1 );
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( paragraph.isInTable()
|
||||
&& paragraph.getTableLevel() != currentTableLevel )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( paragraph.getIlfo() != currentListInfo )
|
||||
{
|
||||
currentListInfo = paragraph.getIlfo();
|
||||
}
|
||||
|
||||
if ( currentListInfo != 0 )
|
||||
{
|
||||
if ( listTables != null )
|
||||
{
|
||||
final ListFormatOverride listFormatOverride = listTables
|
||||
.getOverride( paragraph.getIlfo() );
|
||||
|
||||
String label = AbstractWordUtils.getBulletText( listTables,
|
||||
paragraph, listFormatOverride.getLsid() );
|
||||
|
||||
processParagraph( wordDocument, flow, currentTableLevel,
|
||||
paragraph, label );
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.log( POILogger.WARN,
|
||||
"Paragraph #" + paragraph.getStartOffset() + "-"
|
||||
+ paragraph.getEndOffset()
|
||||
+ " has reference to list structure #"
|
||||
+ currentListInfo
|
||||
+ ", but listTables not defined in file" );
|
||||
|
||||
processParagraph( wordDocument, flow, currentTableLevel,
|
||||
paragraph, AbstractWordUtils.EMPTY );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
processParagraph( wordDocument, flow, currentTableLevel,
|
||||
paragraph, AbstractWordUtils.EMPTY );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected void processSingleSection( HWPFDocumentCore wordDocument,
|
||||
Section section )
|
||||
{
|
||||
processSection( wordDocument, section, 0 );
|
||||
}
|
||||
|
||||
protected abstract void processTable( HWPFDocumentCore wordDocument,
|
||||
Element flow, Table table, int newTableLevel );
|
||||
|
||||
protected int tryField( HWPFDocumentCore wordDocument, Paragraph paragraph,
|
||||
int currentTableLevel, List<CharacterRun> characterRuns,
|
||||
int beginMark, Element currentBlock )
|
||||
{
|
||||
int separatorMark = -1;
|
||||
int endMark = -1;
|
||||
for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
|
||||
{
|
||||
CharacterRun characterRun = paragraph.getCharacterRun( c );
|
||||
|
||||
String text = characterRun.text();
|
||||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
if ( separatorMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
||||
separatorMark = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_END_MARK )
|
||||
{
|
||||
if ( endMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
||||
endMark = c;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( separatorMark == -1 || endMark == -1 )
|
||||
return beginMark;
|
||||
|
||||
processField( wordDocument, currentBlock, paragraph, currentTableLevel,
|
||||
characterRuns, beginMark, separatorMark, endMark );
|
||||
|
||||
return endMark;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,404 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.HWPFOldDocument;
|
||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||
import org.apache.poi.hwpf.model.CHPX;
|
||||
import org.apache.poi.hwpf.model.ListLevel;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.usermodel.BorderCode;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
import org.apache.poi.hwpf.usermodel.Section;
|
||||
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
||||
import org.apache.poi.hwpf.usermodel.TableIterator;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
public class AbstractWordUtils
|
||||
{
|
||||
static final String EMPTY = "";
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( AbstractWordUtils.class );
|
||||
|
||||
public static final float TWIPS_PER_INCH = 1440.0f;
|
||||
public static final int TWIPS_PER_PT = 20;
|
||||
|
||||
static void closeQuietly( final Closeable closeable )
|
||||
{
|
||||
try
|
||||
{
|
||||
closeable.close();
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
logger.log( POILogger.ERROR, "Unable to close resource: " + exc,
|
||||
exc );
|
||||
}
|
||||
}
|
||||
|
||||
static boolean equals( String str1, String str2 )
|
||||
{
|
||||
return str1 == null ? str2 == null : str1.equals( str2 );
|
||||
}
|
||||
|
||||
// XXX incorporate into Range
|
||||
static List<CharacterRun> findCharacterRuns( Range range )
|
||||
{
|
||||
final int min = range.getStartOffset();
|
||||
final int max = range.getEndOffset();
|
||||
|
||||
List<CharacterRun> result = new ArrayList<CharacterRun>();
|
||||
List<CHPX> chpxs = getCharacters( range );
|
||||
for ( int i = 0; i < chpxs.size(); i++ )
|
||||
{
|
||||
CHPX chpx = chpxs.get( i );
|
||||
if ( chpx == null )
|
||||
continue;
|
||||
|
||||
if ( Math.max( min, chpx.getStart() ) <= Math.min( max,
|
||||
chpx.getEnd() ) )
|
||||
{
|
||||
final CharacterRun characterRun = getCharacterRun( range, chpx );
|
||||
|
||||
if ( characterRun == null )
|
||||
continue;
|
||||
|
||||
result.add( characterRun );
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
public static String getBorderType( BorderCode borderCode )
|
||||
{
|
||||
if ( borderCode == null )
|
||||
throw new IllegalArgumentException( "borderCode is null" );
|
||||
|
||||
switch ( borderCode.getBorderType() )
|
||||
{
|
||||
case 1:
|
||||
case 2:
|
||||
return "solid";
|
||||
case 3:
|
||||
return "double";
|
||||
case 5:
|
||||
return "solid";
|
||||
case 6:
|
||||
return "dotted";
|
||||
case 7:
|
||||
case 8:
|
||||
return "dashed";
|
||||
case 9:
|
||||
return "dotted";
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
return "double";
|
||||
case 20:
|
||||
return "solid";
|
||||
case 21:
|
||||
return "double";
|
||||
case 22:
|
||||
return "dashed";
|
||||
case 23:
|
||||
return "dashed";
|
||||
case 24:
|
||||
return "ridge";
|
||||
case 25:
|
||||
return "grooved";
|
||||
default:
|
||||
return "solid";
|
||||
}
|
||||
}
|
||||
|
||||
public static String getBorderWidth( BorderCode borderCode )
|
||||
{
|
||||
int lineWidth = borderCode.getLineWidth();
|
||||
int pt = lineWidth / 8;
|
||||
int pte = lineWidth - pt * 8;
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
stringBuilder.append( pt );
|
||||
stringBuilder.append( "." );
|
||||
stringBuilder.append( 1000 / 8 * pte );
|
||||
stringBuilder.append( "pt" );
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
public static String getBulletText( ListTables listTables,
|
||||
Paragraph paragraph, int listId )
|
||||
{
|
||||
final ListLevel listLevel = listTables.getLevel( listId,
|
||||
paragraph.getIlvl() );
|
||||
|
||||
if ( listLevel.getNumberText() == null )
|
||||
return EMPTY;
|
||||
|
||||
StringBuffer bulletBuffer = new StringBuffer();
|
||||
char[] xst = listLevel.getNumberText().toCharArray();
|
||||
for ( char element : xst )
|
||||
{
|
||||
if ( element < 9 )
|
||||
{
|
||||
ListLevel numLevel = listTables.getLevel( listId, element );
|
||||
|
||||
int num = numLevel.getStartAt();
|
||||
bulletBuffer.append( NumberFormatter.getNumber( num,
|
||||
listLevel.getNumberFormat() ) );
|
||||
|
||||
if ( numLevel == listLevel )
|
||||
{
|
||||
numLevel.setStartAt( numLevel.getStartAt() + 1 );
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
bulletBuffer.append( element );
|
||||
}
|
||||
}
|
||||
|
||||
byte follow = getIxchFollow( listLevel );
|
||||
switch ( follow )
|
||||
{
|
||||
case 0:
|
||||
bulletBuffer.append( "\t" );
|
||||
break;
|
||||
case 1:
|
||||
bulletBuffer.append( " " );
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return bulletBuffer.toString();
|
||||
}
|
||||
|
||||
private static CharacterRun getCharacterRun( Range range, CHPX chpx )
|
||||
{
|
||||
try
|
||||
{
|
||||
Method method = Range.class.getDeclaredMethod( "getCharacterRun",
|
||||
CHPX.class );
|
||||
method.setAccessible( true );
|
||||
return (CharacterRun) method.invoke( range, chpx );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
throw new Error( exc );
|
||||
}
|
||||
}
|
||||
|
||||
private static List<CHPX> getCharacters( Range range )
|
||||
{
|
||||
try
|
||||
{
|
||||
Field field = Range.class.getDeclaredField( "_characters" );
|
||||
field.setAccessible( true );
|
||||
return (List<CHPX>) field.get( range );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
throw new Error( exc );
|
||||
}
|
||||
}
|
||||
|
||||
public static String getColor( int ico )
|
||||
{
|
||||
switch ( ico )
|
||||
{
|
||||
case 1:
|
||||
return "black";
|
||||
case 2:
|
||||
return "blue";
|
||||
case 3:
|
||||
return "cyan";
|
||||
case 4:
|
||||
return "green";
|
||||
case 5:
|
||||
return "magenta";
|
||||
case 6:
|
||||
return "red";
|
||||
case 7:
|
||||
return "yellow";
|
||||
case 8:
|
||||
return "white";
|
||||
case 9:
|
||||
return "darkblue";
|
||||
case 10:
|
||||
return "darkcyan";
|
||||
case 11:
|
||||
return "darkgreen";
|
||||
case 12:
|
||||
return "darkmagenta";
|
||||
case 13:
|
||||
return "darkred";
|
||||
case 14:
|
||||
return "darkyellow";
|
||||
case 15:
|
||||
return "darkgray";
|
||||
case 16:
|
||||
return "lightgray";
|
||||
default:
|
||||
return "black";
|
||||
}
|
||||
}
|
||||
|
||||
public static byte getIxchFollow( ListLevel listLevel )
|
||||
{
|
||||
try
|
||||
{
|
||||
Field field = ListLevel.class.getDeclaredField( "_ixchFollow" );
|
||||
field.setAccessible( true );
|
||||
return ( (Byte) field.get( listLevel ) ).byteValue();
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
throw new Error( exc );
|
||||
}
|
||||
}
|
||||
|
||||
public static String getJustification( int js )
|
||||
{
|
||||
switch ( js )
|
||||
{
|
||||
case 0:
|
||||
return "start";
|
||||
case 1:
|
||||
return "center";
|
||||
case 2:
|
||||
return "end";
|
||||
case 3:
|
||||
case 4:
|
||||
return "justify";
|
||||
case 5:
|
||||
return "center";
|
||||
case 6:
|
||||
return "left";
|
||||
case 7:
|
||||
return "start";
|
||||
case 8:
|
||||
return "end";
|
||||
case 9:
|
||||
return "justify";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public static String getListItemNumberLabel( int number, int format )
|
||||
{
|
||||
|
||||
if ( format != 0 )
|
||||
System.err.println( "NYI: toListItemNumberLabel(): " + format );
|
||||
|
||||
return String.valueOf( number );
|
||||
}
|
||||
|
||||
public static SectionProperties getSectionProperties( Section section )
|
||||
{
|
||||
try
|
||||
{
|
||||
Field field = Section.class.getDeclaredField( "_props" );
|
||||
field.setAccessible( true );
|
||||
return (SectionProperties) field.get( section );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
throw new Error( exc );
|
||||
}
|
||||
}
|
||||
|
||||
static boolean isEmpty( String str )
|
||||
{
|
||||
return str == null || str.length() == 0;
|
||||
}
|
||||
|
||||
static boolean isNotEmpty( String str )
|
||||
{
|
||||
return !isEmpty( str );
|
||||
}
|
||||
|
||||
public static HWPFDocumentCore loadDoc( File docFile ) throws IOException
|
||||
{
|
||||
final FileInputStream istream = new FileInputStream( docFile );
|
||||
try
|
||||
{
|
||||
return loadDoc( istream );
|
||||
}
|
||||
finally
|
||||
{
|
||||
closeQuietly( istream );
|
||||
}
|
||||
}
|
||||
|
||||
public static HWPFDocumentCore loadDoc( InputStream inputStream )
|
||||
throws IOException
|
||||
{
|
||||
final POIFSFileSystem poifsFileSystem = HWPFDocumentCore
|
||||
.verifyAndBuildPOIFS( inputStream );
|
||||
try
|
||||
{
|
||||
return new HWPFDocument( poifsFileSystem );
|
||||
}
|
||||
catch ( OldWordFileFormatException exc )
|
||||
{
|
||||
return new HWPFOldDocument( poifsFileSystem );
|
||||
}
|
||||
}
|
||||
|
||||
public static TableIterator newTableIterator( Range range, int level )
|
||||
{
|
||||
try
|
||||
{
|
||||
Constructor<TableIterator> constructor = TableIterator.class
|
||||
.getDeclaredConstructor( Range.class, int.class );
|
||||
constructor.setAccessible( true );
|
||||
return constructor.newInstance( range, Integer.valueOf( level ) );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
throw new Error( exc );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,37 +1,34 @@
|
||||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
public abstract class AbstractToFoExtractor
|
||||
public class FoDocumentFacade
|
||||
{
|
||||
|
||||
private static final String NS_XSLFO = "http://www.w3.org/1999/XSL/Format";
|
||||
|
||||
protected final Document document;
|
||||
protected final Element layoutMasterSet;
|
||||
protected final Element root;
|
||||
|
||||
public AbstractToFoExtractor( Document document )
|
||||
public FoDocumentFacade( Document document )
|
||||
{
|
||||
this.document = document;
|
||||
|
||||
@ -43,7 +40,7 @@ public abstract class AbstractToFoExtractor
|
||||
root.appendChild( layoutMasterSet );
|
||||
}
|
||||
|
||||
protected Element addFlowToPageSequence( final Element pageSequence,
|
||||
public Element addFlowToPageSequence( final Element pageSequence,
|
||||
String flowName )
|
||||
{
|
||||
final Element flow = document.createElementNS( NS_XSLFO, "fo:flow" );
|
||||
@ -53,28 +50,28 @@ public abstract class AbstractToFoExtractor
|
||||
return flow;
|
||||
}
|
||||
|
||||
protected Element addListItem( Element listBlock )
|
||||
public Element addListItem( Element listBlock )
|
||||
{
|
||||
Element result = createListItem();
|
||||
listBlock.appendChild( result );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addListItemBody( Element listItem )
|
||||
public Element addListItemBody( Element listItem )
|
||||
{
|
||||
Element result = createListItemBody();
|
||||
listItem.appendChild( result );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addListItemLabel( Element listItem, String text )
|
||||
public Element addListItemLabel( Element listItem, String text )
|
||||
{
|
||||
Element result = createListItemLabel( text );
|
||||
listItem.appendChild( result );
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element addPageSequence( String pageMaster )
|
||||
public Element addPageSequence( String pageMaster )
|
||||
{
|
||||
final Element pageSequence = document.createElementNS( NS_XSLFO,
|
||||
"fo:page-sequence" );
|
||||
@ -83,7 +80,7 @@ public abstract class AbstractToFoExtractor
|
||||
return pageSequence;
|
||||
}
|
||||
|
||||
protected Element addRegionBody( Element pageMaster )
|
||||
public Element addRegionBody( Element pageMaster )
|
||||
{
|
||||
final Element regionBody = document.createElementNS( NS_XSLFO,
|
||||
"fo:region-body" );
|
||||
@ -92,7 +89,7 @@ public abstract class AbstractToFoExtractor
|
||||
return regionBody;
|
||||
}
|
||||
|
||||
protected Element addSimplePageMaster( String masterName )
|
||||
public Element addSimplePageMaster( String masterName )
|
||||
{
|
||||
final Element simplePageMaster = document.createElementNS( NS_XSLFO,
|
||||
"fo:simple-page-master" );
|
||||
@ -110,7 +107,7 @@ public abstract class AbstractToFoExtractor
|
||||
return basicLink;
|
||||
}
|
||||
|
||||
protected Element createBasicLinkInternal( String internalDestination )
|
||||
public Element createBasicLinkInternal( String internalDestination )
|
||||
{
|
||||
final Element basicLink = document.createElementNS( NS_XSLFO,
|
||||
"fo:basic-link" );
|
||||
@ -118,12 +115,12 @@ public abstract class AbstractToFoExtractor
|
||||
return basicLink;
|
||||
}
|
||||
|
||||
protected Element createBlock()
|
||||
public Element createBlock()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:block" );
|
||||
}
|
||||
|
||||
protected Element createExternalGraphic( String source )
|
||||
public Element createExternalGraphic( String source )
|
||||
{
|
||||
Element result = document.createElementNS( NS_XSLFO,
|
||||
"fo:external-graphic" );
|
||||
@ -131,32 +128,32 @@ public abstract class AbstractToFoExtractor
|
||||
return result;
|
||||
}
|
||||
|
||||
protected Element createInline()
|
||||
public Element createInline()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:inline" );
|
||||
}
|
||||
|
||||
protected Element createLeader()
|
||||
public Element createLeader()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:leader" );
|
||||
}
|
||||
|
||||
protected Element createListBlock()
|
||||
public Element createListBlock()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:list-block" );
|
||||
}
|
||||
|
||||
protected Element createListItem()
|
||||
public Element createListItem()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:list-item" );
|
||||
}
|
||||
|
||||
protected Element createListItemBody()
|
||||
public Element createListItemBody()
|
||||
{
|
||||
return document.createElementNS( NS_XSLFO, "fo:list-item-body" );
|
||||
}
|
||||
|
||||
protected Element createListItemLabel( String text )
|
||||
public Element createListItemLabel( String text )
|
||||
{
|
||||
Element result = document.createElementNS( NS_XSLFO,
|
||||
"fo:list-item-label" );
|
@ -0,0 +1,107 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
public class HtmlDocumentFacade
|
||||
{
|
||||
|
||||
protected final Element body;
|
||||
protected final Document document;
|
||||
protected final Element head;
|
||||
protected final Element html;
|
||||
|
||||
public HtmlDocumentFacade( Document document )
|
||||
{
|
||||
this.document = document;
|
||||
|
||||
html = document.createElement( "html" );
|
||||
document.appendChild( html );
|
||||
|
||||
body = document.createElement( "body" );
|
||||
head = document.createElement( "head" );
|
||||
|
||||
html.appendChild( head );
|
||||
html.appendChild( body );
|
||||
}
|
||||
|
||||
public Element createHyperlink( String internalDestination )
|
||||
{
|
||||
final Element basicLink = document.createElement( "a" );
|
||||
basicLink.setAttribute( "href", internalDestination );
|
||||
return basicLink;
|
||||
}
|
||||
|
||||
public Element createListItem()
|
||||
{
|
||||
return document.createElement( "li" );
|
||||
}
|
||||
|
||||
public Element createParagraph()
|
||||
{
|
||||
return document.createElement( "p" );
|
||||
}
|
||||
|
||||
public Element createTable()
|
||||
{
|
||||
return document.createElement( "table" );
|
||||
}
|
||||
|
||||
public Element createTableBody()
|
||||
{
|
||||
return document.createElement( "tbody" );
|
||||
}
|
||||
|
||||
public Element createTableCell()
|
||||
{
|
||||
return document.createElement( "td" );
|
||||
}
|
||||
|
||||
public Element createTableHeader()
|
||||
{
|
||||
return document.createElement( "thead" );
|
||||
}
|
||||
|
||||
public Element createTableHeaderCell()
|
||||
{
|
||||
return document.createElement( "th" );
|
||||
}
|
||||
|
||||
public Element createTableRow()
|
||||
{
|
||||
return document.createElement( "tr" );
|
||||
}
|
||||
|
||||
public Text createText( String data )
|
||||
{
|
||||
return document.createTextNode( data );
|
||||
}
|
||||
|
||||
public Element createUnorderedList()
|
||||
{
|
||||
return document.createElement( "ul" );
|
||||
}
|
||||
|
||||
public Document getDocument()
|
||||
{
|
||||
return document;
|
||||
}
|
||||
|
||||
}
|
@ -1,32 +1,27 @@
|
||||
/*
|
||||
* ====================================================================
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
* ====================================================================
|
||||
*/
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Stack;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
@ -36,8 +31,10 @@ import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.model.ListFormatOverride;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.usermodel.BorderCode;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
@ -54,12 +51,10 @@ import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
import static org.apache.poi.hwpf.extractor.WordToFoUtils.TWIPS_PER_INCH;
|
||||
|
||||
/**
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
public class WordToFoExtractor extends AbstractWordExtractor
|
||||
{
|
||||
|
||||
/**
|
||||
@ -84,35 +79,55 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
}
|
||||
}
|
||||
|
||||
private static final byte BEL_MARK = 7;
|
||||
|
||||
private static final byte FIELD_BEGIN_MARK = 19;
|
||||
|
||||
private static final byte FIELD_END_MARK = 21;
|
||||
|
||||
private static final byte FIELD_SEPARATOR_MARK = 20;
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( WordToFoExtractor.class );
|
||||
|
||||
private static HWPFDocument loadDoc( File docFile ) throws IOException
|
||||
public static String getBorderType( BorderCode borderCode )
|
||||
{
|
||||
final FileInputStream istream = new FileInputStream( docFile );
|
||||
try
|
||||
if ( borderCode == null )
|
||||
throw new IllegalArgumentException( "borderCode is null" );
|
||||
|
||||
switch ( borderCode.getBorderType() )
|
||||
{
|
||||
return new HWPFDocument( istream );
|
||||
}
|
||||
finally
|
||||
{
|
||||
try
|
||||
{
|
||||
istream.close();
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
logger.log( POILogger.ERROR,
|
||||
"Unable to close FileInputStream: " + exc, exc );
|
||||
}
|
||||
case 1:
|
||||
case 2:
|
||||
return "solid";
|
||||
case 3:
|
||||
return "double";
|
||||
case 5:
|
||||
return "solid";
|
||||
case 6:
|
||||
return "dotted";
|
||||
case 7:
|
||||
case 8:
|
||||
return "dashed";
|
||||
case 9:
|
||||
return "dotted";
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
return "double";
|
||||
case 20:
|
||||
return "solid";
|
||||
case 21:
|
||||
return "double";
|
||||
case 22:
|
||||
return "dashed";
|
||||
case 23:
|
||||
return "dashed";
|
||||
case 24:
|
||||
return "ridge";
|
||||
case 25:
|
||||
return "grooved";
|
||||
default:
|
||||
return "solid";
|
||||
}
|
||||
}
|
||||
|
||||
@ -160,7 +175,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
|
||||
static Document process( File docFile ) throws Exception
|
||||
{
|
||||
final HWPFDocument hwpfDocument = loadDoc( docFile );
|
||||
final HWPFDocumentCore hwpfDocument = WordToFoUtils.loadDoc( docFile );
|
||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument() );
|
||||
@ -170,6 +185,8 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
|
||||
private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
|
||||
|
||||
protected final FoDocumentFacade foDocumentFacade;
|
||||
|
||||
/**
|
||||
* Creates new instance of {@link WordToFoExtractor}. Can be used for output
|
||||
* several {@link HWPFDocument}s into single FO document.
|
||||
@ -180,27 +197,28 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
*/
|
||||
public WordToFoExtractor( Document document )
|
||||
{
|
||||
super( document );
|
||||
this.foDocumentFacade = new FoDocumentFacade( document );
|
||||
}
|
||||
|
||||
protected String createPageMaster( SectionProperties sep, String type,
|
||||
int section )
|
||||
{
|
||||
float height = sep.getYaPage() / TWIPS_PER_INCH;
|
||||
float width = sep.getXaPage() / TWIPS_PER_INCH;
|
||||
float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
|
||||
float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
|
||||
float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
|
||||
float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
|
||||
float height = sep.getYaPage() / WordToFoUtils.TWIPS_PER_INCH;
|
||||
float width = sep.getXaPage() / WordToFoUtils.TWIPS_PER_INCH;
|
||||
float leftMargin = sep.getDxaLeft() / WordToFoUtils.TWIPS_PER_INCH;
|
||||
float rightMargin = sep.getDxaRight() / WordToFoUtils.TWIPS_PER_INCH;
|
||||
float topMargin = sep.getDyaTop() / WordToFoUtils.TWIPS_PER_INCH;
|
||||
float bottomMargin = sep.getDyaBottom() / WordToFoUtils.TWIPS_PER_INCH;
|
||||
|
||||
// add these to the header
|
||||
String pageMasterName = type + "-page" + section;
|
||||
|
||||
Element pageMaster = addSimplePageMaster( pageMasterName );
|
||||
Element pageMaster = foDocumentFacade
|
||||
.addSimplePageMaster( pageMasterName );
|
||||
pageMaster.setAttribute( "page-height", height + "in" );
|
||||
pageMaster.setAttribute( "page-width", width + "in" );
|
||||
|
||||
Element regionBody = addRegionBody( pageMaster );
|
||||
Element regionBody = foDocumentFacade.addRegionBody( pageMaster );
|
||||
regionBody.setAttribute( "margin", topMargin + "in " + rightMargin
|
||||
+ "in " + bottomMargin + "in " + leftMargin + "in" );
|
||||
|
||||
@ -216,12 +234,13 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
|
||||
if ( sep.getCcolM1() > 0 )
|
||||
{
|
||||
regionBody
|
||||
.setAttribute( "column-count", "" + (sep.getCcolM1() + 1) );
|
||||
regionBody.setAttribute( "column-count", ""
|
||||
+ ( sep.getCcolM1() + 1 ) );
|
||||
if ( sep.getFEvenlySpaced() )
|
||||
{
|
||||
regionBody.setAttribute( "column-gap",
|
||||
(sep.getDxaColumns() / TWIPS_PER_INCH) + "in" );
|
||||
( sep.getDxaColumns() / WordToFoUtils.TWIPS_PER_INCH )
|
||||
+ "in" );
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -232,171 +251,55 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
return pageMasterName;
|
||||
}
|
||||
|
||||
protected boolean processCharacters( HWPFDocument hwpfDocument,
|
||||
int currentTableLevel, Paragraph paragraph, final Element block,
|
||||
final int start, final int end )
|
||||
public Document getDocument()
|
||||
{
|
||||
boolean haveAnyText = false;
|
||||
|
||||
for ( int c = start; c < end; c++ )
|
||||
{
|
||||
CharacterRun characterRun = paragraph.getCharacterRun( c );
|
||||
|
||||
if ( hwpfDocument.getPicturesTable().hasPicture( characterRun ) )
|
||||
{
|
||||
Picture picture = hwpfDocument.getPicturesTable()
|
||||
.extractPicture( characterRun, true );
|
||||
|
||||
processImage( block, characterRun.text().charAt( 0 ) == 0x01,
|
||||
picture );
|
||||
continue;
|
||||
}
|
||||
|
||||
String text = characterRun.text();
|
||||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_BEGIN_MARK )
|
||||
{
|
||||
int skipTo = tryField( hwpfDocument, paragraph,
|
||||
currentTableLevel, c, block );
|
||||
|
||||
if ( skipTo != c )
|
||||
{
|
||||
c = skipTo;
|
||||
continue;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
// shall not appear without FIELD_BEGIN_MARK
|
||||
continue;
|
||||
}
|
||||
if ( text.getBytes()[0] == FIELD_END_MARK )
|
||||
{
|
||||
// shall not appear without FIELD_BEGIN_MARK
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( characterRun.isSpecialCharacter() || characterRun.isObj()
|
||||
|| characterRun.isOle2() )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
BlockProperies blockProperies = this.blocksProperies.peek();
|
||||
Element inline = createInline();
|
||||
if ( characterRun.isBold() != blockProperies.pBold )
|
||||
{
|
||||
WordToFoUtils.setBold( inline, characterRun.isBold() );
|
||||
}
|
||||
if ( characterRun.isItalic() != blockProperies.pItalic )
|
||||
{
|
||||
WordToFoUtils.setItalic( inline, characterRun.isItalic() );
|
||||
}
|
||||
if ( !WordToFoUtils.equals( characterRun.getFontName(),
|
||||
blockProperies.pFontName ) )
|
||||
{
|
||||
WordToFoUtils
|
||||
.setFontFamily( inline, characterRun.getFontName() );
|
||||
}
|
||||
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
|
||||
{
|
||||
WordToFoUtils.setFontSize( inline,
|
||||
characterRun.getFontSize() / 2 );
|
||||
}
|
||||
WordToFoUtils.setCharactersProperties( characterRun, inline );
|
||||
block.appendChild( inline );
|
||||
|
||||
if ( text.endsWith( "\r" )
|
||||
|| (text.charAt( text.length() - 1 ) == BEL_MARK && currentTableLevel != 0) )
|
||||
text = text.substring( 0, text.length() - 1 );
|
||||
|
||||
Text textNode = createText( text );
|
||||
inline.appendChild( textNode );
|
||||
|
||||
haveAnyText |= text.trim().length() != 0;
|
||||
}
|
||||
|
||||
return haveAnyText;
|
||||
return foDocumentFacade.getDocument();
|
||||
}
|
||||
|
||||
public void processDocument( HWPFDocument hwpfDocument )
|
||||
@Override
|
||||
protected void outputCharacters( Element block, CharacterRun characterRun,
|
||||
String text )
|
||||
{
|
||||
final Range range = hwpfDocument.getRange();
|
||||
|
||||
for ( int s = 0; s < range.numSections(); s++ )
|
||||
BlockProperies blockProperies = this.blocksProperies.peek();
|
||||
Element inline = foDocumentFacade.createInline();
|
||||
if ( characterRun.isBold() != blockProperies.pBold )
|
||||
{
|
||||
processSection( hwpfDocument, range.getSection( s ), s );
|
||||
WordToFoUtils.setBold( inline, characterRun.isBold() );
|
||||
}
|
||||
if ( characterRun.isItalic() != blockProperies.pItalic )
|
||||
{
|
||||
WordToFoUtils.setItalic( inline, characterRun.isItalic() );
|
||||
}
|
||||
if ( characterRun.getFontName() != null
|
||||
&& !AbstractWordUtils.equals( characterRun.getFontName(),
|
||||
blockProperies.pFontName ) )
|
||||
{
|
||||
WordToFoUtils.setFontFamily( inline, characterRun.getFontName() );
|
||||
}
|
||||
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
|
||||
{
|
||||
WordToFoUtils.setFontSize( inline, characterRun.getFontSize() / 2 );
|
||||
}
|
||||
WordToFoUtils.setCharactersProperties( characterRun, inline );
|
||||
block.appendChild( inline );
|
||||
|
||||
Text textNode = foDocumentFacade.createText( text );
|
||||
inline.appendChild( textNode );
|
||||
}
|
||||
|
||||
protected void processField( HWPFDocument hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
int beginMark, int separatorMark, int endMark )
|
||||
{
|
||||
|
||||
Pattern hyperlinkPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*HYPERLINK \"(.*)\"[ \\t\\r\\n]*" );
|
||||
Pattern pagerefPattern = Pattern
|
||||
.compile( "[ \\t\\r\\n]*PAGEREF ([^ ]*)[ \\t\\r\\n]*\\\\h[ \\t\\r\\n]*" );
|
||||
|
||||
if ( separatorMark - beginMark > 1 )
|
||||
{
|
||||
CharacterRun firstAfterBegin = paragraph
|
||||
.getCharacterRun( beginMark + 1 );
|
||||
|
||||
final Matcher hyperlinkMatcher = hyperlinkPattern
|
||||
.matcher( firstAfterBegin.text() );
|
||||
if ( hyperlinkMatcher.matches() )
|
||||
{
|
||||
String hyperlink = hyperlinkMatcher.group( 1 );
|
||||
processHyperlink( hwpfDocument, currentBlock, paragraph,
|
||||
currentTableLevel, hyperlink, separatorMark + 1,
|
||||
endMark );
|
||||
return;
|
||||
}
|
||||
|
||||
final Matcher pagerefMatcher = pagerefPattern
|
||||
.matcher( firstAfterBegin.text() );
|
||||
if ( pagerefMatcher.matches() )
|
||||
{
|
||||
String pageref = pagerefMatcher.group( 1 );
|
||||
processPageref( hwpfDocument, currentBlock, paragraph,
|
||||
currentTableLevel, pageref, separatorMark + 1, endMark );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder debug = new StringBuilder( "Unsupported field type: \n" );
|
||||
for ( int i = beginMark; i <= endMark; i++ )
|
||||
{
|
||||
debug.append( "\t" );
|
||||
debug.append( paragraph.getCharacterRun( i ) );
|
||||
debug.append( "\n" );
|
||||
}
|
||||
logger.log( POILogger.WARN, debug );
|
||||
|
||||
// just output field value
|
||||
if ( separatorMark + 1 < endMark )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
currentBlock, separatorMark + 1, endMark );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
protected void processHyperlink( HWPFDocument hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
protected void processHyperlink( HWPFDocumentCore hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph,
|
||||
List<CharacterRun> characterRuns, int currentTableLevel,
|
||||
String hyperlink, int beginTextInclusive, int endTextExclusive )
|
||||
{
|
||||
Element basicLink = createBasicLinkExternal( hyperlink );
|
||||
Element basicLink = foDocumentFacade
|
||||
.createBasicLinkExternal( hyperlink );
|
||||
currentBlock.appendChild( basicLink );
|
||||
|
||||
if ( beginTextInclusive < endTextExclusive )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
basicLink, beginTextInclusive, endTextExclusive );
|
||||
basicLink, characterRuns, beginTextInclusive,
|
||||
endTextExclusive );
|
||||
}
|
||||
|
||||
/**
|
||||
@ -422,27 +325,30 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
Picture picture )
|
||||
{
|
||||
// no default implementation -- skip
|
||||
currentBlock.appendChild( document.createComment( "Image link to '"
|
||||
+ picture.suggestFullFileName() + "' can be here" ) );
|
||||
currentBlock.appendChild( foDocumentFacade.getDocument().createComment(
|
||||
"Image link to '" + picture.suggestFullFileName()
|
||||
+ "' can be here" ) );
|
||||
}
|
||||
|
||||
protected void processPageref( HWPFDocument hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph, int currentTableLevel,
|
||||
protected void processPageref( HWPFDocumentCore hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph,
|
||||
List<CharacterRun> characterRuns, int currentTableLevel,
|
||||
String pageref, int beginTextInclusive, int endTextExclusive )
|
||||
{
|
||||
Element basicLink = createBasicLinkInternal( pageref );
|
||||
Element basicLink = foDocumentFacade.createBasicLinkInternal( pageref );
|
||||
currentBlock.appendChild( basicLink );
|
||||
|
||||
if ( beginTextInclusive < endTextExclusive )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
basicLink, beginTextInclusive, endTextExclusive );
|
||||
basicLink, characterRuns, beginTextInclusive,
|
||||
endTextExclusive );
|
||||
}
|
||||
|
||||
protected void processParagraph( HWPFDocument hwpfDocument,
|
||||
protected void processParagraph( HWPFDocumentCore hwpfDocument,
|
||||
Element parentFopElement, int currentTableLevel,
|
||||
Paragraph paragraph, String bulletText )
|
||||
{
|
||||
final Element block = createBlock();
|
||||
final Element block = foDocumentFacade.createBlock();
|
||||
parentFopElement.appendChild( block );
|
||||
|
||||
WordToFoUtils.setParagraphProperties( paragraph, block );
|
||||
@ -480,21 +386,23 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
|
||||
if ( WordToFoUtils.isNotEmpty( bulletText ) )
|
||||
{
|
||||
Element inline = createInline();
|
||||
Element inline = foDocumentFacade.createInline();
|
||||
block.appendChild( inline );
|
||||
|
||||
Text textNode = createText( bulletText );
|
||||
Text textNode = foDocumentFacade.createText( bulletText );
|
||||
inline.appendChild( textNode );
|
||||
|
||||
haveAnyText |= bulletText.trim().length() != 0;
|
||||
}
|
||||
|
||||
List<CharacterRun> characterRuns = WordToFoUtils
|
||||
.findCharacterRuns( paragraph );
|
||||
haveAnyText = processCharacters( hwpfDocument, currentTableLevel,
|
||||
paragraph, block, 0, charRuns );
|
||||
paragraph, block, characterRuns, 0, characterRuns.size() );
|
||||
|
||||
if ( !haveAnyText )
|
||||
{
|
||||
Element leader = createLeader();
|
||||
Element leader = foDocumentFacade.createLeader();
|
||||
block.appendChild( leader );
|
||||
}
|
||||
}
|
||||
@ -506,20 +414,21 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
return;
|
||||
}
|
||||
|
||||
protected void processSection( HWPFDocument hwpfDocument, Section section,
|
||||
int sectionCounter )
|
||||
protected void processSection( HWPFDocumentCore wordDocument,
|
||||
Section section, int sectionCounter )
|
||||
{
|
||||
String regularPage = createPageMaster(
|
||||
WordToFoUtils.getSectionProperties( section ), "page",
|
||||
sectionCounter );
|
||||
|
||||
Element pageSequence = addPageSequence( regularPage );
|
||||
Element flow = addFlowToPageSequence( pageSequence, "xsl-region-body" );
|
||||
Element pageSequence = foDocumentFacade.addPageSequence( regularPage );
|
||||
Element flow = foDocumentFacade.addFlowToPageSequence( pageSequence,
|
||||
"xsl-region-body" );
|
||||
|
||||
processSectionParagraphes( hwpfDocument, flow, section, 0 );
|
||||
processSectionParagraphes( wordDocument, flow, section, 0 );
|
||||
}
|
||||
|
||||
protected void processSectionParagraphes( HWPFDocument hwpfDocument,
|
||||
protected void processSectionParagraphes( HWPFDocument wordDocument,
|
||||
Element flow, Range range, int currentTableLevel )
|
||||
{
|
||||
final Map<Integer, Table> allTables = new HashMap<Integer, Table>();
|
||||
@ -530,7 +439,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
allTables.put( Integer.valueOf( next.getStartOffset() ), next );
|
||||
}
|
||||
|
||||
final ListTables listTables = hwpfDocument.getListTables();
|
||||
final ListTables listTables = wordDocument.getListTables();
|
||||
int currentListInfo = 0;
|
||||
|
||||
final int paragraphs = range.numParagraphs();
|
||||
@ -543,7 +452,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
{
|
||||
Table table = allTables.get( Integer.valueOf( paragraph
|
||||
.getStartOffset() ) );
|
||||
processTable( hwpfDocument, flow, table, currentTableLevel + 1 );
|
||||
processTable( wordDocument, flow, table, currentTableLevel + 1 );
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -568,7 +477,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
String label = WordToFoUtils.getBulletText( listTables,
|
||||
paragraph, listFormatOverride.getLsid() );
|
||||
|
||||
processParagraph( hwpfDocument, flow, currentTableLevel,
|
||||
processParagraph( wordDocument, flow, currentTableLevel,
|
||||
paragraph, label );
|
||||
}
|
||||
else
|
||||
@ -580,24 +489,24 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
+ currentListInfo
|
||||
+ ", but listTables not defined in file" );
|
||||
|
||||
processParagraph( hwpfDocument, flow, currentTableLevel,
|
||||
processParagraph( wordDocument, flow, currentTableLevel,
|
||||
paragraph, WordToFoUtils.EMPTY );
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
processParagraph( hwpfDocument, flow, currentTableLevel,
|
||||
processParagraph( wordDocument, flow, currentTableLevel,
|
||||
paragraph, WordToFoUtils.EMPTY );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected void processTable( HWPFDocument hwpfDocument, Element flow,
|
||||
protected void processTable( HWPFDocumentCore wordDocument, Element flow,
|
||||
Table table, int thisTableLevel )
|
||||
{
|
||||
Element tableHeader = createTableHeader();
|
||||
Element tableBody = createTableBody();
|
||||
Element tableHeader = foDocumentFacade.createTableHeader();
|
||||
Element tableBody = foDocumentFacade.createTableBody();
|
||||
|
||||
final int tableRows = table.numRows();
|
||||
|
||||
@ -611,7 +520,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
{
|
||||
TableRow tableRow = table.getRow( r );
|
||||
|
||||
Element tableRowElement = createTableRow();
|
||||
Element tableRowElement = foDocumentFacade.createTableRow();
|
||||
WordToFoUtils.setTableRowProperties( tableRow, tableRowElement );
|
||||
|
||||
final int rowCells = tableRow.numCells();
|
||||
@ -626,7 +535,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
&& !tableCell.isFirstVerticallyMerged() )
|
||||
continue;
|
||||
|
||||
Element tableCellElement = createTableCell();
|
||||
Element tableCellElement = foDocumentFacade.createTableCell();
|
||||
WordToFoUtils.setTableCellProperties( tableRow, tableCell,
|
||||
tableCellElement, r == 0, r == tableRows - 1, c == 0,
|
||||
c == rowCells - 1 );
|
||||
@ -649,9 +558,9 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
{
|
||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
||||
{
|
||||
tableCellElement
|
||||
.setAttribute( "number-columns-spanned", ""
|
||||
+ (maxColumns - c) );
|
||||
tableCellElement.setAttribute(
|
||||
"number-columns-spanned", ""
|
||||
+ ( maxColumns - c ) );
|
||||
}
|
||||
}
|
||||
|
||||
@ -673,12 +582,13 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
+ count );
|
||||
}
|
||||
|
||||
processSectionParagraphes( hwpfDocument, tableCellElement,
|
||||
processSectionParagraphes( wordDocument, tableCellElement,
|
||||
tableCell, thisTableLevel );
|
||||
|
||||
if ( !tableCellElement.hasChildNodes() )
|
||||
{
|
||||
tableCellElement.appendChild( createBlock() );
|
||||
tableCellElement.appendChild( foDocumentFacade
|
||||
.createBlock() );
|
||||
}
|
||||
|
||||
tableRowElement.appendChild( tableCellElement );
|
||||
@ -694,7 +604,7 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
}
|
||||
}
|
||||
|
||||
final Element tableElement = createTable();
|
||||
final Element tableElement = foDocumentFacade.createTable();
|
||||
if ( tableHeader.hasChildNodes() )
|
||||
{
|
||||
tableElement.appendChild( tableHeader );
|
||||
@ -714,51 +624,4 @@ public class WordToFoExtractor extends AbstractToFoExtractor
|
||||
}
|
||||
}
|
||||
|
||||
protected int tryField( HWPFDocument hwpfDocument, Paragraph paragraph,
|
||||
int currentTableLevel, int beginMark, Element currentBlock )
|
||||
{
|
||||
int separatorMark = -1;
|
||||
int endMark = -1;
|
||||
for ( int c = beginMark + 1; c < paragraph.numCharacterRuns(); c++ )
|
||||
{
|
||||
CharacterRun characterRun = paragraph.getCharacterRun( c );
|
||||
|
||||
String text = characterRun.text();
|
||||
if ( text.getBytes().length == 0 )
|
||||
continue;
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_SEPARATOR_MARK )
|
||||
{
|
||||
if ( separatorMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
||||
separatorMark = c;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ( text.getBytes()[0] == FIELD_END_MARK )
|
||||
{
|
||||
if ( endMark != -1 )
|
||||
{
|
||||
// double;
|
||||
return beginMark;
|
||||
}
|
||||
|
||||
endMark = c;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if ( separatorMark == -1 || endMark == -1 )
|
||||
return beginMark;
|
||||
|
||||
processField( hwpfDocument, currentBlock, paragraph, currentTableLevel,
|
||||
beginMark, separatorMark, endMark );
|
||||
|
||||
return endMark;
|
||||
}
|
||||
}
|
||||
|
@ -1,489 +1,323 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
import org.apache.poi.hwpf.model.ListLevel;
|
||||
import org.apache.poi.hwpf.model.ListTables;
|
||||
import org.apache.poi.hwpf.usermodel.BorderCode;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterProperties;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
import org.apache.poi.hwpf.usermodel.Range;
|
||||
import org.apache.poi.hwpf.usermodel.Section;
|
||||
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
||||
import org.apache.poi.hwpf.usermodel.TableCell;
|
||||
import org.apache.poi.hwpf.usermodel.TableIterator;
|
||||
import org.apache.poi.hwpf.usermodel.TableRow;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
public class WordToFoUtils {
|
||||
static final String EMPTY = "";
|
||||
|
||||
public static final float TWIPS_PER_INCH = 1440.0f;
|
||||
|
||||
public static final int TWIPS_PER_PT = 20;
|
||||
|
||||
static boolean equals(String str1, String str2) {
|
||||
return str1 == null ? str2 == null : str1.equals(str2);
|
||||
public class WordToFoUtils extends AbstractWordUtils
|
||||
{
|
||||
public static void setBold( final Element element, final boolean bold )
|
||||
{
|
||||
element.setAttribute( "font-weight", bold ? "bold" : "normal" );
|
||||
}
|
||||
|
||||
public static String getBorderType(BorderCode borderCode) {
|
||||
if (borderCode == null)
|
||||
throw new IllegalArgumentException("borderCode is null");
|
||||
public static void setBorder( Element element, BorderCode borderCode,
|
||||
String where )
|
||||
{
|
||||
if ( element == null )
|
||||
throw new IllegalArgumentException( "element is null" );
|
||||
|
||||
switch (borderCode.getBorderType()) {
|
||||
case 1:
|
||||
case 2:
|
||||
return "solid";
|
||||
case 3:
|
||||
return "double";
|
||||
case 5:
|
||||
return "solid";
|
||||
case 6:
|
||||
return "dotted";
|
||||
case 7:
|
||||
case 8:
|
||||
return "dashed";
|
||||
case 9:
|
||||
return "dotted";
|
||||
case 10:
|
||||
case 11:
|
||||
case 12:
|
||||
case 13:
|
||||
case 14:
|
||||
case 15:
|
||||
case 16:
|
||||
case 17:
|
||||
case 18:
|
||||
case 19:
|
||||
return "double";
|
||||
case 20:
|
||||
return "solid";
|
||||
case 21:
|
||||
return "double";
|
||||
case 22:
|
||||
return "dashed";
|
||||
case 23:
|
||||
return "dashed";
|
||||
case 24:
|
||||
return "ridge";
|
||||
case 25:
|
||||
return "grooved";
|
||||
default:
|
||||
return "solid";
|
||||
}
|
||||
}
|
||||
if ( borderCode == null || borderCode.getBorderType() == 0 )
|
||||
return;
|
||||
|
||||
public static String getBorderWidth(BorderCode borderCode) {
|
||||
int lineWidth = borderCode.getLineWidth();
|
||||
int pt = lineWidth / 8;
|
||||
int pte = lineWidth - pt * 8;
|
||||
|
||||
StringBuilder stringBuilder = new StringBuilder();
|
||||
stringBuilder.append(pt);
|
||||
stringBuilder.append(".");
|
||||
stringBuilder.append(1000 / 8 * pte);
|
||||
stringBuilder.append("pt");
|
||||
return stringBuilder.toString();
|
||||
}
|
||||
|
||||
public static String getBulletText(ListTables listTables,
|
||||
Paragraph paragraph, int listId) {
|
||||
final ListLevel listLevel = listTables.getLevel(listId,
|
||||
paragraph.getIlvl());
|
||||
|
||||
if (listLevel.getNumberText() == null)
|
||||
return EMPTY;
|
||||
|
||||
StringBuffer bulletBuffer = new StringBuffer();
|
||||
char[] xst = listLevel.getNumberText().toCharArray();
|
||||
for (char element : xst) {
|
||||
if (element < 9) {
|
||||
ListLevel numLevel = listTables.getLevel(listId, element);
|
||||
|
||||
int num = numLevel.getStartAt();
|
||||
bulletBuffer.append(NumberFormatter.getNumber(num,
|
||||
listLevel.getNumberFormat()));
|
||||
|
||||
if (numLevel == listLevel) {
|
||||
numLevel.setStartAt(numLevel.getStartAt() + 1);
|
||||
}
|
||||
|
||||
} else {
|
||||
bulletBuffer.append(element);
|
||||
}
|
||||
}
|
||||
|
||||
byte follow = getIxchFollow(listLevel);
|
||||
switch (follow) {
|
||||
case 0:
|
||||
bulletBuffer.append("\t");
|
||||
break;
|
||||
case 1:
|
||||
bulletBuffer.append(" ");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return bulletBuffer.toString();
|
||||
}
|
||||
|
||||
public static String getColor(int ico) {
|
||||
switch (ico) {
|
||||
case 1:
|
||||
return "black";
|
||||
case 2:
|
||||
return "blue";
|
||||
case 3:
|
||||
return "cyan";
|
||||
case 4:
|
||||
return "green";
|
||||
case 5:
|
||||
return "magenta";
|
||||
case 6:
|
||||
return "red";
|
||||
case 7:
|
||||
return "yellow";
|
||||
case 8:
|
||||
return "white";
|
||||
case 9:
|
||||
return "darkblue";
|
||||
case 10:
|
||||
return "darkcyan";
|
||||
case 11:
|
||||
return "darkgreen";
|
||||
case 12:
|
||||
return "darkmagenta";
|
||||
case 13:
|
||||
return "darkred";
|
||||
case 14:
|
||||
return "darkyellow";
|
||||
case 15:
|
||||
return "darkgray";
|
||||
case 16:
|
||||
return "lightgray";
|
||||
default:
|
||||
return "black";
|
||||
}
|
||||
}
|
||||
|
||||
public static byte getIxchFollow(ListLevel listLevel) {
|
||||
try {
|
||||
Field field = ListLevel.class.getDeclaredField("_ixchFollow");
|
||||
field.setAccessible(true);
|
||||
return ((Byte) field.get(listLevel)).byteValue();
|
||||
} catch (Exception exc) {
|
||||
throw new Error(exc);
|
||||
}
|
||||
}
|
||||
|
||||
public static String getJustification(int js) {
|
||||
switch (js) {
|
||||
case 0:
|
||||
return "start";
|
||||
case 1:
|
||||
return "center";
|
||||
case 2:
|
||||
return "end";
|
||||
case 3:
|
||||
case 4:
|
||||
return "justify";
|
||||
case 5:
|
||||
return "center";
|
||||
case 6:
|
||||
return "left";
|
||||
case 7:
|
||||
return "start";
|
||||
case 8:
|
||||
return "end";
|
||||
case 9:
|
||||
return "justify";
|
||||
if ( isEmpty( where ) )
|
||||
{
|
||||
element.setAttribute( "border-style", getBorderType( borderCode ) );
|
||||
element.setAttribute( "border-color",
|
||||
getColor( borderCode.getColor() ) );
|
||||
element.setAttribute( "border-width", getBorderWidth( borderCode ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
element.setAttribute( "border-" + where + "-style",
|
||||
getBorderType( borderCode ) );
|
||||
element.setAttribute( "border-" + where + "-color",
|
||||
getColor( borderCode.getColor() ) );
|
||||
element.setAttribute( "border-" + where + "-width",
|
||||
getBorderWidth( borderCode ) );
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
public static String getListItemNumberLabel(int number, int format) {
|
||||
|
||||
if (format != 0)
|
||||
System.err.println("NYI: toListItemNumberLabel(): " + format);
|
||||
|
||||
return String.valueOf(number);
|
||||
}
|
||||
|
||||
public static SectionProperties getSectionProperties(Section section) {
|
||||
try {
|
||||
Field field = Section.class.getDeclaredField("_props");
|
||||
field.setAccessible(true);
|
||||
return (SectionProperties) field.get(section);
|
||||
} catch (Exception exc) {
|
||||
throw new Error(exc);
|
||||
}
|
||||
}
|
||||
|
||||
static boolean isEmpty(String str) {
|
||||
return str == null || str.length() == 0;
|
||||
}
|
||||
|
||||
static boolean isNotEmpty(String str) {
|
||||
return !isEmpty(str);
|
||||
}
|
||||
|
||||
public static TableIterator newTableIterator(Range range, int level) {
|
||||
try {
|
||||
Constructor<TableIterator> constructor = TableIterator.class
|
||||
.getDeclaredConstructor(Range.class, int.class);
|
||||
constructor.setAccessible(true);
|
||||
return constructor.newInstance(range, Integer.valueOf(level));
|
||||
} catch (Exception exc) {
|
||||
throw new Error(exc);
|
||||
}
|
||||
}
|
||||
|
||||
public static void setBold(final Element element, final boolean bold) {
|
||||
element.setAttribute("font-weight", bold ? "bold" : "normal");
|
||||
}
|
||||
|
||||
public static void setBorder(Element element, BorderCode borderCode,
|
||||
String where) {
|
||||
if (element == null)
|
||||
throw new IllegalArgumentException("element is null");
|
||||
|
||||
if (borderCode == null)
|
||||
return;
|
||||
|
||||
if (isEmpty(where)) {
|
||||
element.setAttribute("border-style", getBorderType(borderCode));
|
||||
element.setAttribute("border-color",
|
||||
getColor(borderCode.getColor()));
|
||||
element.setAttribute("border-width", getBorderWidth(borderCode));
|
||||
} else {
|
||||
element.setAttribute("border-" + where + "-style",
|
||||
getBorderType(borderCode));
|
||||
element.setAttribute("border-" + where + "-color",
|
||||
getColor(borderCode.getColor()));
|
||||
element.setAttribute("border-" + where + "-width",
|
||||
getBorderWidth(borderCode));
|
||||
}
|
||||
}
|
||||
|
||||
public static void setCharactersProperties(final CharacterRun characterRun,
|
||||
final Element inline) {
|
||||
public static void setCharactersProperties(
|
||||
final CharacterRun characterRun, final Element inline )
|
||||
{
|
||||
final CharacterProperties clonedProperties = characterRun
|
||||
.cloneProperties();
|
||||
StringBuilder textDecorations = new StringBuilder();
|
||||
|
||||
setBorder(inline, clonedProperties.getBrc(), EMPTY);
|
||||
setBorder( inline, clonedProperties.getBrc(), EMPTY );
|
||||
|
||||
if (characterRun.isCapitalized()) {
|
||||
inline.setAttribute("text-transform", "uppercase");
|
||||
if ( characterRun.isCapitalized() )
|
||||
{
|
||||
inline.setAttribute( "text-transform", "uppercase" );
|
||||
}
|
||||
if (characterRun.isHighlighted()) {
|
||||
inline.setAttribute("background-color",
|
||||
getColor(clonedProperties.getIcoHighlight()));
|
||||
if ( characterRun.isHighlighted() )
|
||||
{
|
||||
inline.setAttribute( "background-color",
|
||||
getColor( clonedProperties.getIcoHighlight() ) );
|
||||
}
|
||||
if (characterRun.isStrikeThrough()) {
|
||||
if (textDecorations.length() > 0)
|
||||
textDecorations.append(" ");
|
||||
textDecorations.append("line-through");
|
||||
if ( characterRun.isStrikeThrough() )
|
||||
{
|
||||
if ( textDecorations.length() > 0 )
|
||||
textDecorations.append( " " );
|
||||
textDecorations.append( "line-through" );
|
||||
}
|
||||
if (characterRun.isShadowed()) {
|
||||
inline.setAttribute("text-shadow", characterRun.getFontSize() / 24
|
||||
+ "pt");
|
||||
if ( characterRun.isShadowed() )
|
||||
{
|
||||
inline.setAttribute( "text-shadow", characterRun.getFontSize() / 24
|
||||
+ "pt" );
|
||||
}
|
||||
if (characterRun.isSmallCaps()) {
|
||||
inline.setAttribute("font-variant", "small-caps");
|
||||
if ( characterRun.isSmallCaps() )
|
||||
{
|
||||
inline.setAttribute( "font-variant", "small-caps" );
|
||||
}
|
||||
if (characterRun.getSubSuperScriptIndex() == 1) {
|
||||
inline.setAttribute("baseline-shift", "super");
|
||||
inline.setAttribute("font-size", "smaller");
|
||||
if ( characterRun.getSubSuperScriptIndex() == 1 )
|
||||
{
|
||||
inline.setAttribute( "baseline-shift", "super" );
|
||||
inline.setAttribute( "font-size", "smaller" );
|
||||
}
|
||||
if (characterRun.getSubSuperScriptIndex() == 2) {
|
||||
inline.setAttribute("baseline-shift", "sub");
|
||||
inline.setAttribute("font-size", "smaller");
|
||||
if ( characterRun.getSubSuperScriptIndex() == 2 )
|
||||
{
|
||||
inline.setAttribute( "baseline-shift", "sub" );
|
||||
inline.setAttribute( "font-size", "smaller" );
|
||||
}
|
||||
if (characterRun.getUnderlineCode() > 0) {
|
||||
if (textDecorations.length() > 0)
|
||||
textDecorations.append(" ");
|
||||
textDecorations.append("underline");
|
||||
if ( characterRun.getUnderlineCode() > 0 )
|
||||
{
|
||||
if ( textDecorations.length() > 0 )
|
||||
textDecorations.append( " " );
|
||||
textDecorations.append( "underline" );
|
||||
}
|
||||
if (characterRun.isVanished()) {
|
||||
inline.setAttribute("visibility", "hidden");
|
||||
if ( characterRun.isVanished() )
|
||||
{
|
||||
inline.setAttribute( "visibility", "hidden" );
|
||||
}
|
||||
if (textDecorations.length() > 0) {
|
||||
inline.setAttribute("text-decoration", textDecorations.toString());
|
||||
if ( textDecorations.length() > 0 )
|
||||
{
|
||||
inline.setAttribute( "text-decoration", textDecorations.toString() );
|
||||
}
|
||||
}
|
||||
|
||||
public static void setFontFamily(final Element element,
|
||||
final String fontFamily) {
|
||||
element.setAttribute("font-family", fontFamily);
|
||||
public static void setFontFamily( final Element element,
|
||||
final String fontFamily )
|
||||
{
|
||||
if ( isEmpty( fontFamily ) )
|
||||
return;
|
||||
|
||||
element.setAttribute( "font-family", fontFamily );
|
||||
}
|
||||
|
||||
public static void setFontSize(final Element element, final int fontSize) {
|
||||
element.setAttribute("font-size", String.valueOf(fontSize));
|
||||
public static void setFontSize( final Element element, final int fontSize )
|
||||
{
|
||||
element.setAttribute( "font-size", String.valueOf( fontSize ) );
|
||||
}
|
||||
|
||||
public static void setIndent(Paragraph paragraph, Element block) {
|
||||
if (paragraph.getFirstLineIndent() != 0) {
|
||||
block.setAttribute(
|
||||
"text-indent",
|
||||
String.valueOf(paragraph.getFirstLineIndent()
|
||||
/ TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
}
|
||||
if (paragraph.getIndentFromLeft() != 0) {
|
||||
block.setAttribute(
|
||||
"start-indent",
|
||||
String.valueOf(paragraph.getIndentFromLeft() / TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
}
|
||||
if (paragraph.getIndentFromRight() != 0) {
|
||||
block.setAttribute(
|
||||
"end-indent",
|
||||
String.valueOf(paragraph.getIndentFromRight()
|
||||
/ TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
}
|
||||
if (paragraph.getSpacingBefore() != 0) {
|
||||
block.setAttribute("space-before",
|
||||
String.valueOf(paragraph.getSpacingBefore() / TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
}
|
||||
if (paragraph.getSpacingAfter() != 0) {
|
||||
block.setAttribute("space-after",
|
||||
String.valueOf(paragraph.getSpacingAfter() / TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
}
|
||||
public static void setIndent( Paragraph paragraph, Element block )
|
||||
{
|
||||
if ( paragraph.getFirstLineIndent() != 0 )
|
||||
{
|
||||
block.setAttribute(
|
||||
"text-indent",
|
||||
String.valueOf( paragraph.getFirstLineIndent()
|
||||
/ TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
if ( paragraph.getIndentFromLeft() != 0 )
|
||||
{
|
||||
block.setAttribute(
|
||||
"start-indent",
|
||||
String.valueOf( paragraph.getIndentFromLeft()
|
||||
/ TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
if ( paragraph.getIndentFromRight() != 0 )
|
||||
{
|
||||
block.setAttribute(
|
||||
"end-indent",
|
||||
String.valueOf( paragraph.getIndentFromRight()
|
||||
/ TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
if ( paragraph.getSpacingBefore() != 0 )
|
||||
{
|
||||
block.setAttribute(
|
||||
"space-before",
|
||||
String.valueOf( paragraph.getSpacingBefore() / TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
if ( paragraph.getSpacingAfter() != 0 )
|
||||
{
|
||||
block.setAttribute( "space-after",
|
||||
String.valueOf( paragraph.getSpacingAfter() / TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
}
|
||||
|
||||
public static void setItalic(final Element element, final boolean italic) {
|
||||
element.setAttribute("font-style", italic ? "italic" : "normal");
|
||||
public static void setItalic( final Element element, final boolean italic )
|
||||
{
|
||||
element.setAttribute( "font-style", italic ? "italic" : "normal" );
|
||||
}
|
||||
|
||||
public static void setJustification(Paragraph paragraph,
|
||||
final Element element) {
|
||||
String justification = getJustification(paragraph.getJustification());
|
||||
if (isNotEmpty(justification))
|
||||
element.setAttribute("text-align", justification);
|
||||
public static void setJustification( Paragraph paragraph,
|
||||
final Element element )
|
||||
{
|
||||
String justification = getJustification( paragraph.getJustification() );
|
||||
if ( isNotEmpty( justification ) )
|
||||
element.setAttribute( "text-align", justification );
|
||||
}
|
||||
|
||||
public static void setParagraphProperties(Paragraph paragraph, Element block) {
|
||||
setIndent(paragraph, block);
|
||||
setJustification(paragraph, block);
|
||||
public static void setParagraphProperties( Paragraph paragraph,
|
||||
Element block )
|
||||
{
|
||||
setIndent( paragraph, block );
|
||||
setJustification( paragraph, block );
|
||||
|
||||
setBorder(block, paragraph.getBottomBorder(), "bottom");
|
||||
setBorder(block, paragraph.getLeftBorder(), "left");
|
||||
setBorder(block, paragraph.getRightBorder(), "right");
|
||||
setBorder(block, paragraph.getTopBorder(), "top");
|
||||
setBorder( block, paragraph.getBottomBorder(), "bottom" );
|
||||
setBorder( block, paragraph.getLeftBorder(), "left" );
|
||||
setBorder( block, paragraph.getRightBorder(), "right" );
|
||||
setBorder( block, paragraph.getTopBorder(), "top" );
|
||||
|
||||
if (paragraph.pageBreakBefore()) {
|
||||
block.setAttribute("break-before", "page");
|
||||
}
|
||||
if ( paragraph.pageBreakBefore() )
|
||||
{
|
||||
block.setAttribute( "break-before", "page" );
|
||||
}
|
||||
|
||||
block.setAttribute("hyphenate",
|
||||
String.valueOf(paragraph.isAutoHyphenated()));
|
||||
block.setAttribute( "hyphenate",
|
||||
String.valueOf( paragraph.isAutoHyphenated() ) );
|
||||
|
||||
if (paragraph.keepOnPage()) {
|
||||
block.setAttribute("keep-together.within-page", "always");
|
||||
}
|
||||
if ( paragraph.keepOnPage() )
|
||||
{
|
||||
block.setAttribute( "keep-together.within-page", "always" );
|
||||
}
|
||||
|
||||
if (paragraph.keepWithNext()) {
|
||||
block.setAttribute("keep-with-next.within-page", "always");
|
||||
}
|
||||
if ( paragraph.keepWithNext() )
|
||||
{
|
||||
block.setAttribute( "keep-with-next.within-page", "always" );
|
||||
}
|
||||
|
||||
block.setAttribute("linefeed-treatment", "preserve");
|
||||
block.setAttribute("white-space-collapse", "false");
|
||||
block.setAttribute( "linefeed-treatment", "preserve" );
|
||||
block.setAttribute( "white-space-collapse", "false" );
|
||||
}
|
||||
|
||||
public static void setPictureProperties(Picture picture,
|
||||
Element graphicElement) {
|
||||
public static void setPictureProperties( Picture picture,
|
||||
Element graphicElement )
|
||||
{
|
||||
final int aspectRatioX = picture.getAspectRatioX();
|
||||
final int aspectRatioY = picture.getAspectRatioY();
|
||||
|
||||
if (aspectRatioX > 0) {
|
||||
graphicElement.setAttribute("content-width", ((picture.getDxaGoal()
|
||||
* aspectRatioX / 100) / WordToFoUtils.TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
} else
|
||||
graphicElement.setAttribute("content-width",
|
||||
(picture.getDxaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
|
||||
|
||||
if (aspectRatioY > 0)
|
||||
if ( aspectRatioX > 0 )
|
||||
{
|
||||
graphicElement
|
||||
.setAttribute("content-height", ((picture.getDyaGoal()
|
||||
* aspectRatioY / 100) / WordToFoUtils.TWIPS_PER_PT)
|
||||
+ "pt");
|
||||
.setAttribute( "content-width", ( ( picture.getDxaGoal()
|
||||
* aspectRatioX / 100 ) / TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
else
|
||||
graphicElement.setAttribute("content-height",
|
||||
(picture.getDyaGoal() / WordToFoUtils.TWIPS_PER_PT) + "pt");
|
||||
graphicElement.setAttribute( "content-width",
|
||||
( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
|
||||
|
||||
if (aspectRatioX <= 0 || aspectRatioY <= 0) {
|
||||
graphicElement.setAttribute("scaling", "uniform");
|
||||
} else {
|
||||
graphicElement.setAttribute("scaling", "non-uniform");
|
||||
if ( aspectRatioY > 0 )
|
||||
graphicElement
|
||||
.setAttribute( "content-height", ( ( picture.getDyaGoal()
|
||||
* aspectRatioY / 100 ) / TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
else
|
||||
graphicElement.setAttribute( "content-height",
|
||||
( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
|
||||
|
||||
if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
|
||||
{
|
||||
graphicElement.setAttribute( "scaling", "uniform" );
|
||||
}
|
||||
else
|
||||
{
|
||||
graphicElement.setAttribute( "scaling", "non-uniform" );
|
||||
}
|
||||
|
||||
graphicElement.setAttribute("vertical-align", "text-bottom");
|
||||
graphicElement.setAttribute( "vertical-align", "text-bottom" );
|
||||
|
||||
if (picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
|
||||
if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
|
||||
|| picture.getDyaCropBottom() != 0
|
||||
|| picture.getDxaCropLeft() != 0) {
|
||||
int rectTop = picture.getDyaCropTop() / WordToFoUtils.TWIPS_PER_PT;
|
||||
int rectRight = picture.getDxaCropRight()
|
||||
/ WordToFoUtils.TWIPS_PER_PT;
|
||||
int rectBottom = picture.getDyaCropBottom()
|
||||
/ WordToFoUtils.TWIPS_PER_PT;
|
||||
int rectLeft = picture.getDxaCropLeft()
|
||||
/ WordToFoUtils.TWIPS_PER_PT;
|
||||
graphicElement.setAttribute("clip", "rect(" + rectTop + "pt, "
|
||||
|| picture.getDxaCropLeft() != 0 )
|
||||
{
|
||||
int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
|
||||
int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
|
||||
int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
|
||||
int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
|
||||
graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
|
||||
+ rectRight + "pt, " + rectBottom + "pt, " + rectLeft
|
||||
+ "pt)");
|
||||
graphicElement.setAttribute("oveerflow", "hidden");
|
||||
+ "pt)" );
|
||||
graphicElement.setAttribute( "oveerflow", "hidden" );
|
||||
}
|
||||
}
|
||||
|
||||
public static void setTableCellProperties(TableRow tableRow,
|
||||
TableCell tableCell, Element element, boolean toppest,
|
||||
boolean bottomest, boolean leftest, boolean rightest) {
|
||||
element.setAttribute("width", (tableCell.getWidth() / TWIPS_PER_INCH)
|
||||
+ "in");
|
||||
element.setAttribute("padding-start",
|
||||
(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
|
||||
element.setAttribute("padding-end",
|
||||
(tableRow.getGapHalf() / TWIPS_PER_INCH) + "in");
|
||||
public static void setTableCellProperties( TableRow tableRow,
|
||||
TableCell tableCell, Element element, boolean toppest,
|
||||
boolean bottomest, boolean leftest, boolean rightest )
|
||||
{
|
||||
element.setAttribute( "width", ( tableCell.getWidth() / TWIPS_PER_INCH )
|
||||
+ "in" );
|
||||
element.setAttribute( "padding-start",
|
||||
( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
|
||||
element.setAttribute( "padding-end",
|
||||
( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in" );
|
||||
|
||||
BorderCode top = tableCell.getBrcTop() != null ? tableCell.getBrcTop()
|
||||
: toppest ? tableRow.getTopBorder() : tableRow
|
||||
.getHorizontalBorder();
|
||||
BorderCode bottom = tableCell.getBrcBottom() != null ? tableCell
|
||||
.getBrcBottom() : bottomest ? tableRow.getBottomBorder()
|
||||
: tableRow.getHorizontalBorder();
|
||||
BorderCode top = tableCell.getBrcTop() != null
|
||||
&& tableCell.getBrcTop().getBorderType() != 0 ? tableCell
|
||||
.getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
|
||||
.getHorizontalBorder();
|
||||
BorderCode bottom = tableCell.getBrcBottom() != null
|
||||
&& tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
|
||||
.getBrcBottom() : bottomest ? tableRow.getBottomBorder()
|
||||
: tableRow.getHorizontalBorder();
|
||||
|
||||
BorderCode left = tableCell.getBrcLeft() != null ? tableCell
|
||||
.getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
|
||||
.getVerticalBorder();
|
||||
BorderCode right = tableCell.getBrcRight() != null ? tableCell
|
||||
.getBrcRight() : rightest ? tableRow.getRightBorder()
|
||||
: tableRow.getVerticalBorder();
|
||||
BorderCode left = tableCell.getBrcLeft() != null
|
||||
&& tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
|
||||
.getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
|
||||
.getVerticalBorder();
|
||||
BorderCode right = tableCell.getBrcRight() != null
|
||||
&& tableCell.getBrcRight().getBorderType() != 0 ? tableCell
|
||||
.getBrcRight() : rightest ? tableRow.getRightBorder()
|
||||
: tableRow.getVerticalBorder();
|
||||
|
||||
setBorder(element, bottom, "bottom");
|
||||
setBorder(element, left, "left");
|
||||
setBorder(element, right, "right");
|
||||
setBorder(element, top, "top");
|
||||
setBorder( element, bottom, "bottom" );
|
||||
setBorder( element, left, "left" );
|
||||
setBorder( element, right, "right" );
|
||||
setBorder( element, top, "top" );
|
||||
}
|
||||
|
||||
public static void setTableRowProperties(TableRow tableRow,
|
||||
Element tableRowElement) {
|
||||
if (tableRow.getRowHeight() > 0) {
|
||||
tableRowElement.setAttribute("height",
|
||||
(tableRow.getRowHeight() / TWIPS_PER_INCH) + "in");
|
||||
}
|
||||
if (!tableRow.cantSplit()) {
|
||||
tableRowElement.setAttribute("keep-together", "always");
|
||||
}
|
||||
public static void setTableRowProperties( TableRow tableRow,
|
||||
Element tableRowElement )
|
||||
{
|
||||
if ( tableRow.getRowHeight() > 0 )
|
||||
{
|
||||
tableRowElement.setAttribute( "height",
|
||||
( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in" );
|
||||
}
|
||||
if ( !tableRow.cantSplit() )
|
||||
{
|
||||
tableRowElement.setAttribute( "keep-together", "always" );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,475 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileWriter;
|
||||
import java.util.List;
|
||||
import java.util.Stack;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
import org.apache.poi.hwpf.usermodel.Section;
|
||||
import org.apache.poi.hwpf.usermodel.SectionProperties;
|
||||
import org.apache.poi.hwpf.usermodel.Table;
|
||||
import org.apache.poi.hwpf.usermodel.TableCell;
|
||||
import org.apache.poi.hwpf.usermodel.TableRow;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
import org.w3c.dom.Document;
|
||||
import org.w3c.dom.Element;
|
||||
import org.w3c.dom.Text;
|
||||
|
||||
import static org.apache.poi.hwpf.extractor.AbstractWordUtils.TWIPS_PER_INCH;
|
||||
|
||||
/**
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class WordToHtmlExtractor extends AbstractWordExtractor
|
||||
{
|
||||
|
||||
/**
|
||||
* Holds properties values, applied to current <tt>p</tt> element. Those
|
||||
* properties shall not be doubled in children <tt>span</tt> elements.
|
||||
*/
|
||||
private static class BlockProperies
|
||||
{
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
|
||||
public BlockProperies( String pFontName, int pFontSize )
|
||||
{
|
||||
this.pFontName = pFontName;
|
||||
this.pFontSize = pFontSize;
|
||||
}
|
||||
}
|
||||
|
||||
private static final POILogger logger = POILogFactory
|
||||
.getLogger( WordToHtmlExtractor.class );
|
||||
|
||||
private static String getSectionStyle( Section section )
|
||||
{
|
||||
SectionProperties sep = WordToHtmlUtils.getSectionProperties( section );
|
||||
|
||||
float leftMargin = sep.getDxaLeft() / TWIPS_PER_INCH;
|
||||
float rightMargin = sep.getDxaRight() / TWIPS_PER_INCH;
|
||||
float topMargin = sep.getDyaTop() / TWIPS_PER_INCH;
|
||||
float bottomMargin = sep.getDyaBottom() / TWIPS_PER_INCH;
|
||||
|
||||
String style = "margin: " + topMargin + "in " + rightMargin + "in "
|
||||
+ bottomMargin + "in " + leftMargin + "in; ";
|
||||
|
||||
if ( sep.getCcolM1() > 0 )
|
||||
{
|
||||
style += "column-count: " + ( sep.getCcolM1() + 1 ) + "; ";
|
||||
if ( sep.getFEvenlySpaced() )
|
||||
{
|
||||
style += "column-gap: "
|
||||
+ ( sep.getDxaColumns() / TWIPS_PER_INCH ) + "in; ";
|
||||
}
|
||||
else
|
||||
{
|
||||
style += "column-gap: 0.25in; ";
|
||||
}
|
||||
}
|
||||
return style;
|
||||
}
|
||||
|
||||
/**
|
||||
* Java main() interface to interact with WordToHtmlExtractor
|
||||
*
|
||||
* <p>
|
||||
* Usage: WordToHtmlExtractor infile outfile
|
||||
* </p>
|
||||
* Where infile is an input .doc file ( Word 95-2007) which will be rendered
|
||||
* as HTML into outfile
|
||||
*/
|
||||
public static void main( String[] args )
|
||||
{
|
||||
if ( args.length < 2 )
|
||||
{
|
||||
System.err
|
||||
.println( "Usage: WordToHtmlExtractor <inputFile.doc> <saveTo.html>" );
|
||||
return;
|
||||
}
|
||||
|
||||
System.out.println( "Converting " + args[0] );
|
||||
System.out.println( "Saving output to " + args[1] );
|
||||
try
|
||||
{
|
||||
Document doc = WordToHtmlExtractor.process( new File( args[0] ) );
|
||||
|
||||
FileWriter out = new FileWriter( args[1] );
|
||||
DOMSource domSource = new DOMSource( doc );
|
||||
StreamResult streamResult = new StreamResult( out );
|
||||
|
||||
TransformerFactory tf = TransformerFactory.newInstance();
|
||||
Transformer serializer = tf.newTransformer();
|
||||
// TODO set encoding from a command argument
|
||||
serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
|
||||
serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
|
||||
serializer.setOutputProperty( OutputKeys.METHOD, "html" );
|
||||
serializer.transform( domSource, streamResult );
|
||||
out.close();
|
||||
}
|
||||
catch ( Exception e )
|
||||
{
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
static Document process( File docFile ) throws Exception
|
||||
{
|
||||
final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc( docFile );
|
||||
WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument() );
|
||||
wordToHtmlExtractor.processDocument( wordDocument );
|
||||
return wordToHtmlExtractor.getDocument();
|
||||
}
|
||||
|
||||
private final Stack<BlockProperies> blocksProperies = new Stack<BlockProperies>();
|
||||
|
||||
private final HtmlDocumentFacade htmlDocumentFacade;
|
||||
|
||||
/**
|
||||
* Creates new instance of {@link WordToHtmlExtractor}. Can be used for
|
||||
* output several {@link HWPFDocument}s into single HTML document.
|
||||
*
|
||||
* @param document
|
||||
* XML DOM Document used as HTML document
|
||||
*/
|
||||
public WordToHtmlExtractor( Document document )
|
||||
{
|
||||
this.htmlDocumentFacade = new HtmlDocumentFacade( document );
|
||||
}
|
||||
|
||||
public Document getDocument()
|
||||
{
|
||||
return htmlDocumentFacade.getDocument();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void outputCharacters( Element pElement,
|
||||
CharacterRun characterRun, String text )
|
||||
{
|
||||
Element span = htmlDocumentFacade.document.createElement( "span" );
|
||||
pElement.appendChild( span );
|
||||
|
||||
StringBuilder style = new StringBuilder();
|
||||
BlockProperies blockProperies = this.blocksProperies.peek();
|
||||
if ( characterRun.getFontName() != null
|
||||
&& !WordToHtmlUtils.equals( characterRun.getFontName(),
|
||||
blockProperies.pFontName ) )
|
||||
{
|
||||
style.append( "font-family: " + characterRun.getFontName() + "; " );
|
||||
}
|
||||
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
|
||||
{
|
||||
style.append( "font-size: " + characterRun.getFontSize() / 2 + "; " );
|
||||
}
|
||||
|
||||
WordToHtmlUtils.addCharactersProperties( characterRun, style );
|
||||
if ( style.length() != 0 )
|
||||
span.setAttribute( "style", style.toString() );
|
||||
|
||||
Text textNode = htmlDocumentFacade.createText( text );
|
||||
span.appendChild( textNode );
|
||||
}
|
||||
|
||||
protected void processHyperlink( HWPFDocumentCore wordDocument,
|
||||
Element currentBlock, Paragraph paragraph,
|
||||
List<CharacterRun> characterRuns, int currentTableLevel,
|
||||
String hyperlink, int beginTextInclusive, int endTextExclusive )
|
||||
{
|
||||
Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
|
||||
currentBlock.appendChild( basicLink );
|
||||
|
||||
if ( beginTextInclusive < endTextExclusive )
|
||||
processCharacters( wordDocument, currentTableLevel, paragraph,
|
||||
basicLink, characterRuns, beginTextInclusive,
|
||||
endTextExclusive );
|
||||
}
|
||||
|
||||
/**
|
||||
* This method shall store image bytes in external file and convert it if
|
||||
* necessary. Images shall be stored using PNG format. Other formats may be
|
||||
* not supported by user browser.
|
||||
* <p>
|
||||
* Please note the
|
||||
* {@link WordToHtmlUtils#setPictureProperties(Picture, Element)} method.
|
||||
*
|
||||
* @param currentBlock
|
||||
* currently processed HTML element, like <tt>p</tt>. Shall be
|
||||
* used as parent of newly created <tt>img</tt>
|
||||
* @param inlined
|
||||
* if image is inlined
|
||||
* @param picture
|
||||
* HWPF object, contained picture data and properties
|
||||
*/
|
||||
protected void processImage( Element currentBlock, boolean inlined,
|
||||
Picture picture )
|
||||
{
|
||||
// no default implementation -- skip
|
||||
currentBlock.appendChild( htmlDocumentFacade.document
|
||||
.createComment( "Image link to '"
|
||||
+ picture.suggestFullFileName() + "' can be here" ) );
|
||||
}
|
||||
|
||||
protected void processPageref( HWPFDocumentCore hwpfDocument,
|
||||
Element currentBlock, Paragraph paragraph,
|
||||
List<CharacterRun> characterRuns, int currentTableLevel,
|
||||
String pageref, int beginTextInclusive, int endTextExclusive )
|
||||
{
|
||||
Element basicLink = htmlDocumentFacade.createHyperlink( "#" + pageref );
|
||||
currentBlock.appendChild( basicLink );
|
||||
|
||||
if ( beginTextInclusive < endTextExclusive )
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
basicLink, characterRuns, beginTextInclusive,
|
||||
endTextExclusive );
|
||||
}
|
||||
|
||||
protected void processParagraph( HWPFDocumentCore hwpfDocument,
|
||||
Element parentFopElement, int currentTableLevel,
|
||||
Paragraph paragraph, String bulletText )
|
||||
{
|
||||
final Element pElement = htmlDocumentFacade.createParagraph();
|
||||
parentFopElement.appendChild( pElement );
|
||||
|
||||
StringBuilder style = new StringBuilder();
|
||||
WordToHtmlUtils.addParagraphProperties( paragraph, style );
|
||||
|
||||
final int charRuns = paragraph.numCharacterRuns();
|
||||
|
||||
if ( charRuns == 0 )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
final String pFontName;
|
||||
final int pFontSize;
|
||||
final CharacterRun characterRun = paragraph.getCharacterRun( 0 );
|
||||
if ( characterRun != null )
|
||||
{
|
||||
pFontSize = characterRun.getFontSize() / 2;
|
||||
pFontName = characterRun.getFontName();
|
||||
WordToHtmlUtils.addFontFamily( pFontName, style );
|
||||
WordToHtmlUtils.addFontSize( pFontSize, style );
|
||||
}
|
||||
else
|
||||
{
|
||||
pFontSize = -1;
|
||||
pFontName = WordToHtmlUtils.EMPTY;
|
||||
}
|
||||
blocksProperies.push( new BlockProperies( pFontName, pFontSize ) );
|
||||
}
|
||||
try
|
||||
{
|
||||
if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
|
||||
{
|
||||
Text textNode = htmlDocumentFacade.createText( bulletText );
|
||||
pElement.appendChild( textNode );
|
||||
}
|
||||
|
||||
List<CharacterRun> characterRuns = WordToHtmlUtils
|
||||
.findCharacterRuns( paragraph );
|
||||
processCharacters( hwpfDocument, currentTableLevel, paragraph,
|
||||
pElement, characterRuns, 0, characterRuns.size() );
|
||||
}
|
||||
finally
|
||||
{
|
||||
blocksProperies.pop();
|
||||
}
|
||||
|
||||
if ( style.length() > 0 )
|
||||
pElement.setAttribute( "style", style.toString() );
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
protected void processSection( HWPFDocumentCore wordDocument,
|
||||
Section section, int sectionCounter )
|
||||
{
|
||||
Element div = htmlDocumentFacade.document.createElement( "div" );
|
||||
div.setAttribute( "style", getSectionStyle( section ) );
|
||||
htmlDocumentFacade.body.appendChild( div );
|
||||
|
||||
processSectionParagraphes( wordDocument, div, section, 0 );
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void processSingleSection( HWPFDocumentCore wordDocument,
|
||||
Section section )
|
||||
{
|
||||
htmlDocumentFacade.body.setAttribute( "style",
|
||||
getSectionStyle( section ) );
|
||||
|
||||
processSectionParagraphes( wordDocument, htmlDocumentFacade.body,
|
||||
section, 0 );
|
||||
}
|
||||
|
||||
protected void processTable( HWPFDocumentCore hwpfDocument, Element flow,
|
||||
Table table, int thisTableLevel )
|
||||
{
|
||||
Element tableHeader = htmlDocumentFacade.createTableHeader();
|
||||
Element tableBody = htmlDocumentFacade.createTableBody();
|
||||
|
||||
final int tableRows = table.numRows();
|
||||
|
||||
int maxColumns = Integer.MIN_VALUE;
|
||||
for ( int r = 0; r < tableRows; r++ )
|
||||
{
|
||||
maxColumns = Math.max( maxColumns, table.getRow( r ).numCells() );
|
||||
}
|
||||
|
||||
for ( int r = 0; r < tableRows; r++ )
|
||||
{
|
||||
TableRow tableRow = table.getRow( r );
|
||||
|
||||
Element tableRowElement = htmlDocumentFacade.createTableRow();
|
||||
StringBuilder tableRowStyle = new StringBuilder();
|
||||
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle );
|
||||
|
||||
final int rowCells = tableRow.numCells();
|
||||
for ( int c = 0; c < rowCells; c++ )
|
||||
{
|
||||
TableCell tableCell = tableRow.getCell( c );
|
||||
|
||||
if ( tableCell.isMerged() && !tableCell.isFirstMerged() )
|
||||
continue;
|
||||
|
||||
if ( tableCell.isVerticallyMerged()
|
||||
&& !tableCell.isFirstVerticallyMerged() )
|
||||
continue;
|
||||
|
||||
Element tableCellElement;
|
||||
if ( tableRow.isTableHeader() )
|
||||
{
|
||||
tableCellElement = htmlDocumentFacade
|
||||
.createTableHeaderCell();
|
||||
}
|
||||
else
|
||||
{
|
||||
tableCellElement = htmlDocumentFacade.createTableCell();
|
||||
}
|
||||
StringBuilder tableCellStyle = new StringBuilder();
|
||||
WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
|
||||
r == 0, r == tableRows - 1, c == 0, c == rowCells - 1,
|
||||
tableCellStyle );
|
||||
|
||||
if ( tableCell.isFirstMerged() )
|
||||
{
|
||||
int count = 0;
|
||||
for ( int c1 = c; c1 < rowCells; c1++ )
|
||||
{
|
||||
TableCell nextCell = tableRow.getCell( c1 );
|
||||
if ( nextCell.isMerged() )
|
||||
count++;
|
||||
if ( !nextCell.isMerged() )
|
||||
break;
|
||||
}
|
||||
tableCellElement.setAttribute( "colspan", "" + count );
|
||||
}
|
||||
else
|
||||
{
|
||||
if ( c == rowCells - 1 && c != maxColumns - 1 )
|
||||
{
|
||||
tableCellElement.setAttribute( "colspan", ""
|
||||
+ ( maxColumns - c ) );
|
||||
}
|
||||
}
|
||||
|
||||
if ( tableCell.isFirstVerticallyMerged() )
|
||||
{
|
||||
int count = 0;
|
||||
for ( int r1 = r; r1 < tableRows; r1++ )
|
||||
{
|
||||
TableRow nextRow = table.getRow( r1 );
|
||||
if ( nextRow.numCells() < c )
|
||||
break;
|
||||
TableCell nextCell = nextRow.getCell( c );
|
||||
if ( nextCell.isVerticallyMerged() )
|
||||
count++;
|
||||
if ( !nextCell.isVerticallyMerged() )
|
||||
break;
|
||||
}
|
||||
tableCellElement.setAttribute( "rowspan", "" + count );
|
||||
}
|
||||
|
||||
processSectionParagraphes( hwpfDocument, tableCellElement,
|
||||
tableCell, thisTableLevel );
|
||||
|
||||
if ( !tableCellElement.hasChildNodes() )
|
||||
{
|
||||
tableCellElement.appendChild( htmlDocumentFacade
|
||||
.createParagraph() );
|
||||
}
|
||||
if ( tableCellStyle.length() > 0 )
|
||||
tableCellElement.setAttribute( "style",
|
||||
tableCellStyle.toString() );
|
||||
|
||||
tableRowElement.appendChild( tableCellElement );
|
||||
}
|
||||
|
||||
if ( tableRowStyle.length() > 0 )
|
||||
tableRowElement
|
||||
.setAttribute( "style", tableRowStyle.toString() );
|
||||
|
||||
if ( tableRow.isTableHeader() )
|
||||
{
|
||||
tableHeader.appendChild( tableRowElement );
|
||||
}
|
||||
else
|
||||
{
|
||||
tableBody.appendChild( tableRowElement );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
final Element tableElement = htmlDocumentFacade.createTable();
|
||||
if ( tableHeader.hasChildNodes() )
|
||||
{
|
||||
tableElement.appendChild( tableHeader );
|
||||
}
|
||||
if ( tableBody.hasChildNodes() )
|
||||
{
|
||||
tableElement.appendChild( tableBody );
|
||||
flow.appendChild( tableElement );
|
||||
}
|
||||
else
|
||||
{
|
||||
logger.log(
|
||||
POILogger.WARN,
|
||||
"Table without body starting on offset "
|
||||
+ table.getStartOffset() + " -- "
|
||||
+ table.getEndOffset() );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,292 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import org.apache.poi.hwpf.usermodel.BorderCode;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterProperties;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
import org.apache.poi.hwpf.usermodel.TableCell;
|
||||
import org.apache.poi.hwpf.usermodel.TableRow;
|
||||
import org.w3c.dom.Element;
|
||||
|
||||
public class WordToHtmlUtils extends AbstractWordUtils
|
||||
{
|
||||
public static void addBold( final boolean bold, StringBuilder style )
|
||||
{
|
||||
style.append( "font-weight: " + ( bold ? "bold" : "normal" ) + ";" );
|
||||
}
|
||||
|
||||
public static void addBorder( BorderCode borderCode, String where,
|
||||
StringBuilder style )
|
||||
{
|
||||
if ( borderCode == null || borderCode.getBorderType() == 0 )
|
||||
return;
|
||||
|
||||
if ( isEmpty( where ) )
|
||||
{
|
||||
style.append( "border-style: " + getBorderType( borderCode ) + "; " );
|
||||
style.append( "border-color: " + getColor( borderCode.getColor() )
|
||||
+ "; " );
|
||||
style.append( "border-width: " + getBorderWidth( borderCode )
|
||||
+ "; " );
|
||||
}
|
||||
else
|
||||
{
|
||||
style.append( "border-" + where + "-style: "
|
||||
+ getBorderType( borderCode ) + "; " );
|
||||
style.append( "border-" + where + "-color: "
|
||||
+ getColor( borderCode.getColor() ) + "; " );
|
||||
style.append( "border-" + where + "-width: "
|
||||
+ getBorderWidth( borderCode ) + "; " );
|
||||
}
|
||||
}
|
||||
|
||||
public static void addCharactersProperties(
|
||||
final CharacterRun characterRun, StringBuilder style )
|
||||
{
|
||||
final CharacterProperties clonedProperties = characterRun
|
||||
.cloneProperties();
|
||||
|
||||
if ( characterRun.isBold() )
|
||||
{
|
||||
style.append( "font-weight: bold; " );
|
||||
}
|
||||
if ( characterRun.isItalic() )
|
||||
{
|
||||
style.append( "font-style: italic; " );
|
||||
}
|
||||
|
||||
addBorder( clonedProperties.getBrc(), EMPTY, style );
|
||||
|
||||
if ( characterRun.isCapitalized() )
|
||||
{
|
||||
style.append( "text-transform: uppercase; " );
|
||||
}
|
||||
if ( characterRun.isHighlighted() )
|
||||
{
|
||||
style.append( "background-color: "
|
||||
+ getColor( clonedProperties.getIcoHighlight() ) + "; " );
|
||||
}
|
||||
if ( characterRun.isStrikeThrough() )
|
||||
{
|
||||
style.append( "text-decoration: line-through; " );
|
||||
}
|
||||
if ( characterRun.isShadowed() )
|
||||
{
|
||||
style.append( "text-shadow: " + characterRun.getFontSize() / 24
|
||||
+ "pt; " );
|
||||
}
|
||||
if ( characterRun.isSmallCaps() )
|
||||
{
|
||||
style.append( "font-variant: small-caps; " );
|
||||
}
|
||||
if ( characterRun.getSubSuperScriptIndex() == 1 )
|
||||
{
|
||||
style.append( "baseline-shift: super; " );
|
||||
style.append( "font-size: smaller; " );
|
||||
}
|
||||
if ( characterRun.getSubSuperScriptIndex() == 2 )
|
||||
{
|
||||
style.append( "baseline-shift: sub; " );
|
||||
style.append( "font-size: smaller; " );
|
||||
}
|
||||
if ( characterRun.getUnderlineCode() > 0 )
|
||||
{
|
||||
style.append( "text-decoration: underline; " );
|
||||
}
|
||||
if ( characterRun.isVanished() )
|
||||
{
|
||||
style.append( "visibility: hidden; " );
|
||||
}
|
||||
}
|
||||
|
||||
public static void addFontFamily( final String fontFamily,
|
||||
StringBuilder style )
|
||||
{
|
||||
if ( isEmpty( fontFamily ) )
|
||||
return;
|
||||
|
||||
style.append( "font-family: " + fontFamily );
|
||||
}
|
||||
|
||||
public static void addFontSize( final int fontSize, StringBuilder style )
|
||||
{
|
||||
style.append( "font-size: " + fontSize );
|
||||
}
|
||||
|
||||
public static void addIndent( Paragraph paragraph, StringBuilder style )
|
||||
{
|
||||
addIndent( style, "text-indent", paragraph.getFirstLineIndent() );
|
||||
addIndent( style, "start-indent", paragraph.getIndentFromLeft() );
|
||||
addIndent( style, "end-indent", paragraph.getIndentFromRight() );
|
||||
addIndent( style, "space-before", paragraph.getSpacingBefore() );
|
||||
addIndent( style, "space-after", paragraph.getSpacingAfter() );
|
||||
}
|
||||
|
||||
private static void addIndent( StringBuilder style, final String cssName,
|
||||
final int twipsValue )
|
||||
{
|
||||
if ( twipsValue == 0 )
|
||||
return;
|
||||
|
||||
style.append( cssName + ": " + ( twipsValue / TWIPS_PER_PT ) + "pt; " );
|
||||
}
|
||||
|
||||
public static void addJustification( Paragraph paragraph,
|
||||
final StringBuilder style )
|
||||
{
|
||||
String justification = getJustification( paragraph.getJustification() );
|
||||
if ( isNotEmpty( justification ) )
|
||||
style.append( "text-align: " + justification + "; " );
|
||||
}
|
||||
|
||||
public static void addParagraphProperties( Paragraph paragraph,
|
||||
StringBuilder style )
|
||||
{
|
||||
addIndent( paragraph, style );
|
||||
addJustification( paragraph, style );
|
||||
|
||||
addBorder( paragraph.getBottomBorder(), "bottom", style );
|
||||
addBorder( paragraph.getLeftBorder(), "left", style );
|
||||
addBorder( paragraph.getRightBorder(), "right", style );
|
||||
addBorder( paragraph.getTopBorder(), "top", style );
|
||||
|
||||
if ( paragraph.pageBreakBefore() )
|
||||
{
|
||||
style.append( "break-before: page; " );
|
||||
}
|
||||
|
||||
style.append( "hyphenate: " + paragraph.isAutoHyphenated() + "; " );
|
||||
|
||||
if ( paragraph.keepOnPage() )
|
||||
{
|
||||
style.append( "keep-together.within-page: always; " );
|
||||
}
|
||||
|
||||
if ( paragraph.keepWithNext() )
|
||||
{
|
||||
style.append( "keep-with-next.within-page: always; " );
|
||||
}
|
||||
|
||||
style.append( "linefeed-treatment: preserve; " );
|
||||
style.append( "white-space-collapse: false; " );
|
||||
}
|
||||
|
||||
public static void addTableCellProperties( TableRow tableRow,
|
||||
TableCell tableCell, boolean toppest, boolean bottomest,
|
||||
boolean leftest, boolean rightest, StringBuilder style )
|
||||
{
|
||||
style.append( "width: " + ( tableCell.getWidth() / TWIPS_PER_INCH )
|
||||
+ "in; " );
|
||||
style.append( "padding-start: "
|
||||
+ ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
|
||||
style.append( "padding-end: "
|
||||
+ ( tableRow.getGapHalf() / TWIPS_PER_INCH ) + "in; " );
|
||||
|
||||
BorderCode top = tableCell.getBrcTop() != null
|
||||
&& tableCell.getBrcTop().getBorderType() != 0 ? tableCell
|
||||
.getBrcTop() : toppest ? tableRow.getTopBorder() : tableRow
|
||||
.getHorizontalBorder();
|
||||
BorderCode bottom = tableCell.getBrcBottom() != null
|
||||
&& tableCell.getBrcBottom().getBorderType() != 0 ? tableCell
|
||||
.getBrcBottom() : bottomest ? tableRow.getBottomBorder()
|
||||
: tableRow.getHorizontalBorder();
|
||||
|
||||
BorderCode left = tableCell.getBrcLeft() != null
|
||||
&& tableCell.getBrcLeft().getBorderType() != 0 ? tableCell
|
||||
.getBrcLeft() : leftest ? tableRow.getLeftBorder() : tableRow
|
||||
.getVerticalBorder();
|
||||
BorderCode right = tableCell.getBrcRight() != null
|
||||
&& tableCell.getBrcRight().getBorderType() != 0 ? tableCell
|
||||
.getBrcRight() : rightest ? tableRow.getRightBorder()
|
||||
: tableRow.getVerticalBorder();
|
||||
|
||||
addBorder( bottom, "bottom", style );
|
||||
addBorder( left, "left", style );
|
||||
addBorder( right, "right", style );
|
||||
addBorder( top, "top", style );
|
||||
}
|
||||
|
||||
public static void addTableRowProperties( TableRow tableRow,
|
||||
StringBuilder style )
|
||||
{
|
||||
if ( tableRow.getRowHeight() > 0 )
|
||||
{
|
||||
style.append( "height: "
|
||||
+ ( tableRow.getRowHeight() / TWIPS_PER_INCH ) + "in; " );
|
||||
}
|
||||
if ( !tableRow.cantSplit() )
|
||||
{
|
||||
style.append( "keep-together: always; " );
|
||||
}
|
||||
}
|
||||
|
||||
public static void setPictureProperties( Picture picture,
|
||||
Element graphicElement )
|
||||
{
|
||||
final int aspectRatioX = picture.getAspectRatioX();
|
||||
final int aspectRatioY = picture.getAspectRatioY();
|
||||
|
||||
if ( aspectRatioX > 0 )
|
||||
{
|
||||
graphicElement
|
||||
.setAttribute( "content-width", ( ( picture.getDxaGoal()
|
||||
* aspectRatioX / 100 ) / TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
}
|
||||
else
|
||||
graphicElement.setAttribute( "content-width",
|
||||
( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" );
|
||||
|
||||
if ( aspectRatioY > 0 )
|
||||
graphicElement
|
||||
.setAttribute( "content-height", ( ( picture.getDyaGoal()
|
||||
* aspectRatioY / 100 ) / TWIPS_PER_PT )
|
||||
+ "pt" );
|
||||
else
|
||||
graphicElement.setAttribute( "content-height",
|
||||
( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" );
|
||||
|
||||
if ( aspectRatioX <= 0 || aspectRatioY <= 0 )
|
||||
{
|
||||
graphicElement.setAttribute( "scaling", "uniform" );
|
||||
}
|
||||
else
|
||||
{
|
||||
graphicElement.setAttribute( "scaling", "non-uniform" );
|
||||
}
|
||||
|
||||
graphicElement.setAttribute( "vertical-align", "text-bottom" );
|
||||
|
||||
if ( picture.getDyaCropTop() != 0 || picture.getDxaCropRight() != 0
|
||||
|| picture.getDyaCropBottom() != 0
|
||||
|| picture.getDxaCropLeft() != 0 )
|
||||
{
|
||||
int rectTop = picture.getDyaCropTop() / TWIPS_PER_PT;
|
||||
int rectRight = picture.getDxaCropRight() / TWIPS_PER_PT;
|
||||
int rectBottom = picture.getDyaCropBottom() / TWIPS_PER_PT;
|
||||
int rectLeft = picture.getDxaCropLeft() / TWIPS_PER_PT;
|
||||
graphicElement.setAttribute( "clip", "rect(" + rectTop + "pt, "
|
||||
+ rectRight + "pt, " + rectBottom + "pt, " + rectLeft
|
||||
+ "pt)" );
|
||||
graphicElement.setAttribute( "oveerflow", "hidden" );
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,114 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocumentCore;
|
||||
|
||||
public class TestWordToExtractorSuite
|
||||
{
|
||||
/**
|
||||
* YK: a quick hack to exclude failing documents from the suite.
|
||||
*/
|
||||
private static List<String> failingFiles = Arrays.asList();
|
||||
|
||||
public static Test suite()
|
||||
{
|
||||
TestSuite suite = new TestSuite();
|
||||
|
||||
File directory = POIDataSamples.getDocumentInstance().getFile(
|
||||
"../document" );
|
||||
for ( final File child : directory.listFiles( new FilenameFilter()
|
||||
{
|
||||
public boolean accept( File dir, String name )
|
||||
{
|
||||
return name.endsWith( ".doc" ) && !failingFiles.contains( name );
|
||||
}
|
||||
} ) )
|
||||
{
|
||||
final String name = child.getName();
|
||||
|
||||
suite.addTest( new TestCase( name + " [FO]" )
|
||||
{
|
||||
public void runTest() throws Exception
|
||||
{
|
||||
test( child, false );
|
||||
}
|
||||
} );
|
||||
suite.addTest( new TestCase( name + " [HTML]" )
|
||||
{
|
||||
public void runTest() throws Exception
|
||||
{
|
||||
test( child, true );
|
||||
}
|
||||
} );
|
||||
|
||||
}
|
||||
|
||||
return suite;
|
||||
}
|
||||
|
||||
protected static void test( File child, boolean html ) throws Exception
|
||||
{
|
||||
HWPFDocumentCore hwpfDocument;
|
||||
try
|
||||
{
|
||||
hwpfDocument = AbstractWordUtils.loadDoc( child );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
// unable to parse file -- not WordToFoExtractor fault
|
||||
return;
|
||||
}
|
||||
|
||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument() );
|
||||
wordToFoExtractor.processDocument( hwpfDocument );
|
||||
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
|
||||
Transformer transformer = TransformerFactory.newInstance()
|
||||
.newTransformer();
|
||||
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
|
||||
transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
|
||||
transformer.transform(
|
||||
new DOMSource( wordToFoExtractor.getDocument() ),
|
||||
new StreamResult( stringWriter ) );
|
||||
|
||||
if ( html )
|
||||
transformer.setOutputProperty( OutputKeys.METHOD, "html" );
|
||||
|
||||
// no exceptions
|
||||
}
|
||||
}
|
@ -1,92 +0,0 @@
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import org.apache.poi.EncryptedDocumentException;
|
||||
|
||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||
|
||||
import junit.framework.Test;
|
||||
import junit.framework.TestCase;
|
||||
import junit.framework.TestSuite;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
|
||||
public class TestWordToFoExtractorSuite
|
||||
{
|
||||
/**
|
||||
* YK: a quick hack to exclude failing documents from the suite.
|
||||
*
|
||||
* WordToFoExtractor stumbles on Bug33519.doc with a NPE
|
||||
*/
|
||||
private static List<String> failingFiles = Arrays.asList("Bug33519.doc");
|
||||
|
||||
public static Test suite() {
|
||||
TestSuite suite = new TestSuite();
|
||||
|
||||
File directory = POIDataSamples.getDocumentInstance().getFile(
|
||||
"../document");
|
||||
for (final File child : directory.listFiles(new FilenameFilter() {
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.endsWith(".doc") && !failingFiles.contains(name);
|
||||
}
|
||||
})) {
|
||||
final String name = child.getName();
|
||||
suite.addTest(new TestCase(name) {
|
||||
public void runTest() throws Exception {
|
||||
test(child);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
return suite;
|
||||
}
|
||||
|
||||
protected static void test( File child ) throws Exception
|
||||
{
|
||||
HWPFDocument hwpfDocument;
|
||||
FileInputStream fileInputStream = new FileInputStream( child );
|
||||
try
|
||||
{
|
||||
hwpfDocument = new HWPFDocument( fileInputStream );
|
||||
}
|
||||
catch ( Exception exc )
|
||||
{
|
||||
// unable to parse file -- not WordToFoExtractor fault
|
||||
return;
|
||||
}
|
||||
finally
|
||||
{
|
||||
fileInputStream.close();
|
||||
}
|
||||
|
||||
WordToFoExtractor wordToFoExtractor = new WordToFoExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument() );
|
||||
wordToFoExtractor.processDocument( hwpfDocument );
|
||||
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
|
||||
Transformer transformer = TransformerFactory.newInstance()
|
||||
.newTransformer();
|
||||
transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
|
||||
transformer.transform(
|
||||
new DOMSource( wordToFoExtractor.getDocument() ),
|
||||
new StreamResult( stringWriter ) );
|
||||
// no exceptions
|
||||
}
|
||||
}
|
@ -0,0 +1,95 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.hwpf.extractor;
|
||||
|
||||
import java.io.StringWriter;
|
||||
|
||||
import javax.xml.parsers.DocumentBuilderFactory;
|
||||
import javax.xml.transform.OutputKeys;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerFactory;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.stream.StreamResult;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
|
||||
/**
|
||||
* Test cases for {@link WordToFoExtractor}
|
||||
*
|
||||
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||
*/
|
||||
public class TestWordToHtmlExtractor extends TestCase
|
||||
{
|
||||
private static String getHtmlText( final String sampleFileName )
|
||||
throws Exception
|
||||
{
|
||||
HWPFDocument hwpfDocument = new HWPFDocument( POIDataSamples
|
||||
.getDocumentInstance().openResourceAsStream( sampleFileName ) );
|
||||
|
||||
WordToHtmlExtractor wordToHtmlExtractor = new WordToHtmlExtractor(
|
||||
DocumentBuilderFactory.newInstance().newDocumentBuilder()
|
||||
.newDocument() );
|
||||
wordToHtmlExtractor.processDocument( hwpfDocument );
|
||||
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
|
||||
Transformer transformer = TransformerFactory.newInstance()
|
||||
.newTransformer();
|
||||
transformer.setOutputProperty( OutputKeys.INDENT, "yes" );
|
||||
transformer.setOutputProperty( OutputKeys.ENCODING, "utf-8" );
|
||||
transformer.setOutputProperty( OutputKeys.METHOD, "html" );
|
||||
transformer.transform(
|
||||
new DOMSource( wordToHtmlExtractor.getDocument() ),
|
||||
new StreamResult( stringWriter ) );
|
||||
|
||||
String result = stringWriter.toString();
|
||||
return result;
|
||||
}
|
||||
|
||||
public void testBug46610_2() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "Bug46610_2.doc" );
|
||||
assertTrue( result
|
||||
.contains( "012345678911234567892123456789312345678941234567890123456789112345678921234567893123456789412345678" ) );
|
||||
}
|
||||
|
||||
public void testEquation() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "equation.doc" );
|
||||
|
||||
assertTrue( result
|
||||
.contains( "<!--Image link to '0.emf' can be here-->" ) );
|
||||
}
|
||||
|
||||
public void testHyperlink() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "hyperlink.doc" );
|
||||
|
||||
assertTrue( result.contains( "<a href=\"http://testuri.org/\">" ) );
|
||||
assertTrue( result.contains( "Hyperlink text" ) );
|
||||
}
|
||||
|
||||
public void testPageref() throws Exception
|
||||
{
|
||||
String result = getHtmlText( "pageref.doc" );
|
||||
|
||||
assertTrue( result.contains( "<a href=\"#userref\">" ) );
|
||||
assertTrue( result.contains( "1" ) );
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user