simplify API to Word file's part processing, like includint page headers / footers into plain text and HTML

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1156823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-08-11 20:50:42 +00:00
parent 79ecde0c88
commit 8fdb655c4a
4 changed files with 79 additions and 56 deletions

View File

@ -132,6 +132,16 @@ public abstract class AbstractWordConverter
private PicturesManager picturesManager; private PicturesManager picturesManager;
/**
* Special actions that need to be called after processing complete, like
* updating stylesheets or building document notes list. Usually they are
* called once, but it's okay to call them several times.
*/
protected void afterProcess()
{
// by default no such actions needed
}
protected Triplet getCharacterRunTriplet( CharacterRun characterRun ) protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
{ {
Triplet original = new Triplet(); Triplet original = new Triplet();
@ -594,7 +604,17 @@ public abstract class AbstractWordConverter
processDocumentInformation( summaryInformation ); processDocumentInformation( summaryInformation );
} }
processDocumentPart( wordDocument, wordDocument.getRange() ); final Range docRange = wordDocument.getRange();
if ( docRange.numSections() == 1 )
{
processSingleSection( wordDocument, docRange.getSection( 0 ) );
afterProcess();
return;
}
processDocumentPart( wordDocument, docRange );
afterProcess();
} }
protected abstract void processDocumentInformation( protected abstract void processDocumentInformation(
@ -603,12 +623,6 @@ public abstract class AbstractWordConverter
protected void processDocumentPart( HWPFDocumentCore wordDocument, protected void processDocumentPart( HWPFDocumentCore wordDocument,
final Range range ) final Range range )
{ {
if ( range.numSections() == 1 )
{
processSingleSection( wordDocument, range.getSection( 0 ) );
return;
}
for ( int s = 0; s < range.numSections(); s++ ) for ( int s = 0; s < range.numSections(); s++ )
{ {
processSection( wordDocument, range.getSection( s ), s ); processSection( wordDocument, range.getSection( s ), s );

View File

@ -180,6 +180,15 @@ public class WordToHtmlConverter extends AbstractWordConverter
this.htmlDocumentFacade = new HtmlDocumentFacade( document ); this.htmlDocumentFacade = new HtmlDocumentFacade( document );
} }
@Override
protected void afterProcess()
{
if ( notes != null )
htmlDocumentFacade.getBody().appendChild( notes );
htmlDocumentFacade.updateStylesheet();
}
public Document getDocument() public Document getDocument()
{ {
return htmlDocumentFacade.getDocument(); return htmlDocumentFacade.getDocument();
@ -241,17 +250,6 @@ public class WordToHtmlConverter extends AbstractWordConverter
processCharacters( wordDocument, currentTableLevel, range, parent ); processCharacters( wordDocument, currentTableLevel, range, parent );
} }
@Override
public void processDocument( HWPFDocumentCore wordDocument )
{
super.processDocument( wordDocument );
if ( notes != null )
htmlDocumentFacade.getBody().appendChild( notes );
htmlDocumentFacade.updateStylesheet();
}
@Override @Override
protected void processDocumentInformation( protected void processDocumentInformation(
SummaryInformation summaryInformation ) SummaryInformation summaryInformation )
@ -270,6 +268,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
.addDescription( summaryInformation.getComments() ); .addDescription( summaryInformation.getComments() );
} }
@Override
public void processDocumentPart( HWPFDocumentCore wordDocument, Range range )
{
super.processDocumentPart( wordDocument, range );
afterProcess();
}
@Override @Override
protected void processDrawnObject( HWPFDocument doc, protected void processDrawnObject( HWPFDocument doc,
CharacterRun characterRun, OfficeDrawing officeDrawing, CharacterRun characterRun, OfficeDrawing officeDrawing,

View File

@ -166,6 +166,13 @@ public class WordToTextConverter extends AbstractWordConverter
this.textDocumentFacade = new TextDocumentFacade( document ); this.textDocumentFacade = new TextDocumentFacade( document );
} }
@Override
protected void afterProcess()
{
if ( notes != null )
textDocumentFacade.getBody().appendChild( notes );
}
public Document getDocument() public Document getDocument()
{ {
return textDocumentFacade.getDocument(); return textDocumentFacade.getDocument();
@ -208,15 +215,6 @@ public class WordToTextConverter extends AbstractWordConverter
processCharacters( wordDocument, currentTableLevel, range, currentBlock ); processCharacters( wordDocument, currentTableLevel, range, currentBlock );
} }
@Override
public void processDocument( HWPFDocumentCore wordDocument )
{
super.processDocument( wordDocument );
if ( notes != null )
textDocumentFacade.getBody().appendChild( notes );
}
@Override @Override
protected void processDocumentInformation( protected void processDocumentInformation(
SummaryInformation summaryInformation ) SummaryInformation summaryInformation )
@ -241,6 +239,14 @@ public class WordToTextConverter extends AbstractWordConverter
} }
} }
@Override
public void processDocumentPart( HWPFDocumentCore wordDocument,
Range range )
{
super.processDocumentPart( wordDocument, range );
afterProcess();
}
@Override @Override
protected void processDrawnObject( HWPFDocument doc, protected void processDrawnObject( HWPFDocument doc,
CharacterRun characterRun, OfficeDrawing officeDrawing, CharacterRun characterRun, OfficeDrawing officeDrawing,

View File

@ -20,14 +20,10 @@ package org.apache.poi.hwpf.extractor;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.poi.hwpf.converter.WordToTextConverter;
import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.converter.WordToTextConverter;
import org.apache.poi.hwpf.usermodel.HeaderStories; import org.apache.poi.hwpf.usermodel.HeaderStories;
import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Range;
@ -72,6 +68,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @deprecated Use {@link #WordExtractor(DirectoryNode)} instead * @deprecated Use {@link #WordExtractor(DirectoryNode)} instead
*/ */
@Deprecated @Deprecated
@SuppressWarnings( "unused" )
public WordExtractor( DirectoryNode dir, POIFSFileSystem fs ) public WordExtractor( DirectoryNode dir, POIFSFileSystem fs )
throws IOException throws IOException
{ {
@ -290,34 +287,35 @@ public final class WordExtractor extends POIOLE2TextExtractor
{ {
try try
{ {
final StringWriter stringWriter = new StringWriter(); WordToTextConverter wordToTextConverter = new WordToTextConverter();
@SuppressWarnings( "unused" )
WordToTextConverter wordToTextConverter = new WordToTextConverter()
{
{
HeaderStories hs = new HeaderStories( doc ); HeaderStories hs = new HeaderStories( doc );
if ( hs.getFirstHeaderSubrange() != null ) if ( hs.getFirstHeaderSubrange() != null )
processDocumentPart( doc, hs.getFirstHeaderSubrange() ); wordToTextConverter.processDocumentPart( doc,
hs.getFirstHeaderSubrange() );
if ( hs.getEvenHeaderSubrange() != null ) if ( hs.getEvenHeaderSubrange() != null )
processDocumentPart( doc, hs.getEvenHeaderSubrange() ); wordToTextConverter.processDocumentPart( doc,
hs.getEvenHeaderSubrange() );
if ( hs.getOddHeaderSubrange() != null ) if ( hs.getOddHeaderSubrange() != null )
processDocumentPart( doc, hs.getOddHeaderSubrange() ); wordToTextConverter.processDocumentPart( doc,
hs.getOddHeaderSubrange() );
processDocument( doc ); wordToTextConverter.processDocument( doc );
processDocumentPart( doc, doc.getMainTextboxRange() ); wordToTextConverter.processDocumentPart( doc,
doc.getMainTextboxRange() );
if ( hs.getFirstFooterSubrange() != null ) if ( hs.getFirstFooterSubrange() != null )
processDocumentPart( doc, hs.getFirstFooterSubrange() ); wordToTextConverter.processDocumentPart( doc,
hs.getFirstFooterSubrange() );
if ( hs.getEvenFooterSubrange() != null ) if ( hs.getEvenFooterSubrange() != null )
processDocumentPart( doc, hs.getEvenFooterSubrange() ); wordToTextConverter.processDocumentPart( doc,
hs.getEvenFooterSubrange() );
if ( hs.getOddFooterSubrange() != null ) if ( hs.getOddFooterSubrange() != null )
processDocumentPart( doc, hs.getOddFooterSubrange() ); wordToTextConverter.processDocumentPart( doc,
hs.getOddFooterSubrange() );
stringWriter.append( getText() ); return wordToTextConverter.getText();
}
};
return stringWriter.toString();
} }
catch ( Exception exc ) catch ( Exception exc )
{ {