simplify API to Word file's part processing, like includint page headers / footers into plain text and HTML
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1156823 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
79ecde0c88
commit
8fdb655c4a
@ -132,6 +132,16 @@ public abstract class AbstractWordConverter
|
|||||||
|
|
||||||
private PicturesManager picturesManager;
|
private PicturesManager picturesManager;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Special actions that need to be called after processing complete, like
|
||||||
|
* updating stylesheets or building document notes list. Usually they are
|
||||||
|
* called once, but it's okay to call them several times.
|
||||||
|
*/
|
||||||
|
protected void afterProcess()
|
||||||
|
{
|
||||||
|
// by default no such actions needed
|
||||||
|
}
|
||||||
|
|
||||||
protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
|
protected Triplet getCharacterRunTriplet( CharacterRun characterRun )
|
||||||
{
|
{
|
||||||
Triplet original = new Triplet();
|
Triplet original = new Triplet();
|
||||||
@ -594,7 +604,17 @@ public abstract class AbstractWordConverter
|
|||||||
processDocumentInformation( summaryInformation );
|
processDocumentInformation( summaryInformation );
|
||||||
}
|
}
|
||||||
|
|
||||||
processDocumentPart( wordDocument, wordDocument.getRange() );
|
final Range docRange = wordDocument.getRange();
|
||||||
|
|
||||||
|
if ( docRange.numSections() == 1 )
|
||||||
|
{
|
||||||
|
processSingleSection( wordDocument, docRange.getSection( 0 ) );
|
||||||
|
afterProcess();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
processDocumentPart( wordDocument, docRange );
|
||||||
|
afterProcess();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected abstract void processDocumentInformation(
|
protected abstract void processDocumentInformation(
|
||||||
@ -603,12 +623,6 @@ public abstract class AbstractWordConverter
|
|||||||
protected void processDocumentPart( HWPFDocumentCore wordDocument,
|
protected void processDocumentPart( HWPFDocumentCore wordDocument,
|
||||||
final Range range )
|
final Range range )
|
||||||
{
|
{
|
||||||
if ( range.numSections() == 1 )
|
|
||||||
{
|
|
||||||
processSingleSection( wordDocument, range.getSection( 0 ) );
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
for ( int s = 0; s < range.numSections(); s++ )
|
for ( int s = 0; s < range.numSections(); s++ )
|
||||||
{
|
{
|
||||||
processSection( wordDocument, range.getSection( s ), s );
|
processSection( wordDocument, range.getSection( s ), s );
|
||||||
|
@ -180,6 +180,15 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
this.htmlDocumentFacade = new HtmlDocumentFacade( document );
|
this.htmlDocumentFacade = new HtmlDocumentFacade( document );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void afterProcess()
|
||||||
|
{
|
||||||
|
if ( notes != null )
|
||||||
|
htmlDocumentFacade.getBody().appendChild( notes );
|
||||||
|
|
||||||
|
htmlDocumentFacade.updateStylesheet();
|
||||||
|
}
|
||||||
|
|
||||||
public Document getDocument()
|
public Document getDocument()
|
||||||
{
|
{
|
||||||
return htmlDocumentFacade.getDocument();
|
return htmlDocumentFacade.getDocument();
|
||||||
@ -241,17 +250,6 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
processCharacters( wordDocument, currentTableLevel, range, parent );
|
processCharacters( wordDocument, currentTableLevel, range, parent );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void processDocument( HWPFDocumentCore wordDocument )
|
|
||||||
{
|
|
||||||
super.processDocument( wordDocument );
|
|
||||||
|
|
||||||
if ( notes != null )
|
|
||||||
htmlDocumentFacade.getBody().appendChild( notes );
|
|
||||||
|
|
||||||
htmlDocumentFacade.updateStylesheet();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processDocumentInformation(
|
protected void processDocumentInformation(
|
||||||
SummaryInformation summaryInformation )
|
SummaryInformation summaryInformation )
|
||||||
@ -270,6 +268,13 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
.addDescription( summaryInformation.getComments() );
|
.addDescription( summaryInformation.getComments() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void processDocumentPart( HWPFDocumentCore wordDocument, Range range )
|
||||||
|
{
|
||||||
|
super.processDocumentPart( wordDocument, range );
|
||||||
|
afterProcess();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processDrawnObject( HWPFDocument doc,
|
protected void processDrawnObject( HWPFDocument doc,
|
||||||
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
||||||
|
@ -166,6 +166,13 @@ public class WordToTextConverter extends AbstractWordConverter
|
|||||||
this.textDocumentFacade = new TextDocumentFacade( document );
|
this.textDocumentFacade = new TextDocumentFacade( document );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void afterProcess()
|
||||||
|
{
|
||||||
|
if ( notes != null )
|
||||||
|
textDocumentFacade.getBody().appendChild( notes );
|
||||||
|
}
|
||||||
|
|
||||||
public Document getDocument()
|
public Document getDocument()
|
||||||
{
|
{
|
||||||
return textDocumentFacade.getDocument();
|
return textDocumentFacade.getDocument();
|
||||||
@ -208,15 +215,6 @@ public class WordToTextConverter extends AbstractWordConverter
|
|||||||
processCharacters( wordDocument, currentTableLevel, range, currentBlock );
|
processCharacters( wordDocument, currentTableLevel, range, currentBlock );
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public void processDocument( HWPFDocumentCore wordDocument )
|
|
||||||
{
|
|
||||||
super.processDocument( wordDocument );
|
|
||||||
|
|
||||||
if ( notes != null )
|
|
||||||
textDocumentFacade.getBody().appendChild( notes );
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processDocumentInformation(
|
protected void processDocumentInformation(
|
||||||
SummaryInformation summaryInformation )
|
SummaryInformation summaryInformation )
|
||||||
@ -241,6 +239,14 @@ public class WordToTextConverter extends AbstractWordConverter
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void processDocumentPart( HWPFDocumentCore wordDocument,
|
||||||
|
Range range )
|
||||||
|
{
|
||||||
|
super.processDocumentPart( wordDocument, range );
|
||||||
|
afterProcess();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processDrawnObject( HWPFDocument doc,
|
protected void processDrawnObject( HWPFDocument doc,
|
||||||
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
||||||
|
@ -20,14 +20,10 @@ package org.apache.poi.hwpf.extractor;
|
|||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.StringWriter;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
import org.apache.poi.hwpf.converter.WordToTextConverter;
|
|
||||||
|
|
||||||
import org.apache.poi.POIOLE2TextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
|
import org.apache.poi.hwpf.converter.WordToTextConverter;
|
||||||
import org.apache.poi.hwpf.usermodel.HeaderStories;
|
import org.apache.poi.hwpf.usermodel.HeaderStories;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
@ -72,6 +68,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* @deprecated Use {@link #WordExtractor(DirectoryNode)} instead
|
* @deprecated Use {@link #WordExtractor(DirectoryNode)} instead
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
|
@SuppressWarnings( "unused" )
|
||||||
public WordExtractor( DirectoryNode dir, POIFSFileSystem fs )
|
public WordExtractor( DirectoryNode dir, POIFSFileSystem fs )
|
||||||
throws IOException
|
throws IOException
|
||||||
{
|
{
|
||||||
@ -290,34 +287,35 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
final StringWriter stringWriter = new StringWriter();
|
WordToTextConverter wordToTextConverter = new WordToTextConverter();
|
||||||
@SuppressWarnings( "unused" )
|
|
||||||
WordToTextConverter wordToTextConverter = new WordToTextConverter()
|
|
||||||
{
|
|
||||||
{
|
|
||||||
HeaderStories hs = new HeaderStories( doc );
|
HeaderStories hs = new HeaderStories( doc );
|
||||||
|
|
||||||
if ( hs.getFirstHeaderSubrange() != null )
|
if ( hs.getFirstHeaderSubrange() != null )
|
||||||
processDocumentPart( doc, hs.getFirstHeaderSubrange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
hs.getFirstHeaderSubrange() );
|
||||||
if ( hs.getEvenHeaderSubrange() != null )
|
if ( hs.getEvenHeaderSubrange() != null )
|
||||||
processDocumentPart( doc, hs.getEvenHeaderSubrange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
hs.getEvenHeaderSubrange() );
|
||||||
if ( hs.getOddHeaderSubrange() != null )
|
if ( hs.getOddHeaderSubrange() != null )
|
||||||
processDocumentPart( doc, hs.getOddHeaderSubrange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
hs.getOddHeaderSubrange() );
|
||||||
|
|
||||||
processDocument( doc );
|
wordToTextConverter.processDocument( doc );
|
||||||
processDocumentPart( doc, doc.getMainTextboxRange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
doc.getMainTextboxRange() );
|
||||||
|
|
||||||
if ( hs.getFirstFooterSubrange() != null )
|
if ( hs.getFirstFooterSubrange() != null )
|
||||||
processDocumentPart( doc, hs.getFirstFooterSubrange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
hs.getFirstFooterSubrange() );
|
||||||
if ( hs.getEvenFooterSubrange() != null )
|
if ( hs.getEvenFooterSubrange() != null )
|
||||||
processDocumentPart( doc, hs.getEvenFooterSubrange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
hs.getEvenFooterSubrange() );
|
||||||
if ( hs.getOddFooterSubrange() != null )
|
if ( hs.getOddFooterSubrange() != null )
|
||||||
processDocumentPart( doc, hs.getOddFooterSubrange() );
|
wordToTextConverter.processDocumentPart( doc,
|
||||||
|
hs.getOddFooterSubrange() );
|
||||||
|
|
||||||
stringWriter.append( getText() );
|
return wordToTextConverter.getText();
|
||||||
}
|
|
||||||
};
|
|
||||||
return stringWriter.toString();
|
|
||||||
}
|
}
|
||||||
catch ( Exception exc )
|
catch ( Exception exc )
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user