Don't wrap RuntimeException
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1780455 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
37b53744c0
commit
400bdc08b7
@ -38,8 +38,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||||||
*
|
*
|
||||||
* @author Nick Burch
|
* @author Nick Burch
|
||||||
*/
|
*/
|
||||||
public final class WordExtractor extends POIOLE2TextExtractor
|
public final class WordExtractor extends POIOLE2TextExtractor {
|
||||||
{
|
|
||||||
private HWPFDocument doc;
|
private HWPFDocument doc;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -48,8 +47,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* @param is
|
* @param is
|
||||||
* InputStream containing the word file
|
* InputStream containing the word file
|
||||||
*/
|
*/
|
||||||
public WordExtractor( InputStream is ) throws IOException
|
public WordExtractor( InputStream is ) throws IOException {
|
||||||
{
|
|
||||||
this( HWPFDocument.verifyAndBuildPOIFS( is ) );
|
this( HWPFDocument.verifyAndBuildPOIFS( is ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,13 +57,11 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* @param fs
|
* @param fs
|
||||||
* POIFSFileSystem containing the word file
|
* POIFSFileSystem containing the word file
|
||||||
*/
|
*/
|
||||||
public WordExtractor( POIFSFileSystem fs ) throws IOException
|
public WordExtractor( POIFSFileSystem fs ) throws IOException {
|
||||||
{
|
|
||||||
this( new HWPFDocument( fs ) );
|
this( new HWPFDocument( fs ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
public WordExtractor( DirectoryNode dir ) throws IOException
|
public WordExtractor( DirectoryNode dir ) throws IOException {
|
||||||
{
|
|
||||||
this( new HWPFDocument( dir ) );
|
this( new HWPFDocument( dir ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -75,8 +71,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* @param doc
|
* @param doc
|
||||||
* The HWPFDocument to extract from
|
* The HWPFDocument to extract from
|
||||||
*/
|
*/
|
||||||
public WordExtractor( HWPFDocument doc )
|
public WordExtractor( HWPFDocument doc ) {
|
||||||
{
|
|
||||||
super( doc );
|
super( doc );
|
||||||
this.doc = doc;
|
this.doc = doc;
|
||||||
}
|
}
|
||||||
@ -85,10 +80,8 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* Command line extractor, so people will stop moaning that they can't just
|
* Command line extractor, so people will stop moaning that they can't just
|
||||||
* run this.
|
* run this.
|
||||||
*/
|
*/
|
||||||
public static void main( String[] args ) throws IOException
|
public static void main( String[] args ) throws IOException {
|
||||||
{
|
if ( args.length == 0 ) {
|
||||||
if ( args.length == 0 )
|
|
||||||
{
|
|
||||||
System.err.println( "Use:" );
|
System.err.println( "Use:" );
|
||||||
System.err
|
System.err
|
||||||
.println( " java org.apache.poi.hwpf.extractor.WordExtractor <filename>" );
|
.println( " java org.apache.poi.hwpf.extractor.WordExtractor <filename>" );
|
||||||
@ -109,19 +102,15 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* Get the text from the word file, as an array with one String per
|
* Get the text from the word file, as an array with one String per
|
||||||
* paragraph
|
* paragraph
|
||||||
*/
|
*/
|
||||||
public String[] getParagraphText()
|
public String[] getParagraphText() {
|
||||||
{
|
|
||||||
String[] ret;
|
String[] ret;
|
||||||
|
|
||||||
// Extract using the model code
|
// Extract using the model code
|
||||||
try
|
try {
|
||||||
{
|
|
||||||
Range r = doc.getRange();
|
Range r = doc.getRange();
|
||||||
|
|
||||||
ret = getParagraphText( r );
|
ret = getParagraphText( r );
|
||||||
}
|
} catch ( Exception e ) {
|
||||||
catch ( Exception e )
|
|
||||||
{
|
|
||||||
// Something's up with turning the text pieces into paragraphs
|
// Something's up with turning the text pieces into paragraphs
|
||||||
// Fall back to ripping out the text pieces
|
// Fall back to ripping out the text pieces
|
||||||
ret = new String[1];
|
ret = new String[1];
|
||||||
@ -131,46 +120,39 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String[] getFootnoteText()
|
public String[] getFootnoteText() {
|
||||||
{
|
|
||||||
Range r = doc.getFootnoteRange();
|
Range r = doc.getFootnoteRange();
|
||||||
|
|
||||||
return getParagraphText( r );
|
return getParagraphText( r );
|
||||||
}
|
}
|
||||||
|
|
||||||
public String[] getMainTextboxText()
|
public String[] getMainTextboxText() {
|
||||||
{
|
|
||||||
Range r = doc.getMainTextboxRange();
|
Range r = doc.getMainTextboxRange();
|
||||||
|
|
||||||
return getParagraphText( r );
|
return getParagraphText( r );
|
||||||
}
|
}
|
||||||
|
|
||||||
public String[] getEndnoteText()
|
public String[] getEndnoteText() {
|
||||||
{
|
|
||||||
Range r = doc.getEndnoteRange();
|
Range r = doc.getEndnoteRange();
|
||||||
|
|
||||||
return getParagraphText( r );
|
return getParagraphText( r );
|
||||||
}
|
}
|
||||||
|
|
||||||
public String[] getCommentsText()
|
public String[] getCommentsText() {
|
||||||
{
|
|
||||||
Range r = doc.getCommentsRange();
|
Range r = doc.getCommentsRange();
|
||||||
|
|
||||||
return getParagraphText( r );
|
return getParagraphText( r );
|
||||||
}
|
}
|
||||||
|
|
||||||
protected static String[] getParagraphText( Range r )
|
protected static String[] getParagraphText( Range r ) {
|
||||||
{
|
|
||||||
String[] ret;
|
String[] ret;
|
||||||
ret = new String[r.numParagraphs()];
|
ret = new String[r.numParagraphs()];
|
||||||
for ( int i = 0; i < ret.length; i++ )
|
for ( int i = 0; i < ret.length; i++ ) {
|
||||||
{
|
|
||||||
Paragraph p = r.getParagraph( i );
|
Paragraph p = r.getParagraph( i );
|
||||||
ret[i] = p.text();
|
ret[i] = p.text();
|
||||||
|
|
||||||
// Fix the line ending
|
// Fix the line ending
|
||||||
if ( ret[i].endsWith( "\r" ) )
|
if ( ret[i].endsWith( "\r" )) {
|
||||||
{
|
|
||||||
ret[i] = ret[i] + "\n";
|
ret[i] = ret[i] + "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -180,25 +162,23 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
/**
|
/**
|
||||||
* Add the header/footer text, if it's not empty
|
* Add the header/footer text, if it's not empty
|
||||||
*/
|
*/
|
||||||
private void appendHeaderFooter( String text, StringBuffer out )
|
private void appendHeaderFooter( String text, StringBuffer out ) {
|
||||||
{
|
|
||||||
if ( text == null || text.length() == 0 )
|
if ( text == null || text.length() == 0 )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
text = text.replace( '\r', '\n' );
|
text = text.replace( '\r', '\n' );
|
||||||
if ( !text.endsWith( "\n" ) )
|
if ( !text.endsWith( "\n" ))
|
||||||
{
|
{
|
||||||
out.append( text );
|
out.append( text );
|
||||||
out.append( '\n' );
|
out.append( '\n' );
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ( text.endsWith( "\n\n" ) )
|
if ( text.endsWith( "\n\n" ))
|
||||||
{
|
{
|
||||||
out.append( text.substring( 0, text.length() - 1 ) );
|
out.append( text.substring( 0, text.length() - 1 ));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
out.append( text );
|
out.append( text );
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -206,21 +186,17 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* @deprecated 3.8 beta 4
|
* @deprecated 3.8 beta 4
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public String getHeaderText()
|
public String getHeaderText() {
|
||||||
{
|
|
||||||
HeaderStories hs = new HeaderStories( doc );
|
HeaderStories hs = new HeaderStories( doc );
|
||||||
|
|
||||||
StringBuffer ret = new StringBuffer();
|
StringBuffer ret = new StringBuffer();
|
||||||
if ( hs.getFirstHeader() != null )
|
if ( hs.getFirstHeader() != null ) {
|
||||||
{
|
|
||||||
appendHeaderFooter( hs.getFirstHeader(), ret );
|
appendHeaderFooter( hs.getFirstHeader(), ret );
|
||||||
}
|
}
|
||||||
if ( hs.getEvenHeader() != null )
|
if ( hs.getEvenHeader() != null ) {
|
||||||
{
|
|
||||||
appendHeaderFooter( hs.getEvenHeader(), ret );
|
appendHeaderFooter( hs.getEvenHeader(), ret );
|
||||||
}
|
}
|
||||||
if ( hs.getOddHeader() != null )
|
if ( hs.getOddHeader() != null ) {
|
||||||
{
|
|
||||||
appendHeaderFooter( hs.getOddHeader(), ret );
|
appendHeaderFooter( hs.getOddHeader(), ret );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -232,21 +208,17 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* @deprecated 3.8 beta 4
|
* @deprecated 3.8 beta 4
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Deprecated
|
||||||
public String getFooterText()
|
public String getFooterText() {
|
||||||
{
|
|
||||||
HeaderStories hs = new HeaderStories( doc );
|
HeaderStories hs = new HeaderStories( doc );
|
||||||
|
|
||||||
StringBuffer ret = new StringBuffer();
|
StringBuffer ret = new StringBuffer();
|
||||||
if ( hs.getFirstFooter() != null )
|
if ( hs.getFirstFooter() != null ) {
|
||||||
{
|
|
||||||
appendHeaderFooter( hs.getFirstFooter(), ret );
|
appendHeaderFooter( hs.getFirstFooter(), ret );
|
||||||
}
|
}
|
||||||
if ( hs.getEvenFooter() != null )
|
if ( hs.getEvenFooter() != null ) {
|
||||||
{
|
|
||||||
appendHeaderFooter( hs.getEvenFooter(), ret );
|
appendHeaderFooter( hs.getEvenFooter(), ret );
|
||||||
}
|
}
|
||||||
if ( hs.getOddFooter() != null )
|
if ( hs.getOddFooter() != null ) {
|
||||||
{
|
|
||||||
appendHeaderFooter( hs.getOddFooter(), ret );
|
appendHeaderFooter( hs.getOddFooter(), ret );
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,16 +230,14 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* crud, but will work in cases where the text piece -> paragraph mapping is
|
* crud, but will work in cases where the text piece -> paragraph mapping is
|
||||||
* broken. Fast too.
|
* broken. Fast too.
|
||||||
*/
|
*/
|
||||||
public String getTextFromPieces()
|
public String getTextFromPieces() {
|
||||||
{
|
|
||||||
String text = doc.getDocumentText();
|
String text = doc.getDocumentText();
|
||||||
|
|
||||||
// Fix line endings (Note - won't get all of them
|
// Fix line endings (Note - won't get all of them
|
||||||
text = text.replaceAll( "\r\r\r", "\r\n\r\n\r\n" );
|
text = text.replaceAll( "\r\r\r", "\r\n\r\n\r\n" );
|
||||||
text = text.replaceAll( "\r\r", "\r\n\r\n" );
|
text = text.replaceAll( "\r\r", "\r\n\r\n" );
|
||||||
|
|
||||||
if ( text.endsWith( "\r" ) )
|
if ( text.endsWith( "\r" )) {
|
||||||
{
|
|
||||||
text += "\n";
|
text += "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -278,42 +248,40 @@ public final class WordExtractor extends POIOLE2TextExtractor
|
|||||||
* Grab the text, based on the WordToTextConverter. Shouldn't include any
|
* Grab the text, based on the WordToTextConverter. Shouldn't include any
|
||||||
* crud, but slower than getTextFromPieces().
|
* crud, but slower than getTextFromPieces().
|
||||||
*/
|
*/
|
||||||
public String getText()
|
public String getText() {
|
||||||
{
|
try {
|
||||||
try
|
|
||||||
{
|
|
||||||
WordToTextConverter wordToTextConverter = new WordToTextConverter();
|
WordToTextConverter wordToTextConverter = new WordToTextConverter();
|
||||||
|
|
||||||
HeaderStories hs = new HeaderStories( doc );
|
HeaderStories hs = new HeaderStories(doc);
|
||||||
|
|
||||||
if ( hs.getFirstHeaderSubrange() != null )
|
if (hs.getFirstHeaderSubrange() != null)
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
hs.getFirstHeaderSubrange() );
|
hs.getFirstHeaderSubrange());
|
||||||
if ( hs.getEvenHeaderSubrange() != null )
|
if (hs.getEvenHeaderSubrange() != null)
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
hs.getEvenHeaderSubrange() );
|
hs.getEvenHeaderSubrange());
|
||||||
if ( hs.getOddHeaderSubrange() != null )
|
if (hs.getOddHeaderSubrange() != null)
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
hs.getOddHeaderSubrange() );
|
hs.getOddHeaderSubrange());
|
||||||
|
|
||||||
wordToTextConverter.processDocument( doc );
|
wordToTextConverter.processDocument(doc);
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
doc.getMainTextboxRange() );
|
doc.getMainTextboxRange());
|
||||||
|
|
||||||
if ( hs.getFirstFooterSubrange() != null )
|
if (hs.getFirstFooterSubrange() != null)
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
hs.getFirstFooterSubrange() );
|
hs.getFirstFooterSubrange());
|
||||||
if ( hs.getEvenFooterSubrange() != null )
|
if (hs.getEvenFooterSubrange() != null)
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
hs.getEvenFooterSubrange() );
|
hs.getEvenFooterSubrange());
|
||||||
if ( hs.getOddFooterSubrange() != null )
|
if (hs.getOddFooterSubrange() != null)
|
||||||
wordToTextConverter.processDocumentPart( doc,
|
wordToTextConverter.processDocumentPart(doc,
|
||||||
hs.getOddFooterSubrange() );
|
hs.getOddFooterSubrange());
|
||||||
|
|
||||||
return wordToTextConverter.getText();
|
return wordToTextConverter.getText();
|
||||||
}
|
} catch (RuntimeException e) {
|
||||||
catch ( Exception exc )
|
throw e;
|
||||||
{
|
} catch ( Exception exc ) {
|
||||||
throw new RuntimeException( exc );
|
throw new RuntimeException( exc );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user