Don't wrap RuntimeException

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1780455 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Dominik Stadler 2017-01-26 19:58:58 +00:00
parent 37b53744c0
commit 400bdc08b7
1 changed files with 58 additions and 90 deletions

View File

@ -38,8 +38,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
* *
* @author Nick Burch * @author Nick Burch
*/ */
public final class WordExtractor extends POIOLE2TextExtractor public final class WordExtractor extends POIOLE2TextExtractor {
{
private HWPFDocument doc; private HWPFDocument doc;
/** /**
@ -48,8 +47,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @param is * @param is
* InputStream containing the word file * InputStream containing the word file
*/ */
public WordExtractor( InputStream is ) throws IOException public WordExtractor( InputStream is ) throws IOException {
{
this( HWPFDocument.verifyAndBuildPOIFS( is ) ); this( HWPFDocument.verifyAndBuildPOIFS( is ) );
} }
@ -59,13 +57,11 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @param fs * @param fs
* POIFSFileSystem containing the word file * POIFSFileSystem containing the word file
*/ */
public WordExtractor( POIFSFileSystem fs ) throws IOException public WordExtractor( POIFSFileSystem fs ) throws IOException {
{
this( new HWPFDocument( fs ) ); this( new HWPFDocument( fs ) );
} }
public WordExtractor( DirectoryNode dir ) throws IOException public WordExtractor( DirectoryNode dir ) throws IOException {
{
this( new HWPFDocument( dir ) ); this( new HWPFDocument( dir ) );
} }
@ -75,8 +71,7 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @param doc * @param doc
* The HWPFDocument to extract from * The HWPFDocument to extract from
*/ */
public WordExtractor( HWPFDocument doc ) public WordExtractor( HWPFDocument doc ) {
{
super( doc ); super( doc );
this.doc = doc; this.doc = doc;
} }
@ -85,10 +80,8 @@ public final class WordExtractor extends POIOLE2TextExtractor
* Command line extractor, so people will stop moaning that they can't just * Command line extractor, so people will stop moaning that they can't just
* run this. * run this.
*/ */
public static void main( String[] args ) throws IOException public static void main( String[] args ) throws IOException {
{ if ( args.length == 0 ) {
if ( args.length == 0 )
{
System.err.println( "Use:" ); System.err.println( "Use:" );
System.err System.err
.println( " java org.apache.poi.hwpf.extractor.WordExtractor <filename>" ); .println( " java org.apache.poi.hwpf.extractor.WordExtractor <filename>" );
@ -109,19 +102,15 @@ public final class WordExtractor extends POIOLE2TextExtractor
* Get the text from the word file, as an array with one String per * Get the text from the word file, as an array with one String per
* paragraph * paragraph
*/ */
public String[] getParagraphText() public String[] getParagraphText() {
{
String[] ret; String[] ret;
// Extract using the model code // Extract using the model code
try try {
{
Range r = doc.getRange(); Range r = doc.getRange();
ret = getParagraphText( r ); ret = getParagraphText( r );
} } catch ( Exception e ) {
catch ( Exception e )
{
// Something's up with turning the text pieces into paragraphs // Something's up with turning the text pieces into paragraphs
// Fall back to ripping out the text pieces // Fall back to ripping out the text pieces
ret = new String[1]; ret = new String[1];
@ -131,46 +120,39 @@ public final class WordExtractor extends POIOLE2TextExtractor
return ret; return ret;
} }
public String[] getFootnoteText() public String[] getFootnoteText() {
{
Range r = doc.getFootnoteRange(); Range r = doc.getFootnoteRange();
return getParagraphText( r ); return getParagraphText( r );
} }
public String[] getMainTextboxText() public String[] getMainTextboxText() {
{
Range r = doc.getMainTextboxRange(); Range r = doc.getMainTextboxRange();
return getParagraphText( r ); return getParagraphText( r );
} }
public String[] getEndnoteText() public String[] getEndnoteText() {
{
Range r = doc.getEndnoteRange(); Range r = doc.getEndnoteRange();
return getParagraphText( r ); return getParagraphText( r );
} }
public String[] getCommentsText() public String[] getCommentsText() {
{
Range r = doc.getCommentsRange(); Range r = doc.getCommentsRange();
return getParagraphText( r ); return getParagraphText( r );
} }
protected static String[] getParagraphText( Range r ) protected static String[] getParagraphText( Range r ) {
{
String[] ret; String[] ret;
ret = new String[r.numParagraphs()]; ret = new String[r.numParagraphs()];
for ( int i = 0; i < ret.length; i++ ) for ( int i = 0; i < ret.length; i++ ) {
{
Paragraph p = r.getParagraph( i ); Paragraph p = r.getParagraph( i );
ret[i] = p.text(); ret[i] = p.text();
// Fix the line ending // Fix the line ending
if ( ret[i].endsWith( "\r" ) ) if ( ret[i].endsWith( "\r" )) {
{
ret[i] = ret[i] + "\n"; ret[i] = ret[i] + "\n";
} }
} }
@ -180,25 +162,23 @@ public final class WordExtractor extends POIOLE2TextExtractor
/** /**
* Add the header/footer text, if it's not empty * Add the header/footer text, if it's not empty
*/ */
private void appendHeaderFooter( String text, StringBuffer out ) private void appendHeaderFooter( String text, StringBuffer out ) {
{
if ( text == null || text.length() == 0 ) if ( text == null || text.length() == 0 )
return; return;
text = text.replace( '\r', '\n' ); text = text.replace( '\r', '\n' );
if ( !text.endsWith( "\n" ) ) if ( !text.endsWith( "\n" ))
{ {
out.append( text ); out.append( text );
out.append( '\n' ); out.append( '\n' );
return; return;
} }
if ( text.endsWith( "\n\n" ) ) if ( text.endsWith( "\n\n" ))
{ {
out.append( text.substring( 0, text.length() - 1 ) ); out.append( text.substring( 0, text.length() - 1 ));
return; return;
} }
out.append( text ); out.append( text );
return;
} }
/** /**
@ -206,21 +186,17 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @deprecated 3.8 beta 4 * @deprecated 3.8 beta 4
*/ */
@Deprecated @Deprecated
public String getHeaderText() public String getHeaderText() {
{
HeaderStories hs = new HeaderStories( doc ); HeaderStories hs = new HeaderStories( doc );
StringBuffer ret = new StringBuffer(); StringBuffer ret = new StringBuffer();
if ( hs.getFirstHeader() != null ) if ( hs.getFirstHeader() != null ) {
{
appendHeaderFooter( hs.getFirstHeader(), ret ); appendHeaderFooter( hs.getFirstHeader(), ret );
} }
if ( hs.getEvenHeader() != null ) if ( hs.getEvenHeader() != null ) {
{
appendHeaderFooter( hs.getEvenHeader(), ret ); appendHeaderFooter( hs.getEvenHeader(), ret );
} }
if ( hs.getOddHeader() != null ) if ( hs.getOddHeader() != null ) {
{
appendHeaderFooter( hs.getOddHeader(), ret ); appendHeaderFooter( hs.getOddHeader(), ret );
} }
@ -232,21 +208,17 @@ public final class WordExtractor extends POIOLE2TextExtractor
* @deprecated 3.8 beta 4 * @deprecated 3.8 beta 4
*/ */
@Deprecated @Deprecated
public String getFooterText() public String getFooterText() {
{
HeaderStories hs = new HeaderStories( doc ); HeaderStories hs = new HeaderStories( doc );
StringBuffer ret = new StringBuffer(); StringBuffer ret = new StringBuffer();
if ( hs.getFirstFooter() != null ) if ( hs.getFirstFooter() != null ) {
{
appendHeaderFooter( hs.getFirstFooter(), ret ); appendHeaderFooter( hs.getFirstFooter(), ret );
} }
if ( hs.getEvenFooter() != null ) if ( hs.getEvenFooter() != null ) {
{
appendHeaderFooter( hs.getEvenFooter(), ret ); appendHeaderFooter( hs.getEvenFooter(), ret );
} }
if ( hs.getOddFooter() != null ) if ( hs.getOddFooter() != null ) {
{
appendHeaderFooter( hs.getOddFooter(), ret ); appendHeaderFooter( hs.getOddFooter(), ret );
} }
@ -258,16 +230,14 @@ public final class WordExtractor extends POIOLE2TextExtractor
* crud, but will work in cases where the text piece -> paragraph mapping is * crud, but will work in cases where the text piece -> paragraph mapping is
* broken. Fast too. * broken. Fast too.
*/ */
public String getTextFromPieces() public String getTextFromPieces() {
{
String text = doc.getDocumentText(); String text = doc.getDocumentText();
// Fix line endings (Note - won't get all of them // Fix line endings (Note - won't get all of them
text = text.replaceAll( "\r\r\r", "\r\n\r\n\r\n" ); text = text.replaceAll( "\r\r\r", "\r\n\r\n\r\n" );
text = text.replaceAll( "\r\r", "\r\n\r\n" ); text = text.replaceAll( "\r\r", "\r\n\r\n" );
if ( text.endsWith( "\r" ) ) if ( text.endsWith( "\r" )) {
{
text += "\n"; text += "\n";
} }
@ -278,42 +248,40 @@ public final class WordExtractor extends POIOLE2TextExtractor
* Grab the text, based on the WordToTextConverter. Shouldn't include any * Grab the text, based on the WordToTextConverter. Shouldn't include any
* crud, but slower than getTextFromPieces(). * crud, but slower than getTextFromPieces().
*/ */
public String getText() public String getText() {
{ try {
try
{
WordToTextConverter wordToTextConverter = new WordToTextConverter(); WordToTextConverter wordToTextConverter = new WordToTextConverter();
HeaderStories hs = new HeaderStories( doc ); HeaderStories hs = new HeaderStories(doc);
if ( hs.getFirstHeaderSubrange() != null ) if (hs.getFirstHeaderSubrange() != null)
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
hs.getFirstHeaderSubrange() ); hs.getFirstHeaderSubrange());
if ( hs.getEvenHeaderSubrange() != null ) if (hs.getEvenHeaderSubrange() != null)
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
hs.getEvenHeaderSubrange() ); hs.getEvenHeaderSubrange());
if ( hs.getOddHeaderSubrange() != null ) if (hs.getOddHeaderSubrange() != null)
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
hs.getOddHeaderSubrange() ); hs.getOddHeaderSubrange());
wordToTextConverter.processDocument( doc ); wordToTextConverter.processDocument(doc);
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
doc.getMainTextboxRange() ); doc.getMainTextboxRange());
if ( hs.getFirstFooterSubrange() != null ) if (hs.getFirstFooterSubrange() != null)
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
hs.getFirstFooterSubrange() ); hs.getFirstFooterSubrange());
if ( hs.getEvenFooterSubrange() != null ) if (hs.getEvenFooterSubrange() != null)
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
hs.getEvenFooterSubrange() ); hs.getEvenFooterSubrange());
if ( hs.getOddFooterSubrange() != null ) if (hs.getOddFooterSubrange() != null)
wordToTextConverter.processDocumentPart( doc, wordToTextConverter.processDocumentPart(doc,
hs.getOddFooterSubrange() ); hs.getOddFooterSubrange());
return wordToTextConverter.getText(); return wordToTextConverter.getText();
} } catch (RuntimeException e) {
catch ( Exception exc ) throw e;
{ } catch ( Exception exc ) {
throw new RuntimeException( exc ); throw new RuntimeException( exc );
} }
} }