add ability to dump text pieces

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143734 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-07 09:51:42 +00:00
parent 4c9a35ee4d
commit 92e9201b77

View File

@ -17,13 +17,17 @@
package org.apache.poi.hwpf.dev; package org.apache.poi.hwpf.dev;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore; import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.model.FileInformationBlock; import org.apache.poi.hwpf.model.FileInformationBlock;
import org.apache.poi.hwpf.model.PAPX; import org.apache.poi.hwpf.model.PAPX;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.sprm.SprmIterator; import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Paragraph;
@ -43,12 +47,18 @@ public final class HWPFLister
if ( args.length == 0 ) if ( args.length == 0 )
{ {
System.err.println( "Use:" ); System.err.println( "Use:" );
System.err.println( " HWPFLister <filename> " System.err
+ "[--papx] [--papxProperties] " .println( "\tHWPFLister <filename>\n"
+ "[--paragraphs] [--paragraphsSprms] [--paragraphsText]" ); + "\t\t[--textPieces] [--textPiecesText]\n"
+ "\t\t[--papx] [--papxProperties]\n"
+ "\t\t[--paragraphs] [--paragraphsSprms] [--paragraphsText]\n"
+ "\t\t[--writereadback]\n" );
System.exit( 1 ); System.exit( 1 );
} }
boolean outputTextPieces = false;
boolean outputTextPiecesText = false;
boolean outputParagraphs = false; boolean outputParagraphs = false;
boolean outputParagraphsSprms = false; boolean outputParagraphsSprms = false;
boolean outputParagraphsText = false; boolean outputParagraphsText = false;
@ -56,8 +66,15 @@ public final class HWPFLister
boolean outputPapx = false; boolean outputPapx = false;
boolean outputPapxProperties = false; boolean outputPapxProperties = false;
boolean writereadback = false;
for ( String arg : Arrays.asList( args ).subList( 1, args.length ) ) for ( String arg : Arrays.asList( args ).subList( 1, args.length ) )
{ {
if ( "--textPieces".equals( arg ) )
outputTextPieces = true;
if ( "--textPiecesText".equals( arg ) )
outputTextPiecesText = true;
if ( "--paragraphs".equals( arg ) ) if ( "--paragraphs".equals( arg ) )
outputParagraphs = true; outputParagraphs = true;
if ( "--paragraphsSprms".equals( arg ) ) if ( "--paragraphsSprms".equals( arg ) )
@ -69,12 +86,24 @@ public final class HWPFLister
outputPapx = true; outputPapx = true;
if ( "--papxProperties".equals( arg ) ) if ( "--papxProperties".equals( arg ) )
outputPapxProperties = true; outputPapxProperties = true;
if ( "--writereadback".equals( arg ) )
writereadback = true;
} }
HWPFLister lister = new HWPFLister( new HWPFDocument( HWPFDocument doc = new HWPFDocument( new FileInputStream( args[0] ) );
new FileInputStream( args[0] ) ) ); if ( writereadback )
doc = writeOutAndReadBack( doc );
HWPFLister lister = new HWPFLister( doc );
lister.dumpFIB(); lister.dumpFIB();
if ( outputTextPieces )
{
System.out.println( "== Text pieces ==" );
lister.dumpTextPieces( outputTextPiecesText );
}
if ( outputParagraphs ) if ( outputParagraphs )
{ {
System.out.println( "== Paragraphs ==" ); System.out.println( "== Paragraphs ==" );
@ -89,6 +118,22 @@ public final class HWPFLister
} }
} }
private static HWPFDocument writeOutAndReadBack( HWPFDocument original )
{
try
{
ByteArrayOutputStream baos = new ByteArrayOutputStream( 4096 );
original.write( baos );
ByteArrayInputStream bais = new ByteArrayInputStream(
baos.toByteArray() );
return new HWPFDocument( bais );
}
catch ( IOException e )
{
throw new RuntimeException( e );
}
}
private final HWPFDocumentCore _doc; private final HWPFDocumentCore _doc;
public HWPFLister( HWPFDocumentCore doc ) public HWPFLister( HWPFDocumentCore doc )
@ -139,4 +184,17 @@ public final class HWPFLister
System.out.println( paragraph.text() ); System.out.println( paragraph.text() );
} }
} }
public void dumpTextPieces( boolean withText )
{
for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )
{
System.out.println( textPiece );
if ( withText )
{
System.out.println( "\t" + textPiece.getStringBuffer() );
}
}
}
} }