allow to dump pictures

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1147420 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Sergey Vladimirov 2011-07-16 12:36:55 +00:00
parent 09e3d77cd6
commit 572c611187

View File

@ -23,8 +23,12 @@ import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap; import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map; import java.util.Map;
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFDocument;
@ -33,15 +37,21 @@ import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.model.CHPX; import org.apache.poi.hwpf.model.CHPX;
import org.apache.poi.hwpf.model.FileInformationBlock; import org.apache.poi.hwpf.model.FileInformationBlock;
import org.apache.poi.hwpf.model.GenericPropertyNode;
import org.apache.poi.hwpf.model.PAPFormattedDiskPage;
import org.apache.poi.hwpf.model.PAPX; import org.apache.poi.hwpf.model.PAPX;
import org.apache.poi.hwpf.model.PlexOfCps;
import org.apache.poi.hwpf.model.StyleSheet; import org.apache.poi.hwpf.model.StyleSheet;
import org.apache.poi.hwpf.model.TextPiece; import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.sprm.SprmIterator; import org.apache.poi.hwpf.sprm.SprmIterator;
import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.hwpf.sprm.SprmOperation;
import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.poifs.common.POIFSConstants;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.IOUtils; import org.apache.poi.util.IOUtils;
import org.apache.poi.util.LittleEndian;
/** /**
* Used by developers to list out key information on a HWPF file. End users will * Used by developers to list out key information on a HWPF file. End users will
@ -88,18 +98,19 @@ public final class HWPFLister
System.err System.err
.println( "\tHWPFLister <filename>\n" .println( "\tHWPFLister <filename>\n"
+ "\t\t[--textPieces] [--textPiecesText]\n" + "\t\t[--textPieces] [--textPiecesText]\n"
+ "\t\t[--textRuns] [--textRunsSprms]\n" + "\t\t[--chpx] [--chpxProperties] [--chpxSprms]\n"
+ "\t\t[--papx] [--papxProperties]\n" + "\t\t[--papx] [--papxProperties]\n"
+ "\t\t[--paragraphs] [--paragraphsSprms] [--paragraphsText]\n" + "\t\t[--paragraphs] [--paragraphsSprms] [--paragraphsText]\n"
+ "\t\t[--writereadback]\n" ); + "\t\t[--pictures]\n" + "\t\t[--writereadback]\n" );
System.exit( 1 ); System.exit( 1 );
} }
boolean outputTextPieces = false; boolean outputTextPieces = false;
boolean outputTextPiecesText = false; boolean outputTextPiecesText = false;
boolean outputTextRuns = false; boolean outputChpx = false;
boolean outputTextRunsSprms = false; boolean outputChpxProperties = false;
boolean outputChpxSprms = false;
boolean outputParagraphs = false; boolean outputParagraphs = false;
boolean outputParagraphsSprms = false; boolean outputParagraphsSprms = false;
@ -108,6 +119,8 @@ public final class HWPFLister
boolean outputPapx = false; boolean outputPapx = false;
boolean outputPapxProperties = false; boolean outputPapxProperties = false;
boolean outputPictures = false;
boolean writereadback = false; boolean writereadback = false;
for ( String arg : Arrays.asList( args ).subList( 1, args.length ) ) for ( String arg : Arrays.asList( args ).subList( 1, args.length ) )
@ -117,10 +130,12 @@ public final class HWPFLister
if ( "--textPiecesText".equals( arg ) ) if ( "--textPiecesText".equals( arg ) )
outputTextPiecesText = true; outputTextPiecesText = true;
if ( "--textRuns".equals( arg ) ) if ( "--chpx".equals( arg ) )
outputTextRuns = true; outputChpx = true;
if ( "--textRunsSprms".equals( arg ) ) if ( "--chpxProperties".equals( arg ) )
outputTextRunsSprms = true; outputChpxProperties = true;
if ( "--chpxSprms".equals( arg ) )
outputChpxSprms = true;
if ( "--paragraphs".equals( arg ) ) if ( "--paragraphs".equals( arg ) )
outputParagraphs = true; outputParagraphs = true;
@ -134,6 +149,9 @@ public final class HWPFLister
if ( "--papxProperties".equals( arg ) ) if ( "--papxProperties".equals( arg ) )
outputPapxProperties = true; outputPapxProperties = true;
if ( "--pictures".equals( arg ) )
outputPictures = true;
if ( "--writereadback".equals( arg ) ) if ( "--writereadback".equals( arg ) )
writereadback = true; writereadback = true;
} }
@ -151,10 +169,16 @@ public final class HWPFLister
lister.dumpTextPieces( outputTextPiecesText ); lister.dumpTextPieces( outputTextPiecesText );
} }
if ( outputTextRuns ) if ( outputChpx )
{ {
System.out.println( "== Text runs ==" ); System.out.println( "== CHPX ==" );
lister.dumpChpx( outputTextRunsSprms ); lister.dumpChpx( outputChpxProperties, outputChpxSprms );
}
if ( outputPapx )
{
System.out.println( "== PAPX ==" );
lister.dumpPapx( outputPapxProperties );
} }
if ( outputParagraphs ) if ( outputParagraphs )
@ -167,10 +191,10 @@ public final class HWPFLister
outputParagraphsText ); outputParagraphsText );
} }
if ( !outputParagraphs && outputPapx ) if ( outputPictures )
{ {
System.out.println( "== PAPX ==" ); System.out.println( "== PICTURES ==" );
lister.dumpPapx( outputPapxProperties ); lister.dumpPictures();
} }
} }
@ -240,13 +264,13 @@ public final class HWPFLister
this.text = builder.toString(); this.text = builder.toString();
} }
public void dumpChpx( boolean withSprms ) public void dumpChpx( boolean withProperties, boolean withSprms )
{ {
for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() ) for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() )
{ {
System.out.println( chpx ); System.out.println( chpx );
if ( false ) if ( withProperties )
{ {
System.out.println( chpx.getCharacterProperties( System.out.println( chpx.getCharacterProperties(
_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) ); _doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) );
@ -282,26 +306,90 @@ public final class HWPFLister
System.out.println( fib ); System.out.println( fib );
} }
public void dumpPapx( boolean withProperties ) public void dumpPapx( boolean withProperties ) throws Exception
{ {
for ( PAPX papx : _doc.getParagraphTable().getParagraphs() ) if ( _doc instanceof HWPFDocument )
{ {
System.out.println( papx ); System.out.println( "binary PAP pages " );
if ( withProperties ) HWPFDocument doc = (HWPFDocument) _doc;
System.out.println( papx.getParagraphProperties( _doc
.getStyleSheet() ) );
if ( true ) Field fMainStream = HWPFDocumentCore.class
.getDeclaredField( "_mainStream" );
fMainStream.setAccessible( true );
byte[] mainStream = (byte[]) fMainStream.get( _doc );
PlexOfCps binTable = new PlexOfCps( doc.getTableStream(), doc
.getFileInformationBlock().getFcPlcfbtePapx(), doc
.getFileInformationBlock().getLcbPlcfbtePapx(), 4 );
List<PAPX> papxs = new ArrayList<PAPX>();
int length = binTable.length();
for ( int x = 0; x < length; x++ )
{ {
GenericPropertyNode node = binTable.getProperty( x );
int pageNum = LittleEndian.getInt( node.getBytes() );
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
* pageNum;
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
mainStream, doc.getDataStream(), pageOffset,
doc.getTextTable(), false );
System.out.println( "* PFKP: " + pfkp );
for ( PAPX papx : pfkp.getPAPXs() )
{
System.out.println( "** " + papx );
papxs.add( papx );
if ( papx != null && true )
{
SprmIterator sprmIt = new SprmIterator(
papx.getGrpprl(), 2 );
while ( sprmIt.hasNext() )
{
SprmOperation sprm = sprmIt.next();
System.out.println( "*** " + sprm.toString() );
}
}
}
}
Collections.sort( papxs );
System.out.println( "* Sorted by END" );
for ( PAPX papx : papxs )
{
System.out.println( "** " + papx );
SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 ); SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
while ( sprmIt.hasNext() ) while ( sprmIt.hasNext() )
{ {
SprmOperation sprm = sprmIt.next(); SprmOperation sprm = sprmIt.next();
System.out.println( "\t" + sprm.toString() ); System.out.println( "*** " + sprm.toString() );
} }
} }
} }
// for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
// {
// System.out.println( papx );
//
// if ( withProperties )
// System.out.println( papx.getParagraphProperties( _doc
// .getStyleSheet() ) );
//
// if ( true )
// {
// SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
// while ( sprmIt.hasNext() )
// {
// SprmOperation sprm = sprmIt.next();
// System.out.println( "\t" + sprm.toString() );
// }
// }
// }
} }
public void dumpParagraphs( boolean dumpAssotiatedPapx ) public void dumpParagraphs( boolean dumpAssotiatedPapx )
@ -356,6 +444,22 @@ public final class HWPFLister
} }
} }
private void dumpPictures()
{
if ( _doc instanceof HWPFOldDocument )
{
System.out.println( "Word 95 not supported so far" );
return;
}
List<Picture> allPictures = ( (HWPFDocument) _doc ).getPicturesTable()
.getAllPictures();
for ( Picture picture : allPictures )
{
System.out.println( picture.toString() );
}
}
public void dumpTextPieces( boolean withText ) public void dumpTextPieces( boolean withText )
{ {
for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() ) for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )