From 572c6111875ac4d8b0e8a6570d1ea4d361b2b681 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Sat, 16 Jul 2011 12:36:55 +0000 Subject: [PATCH] allow to dump pictures git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1147420 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hwpf/dev/HWPFLister.java | 152 +++++++++++++++--- 1 file changed, 128 insertions(+), 24 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java index c80035cbb..aceb51572 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/dev/HWPFLister.java @@ -23,8 +23,12 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.lang.reflect.Field; +import java.util.ArrayList; import java.util.Arrays; +import java.util.Collections; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import org.apache.poi.hwpf.HWPFDocument; @@ -33,15 +37,21 @@ import org.apache.poi.hwpf.HWPFOldDocument; import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.hwpf.model.CHPX; import org.apache.poi.hwpf.model.FileInformationBlock; +import org.apache.poi.hwpf.model.GenericPropertyNode; +import org.apache.poi.hwpf.model.PAPFormattedDiskPage; import org.apache.poi.hwpf.model.PAPX; +import org.apache.poi.hwpf.model.PlexOfCps; import org.apache.poi.hwpf.model.StyleSheet; import org.apache.poi.hwpf.model.TextPiece; import org.apache.poi.hwpf.sprm.SprmIterator; import org.apache.poi.hwpf.sprm.SprmOperation; import org.apache.poi.hwpf.usermodel.Paragraph; +import org.apache.poi.hwpf.usermodel.Picture; import org.apache.poi.hwpf.usermodel.Range; +import org.apache.poi.poifs.common.POIFSConstants; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.util.IOUtils; +import org.apache.poi.util.LittleEndian; /** * Used by developers to list out key information on a HWPF file. End users will @@ -88,18 +98,19 @@ public final class HWPFLister System.err .println( "\tHWPFLister \n" + "\t\t[--textPieces] [--textPiecesText]\n" - + "\t\t[--textRuns] [--textRunsSprms]\n" + + "\t\t[--chpx] [--chpxProperties] [--chpxSprms]\n" + "\t\t[--papx] [--papxProperties]\n" + "\t\t[--paragraphs] [--paragraphsSprms] [--paragraphsText]\n" - + "\t\t[--writereadback]\n" ); + + "\t\t[--pictures]\n" + "\t\t[--writereadback]\n" ); System.exit( 1 ); } boolean outputTextPieces = false; boolean outputTextPiecesText = false; - boolean outputTextRuns = false; - boolean outputTextRunsSprms = false; + boolean outputChpx = false; + boolean outputChpxProperties = false; + boolean outputChpxSprms = false; boolean outputParagraphs = false; boolean outputParagraphsSprms = false; @@ -108,6 +119,8 @@ public final class HWPFLister boolean outputPapx = false; boolean outputPapxProperties = false; + boolean outputPictures = false; + boolean writereadback = false; for ( String arg : Arrays.asList( args ).subList( 1, args.length ) ) @@ -117,10 +130,12 @@ public final class HWPFLister if ( "--textPiecesText".equals( arg ) ) outputTextPiecesText = true; - if ( "--textRuns".equals( arg ) ) - outputTextRuns = true; - if ( "--textRunsSprms".equals( arg ) ) - outputTextRunsSprms = true; + if ( "--chpx".equals( arg ) ) + outputChpx = true; + if ( "--chpxProperties".equals( arg ) ) + outputChpxProperties = true; + if ( "--chpxSprms".equals( arg ) ) + outputChpxSprms = true; if ( "--paragraphs".equals( arg ) ) outputParagraphs = true; @@ -134,6 +149,9 @@ public final class HWPFLister if ( "--papxProperties".equals( arg ) ) outputPapxProperties = true; + if ( "--pictures".equals( arg ) ) + outputPictures = true; + if ( "--writereadback".equals( arg ) ) writereadback = true; } @@ -151,10 +169,16 @@ public final class HWPFLister lister.dumpTextPieces( outputTextPiecesText ); } - if ( outputTextRuns ) + if ( outputChpx ) { - System.out.println( "== Text runs ==" ); - lister.dumpChpx( outputTextRunsSprms ); + System.out.println( "== CHPX ==" ); + lister.dumpChpx( outputChpxProperties, outputChpxSprms ); + } + + if ( outputPapx ) + { + System.out.println( "== PAPX ==" ); + lister.dumpPapx( outputPapxProperties ); } if ( outputParagraphs ) @@ -167,10 +191,10 @@ public final class HWPFLister outputParagraphsText ); } - if ( !outputParagraphs && outputPapx ) + if ( outputPictures ) { - System.out.println( "== PAPX ==" ); - lister.dumpPapx( outputPapxProperties ); + System.out.println( "== PICTURES ==" ); + lister.dumpPictures(); } } @@ -240,13 +264,13 @@ public final class HWPFLister this.text = builder.toString(); } - public void dumpChpx( boolean withSprms ) + public void dumpChpx( boolean withProperties, boolean withSprms ) { for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() ) { System.out.println( chpx ); - if ( false ) + if ( withProperties ) { System.out.println( chpx.getCharacterProperties( _doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) ); @@ -282,26 +306,90 @@ public final class HWPFLister System.out.println( fib ); } - public void dumpPapx( boolean withProperties ) + public void dumpPapx( boolean withProperties ) throws Exception { - for ( PAPX papx : _doc.getParagraphTable().getParagraphs() ) + if ( _doc instanceof HWPFDocument ) { - System.out.println( papx ); + System.out.println( "binary PAP pages " ); - if ( withProperties ) - System.out.println( papx.getParagraphProperties( _doc - .getStyleSheet() ) ); + HWPFDocument doc = (HWPFDocument) _doc; - if ( true ) + Field fMainStream = HWPFDocumentCore.class + .getDeclaredField( "_mainStream" ); + fMainStream.setAccessible( true ); + byte[] mainStream = (byte[]) fMainStream.get( _doc ); + + PlexOfCps binTable = new PlexOfCps( doc.getTableStream(), doc + .getFileInformationBlock().getFcPlcfbtePapx(), doc + .getFileInformationBlock().getLcbPlcfbtePapx(), 4 ); + + List papxs = new ArrayList(); + + int length = binTable.length(); + for ( int x = 0; x < length; x++ ) { + GenericPropertyNode node = binTable.getProperty( x ); + + int pageNum = LittleEndian.getInt( node.getBytes() ); + int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE + * pageNum; + + PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage( + mainStream, doc.getDataStream(), pageOffset, + doc.getTextTable(), false ); + + System.out.println( "* PFKP: " + pfkp ); + + for ( PAPX papx : pfkp.getPAPXs() ) + { + System.out.println( "** " + papx ); + papxs.add( papx ); + if ( papx != null && true ) + { + SprmIterator sprmIt = new SprmIterator( + papx.getGrpprl(), 2 ); + while ( sprmIt.hasNext() ) + { + SprmOperation sprm = sprmIt.next(); + System.out.println( "*** " + sprm.toString() ); + } + } + + } + } + + Collections.sort( papxs ); + System.out.println( "* Sorted by END" ); + for ( PAPX papx : papxs ) + { + System.out.println( "** " + papx ); SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 ); while ( sprmIt.hasNext() ) { SprmOperation sprm = sprmIt.next(); - System.out.println( "\t" + sprm.toString() ); + System.out.println( "*** " + sprm.toString() ); } } } + + // for ( PAPX papx : _doc.getParagraphTable().getParagraphs() ) + // { + // System.out.println( papx ); + // + // if ( withProperties ) + // System.out.println( papx.getParagraphProperties( _doc + // .getStyleSheet() ) ); + // + // if ( true ) + // { + // SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 ); + // while ( sprmIt.hasNext() ) + // { + // SprmOperation sprm = sprmIt.next(); + // System.out.println( "\t" + sprm.toString() ); + // } + // } + // } } public void dumpParagraphs( boolean dumpAssotiatedPapx ) @@ -356,6 +444,22 @@ public final class HWPFLister } } + private void dumpPictures() + { + if ( _doc instanceof HWPFOldDocument ) + { + System.out.println( "Word 95 not supported so far" ); + return; + } + + List allPictures = ( (HWPFDocument) _doc ).getPicturesTable() + .getAllPictures(); + for ( Picture picture : allPictures ) + { + System.out.println( picture.toString() ); + } + } + public void dumpTextPieces( boolean withText ) { for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )