allow to dump pictures
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1147420 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
09e3d77cd6
commit
572c611187
@ -23,8 +23,12 @@ import java.io.File;
|
|||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.LinkedHashMap;
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
@ -33,15 +37,21 @@ import org.apache.poi.hwpf.HWPFOldDocument;
|
|||||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||||
import org.apache.poi.hwpf.model.CHPX;
|
import org.apache.poi.hwpf.model.CHPX;
|
||||||
import org.apache.poi.hwpf.model.FileInformationBlock;
|
import org.apache.poi.hwpf.model.FileInformationBlock;
|
||||||
|
import org.apache.poi.hwpf.model.GenericPropertyNode;
|
||||||
|
import org.apache.poi.hwpf.model.PAPFormattedDiskPage;
|
||||||
import org.apache.poi.hwpf.model.PAPX;
|
import org.apache.poi.hwpf.model.PAPX;
|
||||||
|
import org.apache.poi.hwpf.model.PlexOfCps;
|
||||||
import org.apache.poi.hwpf.model.StyleSheet;
|
import org.apache.poi.hwpf.model.StyleSheet;
|
||||||
import org.apache.poi.hwpf.model.TextPiece;
|
import org.apache.poi.hwpf.model.TextPiece;
|
||||||
import org.apache.poi.hwpf.sprm.SprmIterator;
|
import org.apache.poi.hwpf.sprm.SprmIterator;
|
||||||
import org.apache.poi.hwpf.sprm.SprmOperation;
|
import org.apache.poi.hwpf.sprm.SprmOperation;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Picture;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
|
import org.apache.poi.poifs.common.POIFSConstants;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
import org.apache.poi.util.IOUtils;
|
import org.apache.poi.util.IOUtils;
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Used by developers to list out key information on a HWPF file. End users will
|
* Used by developers to list out key information on a HWPF file. End users will
|
||||||
@ -88,18 +98,19 @@ public final class HWPFLister
|
|||||||
System.err
|
System.err
|
||||||
.println( "\tHWPFLister <filename>\n"
|
.println( "\tHWPFLister <filename>\n"
|
||||||
+ "\t\t[--textPieces] [--textPiecesText]\n"
|
+ "\t\t[--textPieces] [--textPiecesText]\n"
|
||||||
+ "\t\t[--textRuns] [--textRunsSprms]\n"
|
+ "\t\t[--chpx] [--chpxProperties] [--chpxSprms]\n"
|
||||||
+ "\t\t[--papx] [--papxProperties]\n"
|
+ "\t\t[--papx] [--papxProperties]\n"
|
||||||
+ "\t\t[--paragraphs] [--paragraphsSprms] [--paragraphsText]\n"
|
+ "\t\t[--paragraphs] [--paragraphsSprms] [--paragraphsText]\n"
|
||||||
+ "\t\t[--writereadback]\n" );
|
+ "\t\t[--pictures]\n" + "\t\t[--writereadback]\n" );
|
||||||
System.exit( 1 );
|
System.exit( 1 );
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean outputTextPieces = false;
|
boolean outputTextPieces = false;
|
||||||
boolean outputTextPiecesText = false;
|
boolean outputTextPiecesText = false;
|
||||||
|
|
||||||
boolean outputTextRuns = false;
|
boolean outputChpx = false;
|
||||||
boolean outputTextRunsSprms = false;
|
boolean outputChpxProperties = false;
|
||||||
|
boolean outputChpxSprms = false;
|
||||||
|
|
||||||
boolean outputParagraphs = false;
|
boolean outputParagraphs = false;
|
||||||
boolean outputParagraphsSprms = false;
|
boolean outputParagraphsSprms = false;
|
||||||
@ -108,6 +119,8 @@ public final class HWPFLister
|
|||||||
boolean outputPapx = false;
|
boolean outputPapx = false;
|
||||||
boolean outputPapxProperties = false;
|
boolean outputPapxProperties = false;
|
||||||
|
|
||||||
|
boolean outputPictures = false;
|
||||||
|
|
||||||
boolean writereadback = false;
|
boolean writereadback = false;
|
||||||
|
|
||||||
for ( String arg : Arrays.asList( args ).subList( 1, args.length ) )
|
for ( String arg : Arrays.asList( args ).subList( 1, args.length ) )
|
||||||
@ -117,10 +130,12 @@ public final class HWPFLister
|
|||||||
if ( "--textPiecesText".equals( arg ) )
|
if ( "--textPiecesText".equals( arg ) )
|
||||||
outputTextPiecesText = true;
|
outputTextPiecesText = true;
|
||||||
|
|
||||||
if ( "--textRuns".equals( arg ) )
|
if ( "--chpx".equals( arg ) )
|
||||||
outputTextRuns = true;
|
outputChpx = true;
|
||||||
if ( "--textRunsSprms".equals( arg ) )
|
if ( "--chpxProperties".equals( arg ) )
|
||||||
outputTextRunsSprms = true;
|
outputChpxProperties = true;
|
||||||
|
if ( "--chpxSprms".equals( arg ) )
|
||||||
|
outputChpxSprms = true;
|
||||||
|
|
||||||
if ( "--paragraphs".equals( arg ) )
|
if ( "--paragraphs".equals( arg ) )
|
||||||
outputParagraphs = true;
|
outputParagraphs = true;
|
||||||
@ -134,6 +149,9 @@ public final class HWPFLister
|
|||||||
if ( "--papxProperties".equals( arg ) )
|
if ( "--papxProperties".equals( arg ) )
|
||||||
outputPapxProperties = true;
|
outputPapxProperties = true;
|
||||||
|
|
||||||
|
if ( "--pictures".equals( arg ) )
|
||||||
|
outputPictures = true;
|
||||||
|
|
||||||
if ( "--writereadback".equals( arg ) )
|
if ( "--writereadback".equals( arg ) )
|
||||||
writereadback = true;
|
writereadback = true;
|
||||||
}
|
}
|
||||||
@ -151,10 +169,16 @@ public final class HWPFLister
|
|||||||
lister.dumpTextPieces( outputTextPiecesText );
|
lister.dumpTextPieces( outputTextPiecesText );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( outputTextRuns )
|
if ( outputChpx )
|
||||||
{
|
{
|
||||||
System.out.println( "== Text runs ==" );
|
System.out.println( "== CHPX ==" );
|
||||||
lister.dumpChpx( outputTextRunsSprms );
|
lister.dumpChpx( outputChpxProperties, outputChpxSprms );
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( outputPapx )
|
||||||
|
{
|
||||||
|
System.out.println( "== PAPX ==" );
|
||||||
|
lister.dumpPapx( outputPapxProperties );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( outputParagraphs )
|
if ( outputParagraphs )
|
||||||
@ -167,10 +191,10 @@ public final class HWPFLister
|
|||||||
outputParagraphsText );
|
outputParagraphsText );
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( !outputParagraphs && outputPapx )
|
if ( outputPictures )
|
||||||
{
|
{
|
||||||
System.out.println( "== PAPX ==" );
|
System.out.println( "== PICTURES ==" );
|
||||||
lister.dumpPapx( outputPapxProperties );
|
lister.dumpPictures();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,13 +264,13 @@ public final class HWPFLister
|
|||||||
this.text = builder.toString();
|
this.text = builder.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void dumpChpx( boolean withSprms )
|
public void dumpChpx( boolean withProperties, boolean withSprms )
|
||||||
{
|
{
|
||||||
for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() )
|
for ( CHPX chpx : _doc.getCharacterTable().getTextRuns() )
|
||||||
{
|
{
|
||||||
System.out.println( chpx );
|
System.out.println( chpx );
|
||||||
|
|
||||||
if ( false )
|
if ( withProperties )
|
||||||
{
|
{
|
||||||
System.out.println( chpx.getCharacterProperties(
|
System.out.println( chpx.getCharacterProperties(
|
||||||
_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) );
|
_doc.getStyleSheet(), (short) StyleSheet.NIL_STYLE ) );
|
||||||
@ -282,26 +306,90 @@ public final class HWPFLister
|
|||||||
System.out.println( fib );
|
System.out.println( fib );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void dumpPapx( boolean withProperties )
|
public void dumpPapx( boolean withProperties ) throws Exception
|
||||||
{
|
{
|
||||||
for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
|
if ( _doc instanceof HWPFDocument )
|
||||||
{
|
{
|
||||||
System.out.println( papx );
|
System.out.println( "binary PAP pages " );
|
||||||
|
|
||||||
if ( withProperties )
|
HWPFDocument doc = (HWPFDocument) _doc;
|
||||||
System.out.println( papx.getParagraphProperties( _doc
|
|
||||||
.getStyleSheet() ) );
|
|
||||||
|
|
||||||
if ( true )
|
Field fMainStream = HWPFDocumentCore.class
|
||||||
|
.getDeclaredField( "_mainStream" );
|
||||||
|
fMainStream.setAccessible( true );
|
||||||
|
byte[] mainStream = (byte[]) fMainStream.get( _doc );
|
||||||
|
|
||||||
|
PlexOfCps binTable = new PlexOfCps( doc.getTableStream(), doc
|
||||||
|
.getFileInformationBlock().getFcPlcfbtePapx(), doc
|
||||||
|
.getFileInformationBlock().getLcbPlcfbtePapx(), 4 );
|
||||||
|
|
||||||
|
List<PAPX> papxs = new ArrayList<PAPX>();
|
||||||
|
|
||||||
|
int length = binTable.length();
|
||||||
|
for ( int x = 0; x < length; x++ )
|
||||||
{
|
{
|
||||||
|
GenericPropertyNode node = binTable.getProperty( x );
|
||||||
|
|
||||||
|
int pageNum = LittleEndian.getInt( node.getBytes() );
|
||||||
|
int pageOffset = POIFSConstants.SMALLER_BIG_BLOCK_SIZE
|
||||||
|
* pageNum;
|
||||||
|
|
||||||
|
PAPFormattedDiskPage pfkp = new PAPFormattedDiskPage(
|
||||||
|
mainStream, doc.getDataStream(), pageOffset,
|
||||||
|
doc.getTextTable(), false );
|
||||||
|
|
||||||
|
System.out.println( "* PFKP: " + pfkp );
|
||||||
|
|
||||||
|
for ( PAPX papx : pfkp.getPAPXs() )
|
||||||
|
{
|
||||||
|
System.out.println( "** " + papx );
|
||||||
|
papxs.add( papx );
|
||||||
|
if ( papx != null && true )
|
||||||
|
{
|
||||||
|
SprmIterator sprmIt = new SprmIterator(
|
||||||
|
papx.getGrpprl(), 2 );
|
||||||
|
while ( sprmIt.hasNext() )
|
||||||
|
{
|
||||||
|
SprmOperation sprm = sprmIt.next();
|
||||||
|
System.out.println( "*** " + sprm.toString() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Collections.sort( papxs );
|
||||||
|
System.out.println( "* Sorted by END" );
|
||||||
|
for ( PAPX papx : papxs )
|
||||||
|
{
|
||||||
|
System.out.println( "** " + papx );
|
||||||
SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
|
SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
|
||||||
while ( sprmIt.hasNext() )
|
while ( sprmIt.hasNext() )
|
||||||
{
|
{
|
||||||
SprmOperation sprm = sprmIt.next();
|
SprmOperation sprm = sprmIt.next();
|
||||||
System.out.println( "\t" + sprm.toString() );
|
System.out.println( "*** " + sprm.toString() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for ( PAPX papx : _doc.getParagraphTable().getParagraphs() )
|
||||||
|
// {
|
||||||
|
// System.out.println( papx );
|
||||||
|
//
|
||||||
|
// if ( withProperties )
|
||||||
|
// System.out.println( papx.getParagraphProperties( _doc
|
||||||
|
// .getStyleSheet() ) );
|
||||||
|
//
|
||||||
|
// if ( true )
|
||||||
|
// {
|
||||||
|
// SprmIterator sprmIt = new SprmIterator( papx.getGrpprl(), 2 );
|
||||||
|
// while ( sprmIt.hasNext() )
|
||||||
|
// {
|
||||||
|
// SprmOperation sprm = sprmIt.next();
|
||||||
|
// System.out.println( "\t" + sprm.toString() );
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
public void dumpParagraphs( boolean dumpAssotiatedPapx )
|
public void dumpParagraphs( boolean dumpAssotiatedPapx )
|
||||||
@ -356,6 +444,22 @@ public final class HWPFLister
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void dumpPictures()
|
||||||
|
{
|
||||||
|
if ( _doc instanceof HWPFOldDocument )
|
||||||
|
{
|
||||||
|
System.out.println( "Word 95 not supported so far" );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<Picture> allPictures = ( (HWPFDocument) _doc ).getPicturesTable()
|
||||||
|
.getAllPictures();
|
||||||
|
for ( Picture picture : allPictures )
|
||||||
|
{
|
||||||
|
System.out.println( picture.toString() );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void dumpTextPieces( boolean withText )
|
public void dumpTextPieces( boolean withText )
|
||||||
{
|
{
|
||||||
for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )
|
for ( TextPiece textPiece : _doc.getTextTable().getTextPieces() )
|
||||||
|
Loading…
Reference in New Issue
Block a user