Improve documentation of some of the HWPF picture stuff, and add unit tests for images of embeded documents

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@995807 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2010-09-10 14:37:45 +00:00
parent 2b9a63d38d
commit 96b0aea310
5 changed files with 149 additions and 23 deletions

View File

@ -87,9 +87,12 @@ public final class PicturesTable
* @param run
*/
public boolean hasPicture(CharacterRun run) {
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData()) {
// Image should be in it's own run, or in a run with the end-of-special marker
if("\u0001".equals(run.text()) || "\u0001\u0015".equals(run.text())) {
return isBlockContainsImage(run.getPicOffset());
}
}
return false;
}

View File

@ -141,6 +141,7 @@ public final class CharacterSprmUncompressor
// undocumented till 0x30
case 0x11:
// sprmCFWebHidden
break;
case 0x12:
break;
@ -149,16 +150,21 @@ public final class CharacterSprmUncompressor
case 0x14:
break;
case 0x15:
// sprmCRsidProp
break;
case 0x16:
// sprmCRsidText
break;
case 0x17:
// sprmCRsidRMDel
break;
case 0x18:
// sprmCFSpecVanish
break;
case 0x19:
break;
case 0x1a:
// sprmCFMathPr
break;
case 0x1b:
break;
@ -236,8 +242,7 @@ public final class CharacterSprmUncompressor
}
return;
case 0x34:
// undocumented
// sprmCKcd
break;
case 0x35:
newCHP.setFBold (getCHPFlag ((byte) sprm.getOperand(), oldCHP.isFBold ()));
@ -443,8 +448,7 @@ public final class CharacterSprmUncompressor
newCHP.setFtcOther ((short) sprm.getOperand());
break;
case 0x52:
// undocumented
// sprmCCharScale
break;
case 0x53:
newCHP.setFDStrike (getFlag (sprm.getOperand()));
@ -471,23 +475,28 @@ public final class CharacterSprmUncompressor
case 0x59:
newCHP.setSfxtText ((byte) sprm.getOperand());
break;
// undocumented till 0x61
case 0x5a:
// sprmCFBiDi
break;
case 0x5b:
break;
case 0x5c:
// sprmCFBoldBi
break;
case 0x5d:
// sprmCFItalicBi
break;
case 0x5e:
// sprmCFtcBi
break;
case 0x5f:
// sprmCLidBi
break;
case 0x60:
// sprmCIcoBi
break;
case 0x61:
// sprmCHpsBi
break;
case 0x62:
byte[] xstDispFldRMark = new byte[32];
@ -512,14 +521,11 @@ public final class CharacterSprmUncompressor
newCHP.setShd (new ShadingDescriptor(sprm.getGrpprl(), sprm.getGrpprlOffset()));
break;
case 0x67:
// Obsolete
break;
case 0x68:
// sprmCFUsePgsuSettings
break;
// undocumented till 0x6c
case 0x69:
break;
case 0x6a:
@ -540,6 +546,18 @@ public final class CharacterSprmUncompressor
case 0x70:
newCHP.setIco24 (sprm.getOperand());
break;
case 0x71:
// sprmCShd
break;
case 0x72:
// sprmCBrc
break;
case 0x73:
// sprmCRgLid0
break;
case 0x74:
// sprmCRgLid1
break;
}
}

View File

@ -465,7 +465,13 @@ public final class CharacterRun
_chpx.updateSprm(SPRM_PICLOCATION, offset);
}
/**
* Does the picture offset represent picture
* or binary data?
* If it's set, then the picture offset refers to
* a NilPICFAndBinData structure, otherwise to a
* PICFAndOfficeArtData
*/
public boolean isData()
{
return _props.isFData();

View File

@ -37,9 +37,11 @@ public final class Picture
// public static final int FILENAME_OFFSET = 0x7C;
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
static final int MFPMM_OFFSET = 0x6;
static final int BLOCK_TYPE_OFFSET = 0xE;
static final int PICF_OFFSET = 0x0;
static final int PICT_HEADER_OFFSET = 0x4;
static final int MFPMM_OFFSET = 0x6;
static final int PICF_SHAPE_OFFSET = 0xE;
static final int PICMD_OFFSET = 0x1C;
static final int UNKNOWN_HEADER_SIZE = 0x49;
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
@ -87,10 +89,6 @@ public final class Picture
this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
// if (fileName==null || fileName.length()==0) {
// fileName = "clipboard";
// }
if (fillBytes)
{
@ -353,11 +351,20 @@ public final class Picture
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
{
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
int realPicoffset = dataBlockStartOffset;
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
// Skip over the PICT block
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
// Now the PICTF1
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
if(MM_TYPE == 0x66) {
// Skip the stPicName
int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
PICTF1BlockOffset += 1 + cchPicName;
}
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;

View File

@ -21,10 +21,11 @@ import java.util.List;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.POIDataSamples;
/**
* Test the picture handling
@ -169,4 +170,95 @@ public final class TestPictures extends TestCase {
doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
}
/**
* When you embed another office document into Word, it stores
* a rendered "icon" picture of what that document looks like.
* This image is re-created when you edit the embeded document,
* then used as-is to speed things up.
* Check that we can properly read one of these
*/
public void testEmbededDocumentIcon() throws Exception {
// This file has two embeded excel files, an embeded powerpoint
// file and an embeded word file, in that order
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("word_with_embeded.doc");
// Check we don't break loading the pictures
doc.getPicturesTable().getAllPictures();
PicturesTable pictureTable = doc.getPicturesTable();
// Check the text, and its embeded images
Paragraph p;
Range r = doc.getRange();
assertEquals(1, r.numSections());
assertEquals(5, r.numParagraphs());
p = r.getParagraph(0);
assertEquals(2, p.numCharacterRuns());
assertEquals("I have lots of embedded files in me\r", p.text());
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
p = r.getParagraph(1);
assertEquals(5, p.numCharacterRuns());
assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
p = r.getParagraph(2);
assertEquals(6, p.numCharacterRuns());
assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
p = r.getParagraph(3);
assertEquals(6, p.numCharacterRuns());
assertEquals("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.text());
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
p = r.getParagraph(4);
assertEquals(6, p.numCharacterRuns());
assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text());
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
// Look at the pictures table
List<Picture> pictures = pictureTable.getAllPictures();
assertEquals(4, pictures.size());
Picture picture = pictures.get(0);
assertEquals("", picture.suggestFileExtension());
assertEquals("0", picture.suggestFullFileName());
assertEquals("image/unknown", picture.getMimeType());
picture = pictures.get(1);
assertEquals("", picture.suggestFileExtension());
assertEquals("469", picture.suggestFullFileName());
assertEquals("image/unknown", picture.getMimeType());
picture = pictures.get(2);
assertEquals("", picture.suggestFileExtension());
assertEquals("8c7", picture.suggestFullFileName());
assertEquals("image/unknown", picture.getMimeType());
picture = pictures.get(3);
assertEquals("", picture.suggestFileExtension());
assertEquals("10a8", picture.suggestFullFileName());
assertEquals("image/unknown", picture.getMimeType());
}
}