Improve documentation of some of the HWPF picture stuff, and add unit tests for images of embeded documents
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@995807 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2b9a63d38d
commit
96b0aea310
@ -87,9 +87,12 @@ public final class PicturesTable
|
||||
* @param run
|
||||
*/
|
||||
public boolean hasPicture(CharacterRun run) {
|
||||
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
|
||||
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData()) {
|
||||
// Image should be in it's own run, or in a run with the end-of-special marker
|
||||
if("\u0001".equals(run.text()) || "\u0001\u0015".equals(run.text())) {
|
||||
return isBlockContainsImage(run.getPicOffset());
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -141,6 +141,7 @@ public final class CharacterSprmUncompressor
|
||||
// undocumented till 0x30
|
||||
|
||||
case 0x11:
|
||||
// sprmCFWebHidden
|
||||
break;
|
||||
case 0x12:
|
||||
break;
|
||||
@ -149,16 +150,21 @@ public final class CharacterSprmUncompressor
|
||||
case 0x14:
|
||||
break;
|
||||
case 0x15:
|
||||
// sprmCRsidProp
|
||||
break;
|
||||
case 0x16:
|
||||
// sprmCRsidText
|
||||
break;
|
||||
case 0x17:
|
||||
// sprmCRsidRMDel
|
||||
break;
|
||||
case 0x18:
|
||||
// sprmCFSpecVanish
|
||||
break;
|
||||
case 0x19:
|
||||
break;
|
||||
case 0x1a:
|
||||
// sprmCFMathPr
|
||||
break;
|
||||
case 0x1b:
|
||||
break;
|
||||
@ -236,8 +242,7 @@ public final class CharacterSprmUncompressor
|
||||
}
|
||||
return;
|
||||
case 0x34:
|
||||
|
||||
// undocumented
|
||||
// sprmCKcd
|
||||
break;
|
||||
case 0x35:
|
||||
newCHP.setFBold (getCHPFlag ((byte) sprm.getOperand(), oldCHP.isFBold ()));
|
||||
@ -443,8 +448,7 @@ public final class CharacterSprmUncompressor
|
||||
newCHP.setFtcOther ((short) sprm.getOperand());
|
||||
break;
|
||||
case 0x52:
|
||||
|
||||
// undocumented
|
||||
// sprmCCharScale
|
||||
break;
|
||||
case 0x53:
|
||||
newCHP.setFDStrike (getFlag (sprm.getOperand()));
|
||||
@ -471,23 +475,28 @@ public final class CharacterSprmUncompressor
|
||||
case 0x59:
|
||||
newCHP.setSfxtText ((byte) sprm.getOperand());
|
||||
break;
|
||||
|
||||
// undocumented till 0x61
|
||||
case 0x5a:
|
||||
// sprmCFBiDi
|
||||
break;
|
||||
case 0x5b:
|
||||
break;
|
||||
case 0x5c:
|
||||
// sprmCFBoldBi
|
||||
break;
|
||||
case 0x5d:
|
||||
// sprmCFItalicBi
|
||||
break;
|
||||
case 0x5e:
|
||||
// sprmCFtcBi
|
||||
break;
|
||||
case 0x5f:
|
||||
// sprmCLidBi
|
||||
break;
|
||||
case 0x60:
|
||||
// sprmCIcoBi
|
||||
break;
|
||||
case 0x61:
|
||||
// sprmCHpsBi
|
||||
break;
|
||||
case 0x62:
|
||||
byte[] xstDispFldRMark = new byte[32];
|
||||
@ -512,14 +521,11 @@ public final class CharacterSprmUncompressor
|
||||
newCHP.setShd (new ShadingDescriptor(sprm.getGrpprl(), sprm.getGrpprlOffset()));
|
||||
break;
|
||||
case 0x67:
|
||||
|
||||
// Obsolete
|
||||
break;
|
||||
case 0x68:
|
||||
// sprmCFUsePgsuSettings
|
||||
break;
|
||||
|
||||
// undocumented till 0x6c
|
||||
|
||||
case 0x69:
|
||||
break;
|
||||
case 0x6a:
|
||||
@ -540,6 +546,18 @@ public final class CharacterSprmUncompressor
|
||||
case 0x70:
|
||||
newCHP.setIco24 (sprm.getOperand());
|
||||
break;
|
||||
case 0x71:
|
||||
// sprmCShd
|
||||
break;
|
||||
case 0x72:
|
||||
// sprmCBrc
|
||||
break;
|
||||
case 0x73:
|
||||
// sprmCRgLid0
|
||||
break;
|
||||
case 0x74:
|
||||
// sprmCRgLid1
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -465,7 +465,13 @@ public final class CharacterRun
|
||||
_chpx.updateSprm(SPRM_PICLOCATION, offset);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Does the picture offset represent picture
|
||||
* or binary data?
|
||||
* If it's set, then the picture offset refers to
|
||||
* a NilPICFAndBinData structure, otherwise to a
|
||||
* PICFAndOfficeArtData
|
||||
*/
|
||||
public boolean isData()
|
||||
{
|
||||
return _props.isFData();
|
||||
|
@ -37,9 +37,11 @@ public final class Picture
|
||||
|
||||
// public static final int FILENAME_OFFSET = 0x7C;
|
||||
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
||||
static final int MFPMM_OFFSET = 0x6;
|
||||
static final int BLOCK_TYPE_OFFSET = 0xE;
|
||||
static final int PICF_OFFSET = 0x0;
|
||||
static final int PICT_HEADER_OFFSET = 0x4;
|
||||
static final int MFPMM_OFFSET = 0x6;
|
||||
static final int PICF_SHAPE_OFFSET = 0xE;
|
||||
static final int PICMD_OFFSET = 0x1C;
|
||||
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
||||
|
||||
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
|
||||
@ -87,10 +89,6 @@ public final class Picture
|
||||
|
||||
this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
|
||||
this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
|
||||
// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
|
||||
// if (fileName==null || fileName.length()==0) {
|
||||
// fileName = "clipboard";
|
||||
// }
|
||||
|
||||
if (fillBytes)
|
||||
{
|
||||
@ -353,11 +351,20 @@ public final class Picture
|
||||
|
||||
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
||||
{
|
||||
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
||||
int realPicoffset = dataBlockStartOffset;
|
||||
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
||||
|
||||
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
|
||||
// Skip over the PICT block
|
||||
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
|
||||
|
||||
// Now the PICTF1
|
||||
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
||||
short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
|
||||
if(MM_TYPE == 0x66) {
|
||||
// Skip the stPicName
|
||||
int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
|
||||
PICTF1BlockOffset += 1 + cchPicName;
|
||||
}
|
||||
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
|
||||
|
||||
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
|
||||
|
@ -21,10 +21,11 @@ import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.POIDataSamples;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||
import org.apache.poi.hwpf.model.PicturesTable;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.POIDataSamples;
|
||||
|
||||
/**
|
||||
* Test the picture handling
|
||||
@ -169,4 +170,95 @@ public final class TestPictures extends TestCase {
|
||||
doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
|
||||
}
|
||||
|
||||
/**
|
||||
* When you embed another office document into Word, it stores
|
||||
* a rendered "icon" picture of what that document looks like.
|
||||
* This image is re-created when you edit the embeded document,
|
||||
* then used as-is to speed things up.
|
||||
* Check that we can properly read one of these
|
||||
*/
|
||||
public void testEmbededDocumentIcon() throws Exception {
|
||||
// This file has two embeded excel files, an embeded powerpoint
|
||||
// file and an embeded word file, in that order
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("word_with_embeded.doc");
|
||||
|
||||
// Check we don't break loading the pictures
|
||||
doc.getPicturesTable().getAllPictures();
|
||||
PicturesTable pictureTable = doc.getPicturesTable();
|
||||
|
||||
// Check the text, and its embeded images
|
||||
Paragraph p;
|
||||
Range r = doc.getRange();
|
||||
assertEquals(1, r.numSections());
|
||||
assertEquals(5, r.numParagraphs());
|
||||
|
||||
p = r.getParagraph(0);
|
||||
assertEquals(2, p.numCharacterRuns());
|
||||
assertEquals("I have lots of embedded files in me\r", p.text());
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||
|
||||
p = r.getParagraph(1);
|
||||
assertEquals(5, p.numCharacterRuns());
|
||||
assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||
|
||||
p = r.getParagraph(2);
|
||||
assertEquals(6, p.numCharacterRuns());
|
||||
assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
|
||||
|
||||
p = r.getParagraph(3);
|
||||
assertEquals(6, p.numCharacterRuns());
|
||||
assertEquals("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.text());
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
|
||||
|
||||
p = r.getParagraph(4);
|
||||
assertEquals(6, p.numCharacterRuns());
|
||||
assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text());
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
|
||||
|
||||
// Look at the pictures table
|
||||
List<Picture> pictures = pictureTable.getAllPictures();
|
||||
assertEquals(4, pictures.size());
|
||||
|
||||
Picture picture = pictures.get(0);
|
||||
assertEquals("", picture.suggestFileExtension());
|
||||
assertEquals("0", picture.suggestFullFileName());
|
||||
assertEquals("image/unknown", picture.getMimeType());
|
||||
|
||||
picture = pictures.get(1);
|
||||
assertEquals("", picture.suggestFileExtension());
|
||||
assertEquals("469", picture.suggestFullFileName());
|
||||
assertEquals("image/unknown", picture.getMimeType());
|
||||
|
||||
picture = pictures.get(2);
|
||||
assertEquals("", picture.suggestFileExtension());
|
||||
assertEquals("8c7", picture.suggestFullFileName());
|
||||
assertEquals("image/unknown", picture.getMimeType());
|
||||
|
||||
picture = pictures.get(3);
|
||||
assertEquals("", picture.suggestFileExtension());
|
||||
assertEquals("10a8", picture.suggestFullFileName());
|
||||
assertEquals("image/unknown", picture.getMimeType());
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user