Improve documentation of some of the HWPF picture stuff, and add unit tests for images of embeded documents
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@995807 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2b9a63d38d
commit
96b0aea310
@ -87,9 +87,12 @@ public final class PicturesTable
|
|||||||
* @param run
|
* @param run
|
||||||
*/
|
*/
|
||||||
public boolean hasPicture(CharacterRun run) {
|
public boolean hasPicture(CharacterRun run) {
|
||||||
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
|
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData()) {
|
||||||
|
// Image should be in it's own run, or in a run with the end-of-special marker
|
||||||
|
if("\u0001".equals(run.text()) || "\u0001\u0015".equals(run.text())) {
|
||||||
return isBlockContainsImage(run.getPicOffset());
|
return isBlockContainsImage(run.getPicOffset());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -141,6 +141,7 @@ public final class CharacterSprmUncompressor
|
|||||||
// undocumented till 0x30
|
// undocumented till 0x30
|
||||||
|
|
||||||
case 0x11:
|
case 0x11:
|
||||||
|
// sprmCFWebHidden
|
||||||
break;
|
break;
|
||||||
case 0x12:
|
case 0x12:
|
||||||
break;
|
break;
|
||||||
@ -149,16 +150,21 @@ public final class CharacterSprmUncompressor
|
|||||||
case 0x14:
|
case 0x14:
|
||||||
break;
|
break;
|
||||||
case 0x15:
|
case 0x15:
|
||||||
|
// sprmCRsidProp
|
||||||
break;
|
break;
|
||||||
case 0x16:
|
case 0x16:
|
||||||
|
// sprmCRsidText
|
||||||
break;
|
break;
|
||||||
case 0x17:
|
case 0x17:
|
||||||
|
// sprmCRsidRMDel
|
||||||
break;
|
break;
|
||||||
case 0x18:
|
case 0x18:
|
||||||
|
// sprmCFSpecVanish
|
||||||
break;
|
break;
|
||||||
case 0x19:
|
case 0x19:
|
||||||
break;
|
break;
|
||||||
case 0x1a:
|
case 0x1a:
|
||||||
|
// sprmCFMathPr
|
||||||
break;
|
break;
|
||||||
case 0x1b:
|
case 0x1b:
|
||||||
break;
|
break;
|
||||||
@ -236,8 +242,7 @@ public final class CharacterSprmUncompressor
|
|||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
case 0x34:
|
case 0x34:
|
||||||
|
// sprmCKcd
|
||||||
// undocumented
|
|
||||||
break;
|
break;
|
||||||
case 0x35:
|
case 0x35:
|
||||||
newCHP.setFBold (getCHPFlag ((byte) sprm.getOperand(), oldCHP.isFBold ()));
|
newCHP.setFBold (getCHPFlag ((byte) sprm.getOperand(), oldCHP.isFBold ()));
|
||||||
@ -443,8 +448,7 @@ public final class CharacterSprmUncompressor
|
|||||||
newCHP.setFtcOther ((short) sprm.getOperand());
|
newCHP.setFtcOther ((short) sprm.getOperand());
|
||||||
break;
|
break;
|
||||||
case 0x52:
|
case 0x52:
|
||||||
|
// sprmCCharScale
|
||||||
// undocumented
|
|
||||||
break;
|
break;
|
||||||
case 0x53:
|
case 0x53:
|
||||||
newCHP.setFDStrike (getFlag (sprm.getOperand()));
|
newCHP.setFDStrike (getFlag (sprm.getOperand()));
|
||||||
@ -471,23 +475,28 @@ public final class CharacterSprmUncompressor
|
|||||||
case 0x59:
|
case 0x59:
|
||||||
newCHP.setSfxtText ((byte) sprm.getOperand());
|
newCHP.setSfxtText ((byte) sprm.getOperand());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// undocumented till 0x61
|
|
||||||
case 0x5a:
|
case 0x5a:
|
||||||
|
// sprmCFBiDi
|
||||||
break;
|
break;
|
||||||
case 0x5b:
|
case 0x5b:
|
||||||
break;
|
break;
|
||||||
case 0x5c:
|
case 0x5c:
|
||||||
|
// sprmCFBoldBi
|
||||||
break;
|
break;
|
||||||
case 0x5d:
|
case 0x5d:
|
||||||
|
// sprmCFItalicBi
|
||||||
break;
|
break;
|
||||||
case 0x5e:
|
case 0x5e:
|
||||||
|
// sprmCFtcBi
|
||||||
break;
|
break;
|
||||||
case 0x5f:
|
case 0x5f:
|
||||||
|
// sprmCLidBi
|
||||||
break;
|
break;
|
||||||
case 0x60:
|
case 0x60:
|
||||||
|
// sprmCIcoBi
|
||||||
break;
|
break;
|
||||||
case 0x61:
|
case 0x61:
|
||||||
|
// sprmCHpsBi
|
||||||
break;
|
break;
|
||||||
case 0x62:
|
case 0x62:
|
||||||
byte[] xstDispFldRMark = new byte[32];
|
byte[] xstDispFldRMark = new byte[32];
|
||||||
@ -512,14 +521,11 @@ public final class CharacterSprmUncompressor
|
|||||||
newCHP.setShd (new ShadingDescriptor(sprm.getGrpprl(), sprm.getGrpprlOffset()));
|
newCHP.setShd (new ShadingDescriptor(sprm.getGrpprl(), sprm.getGrpprlOffset()));
|
||||||
break;
|
break;
|
||||||
case 0x67:
|
case 0x67:
|
||||||
|
|
||||||
// Obsolete
|
// Obsolete
|
||||||
break;
|
break;
|
||||||
case 0x68:
|
case 0x68:
|
||||||
|
// sprmCFUsePgsuSettings
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// undocumented till 0x6c
|
|
||||||
|
|
||||||
case 0x69:
|
case 0x69:
|
||||||
break;
|
break;
|
||||||
case 0x6a:
|
case 0x6a:
|
||||||
@ -540,6 +546,18 @@ public final class CharacterSprmUncompressor
|
|||||||
case 0x70:
|
case 0x70:
|
||||||
newCHP.setIco24 (sprm.getOperand());
|
newCHP.setIco24 (sprm.getOperand());
|
||||||
break;
|
break;
|
||||||
|
case 0x71:
|
||||||
|
// sprmCShd
|
||||||
|
break;
|
||||||
|
case 0x72:
|
||||||
|
// sprmCBrc
|
||||||
|
break;
|
||||||
|
case 0x73:
|
||||||
|
// sprmCRgLid0
|
||||||
|
break;
|
||||||
|
case 0x74:
|
||||||
|
// sprmCRgLid1
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -465,7 +465,13 @@ public final class CharacterRun
|
|||||||
_chpx.updateSprm(SPRM_PICLOCATION, offset);
|
_chpx.updateSprm(SPRM_PICLOCATION, offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does the picture offset represent picture
|
||||||
|
* or binary data?
|
||||||
|
* If it's set, then the picture offset refers to
|
||||||
|
* a NilPICFAndBinData structure, otherwise to a
|
||||||
|
* PICFAndOfficeArtData
|
||||||
|
*/
|
||||||
public boolean isData()
|
public boolean isData()
|
||||||
{
|
{
|
||||||
return _props.isFData();
|
return _props.isFData();
|
||||||
|
@ -37,9 +37,11 @@ public final class Picture
|
|||||||
|
|
||||||
// public static final int FILENAME_OFFSET = 0x7C;
|
// public static final int FILENAME_OFFSET = 0x7C;
|
||||||
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
||||||
static final int MFPMM_OFFSET = 0x6;
|
static final int PICF_OFFSET = 0x0;
|
||||||
static final int BLOCK_TYPE_OFFSET = 0xE;
|
|
||||||
static final int PICT_HEADER_OFFSET = 0x4;
|
static final int PICT_HEADER_OFFSET = 0x4;
|
||||||
|
static final int MFPMM_OFFSET = 0x6;
|
||||||
|
static final int PICF_SHAPE_OFFSET = 0xE;
|
||||||
|
static final int PICMD_OFFSET = 0x1C;
|
||||||
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
||||||
|
|
||||||
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
|
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
|
||||||
@ -87,10 +89,6 @@ public final class Picture
|
|||||||
|
|
||||||
this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
|
this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
|
||||||
this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
|
this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
|
||||||
// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
|
|
||||||
// if (fileName==null || fileName.length()==0) {
|
|
||||||
// fileName = "clipboard";
|
|
||||||
// }
|
|
||||||
|
|
||||||
if (fillBytes)
|
if (fillBytes)
|
||||||
{
|
{
|
||||||
@ -353,11 +351,20 @@ public final class Picture
|
|||||||
|
|
||||||
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
||||||
{
|
{
|
||||||
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
|
||||||
int realPicoffset = dataBlockStartOffset;
|
int realPicoffset = dataBlockStartOffset;
|
||||||
|
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
||||||
|
|
||||||
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
|
// Skip over the PICT block
|
||||||
|
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
|
||||||
|
|
||||||
|
// Now the PICTF1
|
||||||
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
||||||
|
short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
|
||||||
|
if(MM_TYPE == 0x66) {
|
||||||
|
// Skip the stPicName
|
||||||
|
int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
|
||||||
|
PICTF1BlockOffset += 1 + cchPicName;
|
||||||
|
}
|
||||||
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
|
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
|
||||||
|
|
||||||
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
|
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
|
||||||
|
@ -21,10 +21,11 @@ import java.util.List;
|
|||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||||
|
import org.apache.poi.hwpf.model.PicturesTable;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.POIDataSamples;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the picture handling
|
* Test the picture handling
|
||||||
@ -169,4 +170,95 @@ public final class TestPictures extends TestCase {
|
|||||||
doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
|
doc.getPicturesTable().getAllPictures(); // just check that we do not throw Exception
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* When you embed another office document into Word, it stores
|
||||||
|
* a rendered "icon" picture of what that document looks like.
|
||||||
|
* This image is re-created when you edit the embeded document,
|
||||||
|
* then used as-is to speed things up.
|
||||||
|
* Check that we can properly read one of these
|
||||||
|
*/
|
||||||
|
public void testEmbededDocumentIcon() throws Exception {
|
||||||
|
// This file has two embeded excel files, an embeded powerpoint
|
||||||
|
// file and an embeded word file, in that order
|
||||||
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("word_with_embeded.doc");
|
||||||
|
|
||||||
|
// Check we don't break loading the pictures
|
||||||
|
doc.getPicturesTable().getAllPictures();
|
||||||
|
PicturesTable pictureTable = doc.getPicturesTable();
|
||||||
|
|
||||||
|
// Check the text, and its embeded images
|
||||||
|
Paragraph p;
|
||||||
|
Range r = doc.getRange();
|
||||||
|
assertEquals(1, r.numSections());
|
||||||
|
assertEquals(5, r.numParagraphs());
|
||||||
|
|
||||||
|
p = r.getParagraph(0);
|
||||||
|
assertEquals(2, p.numCharacterRuns());
|
||||||
|
assertEquals("I have lots of embedded files in me\r", p.text());
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||||
|
|
||||||
|
p = r.getParagraph(1);
|
||||||
|
assertEquals(5, p.numCharacterRuns());
|
||||||
|
assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||||
|
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||||
|
|
||||||
|
p = r.getParagraph(2);
|
||||||
|
assertEquals(6, p.numCharacterRuns());
|
||||||
|
assertEquals("\u0013 EMBED Excel.Sheet.8 \u0014\u0001\u0015\r", p.text());
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||||
|
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
|
||||||
|
|
||||||
|
p = r.getParagraph(3);
|
||||||
|
assertEquals(6, p.numCharacterRuns());
|
||||||
|
assertEquals("\u0013 EMBED PowerPoint.Show.8 \u0014\u0001\u0015\r", p.text());
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||||
|
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
|
||||||
|
|
||||||
|
p = r.getParagraph(4);
|
||||||
|
assertEquals(6, p.numCharacterRuns());
|
||||||
|
assertEquals("\u0013 EMBED Word.Document.8 \\s \u0014\u0001\u0015\r", p.text());
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(0)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(1)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(2)));
|
||||||
|
assertEquals(true, pictureTable.hasPicture(p.getCharacterRun(3)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(4)));
|
||||||
|
assertEquals(false, pictureTable.hasPicture(p.getCharacterRun(5)));
|
||||||
|
|
||||||
|
// Look at the pictures table
|
||||||
|
List<Picture> pictures = pictureTable.getAllPictures();
|
||||||
|
assertEquals(4, pictures.size());
|
||||||
|
|
||||||
|
Picture picture = pictures.get(0);
|
||||||
|
assertEquals("", picture.suggestFileExtension());
|
||||||
|
assertEquals("0", picture.suggestFullFileName());
|
||||||
|
assertEquals("image/unknown", picture.getMimeType());
|
||||||
|
|
||||||
|
picture = pictures.get(1);
|
||||||
|
assertEquals("", picture.suggestFileExtension());
|
||||||
|
assertEquals("469", picture.suggestFullFileName());
|
||||||
|
assertEquals("image/unknown", picture.getMimeType());
|
||||||
|
|
||||||
|
picture = pictures.get(2);
|
||||||
|
assertEquals("", picture.suggestFileExtension());
|
||||||
|
assertEquals("8c7", picture.suggestFullFileName());
|
||||||
|
assertEquals("image/unknown", picture.getMimeType());
|
||||||
|
|
||||||
|
picture = pictures.get(3);
|
||||||
|
assertEquals("", picture.suggestFileExtension());
|
||||||
|
assertEquals("10a8", picture.suggestFullFileName());
|
||||||
|
assertEquals("image/unknown", picture.getMimeType());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user