From bug #38616 - support for extracting images from word files, plus tests for this
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@450066 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
31e27577bd
commit
34878d8f01
@ -89,6 +89,9 @@ public class HWPFDocument extends POIDocument
|
|||||||
/** Holds the save history for this document. */
|
/** Holds the save history for this document. */
|
||||||
protected SavedByTable _sbt;
|
protected SavedByTable _sbt;
|
||||||
|
|
||||||
|
/** Holds pictures table */
|
||||||
|
protected PicturesTable _pictures;
|
||||||
|
|
||||||
protected HWPFDocument()
|
protected HWPFDocument()
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -185,6 +188,9 @@ public class HWPFDocument extends POIDocument
|
|||||||
_dataStream = new byte[0];
|
_dataStream = new byte[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// read in the pictures stream
|
||||||
|
_pictures = new PicturesTable(_dataStream);
|
||||||
|
|
||||||
// get the start of text in the main stream
|
// get the start of text in the main stream
|
||||||
int fcMin = _fib.getFcMin();
|
int fcMin = _fib.getFcMin();
|
||||||
|
|
||||||
@ -288,6 +294,13 @@ public class HWPFDocument extends POIDocument
|
|||||||
return _sbt;
|
return _sbt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return PicturesTable object, that is able to extract images from this document
|
||||||
|
*/
|
||||||
|
public PicturesTable getPicturesTable() {
|
||||||
|
return _pictures;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes out the word file that is represented by an instance of this class.
|
* Writes out the word file that is represented by an instance of this class.
|
||||||
*
|
*
|
||||||
|
152
src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
Normal file
152
src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Copyright 2002-2006 Apache Software Foundation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.poi.hwpf.model;
|
||||||
|
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Picture;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds information about all pictures embedded in Word Document either via "Insert -> Picture -> From File" or via
|
||||||
|
* clipboard. Responsible for images extraction and determining whether some document<EFBFBD>s piece contains embedded image.
|
||||||
|
* Analyzes raw data bytestream <EFBFBD>Data<EFBFBD> (where Word stores all embedded objects) provided by HWPFDocument.
|
||||||
|
*
|
||||||
|
* Word stores images as is within so called "Data stream" - the stream within a Word docfile containing various data
|
||||||
|
* that hang off of characters in the main stream. For example, binary data describing in-line pictures and/or
|
||||||
|
* formfields an also embedded objects-native data. Word picture structures are concatenated one after the other in
|
||||||
|
* the data stream if the document contains pictures.
|
||||||
|
* Data stream is easily reachable via HWPFDocument._dataStream property.
|
||||||
|
* A picture is represented in the document text stream as a special character, an Unicode \u0001 whose
|
||||||
|
* CharacterRun.isSpecial() returns true. The file location of the picture in the Word binary file is accessed
|
||||||
|
* via CharacterRun.getPicOffset(). The CharacterRun.getPicOffset() is a byte offset into the data stream.
|
||||||
|
* Beginning at the position recorded in picOffset, a header data structure, will be stored.
|
||||||
|
*
|
||||||
|
* @author Dmitry Romanov
|
||||||
|
*/
|
||||||
|
public class PicturesTable
|
||||||
|
{
|
||||||
|
static final int TYPE_IMAGE = 0x08;
|
||||||
|
static final int TYPE_IMAGE_WORD2000 = 0x00;
|
||||||
|
static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD = 0xA;
|
||||||
|
static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 = 0x2;
|
||||||
|
static final int TYPE_HORIZONTAL_LINE = 0xE;
|
||||||
|
static final int BLOCK_TYPE_OFFSET = 0xE;
|
||||||
|
static final int MM_MODE_TYPE_OFFSET = 0x6;
|
||||||
|
|
||||||
|
private byte[] _dataStream;
|
||||||
|
|
||||||
|
/** @link dependency
|
||||||
|
* @stereotype instantiate*/
|
||||||
|
/*# Picture lnkPicture; */
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @param _dataStream
|
||||||
|
*/
|
||||||
|
public PicturesTable(byte[] _dataStream)
|
||||||
|
{
|
||||||
|
this._dataStream = _dataStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* determines whether specified CharacterRun contains reference to a picture
|
||||||
|
* @param run
|
||||||
|
*/
|
||||||
|
public boolean hasPicture(CharacterRun run) {
|
||||||
|
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
|
||||||
|
return isBlockContainsImage(run.getPicOffset());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* determines whether specified CharacterRun contains reference to a picture
|
||||||
|
* @param run
|
||||||
|
*/
|
||||||
|
public boolean hasHorizontalLine(CharacterRun run) {
|
||||||
|
if (run.isSpecialCharacter() && "\u0001".equals(run.text())) {
|
||||||
|
return isBlockContainsHorizontalLine(run.getPicOffset());
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isPictureRecognized(short blockType, short mappingModeOfMETAFILEPICT) {
|
||||||
|
return (blockType == TYPE_IMAGE || blockType == TYPE_IMAGE_PASTED_FROM_CLIPBOARD || (blockType==TYPE_IMAGE_WORD2000 && mappingModeOfMETAFILEPICT==0x64) || (blockType==TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 && mappingModeOfMETAFILEPICT==0x64));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static short getBlockType(byte[] dataStream, int pictOffset) {
|
||||||
|
return LittleEndian.getShort(dataStream, pictOffset + BLOCK_TYPE_OFFSET);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static short getMmMode(byte[] dataStream, int pictOffset) {
|
||||||
|
return LittleEndian.getShort(dataStream, pictOffset + MM_MODE_TYPE_OFFSET);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns picture object tied to specified CharacterRun
|
||||||
|
* @param run
|
||||||
|
* @param fillBytes if true, Picture will be returned with filled byte array that represent picture's contents. If you don't want
|
||||||
|
* to have that byte array in memory but only write picture's contents to stream, pass false and then use Picture.writeImageContent
|
||||||
|
* @see Picture#writeImageContent(java.io.OutputStream)
|
||||||
|
* @return a Picture object if picture exists for specified CharacterRun, null otherwise. PicturesTable.hasPicture is used to determine this.
|
||||||
|
* @see #hasPicture(org.apache.poi.hwpf.usermodel.CharacterRun)
|
||||||
|
*/
|
||||||
|
public Picture extractPicture(CharacterRun run, boolean fillBytes) {
|
||||||
|
if (hasPicture(run)) {
|
||||||
|
return new Picture(run.getPicOffset(), _dataStream, fillBytes);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return a list of Picture objects found in current document
|
||||||
|
*/
|
||||||
|
public List getAllPictures() {
|
||||||
|
ArrayList pictures = new ArrayList();
|
||||||
|
|
||||||
|
int pos = 0;
|
||||||
|
boolean atEnd = false;
|
||||||
|
|
||||||
|
while(pos<_dataStream.length && !atEnd) {
|
||||||
|
if (isBlockContainsImage(pos)) {
|
||||||
|
pictures.add(new Picture(pos, _dataStream, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
int skipOn = LittleEndian.getInt(_dataStream, pos);
|
||||||
|
if(skipOn <= 0) { atEnd = true; }
|
||||||
|
pos += skipOn;
|
||||||
|
}
|
||||||
|
|
||||||
|
return pictures;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isBlockContainsImage(int i)
|
||||||
|
{
|
||||||
|
return isPictureRecognized(getBlockType(_dataStream, i), getMmMode(_dataStream, i));
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isBlockContainsHorizontalLine(int i)
|
||||||
|
{
|
||||||
|
return getBlockType(_dataStream, i)==TYPE_HORIZONTAL_LINE && getMmMode(_dataStream, i)==0x64;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
350
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
Normal file
350
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
Normal file
@ -0,0 +1,350 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Copyright 2002-2006 Apache Software Foundation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
|
import org.apache.poi.util.LittleEndian;
|
||||||
|
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents embedded picture extracted from Word Document
|
||||||
|
* @author Dmitry Romanov
|
||||||
|
*/
|
||||||
|
public class Picture
|
||||||
|
{
|
||||||
|
// public static final int FILENAME_OFFSET = 0x7C;
|
||||||
|
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
||||||
|
static final int BLOCK_TYPE_OFFSET = 0xE;
|
||||||
|
static final int PICT_HEADER_OFFSET = 0x4;
|
||||||
|
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
||||||
|
|
||||||
|
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
|
||||||
|
public static final byte[] PNG = new byte[]{ (byte)0x89, 0x50, 0x4E, 0x47,0x0D,0x0A,0x1A,0x0A};
|
||||||
|
public static final byte[] JPG = new byte[]{(byte)0xFF, (byte)0xD8};
|
||||||
|
public static final byte[] BMP = new byte[]{'B', 'M'};
|
||||||
|
public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
|
||||||
|
public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
|
||||||
|
|
||||||
|
public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
|
||||||
|
|
||||||
|
private int dataBlockStartOfsset;
|
||||||
|
private int pictureBytesStartOffset;
|
||||||
|
private int dataBlockSize;
|
||||||
|
private int size;
|
||||||
|
// private String fileName;
|
||||||
|
private byte[] content;
|
||||||
|
private byte[] _dataStream;
|
||||||
|
private int aspectRatioX;
|
||||||
|
private int aspectRatioY;
|
||||||
|
private int height = -1;
|
||||||
|
private int width = -1;
|
||||||
|
|
||||||
|
|
||||||
|
public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
|
||||||
|
{
|
||||||
|
this._dataStream = _dataStream;
|
||||||
|
this.dataBlockStartOfsset = dataBlockStartOfsset;
|
||||||
|
this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
|
||||||
|
this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
|
||||||
|
this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
|
||||||
|
|
||||||
|
if (size<0) {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
|
||||||
|
this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
|
||||||
|
// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
|
||||||
|
// if (fileName==null || fileName.length()==0) {
|
||||||
|
// fileName = "clipboard";
|
||||||
|
// }
|
||||||
|
|
||||||
|
if (fillBytes)
|
||||||
|
{
|
||||||
|
fillImageContent(_dataStream);
|
||||||
|
}
|
||||||
|
|
||||||
|
String ext = suggestFileExtension();
|
||||||
|
// trying to extract width and height from pictures content:
|
||||||
|
if ("jpg".equalsIgnoreCase(ext)) {
|
||||||
|
fillJPGWidthHeight();
|
||||||
|
} else if ("png".equalsIgnoreCase(ext)) {
|
||||||
|
fillPNGWidthHeight();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int extractAspectRatioX(byte[] _dataStream, int dataBlockStartOffset)
|
||||||
|
{
|
||||||
|
return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x20)/10;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int extractAspectRatioY(byte[] _dataStream, int dataBlockStartOffset)
|
||||||
|
{
|
||||||
|
return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x22)/10;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
|
||||||
|
* is tried to determine from first byte of picture's content.
|
||||||
|
*
|
||||||
|
* @return suggested file name
|
||||||
|
*/
|
||||||
|
public String suggestFullFileName()
|
||||||
|
{
|
||||||
|
String fileExt = suggestFileExtension();
|
||||||
|
return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes Picture's content bytes to specified OutputStream.
|
||||||
|
* Is useful when there is need to write picture bytes directly to stream, omitting its representation in
|
||||||
|
* memory as distinct byte array.
|
||||||
|
*
|
||||||
|
* @param out a stream to write to
|
||||||
|
* @throws IOException if some exception is occured while writing to specified out
|
||||||
|
*/
|
||||||
|
public void writeImageContent(OutputStream out) throws IOException
|
||||||
|
{
|
||||||
|
if (content!=null && content.length>0) {
|
||||||
|
out.write(content, 0, size);
|
||||||
|
} else {
|
||||||
|
out.write(_dataStream, pictureBytesStartOffset, size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return picture's content as byte array
|
||||||
|
*/
|
||||||
|
public byte[] getContent()
|
||||||
|
{
|
||||||
|
if (content == null || content.length<=0)
|
||||||
|
{
|
||||||
|
fillImageContent(this._dataStream);
|
||||||
|
}
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
*
|
||||||
|
* @return size in bytes of the picture
|
||||||
|
*/
|
||||||
|
public int getSize()
|
||||||
|
{
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* returns horizontal aspect ratio for picture provided by user
|
||||||
|
*/
|
||||||
|
public int getAspectRatioX()
|
||||||
|
{
|
||||||
|
return aspectRatioX;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* returns vertical aspect ratio for picture provided by user
|
||||||
|
*/
|
||||||
|
public int getAspectRatioY()
|
||||||
|
{
|
||||||
|
return aspectRatioY;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes
|
||||||
|
* of picture's contents
|
||||||
|
* @return suggested file extension
|
||||||
|
*/
|
||||||
|
public String suggestFileExtension()
|
||||||
|
{
|
||||||
|
if (content!=null && content.length>0) {
|
||||||
|
return suggestFileExtension(content, 0);
|
||||||
|
}
|
||||||
|
return suggestFileExtension(_dataStream, pictureBytesStartOffset);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset)
|
||||||
|
{
|
||||||
|
if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) {
|
||||||
|
return "jpg";
|
||||||
|
} else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) {
|
||||||
|
return "png";
|
||||||
|
} else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) {
|
||||||
|
return "gif";
|
||||||
|
} else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
|
||||||
|
return "bmp";
|
||||||
|
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
|
||||||
|
return "tiff";
|
||||||
|
} else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
|
||||||
|
return "tiff";
|
||||||
|
}
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
|
||||||
|
{
|
||||||
|
boolean matched = pictureBytesOffset < dataStream.length;
|
||||||
|
for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
|
||||||
|
{
|
||||||
|
if (dataStream[i+pictureBytesOffset] != signature[i])
|
||||||
|
{
|
||||||
|
matched = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matched;
|
||||||
|
}
|
||||||
|
|
||||||
|
// public String getFileName()
|
||||||
|
// {
|
||||||
|
// return fileName;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// private static String extractFileName(int blockStartIndex, byte[] dataStream) {
|
||||||
|
// int fileNameStartOffset = blockStartIndex + 0x7C;
|
||||||
|
// int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
|
||||||
|
// int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
|
||||||
|
//
|
||||||
|
// int fileNameIndex = fileNameStartOffset;
|
||||||
|
// char[] fileNameChars = new char[(fileNameSize-1)/2];
|
||||||
|
// int charIndex = 0;
|
||||||
|
// while(charIndex<fileNameChars.length) {
|
||||||
|
// short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
|
||||||
|
// fileNameChars[charIndex] = (char)aChar;
|
||||||
|
// charIndex++;
|
||||||
|
// fileNameIndex += 2;
|
||||||
|
// }
|
||||||
|
// String fileName = new String(fileNameChars);
|
||||||
|
// return fileName.trim();
|
||||||
|
// }
|
||||||
|
|
||||||
|
private void fillImageContent(byte[] dataStream)
|
||||||
|
{
|
||||||
|
this.content = new byte[size];
|
||||||
|
System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
||||||
|
{
|
||||||
|
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
||||||
|
int realPicoffset = dataBlockStartOffset;
|
||||||
|
|
||||||
|
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
|
||||||
|
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
||||||
|
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
|
||||||
|
|
||||||
|
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
|
||||||
|
realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
|
||||||
|
if (realPicoffset>=dataBlockEndOffset) {
|
||||||
|
realPicoffset -= UNKNOWN_HEADER_SIZE;
|
||||||
|
}
|
||||||
|
return realPicoffset;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillJPGWidthHeight() {
|
||||||
|
/*
|
||||||
|
http://www.codecomments.com/archive281-2004-3-158083.html
|
||||||
|
|
||||||
|
Algorhitm proposed by Patrick TJ McPhee:
|
||||||
|
|
||||||
|
read 2 bytes
|
||||||
|
make sure they are 'ffd8'x
|
||||||
|
repeatedly:
|
||||||
|
read 2 bytes
|
||||||
|
make sure the first one is 'ff'x
|
||||||
|
if the second one is 'd9'x stop
|
||||||
|
else if the second one is c0 or c2 (or possibly other values ...)
|
||||||
|
skip 2 bytes
|
||||||
|
read one byte into depth
|
||||||
|
read two bytes into height
|
||||||
|
read two bytes into width
|
||||||
|
else
|
||||||
|
read two bytes into length
|
||||||
|
skip forward length-2 bytes
|
||||||
|
|
||||||
|
Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
|
||||||
|
*/
|
||||||
|
int pointer = pictureBytesStartOffset+2;
|
||||||
|
int firstByte = _dataStream[pointer];
|
||||||
|
int secondByte = _dataStream[pointer+1];
|
||||||
|
|
||||||
|
int endOfPicture = pictureBytesStartOffset + size;
|
||||||
|
while(pointer<endOfPicture-1) {
|
||||||
|
do {
|
||||||
|
firstByte = _dataStream[pointer];
|
||||||
|
secondByte = _dataStream[pointer+1];
|
||||||
|
} while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
|
||||||
|
|
||||||
|
if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
|
||||||
|
if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
|
||||||
|
break;
|
||||||
|
} else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
|
||||||
|
pointer += 5;
|
||||||
|
this.height = getBigEndianShort(_dataStream, pointer);
|
||||||
|
this.width = getBigEndianShort(_dataStream, pointer+2);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
pointer++;
|
||||||
|
pointer++;
|
||||||
|
int length = getBigEndianShort(_dataStream, pointer);
|
||||||
|
pointer+=length;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pointer++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void fillPNGWidthHeight()
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Used PNG file format description from http://www.wotsit.org/download.asp?f=png
|
||||||
|
*/
|
||||||
|
int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
|
||||||
|
if (matchSignature(_dataStream, IHDR, HEADER_START)) {
|
||||||
|
int IHDR_CHUNK_WIDTH = HEADER_START + 4;
|
||||||
|
this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
|
||||||
|
this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* returns pixel width of the picture or -1 if dimensions determining was failed
|
||||||
|
*/
|
||||||
|
public int getWidth()
|
||||||
|
{
|
||||||
|
return width;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* returns pixel height of the picture or -1 if dimensions determining was failed
|
||||||
|
*/
|
||||||
|
public int getHeight()
|
||||||
|
{
|
||||||
|
return height;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getBigEndianInt(byte[] data, int offset)
|
||||||
|
{
|
||||||
|
return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getBigEndianShort(byte[] data, int offset)
|
||||||
|
{
|
||||||
|
return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,130 @@
|
|||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
Copyright 2002-2004 Apache Software Foundation
|
||||||
|
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
package org.apache.poi.hwpf;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.poi.hwpf.model.PicturesTable;
|
||||||
|
import org.apache.poi.hwpf.usermodel.Picture;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test picture support in HWPF
|
||||||
|
* @author nick
|
||||||
|
*/
|
||||||
|
public class TestHWPFPictures extends TestCase {
|
||||||
|
private HWPFDocument docA;
|
||||||
|
private HWPFDocument docB;
|
||||||
|
private String docAFile;
|
||||||
|
private String docBFile;
|
||||||
|
|
||||||
|
private String imgAFile;
|
||||||
|
private String imgBFile;
|
||||||
|
|
||||||
|
protected void setUp() throws Exception {
|
||||||
|
String dirname = System.getProperty("HWPF.testdata.path");
|
||||||
|
|
||||||
|
docAFile = dirname + "/testPictures.doc";
|
||||||
|
docBFile = dirname + "/two_images.doc";
|
||||||
|
|
||||||
|
imgAFile = dirname + "/simple_image.jpg";
|
||||||
|
imgBFile = dirname + "/simple_image.png";
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test just opening the files
|
||||||
|
*/
|
||||||
|
public void testOpen() throws Exception {
|
||||||
|
docA = new HWPFDocument(new FileInputStream(docAFile));
|
||||||
|
docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we have the right numbers of images in each file
|
||||||
|
*/
|
||||||
|
public void testImageCount() throws Exception {
|
||||||
|
docA = new HWPFDocument(new FileInputStream(docAFile));
|
||||||
|
docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||||
|
|
||||||
|
assertNotNull(docA.getPicturesTable());
|
||||||
|
assertNotNull(docB.getPicturesTable());
|
||||||
|
|
||||||
|
PicturesTable picA = docA.getPicturesTable();
|
||||||
|
PicturesTable picB = docB.getPicturesTable();
|
||||||
|
|
||||||
|
List picturesA = picA.getAllPictures();
|
||||||
|
List picturesB = picB.getAllPictures();
|
||||||
|
|
||||||
|
assertEquals(7, picturesA.size());
|
||||||
|
assertEquals(2, picturesB.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we have the right images in at least one file
|
||||||
|
*/
|
||||||
|
public void testImageData() throws Exception {
|
||||||
|
docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||||
|
PicturesTable picB = docB.getPicturesTable();
|
||||||
|
List picturesB = picB.getAllPictures();
|
||||||
|
|
||||||
|
assertEquals(2, picturesB.size());
|
||||||
|
|
||||||
|
Picture pic1 = (Picture)picturesB.get(0);
|
||||||
|
Picture pic2 = (Picture)picturesB.get(1);
|
||||||
|
|
||||||
|
assertNotNull(pic1);
|
||||||
|
assertNotNull(pic2);
|
||||||
|
|
||||||
|
// Check the same
|
||||||
|
byte[] pic1B = readFile(imgAFile);
|
||||||
|
byte[] pic2B = readFile(imgBFile);
|
||||||
|
|
||||||
|
assertEquals(pic1B.length, pic1.getContent().length);
|
||||||
|
assertEquals(pic2B.length, pic2.getContent().length);
|
||||||
|
|
||||||
|
assertBytesSame(pic1B, pic1.getContent());
|
||||||
|
assertBytesSame(pic2B, pic2.getContent());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void assertBytesSame(byte[] a, byte[] b) {
|
||||||
|
assertEquals(a.length, b.length);
|
||||||
|
for(int i=0; i<a.length; i++) {
|
||||||
|
assertEquals(a[i],b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private byte[] readFile(String file) throws Exception {
|
||||||
|
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||||
|
FileInputStream fis = new FileInputStream(file);
|
||||||
|
byte[] buffer = new byte[1024];
|
||||||
|
|
||||||
|
int read = 0;
|
||||||
|
while(read > -1) {
|
||||||
|
read = fis.read(buffer);
|
||||||
|
if(read > 0) {
|
||||||
|
baos.write(buffer,0,read);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return baos.toByteArray();
|
||||||
|
}
|
||||||
|
}
|
Binary file not shown.
After Width: | Height: | Size: 643 B |
Binary file not shown.
After Width: | Height: | Size: 1.0 KiB |
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user