From bug #38616 - support for extracting images from word files, plus tests for this
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@450066 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
31e27577bd
commit
34878d8f01
@ -89,6 +89,9 @@ public class HWPFDocument extends POIDocument
|
||||
/** Holds the save history for this document. */
|
||||
protected SavedByTable _sbt;
|
||||
|
||||
/** Holds pictures table */
|
||||
protected PicturesTable _pictures;
|
||||
|
||||
protected HWPFDocument()
|
||||
{
|
||||
|
||||
@ -185,6 +188,9 @@ public class HWPFDocument extends POIDocument
|
||||
_dataStream = new byte[0];
|
||||
}
|
||||
|
||||
// read in the pictures stream
|
||||
_pictures = new PicturesTable(_dataStream);
|
||||
|
||||
// get the start of text in the main stream
|
||||
int fcMin = _fib.getFcMin();
|
||||
|
||||
@ -288,6 +294,13 @@ public class HWPFDocument extends POIDocument
|
||||
return _sbt;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return PicturesTable object, that is able to extract images from this document
|
||||
*/
|
||||
public PicturesTable getPicturesTable() {
|
||||
return _pictures;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes out the word file that is represented by an instance of this class.
|
||||
*
|
||||
|
152
src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
Normal file
152
src/scratchpad/src/org/apache/poi/hwpf/model/PicturesTable.java
Normal file
@ -0,0 +1,152 @@
|
||||
/* ====================================================================
|
||||
Copyright 2002-2006 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
||||
/**
|
||||
* Holds information about all pictures embedded in Word Document either via "Insert -> Picture -> From File" or via
|
||||
* clipboard. Responsible for images extraction and determining whether some document<EFBFBD>s piece contains embedded image.
|
||||
* Analyzes raw data bytestream <EFBFBD>Data<EFBFBD> (where Word stores all embedded objects) provided by HWPFDocument.
|
||||
*
|
||||
* Word stores images as is within so called "Data stream" - the stream within a Word docfile containing various data
|
||||
* that hang off of characters in the main stream. For example, binary data describing in-line pictures and/or
|
||||
* formfields an also embedded objects-native data. Word picture structures are concatenated one after the other in
|
||||
* the data stream if the document contains pictures.
|
||||
* Data stream is easily reachable via HWPFDocument._dataStream property.
|
||||
* A picture is represented in the document text stream as a special character, an Unicode \u0001 whose
|
||||
* CharacterRun.isSpecial() returns true. The file location of the picture in the Word binary file is accessed
|
||||
* via CharacterRun.getPicOffset(). The CharacterRun.getPicOffset() is a byte offset into the data stream.
|
||||
* Beginning at the position recorded in picOffset, a header data structure, will be stored.
|
||||
*
|
||||
* @author Dmitry Romanov
|
||||
*/
|
||||
public class PicturesTable
|
||||
{
|
||||
static final int TYPE_IMAGE = 0x08;
|
||||
static final int TYPE_IMAGE_WORD2000 = 0x00;
|
||||
static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD = 0xA;
|
||||
static final int TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 = 0x2;
|
||||
static final int TYPE_HORIZONTAL_LINE = 0xE;
|
||||
static final int BLOCK_TYPE_OFFSET = 0xE;
|
||||
static final int MM_MODE_TYPE_OFFSET = 0x6;
|
||||
|
||||
private byte[] _dataStream;
|
||||
|
||||
/** @link dependency
|
||||
* @stereotype instantiate*/
|
||||
/*# Picture lnkPicture; */
|
||||
|
||||
/**
|
||||
*
|
||||
* @param _dataStream
|
||||
*/
|
||||
public PicturesTable(byte[] _dataStream)
|
||||
{
|
||||
this._dataStream = _dataStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* determines whether specified CharacterRun contains reference to a picture
|
||||
* @param run
|
||||
*/
|
||||
public boolean hasPicture(CharacterRun run) {
|
||||
if (run.isSpecialCharacter() && !run.isObj() && !run.isOle2() && !run.isData() && "\u0001".equals(run.text())) {
|
||||
return isBlockContainsImage(run.getPicOffset());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* determines whether specified CharacterRun contains reference to a picture
|
||||
* @param run
|
||||
*/
|
||||
public boolean hasHorizontalLine(CharacterRun run) {
|
||||
if (run.isSpecialCharacter() && "\u0001".equals(run.text())) {
|
||||
return isBlockContainsHorizontalLine(run.getPicOffset());
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isPictureRecognized(short blockType, short mappingModeOfMETAFILEPICT) {
|
||||
return (blockType == TYPE_IMAGE || blockType == TYPE_IMAGE_PASTED_FROM_CLIPBOARD || (blockType==TYPE_IMAGE_WORD2000 && mappingModeOfMETAFILEPICT==0x64) || (blockType==TYPE_IMAGE_PASTED_FROM_CLIPBOARD_WORD2000 && mappingModeOfMETAFILEPICT==0x64));
|
||||
}
|
||||
|
||||
private static short getBlockType(byte[] dataStream, int pictOffset) {
|
||||
return LittleEndian.getShort(dataStream, pictOffset + BLOCK_TYPE_OFFSET);
|
||||
}
|
||||
|
||||
private static short getMmMode(byte[] dataStream, int pictOffset) {
|
||||
return LittleEndian.getShort(dataStream, pictOffset + MM_MODE_TYPE_OFFSET);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns picture object tied to specified CharacterRun
|
||||
* @param run
|
||||
* @param fillBytes if true, Picture will be returned with filled byte array that represent picture's contents. If you don't want
|
||||
* to have that byte array in memory but only write picture's contents to stream, pass false and then use Picture.writeImageContent
|
||||
* @see Picture#writeImageContent(java.io.OutputStream)
|
||||
* @return a Picture object if picture exists for specified CharacterRun, null otherwise. PicturesTable.hasPicture is used to determine this.
|
||||
* @see #hasPicture(org.apache.poi.hwpf.usermodel.CharacterRun)
|
||||
*/
|
||||
public Picture extractPicture(CharacterRun run, boolean fillBytes) {
|
||||
if (hasPicture(run)) {
|
||||
return new Picture(run.getPicOffset(), _dataStream, fillBytes);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a list of Picture objects found in current document
|
||||
*/
|
||||
public List getAllPictures() {
|
||||
ArrayList pictures = new ArrayList();
|
||||
|
||||
int pos = 0;
|
||||
boolean atEnd = false;
|
||||
|
||||
while(pos<_dataStream.length && !atEnd) {
|
||||
if (isBlockContainsImage(pos)) {
|
||||
pictures.add(new Picture(pos, _dataStream, false));
|
||||
}
|
||||
|
||||
int skipOn = LittleEndian.getInt(_dataStream, pos);
|
||||
if(skipOn <= 0) { atEnd = true; }
|
||||
pos += skipOn;
|
||||
}
|
||||
|
||||
return pictures;
|
||||
}
|
||||
|
||||
private boolean isBlockContainsImage(int i)
|
||||
{
|
||||
return isPictureRecognized(getBlockType(_dataStream, i), getMmMode(_dataStream, i));
|
||||
}
|
||||
|
||||
private boolean isBlockContainsHorizontalLine(int i)
|
||||
{
|
||||
return getBlockType(_dataStream, i)==TYPE_HORIZONTAL_LINE && getMmMode(_dataStream, i)==0x64;
|
||||
}
|
||||
|
||||
}
|
350
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
Normal file
350
src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java
Normal file
@ -0,0 +1,350 @@
|
||||
/* ====================================================================
|
||||
Copyright 2002-2006 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
import java.io.OutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Represents embedded picture extracted from Word Document
|
||||
* @author Dmitry Romanov
|
||||
*/
|
||||
public class Picture
|
||||
{
|
||||
// public static final int FILENAME_OFFSET = 0x7C;
|
||||
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
||||
static final int BLOCK_TYPE_OFFSET = 0xE;
|
||||
static final int PICT_HEADER_OFFSET = 0x4;
|
||||
static final int UNKNOWN_HEADER_SIZE = 0x49;
|
||||
|
||||
public static final byte[] GIF = new byte[]{'G', 'I', 'F'};
|
||||
public static final byte[] PNG = new byte[]{ (byte)0x89, 0x50, 0x4E, 0x47,0x0D,0x0A,0x1A,0x0A};
|
||||
public static final byte[] JPG = new byte[]{(byte)0xFF, (byte)0xD8};
|
||||
public static final byte[] BMP = new byte[]{'B', 'M'};
|
||||
public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00};
|
||||
public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A};
|
||||
|
||||
public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'};
|
||||
|
||||
private int dataBlockStartOfsset;
|
||||
private int pictureBytesStartOffset;
|
||||
private int dataBlockSize;
|
||||
private int size;
|
||||
// private String fileName;
|
||||
private byte[] content;
|
||||
private byte[] _dataStream;
|
||||
private int aspectRatioX;
|
||||
private int aspectRatioY;
|
||||
private int height = -1;
|
||||
private int width = -1;
|
||||
|
||||
|
||||
public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
|
||||
{
|
||||
this._dataStream = _dataStream;
|
||||
this.dataBlockStartOfsset = dataBlockStartOfsset;
|
||||
this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
|
||||
this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
|
||||
this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
|
||||
|
||||
if (size<0) {
|
||||
|
||||
}
|
||||
|
||||
this.aspectRatioX = extractAspectRatioX(_dataStream, dataBlockStartOfsset);
|
||||
this.aspectRatioY = extractAspectRatioY(_dataStream, dataBlockStartOfsset);
|
||||
// this.fileName = extractFileName(dataBlockStartOfsset, _dataStream);
|
||||
// if (fileName==null || fileName.length()==0) {
|
||||
// fileName = "clipboard";
|
||||
// }
|
||||
|
||||
if (fillBytes)
|
||||
{
|
||||
fillImageContent(_dataStream);
|
||||
}
|
||||
|
||||
String ext = suggestFileExtension();
|
||||
// trying to extract width and height from pictures content:
|
||||
if ("jpg".equalsIgnoreCase(ext)) {
|
||||
fillJPGWidthHeight();
|
||||
} else if ("png".equalsIgnoreCase(ext)) {
|
||||
fillPNGWidthHeight();
|
||||
}
|
||||
}
|
||||
|
||||
private static int extractAspectRatioX(byte[] _dataStream, int dataBlockStartOffset)
|
||||
{
|
||||
return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x20)/10;
|
||||
}
|
||||
|
||||
private static int extractAspectRatioY(byte[] _dataStream, int dataBlockStartOffset)
|
||||
{
|
||||
return LittleEndian.getShort(_dataStream, dataBlockStartOffset+0x22)/10;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
|
||||
* is tried to determine from first byte of picture's content.
|
||||
*
|
||||
* @return suggested file name
|
||||
*/
|
||||
public String suggestFullFileName()
|
||||
{
|
||||
String fileExt = suggestFileExtension();
|
||||
return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes Picture's content bytes to specified OutputStream.
|
||||
* Is useful when there is need to write picture bytes directly to stream, omitting its representation in
|
||||
* memory as distinct byte array.
|
||||
*
|
||||
* @param out a stream to write to
|
||||
* @throws IOException if some exception is occured while writing to specified out
|
||||
*/
|
||||
public void writeImageContent(OutputStream out) throws IOException
|
||||
{
|
||||
if (content!=null && content.length>0) {
|
||||
out.write(content, 0, size);
|
||||
} else {
|
||||
out.write(_dataStream, pictureBytesStartOffset, size);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return picture's content as byte array
|
||||
*/
|
||||
public byte[] getContent()
|
||||
{
|
||||
if (content == null || content.length<=0)
|
||||
{
|
||||
fillImageContent(this._dataStream);
|
||||
}
|
||||
return content;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @return size in bytes of the picture
|
||||
*/
|
||||
public int getSize()
|
||||
{
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* returns horizontal aspect ratio for picture provided by user
|
||||
*/
|
||||
public int getAspectRatioX()
|
||||
{
|
||||
return aspectRatioX;
|
||||
}
|
||||
/**
|
||||
* returns vertical aspect ratio for picture provided by user
|
||||
*/
|
||||
public int getAspectRatioY()
|
||||
{
|
||||
return aspectRatioY;
|
||||
}
|
||||
|
||||
/**
|
||||
* tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes
|
||||
* of picture's contents
|
||||
* @return suggested file extension
|
||||
*/
|
||||
public String suggestFileExtension()
|
||||
{
|
||||
if (content!=null && content.length>0) {
|
||||
return suggestFileExtension(content, 0);
|
||||
}
|
||||
return suggestFileExtension(_dataStream, pictureBytesStartOffset);
|
||||
}
|
||||
|
||||
|
||||
private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset)
|
||||
{
|
||||
if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) {
|
||||
return "jpg";
|
||||
} else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) {
|
||||
return "png";
|
||||
} else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) {
|
||||
return "gif";
|
||||
} else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) {
|
||||
return "bmp";
|
||||
} else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset)) {
|
||||
return "tiff";
|
||||
} else if (matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) {
|
||||
return "tiff";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
|
||||
{
|
||||
boolean matched = pictureBytesOffset < dataStream.length;
|
||||
for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
|
||||
{
|
||||
if (dataStream[i+pictureBytesOffset] != signature[i])
|
||||
{
|
||||
matched = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return matched;
|
||||
}
|
||||
|
||||
// public String getFileName()
|
||||
// {
|
||||
// return fileName;
|
||||
// }
|
||||
|
||||
// private static String extractFileName(int blockStartIndex, byte[] dataStream) {
|
||||
// int fileNameStartOffset = blockStartIndex + 0x7C;
|
||||
// int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
|
||||
// int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
|
||||
//
|
||||
// int fileNameIndex = fileNameStartOffset;
|
||||
// char[] fileNameChars = new char[(fileNameSize-1)/2];
|
||||
// int charIndex = 0;
|
||||
// while(charIndex<fileNameChars.length) {
|
||||
// short aChar = LittleEndian.getShort(dataStream, fileNameIndex);
|
||||
// fileNameChars[charIndex] = (char)aChar;
|
||||
// charIndex++;
|
||||
// fileNameIndex += 2;
|
||||
// }
|
||||
// String fileName = new String(fileNameChars);
|
||||
// return fileName.trim();
|
||||
// }
|
||||
|
||||
private void fillImageContent(byte[] dataStream)
|
||||
{
|
||||
this.content = new byte[size];
|
||||
System.arraycopy(dataStream, pictureBytesStartOffset, content, 0, size);
|
||||
}
|
||||
|
||||
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
||||
{
|
||||
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
||||
int realPicoffset = dataBlockStartOffset;
|
||||
|
||||
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET);
|
||||
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
||||
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
|
||||
|
||||
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
|
||||
realPicoffset += (unknownHeaderOffset + UNKNOWN_HEADER_SIZE);
|
||||
if (realPicoffset>=dataBlockEndOffset) {
|
||||
realPicoffset -= UNKNOWN_HEADER_SIZE;
|
||||
}
|
||||
return realPicoffset;
|
||||
}
|
||||
|
||||
private void fillJPGWidthHeight() {
|
||||
/*
|
||||
http://www.codecomments.com/archive281-2004-3-158083.html
|
||||
|
||||
Algorhitm proposed by Patrick TJ McPhee:
|
||||
|
||||
read 2 bytes
|
||||
make sure they are 'ffd8'x
|
||||
repeatedly:
|
||||
read 2 bytes
|
||||
make sure the first one is 'ff'x
|
||||
if the second one is 'd9'x stop
|
||||
else if the second one is c0 or c2 (or possibly other values ...)
|
||||
skip 2 bytes
|
||||
read one byte into depth
|
||||
read two bytes into height
|
||||
read two bytes into width
|
||||
else
|
||||
read two bytes into length
|
||||
skip forward length-2 bytes
|
||||
|
||||
Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
|
||||
*/
|
||||
int pointer = pictureBytesStartOffset+2;
|
||||
int firstByte = _dataStream[pointer];
|
||||
int secondByte = _dataStream[pointer+1];
|
||||
|
||||
int endOfPicture = pictureBytesStartOffset + size;
|
||||
while(pointer<endOfPicture-1) {
|
||||
do {
|
||||
firstByte = _dataStream[pointer];
|
||||
secondByte = _dataStream[pointer+1];
|
||||
} while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
|
||||
|
||||
if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
|
||||
if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
|
||||
break;
|
||||
} else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
|
||||
pointer += 5;
|
||||
this.height = getBigEndianShort(_dataStream, pointer);
|
||||
this.width = getBigEndianShort(_dataStream, pointer+2);
|
||||
break;
|
||||
} else {
|
||||
pointer++;
|
||||
pointer++;
|
||||
int length = getBigEndianShort(_dataStream, pointer);
|
||||
pointer+=length;
|
||||
}
|
||||
} else {
|
||||
pointer++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void fillPNGWidthHeight()
|
||||
{
|
||||
/*
|
||||
Used PNG file format description from http://www.wotsit.org/download.asp?f=png
|
||||
*/
|
||||
int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
|
||||
if (matchSignature(_dataStream, IHDR, HEADER_START)) {
|
||||
int IHDR_CHUNK_WIDTH = HEADER_START + 4;
|
||||
this.width = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH);
|
||||
this.height = getBigEndianInt(_dataStream, IHDR_CHUNK_WIDTH + 4);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* returns pixel width of the picture or -1 if dimensions determining was failed
|
||||
*/
|
||||
public int getWidth()
|
||||
{
|
||||
return width;
|
||||
}
|
||||
/**
|
||||
* returns pixel height of the picture or -1 if dimensions determining was failed
|
||||
*/
|
||||
public int getHeight()
|
||||
{
|
||||
return height;
|
||||
}
|
||||
|
||||
private static int getBigEndianInt(byte[] data, int offset)
|
||||
{
|
||||
return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
|
||||
}
|
||||
|
||||
private static int getBigEndianShort(byte[] data, int offset)
|
||||
{
|
||||
return (((data[offset] & 0xFF)<< 8) + (data[offset +1] & 0xFF));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,130 @@
|
||||
|
||||
/* ====================================================================
|
||||
Copyright 2002-2004 Apache Software Foundation
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.poi.hwpf.model.PicturesTable;
|
||||
import org.apache.poi.hwpf.usermodel.Picture;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Test picture support in HWPF
|
||||
* @author nick
|
||||
*/
|
||||
public class TestHWPFPictures extends TestCase {
|
||||
private HWPFDocument docA;
|
||||
private HWPFDocument docB;
|
||||
private String docAFile;
|
||||
private String docBFile;
|
||||
|
||||
private String imgAFile;
|
||||
private String imgBFile;
|
||||
|
||||
protected void setUp() throws Exception {
|
||||
String dirname = System.getProperty("HWPF.testdata.path");
|
||||
|
||||
docAFile = dirname + "/testPictures.doc";
|
||||
docBFile = dirname + "/two_images.doc";
|
||||
|
||||
imgAFile = dirname + "/simple_image.jpg";
|
||||
imgBFile = dirname + "/simple_image.png";
|
||||
}
|
||||
|
||||
/**
|
||||
* Test just opening the files
|
||||
*/
|
||||
public void testOpen() throws Exception {
|
||||
docA = new HWPFDocument(new FileInputStream(docAFile));
|
||||
docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we have the right numbers of images in each file
|
||||
*/
|
||||
public void testImageCount() throws Exception {
|
||||
docA = new HWPFDocument(new FileInputStream(docAFile));
|
||||
docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||
|
||||
assertNotNull(docA.getPicturesTable());
|
||||
assertNotNull(docB.getPicturesTable());
|
||||
|
||||
PicturesTable picA = docA.getPicturesTable();
|
||||
PicturesTable picB = docB.getPicturesTable();
|
||||
|
||||
List picturesA = picA.getAllPictures();
|
||||
List picturesB = picB.getAllPictures();
|
||||
|
||||
assertEquals(7, picturesA.size());
|
||||
assertEquals(2, picturesB.size());
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that we have the right images in at least one file
|
||||
*/
|
||||
public void testImageData() throws Exception {
|
||||
docB = new HWPFDocument(new FileInputStream(docBFile));
|
||||
PicturesTable picB = docB.getPicturesTable();
|
||||
List picturesB = picB.getAllPictures();
|
||||
|
||||
assertEquals(2, picturesB.size());
|
||||
|
||||
Picture pic1 = (Picture)picturesB.get(0);
|
||||
Picture pic2 = (Picture)picturesB.get(1);
|
||||
|
||||
assertNotNull(pic1);
|
||||
assertNotNull(pic2);
|
||||
|
||||
// Check the same
|
||||
byte[] pic1B = readFile(imgAFile);
|
||||
byte[] pic2B = readFile(imgBFile);
|
||||
|
||||
assertEquals(pic1B.length, pic1.getContent().length);
|
||||
assertEquals(pic2B.length, pic2.getContent().length);
|
||||
|
||||
assertBytesSame(pic1B, pic1.getContent());
|
||||
assertBytesSame(pic2B, pic2.getContent());
|
||||
}
|
||||
|
||||
|
||||
private void assertBytesSame(byte[] a, byte[] b) {
|
||||
assertEquals(a.length, b.length);
|
||||
for(int i=0; i<a.length; i++) {
|
||||
assertEquals(a[i],b[i]);
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] readFile(String file) throws Exception {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
FileInputStream fis = new FileInputStream(file);
|
||||
byte[] buffer = new byte[1024];
|
||||
|
||||
int read = 0;
|
||||
while(read > -1) {
|
||||
read = fis.read(buffer);
|
||||
if(read > 0) {
|
||||
baos.write(buffer,0,read);
|
||||
}
|
||||
}
|
||||
|
||||
return baos.toByteArray();
|
||||
}
|
||||
}
|
Binary file not shown.
After Width: | Height: | Size: 643 B |
Binary file not shown.
After Width: | Height: | Size: 1.0 KiB |
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hwpf/data/two_images.doc
Executable file
Binary file not shown.
Loading…
Reference in New Issue
Block a user