add simpliest "escher" pictures support in Word-to-HTML and Word-to-FO converters
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1151888 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e3682b766f
commit
493aa8c30d
@ -310,8 +310,8 @@ public final class HWPFDocument extends HWPFDocumentCore
|
|||||||
_officeArts = new ShapesTable(_tableStream, _fib);
|
_officeArts = new ShapesTable(_tableStream, _fib);
|
||||||
|
|
||||||
// And escher pictures
|
// And escher pictures
|
||||||
_officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder );
|
_officeDrawingsHeaders = new OfficeDrawingsImpl( _fspaHeaders, _escherRecordHolder, _mainStream );
|
||||||
_officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder);
|
_officeDrawingsMain = new OfficeDrawingsImpl( _fspaMain , _escherRecordHolder, _mainStream);
|
||||||
|
|
||||||
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit);
|
_st = new SectionTable(_mainStream, _tableStream, _fib.getFcPlcfsed(), _fib.getLcbPlcfsed(), fcMin, _tpt, _cpSplit);
|
||||||
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
|
_ss = new StyleSheet(_tableStream, _fib.getFcStshf());
|
||||||
|
@ -41,6 +41,7 @@ import org.apache.poi.hwpf.usermodel.Notes;
|
|||||||
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
|
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
import org.apache.poi.hwpf.usermodel.Picture;
|
||||||
|
import org.apache.poi.hwpf.usermodel.PictureType;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
import org.apache.poi.hwpf.usermodel.Section;
|
import org.apache.poi.hwpf.usermodel.Section;
|
||||||
import org.apache.poi.hwpf.usermodel.Table;
|
import org.apache.poi.hwpf.usermodel.Table;
|
||||||
@ -578,7 +579,10 @@ public abstract class AbstractWordConverter
|
|||||||
protected void processDrawnObject( HWPFDocument doc,
|
protected void processDrawnObject( HWPFDocument doc,
|
||||||
CharacterRun characterRun, Element block )
|
CharacterRun characterRun, Element block )
|
||||||
{
|
{
|
||||||
// main?
|
if ( getPicturesManager() == null )
|
||||||
|
return;
|
||||||
|
|
||||||
|
// TODO: support headers
|
||||||
OfficeDrawing officeDrawing = doc.getOfficeDrawingsMain()
|
OfficeDrawing officeDrawing = doc.getOfficeDrawingsMain()
|
||||||
.getOfficeDrawingAt( characterRun.getStartOffset() );
|
.getOfficeDrawingAt( characterRun.getStartOffset() );
|
||||||
if ( officeDrawing == null )
|
if ( officeDrawing == null )
|
||||||
@ -588,10 +592,22 @@ public abstract class AbstractWordConverter
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: do something :)
|
byte[] pictureData = officeDrawing.getPictureData();
|
||||||
|
if ( pictureData == null )
|
||||||
|
// usual shape?
|
||||||
|
return;
|
||||||
|
|
||||||
|
final PictureType type = PictureType.findMatchingType( pictureData );
|
||||||
|
String path = getPicturesManager().savePicture( pictureData, type,
|
||||||
|
"s" + characterRun.getStartOffset() + "." + type );
|
||||||
|
|
||||||
|
processDrawnObject( doc, characterRun, officeDrawing, path, block );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected abstract void processDrawnObject( HWPFDocument doc,
|
||||||
|
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
||||||
|
String path, Element block );
|
||||||
|
|
||||||
protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
|
protected abstract void processEndnoteAutonumbered( HWPFDocument doc,
|
||||||
int noteIndex, Element block, Range endnoteTextRange );
|
int noteIndex, Element block, Range endnoteTextRange );
|
||||||
|
|
||||||
|
@ -113,6 +113,13 @@ public class HtmlDocumentFacade
|
|||||||
return basicLink;
|
return basicLink;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Element createImage( String src )
|
||||||
|
{
|
||||||
|
Element result = document.createElement( "img" );
|
||||||
|
result.setAttribute( "src", src );
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
public Element createLineBreak()
|
public Element createLineBreak()
|
||||||
{
|
{
|
||||||
return document.createElement( "br" );
|
return document.createElement( "br" );
|
||||||
|
@ -16,7 +16,6 @@
|
|||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.converter;
|
package org.apache.poi.hwpf.converter;
|
||||||
|
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
|
||||||
import org.apache.poi.hwpf.usermodel.PictureType;
|
import org.apache.poi.hwpf.usermodel.PictureType;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -35,11 +34,11 @@ public interface PicturesManager
|
|||||||
* {@link PictureType#WMF}. FO (Apache FOP) supports at least PNG and SVG
|
* {@link PictureType#WMF}. FO (Apache FOP) supports at least PNG and SVG
|
||||||
* types.
|
* types.
|
||||||
*
|
*
|
||||||
* @param picture
|
* @param content
|
||||||
* Word picture
|
* picture content
|
||||||
* @return path to file that can be used as reference in HTML (img's src) of
|
* @return path to file that can be used as reference in HTML (img's src) of
|
||||||
* XLS FO (fo:external-graphic's src) or <tt>null</tt> if image were
|
* XLS FO (fo:external-graphic's src) or <tt>null</tt> if image were
|
||||||
* not saved and should not be referenced from result HTML / FO.
|
* not saved and should not be referenced from result HTML / FO.
|
||||||
*/
|
*/
|
||||||
String savePicture( Picture picture );
|
String savePicture( byte[] content, PictureType pictureType, String suggestedName );
|
||||||
}
|
}
|
||||||
|
@ -37,6 +37,7 @@ import org.apache.poi.hwpf.HWPFDocumentCore;
|
|||||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||||
import org.apache.poi.hwpf.usermodel.Bookmark;
|
import org.apache.poi.hwpf.usermodel.Bookmark;
|
||||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||||
|
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
import org.apache.poi.hwpf.usermodel.Picture;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
@ -262,6 +263,16 @@ public class WordToFoConverter extends AbstractWordConverter
|
|||||||
foDocumentFacade.setDescription( summaryInformation.getComments() );
|
foDocumentFacade.setDescription( summaryInformation.getComments() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void processDrawnObject( HWPFDocument doc,
|
||||||
|
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
||||||
|
String path, Element block )
|
||||||
|
{
|
||||||
|
final Element externalGraphic = foDocumentFacade
|
||||||
|
.createExternalGraphic( path );
|
||||||
|
block.appendChild( externalGraphic );
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
|
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
|
||||||
Element block, Range endnoteTextRange )
|
Element block, Range endnoteTextRange )
|
||||||
@ -364,7 +375,10 @@ public class WordToFoConverter extends AbstractWordConverter
|
|||||||
PicturesManager fileManager = getPicturesManager();
|
PicturesManager fileManager = getPicturesManager();
|
||||||
if ( fileManager != null )
|
if ( fileManager != null )
|
||||||
{
|
{
|
||||||
String url = fileManager.savePicture( picture );
|
String url = fileManager
|
||||||
|
.savePicture( picture.getContent(),
|
||||||
|
picture.suggestPictureType(),
|
||||||
|
picture.suggestFullFileName() );
|
||||||
|
|
||||||
if ( WordToFoUtils.isNotEmpty( url ) )
|
if ( WordToFoUtils.isNotEmpty( url ) )
|
||||||
{
|
{
|
||||||
|
@ -34,6 +34,7 @@ import org.apache.poi.hwpf.HWPFDocumentCore;
|
|||||||
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
|
||||||
import org.apache.poi.hwpf.usermodel.Bookmark;
|
import org.apache.poi.hwpf.usermodel.Bookmark;
|
||||||
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
import org.apache.poi.hwpf.usermodel.CharacterRun;
|
||||||
|
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
|
||||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
import org.apache.poi.hwpf.usermodel.Picture;
|
||||||
import org.apache.poi.hwpf.usermodel.Range;
|
import org.apache.poi.hwpf.usermodel.Range;
|
||||||
@ -61,6 +62,22 @@ import static org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;
|
|||||||
public class WordToHtmlConverter extends AbstractWordConverter
|
public class WordToHtmlConverter extends AbstractWordConverter
|
||||||
{
|
{
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Holds properties values, applied to current <tt>p</tt> element. Those
|
||||||
|
* properties shall not be doubled in children <tt>span</tt> elements.
|
||||||
|
*/
|
||||||
|
private static class BlockProperies
|
||||||
|
{
|
||||||
|
final String pFontName;
|
||||||
|
final int pFontSize;
|
||||||
|
|
||||||
|
public BlockProperies( String pFontName, int pFontSize )
|
||||||
|
{
|
||||||
|
this.pFontName = pFontName;
|
||||||
|
this.pFontSize = pFontSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private static final POILogger logger = POILogFactory
|
private static final POILogger logger = POILogFactory
|
||||||
.getLogger( WordToHtmlConverter.class );
|
.getLogger( WordToHtmlConverter.class );
|
||||||
|
|
||||||
@ -253,6 +270,15 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
.addDescription( summaryInformation.getComments() );
|
.addDescription( summaryInformation.getComments() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void processDrawnObject( HWPFDocument doc,
|
||||||
|
CharacterRun characterRun, OfficeDrawing officeDrawing,
|
||||||
|
String path, Element block )
|
||||||
|
{
|
||||||
|
Element img = htmlDocumentFacade.createImage( path );
|
||||||
|
block.appendChild( img );
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
|
protected void processEndnoteAutonumbered( HWPFDocument doc, int noteIndex,
|
||||||
Element block, Range endnoteTextRange )
|
Element block, Range endnoteTextRange )
|
||||||
@ -302,7 +328,10 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
PicturesManager fileManager = getPicturesManager();
|
PicturesManager fileManager = getPicturesManager();
|
||||||
if ( fileManager != null )
|
if ( fileManager != null )
|
||||||
{
|
{
|
||||||
String url = fileManager.savePicture( picture );
|
String url = fileManager
|
||||||
|
.savePicture( picture.getContent(),
|
||||||
|
picture.suggestPictureType(),
|
||||||
|
picture.suggestFullFileName() );
|
||||||
|
|
||||||
if ( WordToHtmlUtils.isNotEmpty( url ) )
|
if ( WordToHtmlUtils.isNotEmpty( url ) )
|
||||||
{
|
{
|
||||||
@ -388,8 +417,7 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
+ "in;overflow:hidden;" ) );
|
+ "in;overflow:hidden;" ) );
|
||||||
root.appendChild( inner );
|
root.appendChild( inner );
|
||||||
|
|
||||||
Element image = htmlDocumentFacade.document.createElement( "img" );
|
Element image = htmlDocumentFacade.createImage( imageSourcePath );
|
||||||
image.setAttribute( "src", imageSourcePath );
|
|
||||||
image.setAttribute( "class", htmlDocumentFacade
|
image.setAttribute( "class", htmlDocumentFacade
|
||||||
.getOrCreateCssClass( image.getTagName(), "i",
|
.getOrCreateCssClass( image.getTagName(), "i",
|
||||||
"position:absolute;left:-" + cropLeft + ";top:-"
|
"position:absolute;left:-" + cropLeft + ";top:-"
|
||||||
@ -401,8 +429,7 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
root = htmlDocumentFacade.document.createElement( "img" );
|
root = htmlDocumentFacade.createImage( imageSourcePath );
|
||||||
root.setAttribute( "src", imageSourcePath );
|
|
||||||
root.setAttribute( "style", "width:" + imageWidth + "in;height:"
|
root.setAttribute( "style", "width:" + imageWidth + "in;height:"
|
||||||
+ imageHeight + "in;vertical-align:text-bottom;" );
|
+ imageHeight + "in;vertical-align:text-bottom;" );
|
||||||
}
|
}
|
||||||
@ -691,20 +718,4 @@ public class WordToHtmlConverter extends AbstractWordConverter
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Holds properties values, applied to current <tt>p</tt> element. Those
|
|
||||||
* properties shall not be doubled in children <tt>span</tt> elements.
|
|
||||||
*/
|
|
||||||
private static class BlockProperies
|
|
||||||
{
|
|
||||||
final String pFontName;
|
|
||||||
final int pFontSize;
|
|
||||||
|
|
||||||
public BlockProperies( String pFontName, int pFontSize )
|
|
||||||
{
|
|
||||||
this.pFontName = pFontName;
|
|
||||||
this.pFontSize = pFontSize;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -20,6 +20,7 @@ package org.apache.poi.hwpf.model;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
||||||
import org.apache.poi.ddf.EscherContainerRecord;
|
import org.apache.poi.ddf.EscherContainerRecord;
|
||||||
import org.apache.poi.ddf.EscherRecord;
|
import org.apache.poi.ddf.EscherRecord;
|
||||||
@ -119,4 +120,83 @@ public final class EscherRecordHolder {
|
|||||||
// Not found in this lot
|
// Not found in this lot
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<? extends EscherContainerRecord> getDgContainers()
|
||||||
|
{
|
||||||
|
List<EscherContainerRecord> dgContainers = new ArrayList<EscherContainerRecord>(
|
||||||
|
1 );
|
||||||
|
for ( EscherRecord escherRecord : getEscherRecords() )
|
||||||
|
{
|
||||||
|
if ( escherRecord.getRecordId() == (short) 0xF002 )
|
||||||
|
{
|
||||||
|
dgContainers.add( (EscherContainerRecord) escherRecord );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dgContainers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<? extends EscherContainerRecord> getDggContainers()
|
||||||
|
{
|
||||||
|
List<EscherContainerRecord> dggContainers = new ArrayList<EscherContainerRecord>(
|
||||||
|
1 );
|
||||||
|
for ( EscherRecord escherRecord : getEscherRecords() )
|
||||||
|
{
|
||||||
|
if ( escherRecord.getRecordId() == (short) 0xF000 )
|
||||||
|
{
|
||||||
|
dggContainers.add( (EscherContainerRecord) escherRecord );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return dggContainers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<? extends EscherContainerRecord> getBStoreContainers()
|
||||||
|
{
|
||||||
|
List<EscherContainerRecord> bStoreContainers = new ArrayList<EscherContainerRecord>(
|
||||||
|
1 );
|
||||||
|
for ( EscherContainerRecord dggContainer : getDggContainers() )
|
||||||
|
{
|
||||||
|
for ( EscherRecord escherRecord : dggContainer.getChildRecords() )
|
||||||
|
{
|
||||||
|
if ( escherRecord.getRecordId() == (short) 0xF001 )
|
||||||
|
{
|
||||||
|
bStoreContainers.add( (EscherContainerRecord) escherRecord );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return bStoreContainers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<? extends EscherContainerRecord> getSpgrContainers()
|
||||||
|
{
|
||||||
|
List<EscherContainerRecord> spgrContainers = new ArrayList<EscherContainerRecord>(
|
||||||
|
1 );
|
||||||
|
for ( EscherContainerRecord dgContainer : getDgContainers() )
|
||||||
|
{
|
||||||
|
for ( EscherRecord escherRecord : dgContainer.getChildRecords() )
|
||||||
|
{
|
||||||
|
if ( escherRecord.getRecordId() == (short) 0xF003 )
|
||||||
|
{
|
||||||
|
spgrContainers.add( (EscherContainerRecord) escherRecord );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return spgrContainers;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<? extends EscherContainerRecord> getSpContainers()
|
||||||
|
{
|
||||||
|
List<EscherContainerRecord> spContainers = new ArrayList<EscherContainerRecord>(
|
||||||
|
1 );
|
||||||
|
for ( EscherContainerRecord spgrContainer : getSpgrContainers() )
|
||||||
|
{
|
||||||
|
for ( EscherRecord escherRecord : spgrContainer.getChildRecords() )
|
||||||
|
{
|
||||||
|
if ( escherRecord.getRecordId() == (short) 0xF004 )
|
||||||
|
{
|
||||||
|
spContainers.add( (EscherContainerRecord) escherRecord );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return spContainers;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,26 +1,32 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User-friendly interface to office drawing objects
|
||||||
|
*
|
||||||
|
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||||
|
*/
|
||||||
public interface OfficeDrawing
|
public interface OfficeDrawing
|
||||||
{
|
{
|
||||||
/**
|
/**
|
||||||
* Shape Identifier
|
* Returns picture data if this shape has (single?) associated picture data
|
||||||
*/
|
*/
|
||||||
int getShapeId();
|
byte[] getPictureData();
|
||||||
|
|
||||||
/**
|
|
||||||
* Left of rectangle enclosing shape relative to the origin of the shape
|
|
||||||
*/
|
|
||||||
int getRectangleLeft();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Top of rectangle enclosing shape relative to the origin of the shape
|
|
||||||
*/
|
|
||||||
int getRectangleTop();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Right of rectangle enclosing shape relative to the origin of the shape
|
|
||||||
*/
|
|
||||||
int getRectangleRight();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Bottom of the rectangle enclosing shape relative to the origin of the
|
* Bottom of the rectangle enclosing shape relative to the origin of the
|
||||||
@ -28,4 +34,24 @@ public interface OfficeDrawing
|
|||||||
*/
|
*/
|
||||||
int getRectangleBottom();
|
int getRectangleBottom();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Left of rectangle enclosing shape relative to the origin of the shape
|
||||||
|
*/
|
||||||
|
int getRectangleLeft();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Right of rectangle enclosing shape relative to the origin of the shape
|
||||||
|
*/
|
||||||
|
int getRectangleRight();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Top of rectangle enclosing shape relative to the origin of the shape
|
||||||
|
*/
|
||||||
|
int getRectangleTop();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Shape Identifier
|
||||||
|
*/
|
||||||
|
int getShapeId();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,28 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* User-friendly interface to access document part's office drawings
|
||||||
|
*
|
||||||
|
* @author Sergey Vladimirov (vlsergey {at} gmail {dot} com)
|
||||||
|
*/
|
||||||
public interface OfficeDrawings
|
public interface OfficeDrawings
|
||||||
{
|
{
|
||||||
OfficeDrawing getOfficeDrawingAt( int characterPosition );
|
OfficeDrawing getOfficeDrawingAt( int characterPosition );
|
||||||
|
@ -1,3 +1,19 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
package org.apache.poi.hwpf.usermodel;
|
package org.apache.poi.hwpf.usermodel;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -5,6 +21,16 @@ import java.util.Collection;
|
|||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
||||||
|
import org.apache.poi.ddf.EscherBSERecord;
|
||||||
|
import org.apache.poi.ddf.EscherBlipRecord;
|
||||||
|
import org.apache.poi.ddf.EscherContainerRecord;
|
||||||
|
import org.apache.poi.ddf.EscherOptRecord;
|
||||||
|
import org.apache.poi.ddf.EscherProperties;
|
||||||
|
import org.apache.poi.ddf.EscherRecord;
|
||||||
|
import org.apache.poi.ddf.EscherRecordFactory;
|
||||||
|
import org.apache.poi.ddf.EscherSimpleProperty;
|
||||||
|
import org.apache.poi.ddf.EscherSpRecord;
|
||||||
import org.apache.poi.hwpf.model.EscherRecordHolder;
|
import org.apache.poi.hwpf.model.EscherRecordHolder;
|
||||||
import org.apache.poi.hwpf.model.FSPA;
|
import org.apache.poi.hwpf.model.FSPA;
|
||||||
import org.apache.poi.hwpf.model.FSPATable;
|
import org.apache.poi.hwpf.model.FSPATable;
|
||||||
@ -13,12 +39,82 @@ public class OfficeDrawingsImpl implements OfficeDrawings
|
|||||||
{
|
{
|
||||||
private final EscherRecordHolder _escherRecordHolder;
|
private final EscherRecordHolder _escherRecordHolder;
|
||||||
private final FSPATable _fspaTable;
|
private final FSPATable _fspaTable;
|
||||||
|
private final byte[] _mainStream;
|
||||||
|
|
||||||
public OfficeDrawingsImpl( FSPATable fspaTable,
|
public OfficeDrawingsImpl( FSPATable fspaTable,
|
||||||
EscherRecordHolder escherRecordHolder )
|
EscherRecordHolder escherRecordHolder, byte[] mainStream )
|
||||||
{
|
{
|
||||||
this._fspaTable = fspaTable;
|
this._fspaTable = fspaTable;
|
||||||
this._escherRecordHolder = escherRecordHolder;
|
this._escherRecordHolder = escherRecordHolder;
|
||||||
|
this._mainStream = mainStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
private EscherContainerRecord getEscherShapeRecordContainer(
|
||||||
|
final int shapeId )
|
||||||
|
{
|
||||||
|
for ( EscherContainerRecord spContainer : _escherRecordHolder
|
||||||
|
.getSpContainers() )
|
||||||
|
{
|
||||||
|
EscherSpRecord escherSpRecord = spContainer
|
||||||
|
.getChildById( (short) 0xF00A );
|
||||||
|
if ( escherSpRecord != null
|
||||||
|
&& escherSpRecord.getShapeId() == shapeId )
|
||||||
|
return spContainer;
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private EscherBlipRecord getBitmapRecord( int bitmapIndex )
|
||||||
|
{
|
||||||
|
List<? extends EscherContainerRecord> bContainers = _escherRecordHolder
|
||||||
|
.getBStoreContainers();
|
||||||
|
if ( bContainers == null || bContainers.size() != 1 )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
EscherContainerRecord bContainer = bContainers.get( 0 );
|
||||||
|
final List<EscherRecord> bitmapRecords = bContainer.getChildRecords();
|
||||||
|
|
||||||
|
if ( bitmapRecords.size() < bitmapIndex )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
EscherRecord imageRecord = bitmapRecords.get( bitmapIndex - 1 );
|
||||||
|
|
||||||
|
if ( imageRecord instanceof EscherBlipRecord )
|
||||||
|
{
|
||||||
|
return (EscherBlipRecord) imageRecord;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( imageRecord instanceof EscherBSERecord )
|
||||||
|
{
|
||||||
|
EscherBSERecord bseRecord = (EscherBSERecord) imageRecord;
|
||||||
|
|
||||||
|
EscherBlipRecord blip = bseRecord.getBlipRecord();
|
||||||
|
if ( blip != null )
|
||||||
|
{
|
||||||
|
return blip;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( bseRecord.getOffset() > 0 )
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Blip stored in delay stream, which in a word doc, is the main
|
||||||
|
* stream
|
||||||
|
*/
|
||||||
|
EscherRecordFactory recordFactory = new DefaultEscherRecordFactory();
|
||||||
|
EscherRecord record = recordFactory.createRecord( _mainStream,
|
||||||
|
bseRecord.getOffset() );
|
||||||
|
|
||||||
|
if ( record instanceof EscherBlipRecord )
|
||||||
|
{
|
||||||
|
record.fillFields( _mainStream, bseRecord.getOffset(),
|
||||||
|
recordFactory );
|
||||||
|
return (EscherBlipRecord) record;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private OfficeDrawing getOfficeDrawing( final FSPA fspa )
|
private OfficeDrawing getOfficeDrawing( final FSPA fspa )
|
||||||
@ -50,6 +146,30 @@ public class OfficeDrawingsImpl implements OfficeDrawings
|
|||||||
return fspa.getSpid();
|
return fspa.getSpid();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public byte[] getPictureData()
|
||||||
|
{
|
||||||
|
EscherContainerRecord shapeDescription = getEscherShapeRecordContainer( getShapeId() );
|
||||||
|
if ( shapeDescription == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
EscherOptRecord escherOptRecord = shapeDescription
|
||||||
|
.getChildById( (short) 0xF00B );
|
||||||
|
if ( escherOptRecord == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
EscherSimpleProperty escherProperty = escherOptRecord
|
||||||
|
.lookup( EscherProperties.BLIP__BLIPTODISPLAY );
|
||||||
|
if ( escherProperty == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
int bitmapIndex = escherProperty.getPropertyValue();
|
||||||
|
EscherBlipRecord escherBlipRecord = getBitmapRecord( bitmapIndex );
|
||||||
|
if ( escherBlipRecord == null )
|
||||||
|
return null;
|
||||||
|
|
||||||
|
return escherBlipRecord.getPicturedata();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString()
|
public String toString()
|
||||||
{
|
{
|
||||||
|
@ -30,11 +30,13 @@ import org.apache.poi.util.POILogger;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Represents embedded picture extracted from Word Document
|
* Represents embedded picture extracted from Word Document
|
||||||
|
*
|
||||||
* @author Dmitry Romanov
|
* @author Dmitry Romanov
|
||||||
*/
|
*/
|
||||||
public final class Picture extends PictureDescriptor
|
public final class Picture extends PictureDescriptor
|
||||||
{
|
{
|
||||||
private static final POILogger log = POILogFactory.getLogger(Picture.class);
|
private static final POILogger log = POILogFactory
|
||||||
|
.getLogger( Picture.class );
|
||||||
|
|
||||||
// public static final int FILENAME_OFFSET = 0x7C;
|
// public static final int FILENAME_OFFSET = 0x7C;
|
||||||
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
// public static final int FILENAME_SIZE_OFFSET = 0x6C;
|
||||||
@ -82,17 +84,22 @@ public final class Picture extends PictureDescriptor
|
|||||||
private int height = -1;
|
private int height = -1;
|
||||||
private int width = -1;
|
private int width = -1;
|
||||||
|
|
||||||
public Picture(int dataBlockStartOfsset, byte[] _dataStream, boolean fillBytes)
|
public Picture( int dataBlockStartOfsset, byte[] _dataStream,
|
||||||
|
boolean fillBytes )
|
||||||
{
|
{
|
||||||
super( _dataStream, dataBlockStartOfsset );
|
super( _dataStream, dataBlockStartOfsset );
|
||||||
|
|
||||||
this._dataStream = _dataStream;
|
this._dataStream = _dataStream;
|
||||||
this.dataBlockStartOfsset = dataBlockStartOfsset;
|
this.dataBlockStartOfsset = dataBlockStartOfsset;
|
||||||
this.dataBlockSize = LittleEndian.getInt(_dataStream, dataBlockStartOfsset);
|
this.dataBlockSize = LittleEndian.getInt( _dataStream,
|
||||||
this.pictureBytesStartOffset = getPictureBytesStartOffset(dataBlockStartOfsset, _dataStream, dataBlockSize);
|
dataBlockStartOfsset );
|
||||||
this.size = dataBlockSize - (pictureBytesStartOffset - dataBlockStartOfsset);
|
this.pictureBytesStartOffset = getPictureBytesStartOffset(
|
||||||
|
dataBlockStartOfsset, _dataStream, dataBlockSize );
|
||||||
|
this.size = dataBlockSize
|
||||||
|
- ( pictureBytesStartOffset - dataBlockStartOfsset );
|
||||||
|
|
||||||
if (size<0) {
|
if ( size < 0 )
|
||||||
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -132,39 +139,47 @@ public final class Picture extends PictureDescriptor
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that
|
* Tries to suggest a filename: hex representation of picture structure
|
||||||
* is tried to determine from first byte of picture's content.
|
* offset in "Data" stream plus extension that is tried to determine from
|
||||||
|
* first byte of picture's content.
|
||||||
*
|
*
|
||||||
* @return suggested file name
|
* @return suggested file name
|
||||||
*/
|
*/
|
||||||
public String suggestFullFileName()
|
public String suggestFullFileName()
|
||||||
{
|
{
|
||||||
String fileExt = suggestFileExtension();
|
String fileExt = suggestFileExtension();
|
||||||
return Integer.toHexString(dataBlockStartOfsset) + (fileExt.length()>0 ? "."+fileExt : "");
|
return Integer.toHexString( dataBlockStartOfsset )
|
||||||
|
+ ( fileExt.length() > 0 ? "." + fileExt : "" );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Writes Picture's content bytes to specified OutputStream.
|
* Writes Picture's content bytes to specified OutputStream. Is useful when
|
||||||
* Is useful when there is need to write picture bytes directly to stream, omitting its representation in
|
* there is need to write picture bytes directly to stream, omitting its
|
||||||
* memory as distinct byte array.
|
* representation in memory as distinct byte array.
|
||||||
*
|
*
|
||||||
* @param out a stream to write to
|
* @param out
|
||||||
* @throws IOException if some exception is occured while writing to specified out
|
* a stream to write to
|
||||||
|
* @throws IOException
|
||||||
|
* if some exception is occured while writing to specified out
|
||||||
*/
|
*/
|
||||||
public void writeImageContent( OutputStream out ) throws IOException
|
public void writeImageContent( OutputStream out ) throws IOException
|
||||||
{
|
{
|
||||||
if (rawContent!=null && rawContent.length>0) {
|
if ( rawContent != null && rawContent.length > 0 )
|
||||||
|
{
|
||||||
out.write( rawContent, 0, size );
|
out.write( rawContent, 0, size );
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
out.write( _dataStream, pictureBytesStartOffset, size );
|
out.write( _dataStream, pictureBytesStartOffset, size );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return The offset of this picture in the picture bytes, used
|
* @return The offset of this picture in the picture bytes, used when
|
||||||
* when matching up with {@link CharacterRun#getPicOffset()}
|
* matching up with {@link CharacterRun#getPicOffset()}
|
||||||
*/
|
*/
|
||||||
public int getStartOffset() {
|
public int getStartOffset()
|
||||||
|
{
|
||||||
return dataBlockStartOfsset;
|
return dataBlockStartOfsset;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,48 +251,56 @@ public final class Picture extends PictureDescriptor
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the initial width of the picture, in twips, prior to cropping or scaling.
|
* Gets the initial width of the picture, in twips, prior to cropping or
|
||||||
|
* scaling.
|
||||||
*
|
*
|
||||||
* @return the initial width of the picture in twips
|
* @return the initial width of the picture in twips
|
||||||
*/
|
*/
|
||||||
public int getDxaGoal() {
|
public int getDxaGoal()
|
||||||
|
{
|
||||||
return dxaGoal;
|
return dxaGoal;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the initial height of the picture, in twips, prior to cropping or scaling.
|
* Gets the initial height of the picture, in twips, prior to cropping or
|
||||||
|
* scaling.
|
||||||
*
|
*
|
||||||
* @return the initial width of the picture in twips
|
* @return the initial width of the picture in twips
|
||||||
*/
|
*/
|
||||||
public int getDyaGoal() {
|
public int getDyaGoal()
|
||||||
|
{
|
||||||
return dyaGoal;
|
return dyaGoal;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return The amount the picture has been cropped on the left in twips
|
* @return The amount the picture has been cropped on the left in twips
|
||||||
*/
|
*/
|
||||||
public int getDxaCropLeft() {
|
public int getDxaCropLeft()
|
||||||
|
{
|
||||||
return dxaCropLeft;
|
return dxaCropLeft;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return The amount the picture has been cropped on the top in twips
|
* @return The amount the picture has been cropped on the top in twips
|
||||||
*/
|
*/
|
||||||
public int getDyaCropTop() {
|
public int getDyaCropTop()
|
||||||
|
{
|
||||||
return dyaCropTop;
|
return dyaCropTop;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return The amount the picture has been cropped on the right in twips
|
* @return The amount the picture has been cropped on the right in twips
|
||||||
*/
|
*/
|
||||||
public int getDxaCropRight() {
|
public int getDxaCropRight()
|
||||||
|
{
|
||||||
return dxaCropRight;
|
return dxaCropRight;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return The amount the picture has been cropped on the bottom in twips
|
* @return The amount the picture has been cropped on the bottom in twips
|
||||||
*/
|
*/
|
||||||
public int getDyaCropBottom() {
|
public int getDyaCropBottom()
|
||||||
|
{
|
||||||
return dyaCropBottom;
|
return dyaCropBottom;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -304,28 +327,7 @@ public final class Picture extends PictureDescriptor
|
|||||||
|
|
||||||
public PictureType suggestPictureType()
|
public PictureType suggestPictureType()
|
||||||
{
|
{
|
||||||
final byte[] imageContent = getContent();
|
return PictureType.findMatchingType( getContent() );
|
||||||
for ( PictureType pictureType : PictureType.values() )
|
|
||||||
for ( byte[] signature : pictureType.getSignatures() )
|
|
||||||
if ( matchSignature( imageContent, signature, 0 ) )
|
|
||||||
return pictureType;
|
|
||||||
|
|
||||||
// TODO: DIB, PICT
|
|
||||||
return PictureType.UNKNOWN;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset)
|
|
||||||
{
|
|
||||||
boolean matched = pictureBytesOffset < dataStream.length;
|
|
||||||
for (int i = 0; (i+pictureBytesOffset) < dataStream.length && i < signature.length; i++)
|
|
||||||
{
|
|
||||||
if (dataStream[i+pictureBytesOffset] != signature[i])
|
|
||||||
{
|
|
||||||
matched = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return matched;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// public String getFileName()
|
// public String getFileName()
|
||||||
@ -333,7 +335,8 @@ public final class Picture extends PictureDescriptor
|
|||||||
// return fileName;
|
// return fileName;
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// private static String extractFileName(int blockStartIndex, byte[] dataStream) {
|
// private static String extractFileName(int blockStartIndex, byte[]
|
||||||
|
// dataStream) {
|
||||||
// int fileNameStartOffset = blockStartIndex + 0x7C;
|
// int fileNameStartOffset = blockStartIndex + 0x7C;
|
||||||
// int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
|
// int fileNameSizeOffset = blockStartIndex + FILENAME_SIZE_OFFSET;
|
||||||
// int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
|
// int fileNameSize = LittleEndian.getShort(dataStream, fileNameSizeOffset);
|
||||||
@ -357,7 +360,8 @@ public final class Picture extends PictureDescriptor
|
|||||||
return;
|
return;
|
||||||
|
|
||||||
this.rawContent = new byte[size];
|
this.rawContent = new byte[size];
|
||||||
System.arraycopy(_dataStream, pictureBytesStartOffset, rawContent, 0, size);
|
System.arraycopy( _dataStream, pictureBytesStartOffset, rawContent, 0,
|
||||||
|
size );
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fillImageContent()
|
private void fillImageContent()
|
||||||
@ -367,15 +371,20 @@ public final class Picture extends PictureDescriptor
|
|||||||
|
|
||||||
byte[] rawContent = getRawContent();
|
byte[] rawContent = getRawContent();
|
||||||
|
|
||||||
// HACK: Detect compressed images. In reality there should be some way to determine
|
// HACK: Detect compressed images. In reality there should be some way
|
||||||
// this from the first 32 bytes, but I can't see any similarity between all the
|
// to determine
|
||||||
// samples I have obtained, nor any similarity in the data block contents.
|
// this from the first 32 bytes, but I can't see any similarity between
|
||||||
if (matchSignature(rawContent, COMPRESSED1, 32) || matchSignature(rawContent, COMPRESSED2, 32))
|
// all the
|
||||||
|
// samples I have obtained, nor any similarity in the data block
|
||||||
|
// contents.
|
||||||
|
if ( matchSignature( rawContent, COMPRESSED1, 32 )
|
||||||
|
|| matchSignature( rawContent, COMPRESSED2, 32 ) )
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
InflaterInputStream in = new InflaterInputStream(
|
InflaterInputStream in = new InflaterInputStream(
|
||||||
new ByteArrayInputStream(rawContent, 33, rawContent.length - 33));
|
new ByteArrayInputStream( rawContent, 33,
|
||||||
|
rawContent.length - 33 ) );
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
byte[] buf = new byte[4096];
|
byte[] buf = new byte[4096];
|
||||||
int readBytes;
|
int readBytes;
|
||||||
@ -387,92 +396,130 @@ public final class Picture extends PictureDescriptor
|
|||||||
}
|
}
|
||||||
catch ( IOException e )
|
catch ( IOException e )
|
||||||
{
|
{
|
||||||
// Problems reading from the actual ByteArrayInputStream should never happen
|
// Problems reading from the actual ByteArrayInputStream should
|
||||||
|
// never happen
|
||||||
// so this will only ever be a ZipException.
|
// so this will only ever be a ZipException.
|
||||||
log.log(POILogger.INFO, "Possibly corrupt compression or non-compressed data", e);
|
log.log( POILogger.INFO,
|
||||||
|
"Possibly corrupt compression or non-compressed data",
|
||||||
|
e );
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// Raw data is not compressed.
|
// Raw data is not compressed.
|
||||||
content = rawContent;
|
content = rawContent;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getPictureBytesStartOffset(int dataBlockStartOffset, byte[] _dataStream, int dataBlockSize)
|
private static boolean matchSignature( byte[] pictureData,
|
||||||
|
byte[] signature, int offset )
|
||||||
|
{
|
||||||
|
boolean matched = offset < pictureData.length;
|
||||||
|
for ( int i = 0; ( i + offset ) < pictureData.length
|
||||||
|
&& i < signature.length; i++ )
|
||||||
|
{
|
||||||
|
if ( pictureData[i + offset] != signature[i] )
|
||||||
|
{
|
||||||
|
matched = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return matched;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int getPictureBytesStartOffset( int dataBlockStartOffset,
|
||||||
|
byte[] _dataStream, int dataBlockSize )
|
||||||
{
|
{
|
||||||
int realPicoffset = dataBlockStartOffset;
|
int realPicoffset = dataBlockStartOffset;
|
||||||
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
final int dataBlockEndOffset = dataBlockSize + dataBlockStartOffset;
|
||||||
|
|
||||||
// Skip over the PICT block
|
// Skip over the PICT block
|
||||||
int PICTFBlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICT_HEADER_OFFSET); // Should be 68 bytes
|
int PICTFBlockSize = LittleEndian.getShort( _dataStream,
|
||||||
|
dataBlockStartOffset + PICT_HEADER_OFFSET ); // Should be 68
|
||||||
|
// bytes
|
||||||
|
|
||||||
// Now the PICTF1
|
// Now the PICTF1
|
||||||
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
int PICTF1BlockOffset = PICTFBlockSize + PICT_HEADER_OFFSET;
|
||||||
short MM_TYPE = LittleEndian.getShort(_dataStream, dataBlockStartOffset + PICT_HEADER_OFFSET + 2);
|
short MM_TYPE = LittleEndian.getShort( _dataStream,
|
||||||
if(MM_TYPE == 0x66) {
|
dataBlockStartOffset + PICT_HEADER_OFFSET + 2 );
|
||||||
|
if ( MM_TYPE == 0x66 )
|
||||||
|
{
|
||||||
// Skip the stPicName
|
// Skip the stPicName
|
||||||
int cchPicName = LittleEndian.getUnsignedByte(_dataStream, PICTF1BlockOffset);
|
int cchPicName = LittleEndian.getUnsignedByte( _dataStream,
|
||||||
|
PICTF1BlockOffset );
|
||||||
PICTF1BlockOffset += 1 + cchPicName;
|
PICTF1BlockOffset += 1 + cchPicName;
|
||||||
}
|
}
|
||||||
int PICTF1BlockSize = LittleEndian.getShort(_dataStream, dataBlockStartOffset +PICTF1BlockOffset);
|
int PICTF1BlockSize = LittleEndian.getShort( _dataStream,
|
||||||
|
dataBlockStartOffset + PICTF1BlockOffset );
|
||||||
|
|
||||||
int unknownHeaderOffset = (PICTF1BlockSize + PICTF1BlockOffset) < dataBlockEndOffset ? (PICTF1BlockSize + PICTF1BlockOffset) : PICTF1BlockOffset;
|
int unknownHeaderOffset = ( PICTF1BlockSize + PICTF1BlockOffset ) < dataBlockEndOffset ? ( PICTF1BlockSize + PICTF1BlockOffset )
|
||||||
|
: PICTF1BlockOffset;
|
||||||
realPicoffset += ( unknownHeaderOffset + UNKNOWN_HEADER_SIZE );
|
realPicoffset += ( unknownHeaderOffset + UNKNOWN_HEADER_SIZE );
|
||||||
if (realPicoffset>=dataBlockEndOffset) {
|
if ( realPicoffset >= dataBlockEndOffset )
|
||||||
|
{
|
||||||
realPicoffset -= UNKNOWN_HEADER_SIZE;
|
realPicoffset -= UNKNOWN_HEADER_SIZE;
|
||||||
}
|
}
|
||||||
return realPicoffset;
|
return realPicoffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void fillJPGWidthHeight() {
|
private void fillJPGWidthHeight()
|
||||||
|
{
|
||||||
/*
|
/*
|
||||||
http://www.codecomments.com/archive281-2004-3-158083.html
|
* http://www.codecomments.com/archive281-2004-3-158083.html
|
||||||
|
*
|
||||||
Algorhitm proposed by Patrick TJ McPhee:
|
* Algorhitm proposed by Patrick TJ McPhee:
|
||||||
|
*
|
||||||
read 2 bytes
|
* read 2 bytes make sure they are 'ffd8'x repeatedly: read 2 bytes make
|
||||||
make sure they are 'ffd8'x
|
* sure the first one is 'ff'x if the second one is 'd9'x stop else if
|
||||||
repeatedly:
|
* the second one is c0 or c2 (or possibly other values ...) skip 2
|
||||||
read 2 bytes
|
* bytes read one byte into depth read two bytes into height read two
|
||||||
make sure the first one is 'ff'x
|
* bytes into width else read two bytes into length skip forward
|
||||||
if the second one is 'd9'x stop
|
* length-2 bytes
|
||||||
else if the second one is c0 or c2 (or possibly other values ...)
|
*
|
||||||
skip 2 bytes
|
* Also used Ruby code snippet from:
|
||||||
read one byte into depth
|
* http://www.bigbold.com/snippets/posts/show/805 for reference
|
||||||
read two bytes into height
|
|
||||||
read two bytes into width
|
|
||||||
else
|
|
||||||
read two bytes into length
|
|
||||||
skip forward length-2 bytes
|
|
||||||
|
|
||||||
Also used Ruby code snippet from: http://www.bigbold.com/snippets/posts/show/805 for reference
|
|
||||||
*/
|
*/
|
||||||
int pointer = pictureBytesStartOffset + 2;
|
int pointer = pictureBytesStartOffset + 2;
|
||||||
int firstByte = _dataStream[pointer];
|
int firstByte = _dataStream[pointer];
|
||||||
int secondByte = _dataStream[pointer + 1];
|
int secondByte = _dataStream[pointer + 1];
|
||||||
|
|
||||||
int endOfPicture = pictureBytesStartOffset + size;
|
int endOfPicture = pictureBytesStartOffset + size;
|
||||||
while(pointer<endOfPicture-1) {
|
while ( pointer < endOfPicture - 1 )
|
||||||
do {
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
firstByte = _dataStream[pointer];
|
firstByte = _dataStream[pointer];
|
||||||
secondByte = _dataStream[pointer + 1];
|
secondByte = _dataStream[pointer + 1];
|
||||||
pointer += 2;
|
pointer += 2;
|
||||||
} while (!(firstByte==(byte)0xFF) && pointer<endOfPicture-1);
|
}
|
||||||
|
while ( !( firstByte == (byte) 0xFF ) && pointer < endOfPicture - 1 );
|
||||||
|
|
||||||
if (firstByte==((byte)0xFF) && pointer<endOfPicture-1) {
|
if ( firstByte == ( (byte) 0xFF ) && pointer < endOfPicture - 1 )
|
||||||
if (secondByte==(byte)0xD9 || secondByte==(byte)0xDA) {
|
{
|
||||||
|
if ( secondByte == (byte) 0xD9 || secondByte == (byte) 0xDA )
|
||||||
|
{
|
||||||
break;
|
break;
|
||||||
} else if ( (secondByte & 0xF0) == 0xC0 && secondByte!=(byte)0xC4 && secondByte!=(byte)0xC8 && secondByte!=(byte)0xCC) {
|
}
|
||||||
|
else if ( ( secondByte & 0xF0 ) == 0xC0
|
||||||
|
&& secondByte != (byte) 0xC4
|
||||||
|
&& secondByte != (byte) 0xC8
|
||||||
|
&& secondByte != (byte) 0xCC )
|
||||||
|
{
|
||||||
pointer += 5;
|
pointer += 5;
|
||||||
this.height = getBigEndianShort( _dataStream, pointer );
|
this.height = getBigEndianShort( _dataStream, pointer );
|
||||||
this.width = getBigEndianShort( _dataStream, pointer + 2 );
|
this.width = getBigEndianShort( _dataStream, pointer + 2 );
|
||||||
break;
|
break;
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
pointer++;
|
pointer++;
|
||||||
pointer++;
|
pointer++;
|
||||||
int length = getBigEndianShort( _dataStream, pointer );
|
int length = getBigEndianShort( _dataStream, pointer );
|
||||||
pointer += length;
|
pointer += length;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
pointer++;
|
pointer++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -481,10 +528,12 @@ public final class Picture extends PictureDescriptor
|
|||||||
private void fillPNGWidthHeight()
|
private void fillPNGWidthHeight()
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
Used PNG file format description from http://www.wotsit.org/download.asp?f=png
|
* Used PNG file format description from
|
||||||
|
* http://www.wotsit.org/download.asp?f=png
|
||||||
*/
|
*/
|
||||||
int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
|
int HEADER_START = pictureBytesStartOffset + PNG.length + 4;
|
||||||
if (matchSignature(_dataStream, IHDR, HEADER_START)) {
|
if ( matchSignature( _dataStream, IHDR, HEADER_START ) )
|
||||||
|
{
|
||||||
int IHDR_CHUNK_WIDTH = HEADER_START + 4;
|
int IHDR_CHUNK_WIDTH = HEADER_START + 4;
|
||||||
this.width = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH );
|
this.width = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH );
|
||||||
this.height = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH + 4 );
|
this.height = getBigEndianInt( _dataStream, IHDR_CHUNK_WIDTH + 4 );
|
||||||
@ -492,7 +541,8 @@ public final class Picture extends PictureDescriptor
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns pixel width of the picture or -1 if dimensions determining was failed
|
* returns pixel width of the picture or -1 if dimensions determining was
|
||||||
|
* failed
|
||||||
*/
|
*/
|
||||||
public int getWidth()
|
public int getWidth()
|
||||||
{
|
{
|
||||||
@ -504,7 +554,8 @@ public final class Picture extends PictureDescriptor
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* returns pixel height of the picture or -1 if dimensions determining was failed
|
* returns pixel height of the picture or -1 if dimensions determining was
|
||||||
|
* failed
|
||||||
*/
|
*/
|
||||||
public int getHeight()
|
public int getHeight()
|
||||||
{
|
{
|
||||||
@ -517,7 +568,9 @@ public final class Picture extends PictureDescriptor
|
|||||||
|
|
||||||
private static int getBigEndianInt( byte[] data, int offset )
|
private static int getBigEndianInt( byte[] data, int offset )
|
||||||
{
|
{
|
||||||
return (((data[offset] & 0xFF)<< 24) + ((data[offset +1] & 0xFF) << 16) + ((data[offset + 2] & 0xFF) << 8) + (data[offset +3] & 0xFF));
|
return ( ( ( data[offset] & 0xFF ) << 24 )
|
||||||
|
+ ( ( data[offset + 1] & 0xFF ) << 16 )
|
||||||
|
+ ( ( data[offset + 2] & 0xFF ) << 8 ) + ( data[offset + 3] & 0xFF ) );
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int getBigEndianShort( byte[] data, int offset )
|
private static int getBigEndianShort( byte[] data, int offset )
|
||||||
|
@ -36,11 +36,34 @@ public enum PictureType {
|
|||||||
TIFF( "image/tiff", "tiff", new byte[][] { { 0x49, 0x49, 0x2A, 0x00 },
|
TIFF( "image/tiff", "tiff", new byte[][] { { 0x49, 0x49, 0x2A, 0x00 },
|
||||||
{ 0x4D, 0x4D, 0x00, 0x2A } } ),
|
{ 0x4D, 0x4D, 0x00, 0x2A } } ),
|
||||||
|
|
||||||
|
UNKNOWN( "image/unknown", "", new byte[][] {} ),
|
||||||
|
|
||||||
WMF( "image/x-wmf", "wmf", new byte[][] {
|
WMF( "image/x-wmf", "wmf", new byte[][] {
|
||||||
{ (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 },
|
{ (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 },
|
||||||
{ 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } ),
|
{ 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } );
|
||||||
|
|
||||||
UNKNOWN( "image/unknown", "", new byte[][] {} );
|
public static PictureType findMatchingType( byte[] pictureContent )
|
||||||
|
{
|
||||||
|
for ( PictureType pictureType : PictureType.values() )
|
||||||
|
for ( byte[] signature : pictureType.getSignatures() )
|
||||||
|
if ( matchSignature( pictureContent, signature ) )
|
||||||
|
return pictureType;
|
||||||
|
|
||||||
|
// TODO: DIB, PICT
|
||||||
|
return PictureType.UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean matchSignature( byte[] pictureData, byte[] signature )
|
||||||
|
{
|
||||||
|
if ( pictureData.length < signature.length )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
for ( int i = 0; i < signature.length; i++ )
|
||||||
|
if ( pictureData[i] != signature[i] )
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
private String _extension;
|
private String _extension;
|
||||||
|
|
||||||
@ -69,4 +92,12 @@ public enum PictureType {
|
|||||||
{
|
{
|
||||||
return _signatures;
|
return _signatures;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean matchSignature( byte[] pictureData )
|
||||||
|
{
|
||||||
|
for ( byte[] signature : getSignatures() )
|
||||||
|
if ( matchSignature( signature, pictureData ) )
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -29,9 +29,8 @@ import junit.framework.TestCase;
|
|||||||
|
|
||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.hwpf.HWPFDocument;
|
import org.apache.poi.hwpf.HWPFDocument;
|
||||||
import org.apache.poi.hwpf.usermodel.Picture;
|
import org.apache.poi.hwpf.usermodel.PictureType;
|
||||||
import org.w3c.dom.Document;
|
import org.w3c.dom.Document;
|
||||||
import org.w3c.dom.Element;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test cases for {@link WordToHtmlConverter}
|
* Test cases for {@link WordToHtmlConverter}
|
||||||
@ -62,16 +61,21 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
|
|
||||||
Document newDocument = DocumentBuilderFactory.newInstance()
|
Document newDocument = DocumentBuilderFactory.newInstance()
|
||||||
.newDocumentBuilder().newDocument();
|
.newDocumentBuilder().newDocument();
|
||||||
WordToHtmlConverter wordToHtmlConverter = !emulatePictureStorage ? new WordToHtmlConverter(
|
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
|
||||||
newDocument ) : new WordToHtmlConverter( newDocument )
|
newDocument );
|
||||||
|
|
||||||
|
if ( emulatePictureStorage )
|
||||||
{
|
{
|
||||||
@Override
|
wordToHtmlConverter.setPicturesManager( new PicturesManager()
|
||||||
protected void processImage( Element currentBlock, boolean inlined,
|
|
||||||
Picture picture )
|
|
||||||
{
|
{
|
||||||
processImage( currentBlock, inlined, picture, "picture.bin" );
|
public String savePicture( byte[] content,
|
||||||
|
PictureType pictureType, String suggestedName )
|
||||||
|
{
|
||||||
|
return suggestedName;
|
||||||
}
|
}
|
||||||
};
|
} );
|
||||||
|
}
|
||||||
|
|
||||||
wordToHtmlConverter.processDocument( hwpfDocument );
|
wordToHtmlConverter.processDocument( hwpfDocument );
|
||||||
|
|
||||||
StringWriter stringWriter = new StringWriter();
|
StringWriter stringWriter = new StringWriter();
|
||||||
@ -172,20 +176,6 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
assertContains( result, "<!--Image link to '0.emf' can be here-->" );
|
assertContains( result, "<!--Image link to '0.emf' can be here-->" );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testPicture() throws Exception
|
|
||||||
{
|
|
||||||
String result = getHtmlText( "picture.doc", true );
|
|
||||||
|
|
||||||
// picture
|
|
||||||
assertContains( result, "src=\"picture.bin\"" );
|
|
||||||
// visible size
|
|
||||||
assertContains( result, "width:3.1305554in;height:1.7250001in;" );
|
|
||||||
// shift due to crop
|
|
||||||
assertContains( result, "left:-0.09375;top:-0.25694445;" );
|
|
||||||
// size without crop
|
|
||||||
assertContains( result, "width:3.4125in;height:2.325in;" );
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testHyperlink() throws Exception
|
public void testHyperlink() throws Exception
|
||||||
{
|
{
|
||||||
String result = getHtmlText( "hyperlink.doc" );
|
String result = getHtmlText( "hyperlink.doc" );
|
||||||
@ -201,14 +191,6 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
getHtmlText( "innertable.doc" );
|
getHtmlText( "innertable.doc" );
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTableMerges() throws Exception
|
|
||||||
{
|
|
||||||
String result = getHtmlText( "table-merges.doc" );
|
|
||||||
|
|
||||||
assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
|
|
||||||
assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
|
|
||||||
}
|
|
||||||
|
|
||||||
public void testO_kurs_doc() throws Exception
|
public void testO_kurs_doc() throws Exception
|
||||||
{
|
{
|
||||||
getHtmlText( "o_kurs.doc" );
|
getHtmlText( "o_kurs.doc" );
|
||||||
@ -222,4 +204,33 @@ public class TestWordToHtmlConverter extends TestCase
|
|||||||
assertContains( result, "<a name=\"userref\">" );
|
assertContains( result, "<a name=\"userref\">" );
|
||||||
assertContains( result, "1" );
|
assertContains( result, "1" );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testPicture() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "picture.doc", true );
|
||||||
|
|
||||||
|
// picture
|
||||||
|
assertContains( result, "src=\"0.emf\"" );
|
||||||
|
// visible size
|
||||||
|
assertContains( result, "width:3.1305554in;height:1.7250001in;" );
|
||||||
|
// shift due to crop
|
||||||
|
assertContains( result, "left:-0.09375;top:-0.25694445;" );
|
||||||
|
// size without crop
|
||||||
|
assertContains( result, "width:3.4125in;height:2.325in;" );
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPicturesEscher() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "pictures_escher.doc", true );
|
||||||
|
assertContains( result, "<img src=\"s0.PNG\">" );
|
||||||
|
assertContains( result, "<img src=\"s808.PNG\">" );
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testTableMerges() throws Exception
|
||||||
|
{
|
||||||
|
String result = getHtmlText( "table-merges.doc" );
|
||||||
|
|
||||||
|
assertContains( result, "<td class=\"td1\" colspan=\"3\">" );
|
||||||
|
assertContains( result, "<td class=\"td2\" colspan=\"2\">" );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/document/pictures_escher.doc
Normal file
BIN
test-data/document/pictures_escher.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user