From 5895434e97705999758282054f3fec013dc3a894 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Thu, 28 Jul 2011 08:26:59 +0000 Subject: [PATCH] simplify image processing interface for WordConverters, add PictureManager interface git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1151760 13f79535-47bb-0310-9956-ffa450edef68 --- .../hwpf/converter/AbstractWordConverter.java | 50 +++--- .../poi/hwpf/converter/PicturesManager.java | 45 ++++++ .../poi/hwpf/converter/WordToFoConverter.java | 30 +++- .../poi/hwpf/converter/WordToFoUtils.java | 14 +- .../hwpf/converter/WordToHtmlConverter.java | 12 ++ .../apache/poi/hwpf/usermodel/Picture.java | 146 ++++++++---------- .../poi/hwpf/usermodel/PictureType.java | 72 +++++++++ 7 files changed, 255 insertions(+), 114 deletions(-) create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/converter/PicturesManager.java create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java index 1609dcb5b..d27b86715 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/AbstractWordConverter.java @@ -95,6 +95,8 @@ public abstract class AbstractWordConverter private final Set bookmarkStack = new LinkedHashSet(); + private PicturesManager fileManager; + private FontReplacer fontReplacer = new DefaultFontReplacer(); protected Triplet getCharacterRunTriplet( CharacterRun characterRun ) @@ -109,6 +111,11 @@ public abstract class AbstractWordConverter public abstract Document getDocument(); + public PicturesManager getFileManager() + { + return fileManager; + } + public FontReplacer getFontReplacer() { return fontReplacer; @@ -509,6 +516,25 @@ public abstract class AbstractWordConverter return; } + protected Field processDeadField( HWPFDocumentCore wordDocument, + Range charactersRange, int currentTableLevel, int startOffset, + Element currentBlock ) + { + if ( !( wordDocument instanceof HWPFDocument ) ) + return null; + + HWPFDocument hwpfDocument = (HWPFDocument) wordDocument; + Field field = hwpfDocument.getFields().getFieldByStartOffset( + FieldsDocumentPart.MAIN, startOffset ); + if ( field == null ) + return null; + + processField( hwpfDocument, charactersRange, currentTableLevel, field, + currentBlock ); + + return field; + } + public void processDocument( HWPFDocumentCore wordDocument ) { final SummaryInformation summaryInformation = wordDocument @@ -595,25 +621,6 @@ public abstract class AbstractWordConverter field.secondSubrange( parentRange ), currentBlock ); } - protected Field processDeadField( HWPFDocumentCore wordDocument, - Range charactersRange, int currentTableLevel, int startOffset, - Element currentBlock ) - { - if ( !( wordDocument instanceof HWPFDocument ) ) - return null; - - HWPFDocument hwpfDocument = (HWPFDocument) wordDocument; - Field field = hwpfDocument.getFields().getFieldByStartOffset( - FieldsDocumentPart.MAIN, startOffset ); - if ( field == null ) - return null; - - processField( hwpfDocument, charactersRange, currentTableLevel, field, - currentBlock ); - - return field; - } - protected abstract void processFootnoteAutonumbered( HWPFDocument doc, int noteIndex, Element block, Range footnoteTextRange ); @@ -767,6 +774,11 @@ public abstract class AbstractWordConverter protected abstract void processTable( HWPFDocumentCore wordDocument, Element flow, Table table ); + public void setFileManager( PicturesManager fileManager ) + { + this.fileManager = fileManager; + } + public void setFontReplacer( FontReplacer fontReplacer ) { this.fontReplacer = fontReplacer; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/PicturesManager.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/PicturesManager.java new file mode 100644 index 000000000..ac408139c --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/PicturesManager.java @@ -0,0 +1,45 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.converter; + +import org.apache.poi.hwpf.usermodel.Picture; +import org.apache.poi.hwpf.usermodel.PictureType; + +/** + * User-implemented pictures manager to store images on-disk + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +public interface PicturesManager +{ + /** + * Stores image (probably on disk). Please, note that different output + * format support different file types, so image conversion may be required. + * For example, HTML browsers usually supports {@link PictureType#GIF}, + * {@link PictureType#JPEG}, {@link PictureType#PNG}, + * {@link PictureType#TIFF}, but rarely {@link PictureType#EMF} or + * {@link PictureType#WMF}. FO (Apache FOP) supports at least PNG and SVG + * types. + * + * @param picture + * Word picture + * @return path to file that can be used as reference in HTML (img's src) of + * XLS FO (fo:external-graphic's src) or null if image were + * not saved and should not be referenced from result HTML / FO. + */ + String savePicture( Picture picture ); +} diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java index 08bdcdaeb..d566e9aff 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoConverter.java @@ -359,12 +359,32 @@ public class WordToFoConverter extends AbstractWordConverter * HWPF object, contained picture data and properties */ protected void processImage( Element currentBlock, boolean inlined, - Picture picture ) - { + Picture picture ) { + PicturesManager fileManager = getFileManager(); + if ( fileManager != null ) + { + String url = fileManager.savePicture( picture ); + + if ( WordToFoUtils.isNotEmpty( url ) ) + { + processImage( currentBlock, inlined, picture, url ); + return; + } + } + // no default implementation -- skip - currentBlock.appendChild( foDocumentFacade.getDocument().createComment( - "Image link to '" + picture.suggestFullFileName() - + "' can be here" ) ); + currentBlock.appendChild( foDocumentFacade.document + .createComment( "Image link to '" + + picture.suggestFullFileName() + "' can be here" ) ); + } + + protected void processImage( Element currentBlock, boolean inlined, + Picture picture, String url ) + { + final Element externalGraphic = foDocumentFacade + .createExternalGraphic( url ); + WordToFoUtils.setPictureProperties( picture, externalGraphic ); + currentBlock.appendChild( externalGraphic ); } @Override diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java index 202341fce..b7b0680fd 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToFoUtils.java @@ -234,30 +234,30 @@ public class WordToFoUtils extends AbstractWordUtils public static void setPictureProperties( Picture picture, Element graphicElement ) { - final int aspectRatioX = picture.getAspectRatioX(); - final int aspectRatioY = picture.getAspectRatioY(); + final int horizontalScale = picture.getHorizontalScalingFactor(); + final int verticalScale = picture.getVerticalScalingFactor(); - if ( aspectRatioX > 0 ) + if ( horizontalScale > 0 ) { graphicElement .setAttribute( "content-width", ( ( picture.getDxaGoal() - * aspectRatioX / 100 ) / TWIPS_PER_PT ) + * horizontalScale / 1000 ) / TWIPS_PER_PT ) + "pt" ); } else graphicElement.setAttribute( "content-width", ( picture.getDxaGoal() / TWIPS_PER_PT ) + "pt" ); - if ( aspectRatioY > 0 ) + if ( verticalScale > 0 ) graphicElement .setAttribute( "content-height", ( ( picture.getDyaGoal() - * aspectRatioY / 100 ) / TWIPS_PER_PT ) + * verticalScale / 1000 ) / TWIPS_PER_PT ) + "pt" ); else graphicElement.setAttribute( "content-height", ( picture.getDyaGoal() / TWIPS_PER_PT ) + "pt" ); - if ( aspectRatioX <= 0 || aspectRatioY <= 0 ) + if ( horizontalScale <= 0 || verticalScale <= 0 ) { graphicElement.setAttribute( "scaling", "uniform" ); } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java index 75701ba3b..ebc1a673f 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/converter/WordToHtmlConverter.java @@ -315,6 +315,18 @@ public class WordToHtmlConverter extends AbstractWordConverter protected void processImage( Element currentBlock, boolean inlined, Picture picture ) { + PicturesManager fileManager = getFileManager(); + if ( fileManager != null ) + { + String url = fileManager.savePicture( picture ); + + if ( WordToHtmlUtils.isNotEmpty( url ) ) + { + processImage( currentBlock, inlined, picture, url ); + return; + } + } + // no default implementation -- skip currentBlock.appendChild( htmlDocumentFacade.document .createComment( "Image link to '" diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java index b9de16f8d..926946d92 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/Picture.java @@ -44,17 +44,27 @@ public final class Picture extends PictureDescriptor static final int PICF_SHAPE_OFFSET = 0xE; static final int UNKNOWN_HEADER_SIZE = 0x49; - public static final byte[] GIF = new byte[]{'G', 'I', 'F'}; - public static final byte[] PNG = new byte[]{ (byte)0x89, 0x50, 0x4E, 0x47,0x0D,0x0A,0x1A,0x0A}; - public static final byte[] JPG = new byte[]{(byte)0xFF, (byte)0xD8}; - public static final byte[] BMP = new byte[]{'B', 'M'}; - public static final byte[] TIFF = new byte[]{0x49, 0x49, 0x2A, 0x00}; - public static final byte[] TIFF1 = new byte[]{0x4D, 0x4D, 0x00, 0x2A}; + @Deprecated + public static final byte[] GIF = PictureType.GIF.getSignatures()[0]; + @Deprecated + public static final byte[] PNG = PictureType.PNG.getSignatures()[0]; + @Deprecated + public static final byte[] JPG = PictureType.JPEG.getSignatures()[0]; + @Deprecated + public static final byte[] BMP = PictureType.BMP.getSignatures()[0]; + @Deprecated + public static final byte[] TIFF = PictureType.TIFF.getSignatures()[0]; + @Deprecated + public static final byte[] TIFF1 = PictureType.TIFF.getSignatures()[1]; - public static final byte[] EMF = { 0x01, 0x00, 0x00, 0x00 }; - public static final byte[] WMF1 = { (byte)0xD7, (byte)0xCD, (byte)0xC6, (byte)0x9A, 0x00, 0x00 }; - public static final byte[] WMF2 = { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 }; // Windows 3.x - // TODO: DIB, PICT + @Deprecated + public static final byte[] EMF = PictureType.EMF.getSignatures()[0]; + @Deprecated + public static final byte[] WMF1 = PictureType.WMF.getSignatures()[0]; + // Windows 3.x + @Deprecated + public static final byte[] WMF2 = PictureType.WMF.getSignatures()[1]; + // TODO: DIB, PICT public static final byte[] IHDR = new byte[]{'I', 'H', 'D', 'R'}; @@ -103,16 +113,23 @@ public final class Picture extends PictureDescriptor this.size = _dataStream.length; } - private void fillWidthHeight() - { - String ext = suggestFileExtension(); - // trying to extract width and height from pictures content: - if ("jpg".equalsIgnoreCase(ext)) { - fillJPGWidthHeight(); - } else if ("png".equalsIgnoreCase(ext)) { - fillPNGWidthHeight(); + private void fillWidthHeight() + { + PictureType pictureType = suggestPictureType(); + // trying to extract width and height from pictures content: + switch ( pictureType ) + { + case JPEG: + fillJPGWidthHeight(); + break; + case PNG: + fillPNGWidthHeight(); + break; + default: + // unsupported; + break; + } } - } /** * Tries to suggest a filename: hex representation of picture structure offset in "Data" stream plus extension that @@ -265,76 +282,39 @@ public final class Picture extends PictureDescriptor } /** - * tries to suggest extension for picture's file by matching signatures of popular image formats to first bytes - * of picture's contents - * @return suggested file extension - */ - public String suggestFileExtension() - { - String extension = suggestFileExtension(_dataStream, pictureBytesStartOffset); - if ("".equals(extension)) { - // May be compressed. Get the uncompressed content and inspect that. - extension = suggestFileExtension(getContent(), 0); + * tries to suggest extension for picture's file by matching signatures of + * popular image formats to first bytes of picture's contents + * + * @return suggested file extension + */ + public String suggestFileExtension() + { + return suggestPictureType().getExtension(); } - return extension; - } - /** - * Returns the mime type for the image - */ - public String getMimeType() { - String extension = suggestFileExtension(); - if("jpg".equals(extension)) { - return "image/jpeg"; - } - if("png".equals(extension)) { - return "image/png"; - } - if("gif".equals(extension)) { - return "image/gif"; - } - if("bmp".equals(extension)) { - return "image/bmp"; - } - if("tiff".equals(extension)) { - return "image/tiff"; - } - if("wmf".equals(extension)) { - return "image/x-wmf"; - } - if("emf".equals(extension)) { - return "image/x-emf"; - } - return "image/unknown"; - } + /** + * Returns the MIME type for the image + * + * @return MIME-type for known types of image or "image/unknown" if unknown + */ + public String getMimeType() + { + return suggestPictureType().getMime(); + } - - private String suggestFileExtension(byte[] _dataStream, int pictureBytesStartOffset) - { - if (matchSignature(_dataStream, JPG, pictureBytesStartOffset)) { - return "jpg"; - } else if (matchSignature(_dataStream, PNG, pictureBytesStartOffset)) { - return "png"; - } else if (matchSignature(_dataStream, GIF, pictureBytesStartOffset)) { - return "gif"; - } else if (matchSignature(_dataStream, BMP, pictureBytesStartOffset)) { - return "bmp"; - } else if (matchSignature(_dataStream, TIFF, pictureBytesStartOffset) || - matchSignature(_dataStream, TIFF1, pictureBytesStartOffset)) { - return "tiff"; - } else { - // Need to load the image content before we can try the following tests + public PictureType suggestPictureType() + { fillImageContent(); - if (matchSignature(content, WMF1, 0) || matchSignature(content, WMF2, 0)) { - return "wmf"; - } else if (matchSignature(content, EMF, 0)) { - return "emf"; - } + for ( PictureType pictureType : PictureType.values() ) + for ( byte[] signature : pictureType.getSignatures() ) + if ( matchSignature( _dataStream, signature, + pictureBytesStartOffset ) ) + return pictureType; + + // TODO: DIB, PICT + return PictureType.UNKNOWN; } - // TODO: DIB, PICT - return ""; - } private static boolean matchSignature(byte[] dataStream, byte[] signature, int pictureBytesOffset) { diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java new file mode 100644 index 000000000..d89f0e7bd --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureType.java @@ -0,0 +1,72 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf.usermodel; + +/** + * Picture types supported by MS Word format + * + * @author Sergey Vladimirov (vlsergey {at} gmail {dot} com) + */ +public enum PictureType { + BMP( "image/bmp", "bmp", new byte[][] { { 'B', 'M' } } ), + + EMF( "image/x-emf", "emf", new byte[][] { { 0x01, 0x00, 0x00, 0x00 } } ), + + GIF( "image/gif", "gif", new byte[][] { { 'G', 'I', 'F' } } ), + + JPEG( "image/jpeg", "jpg", new byte[][] { { (byte) 0xFF, (byte) 0xD8 } } ), + + PNG( "image/png", "png", new byte[][] { { (byte) 0x89, 0x50, 0x4E, 0x47, + 0x0D, 0x0A, 0x1A, 0x0A } } ), + + TIFF( "image/tiff", "tiff", new byte[][] { { 0x49, 0x49, 0x2A, 0x00 }, + { 0x4D, 0x4D, 0x00, 0x2A } } ), + + WMF( "image/x-wmf", "wmf", new byte[][] { + { (byte) 0xD7, (byte) 0xCD, (byte) 0xC6, (byte) 0x9A, 0x00, 0x00 }, + { 0x01, 0x00, 0x09, 0x00, 0x00, 0x03 } } ), + + UNKNOWN( "image/unknown", "", new byte[][] {} ); + + private String _extension; + + private String _mime; + + private byte[][] _signatures; + + private PictureType( String mime, String extension, byte[][] signatures ) + { + this._mime = mime; + this._extension = extension; + this._signatures = signatures; + } + + public String getExtension() + { + return _extension; + } + + public String getMime() + { + return _mime; + } + + public byte[][] getSignatures() + { + return _signatures; + } +}