From 526dc01a50c4b8b753f83517d807feed5d9ca72d Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Thu, 7 Sep 2017 21:36:15 +0000 Subject: [PATCH] Copy over and tweak PicturesSource from Apache Tika git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1807651 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/hwpf/usermodel/PictureRunMapper.java | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureRunMapper.java diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureRunMapper.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureRunMapper.java new file mode 100644 index 000000000..928dd188f --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/PictureRunMapper.java @@ -0,0 +1,136 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.hwpf.usermodel; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.model.PicturesTable; + +/** + * Helper class for mapping Pictures to Runs within + * a document. + * + * This allows for easy access to Pictures by Run, + * as well as a way to find "Escher Floating" + * Pictures which don't have the regular \u0001 + * references in the main text. + * + * Provides access to the pictures by offset, iteration + * over the un-claimed, and peeking forward. + */ +public class PictureRunMapper { + private PicturesTable picturesTable; + private Set claimed = new HashSet(); + private Map lookup; + private List nonU1based; + private List all; + private int pn = 0; + + public PictureRunMapper(HWPFDocument doc) { + picturesTable = doc.getPicturesTable(); + all = picturesTable.getAllPictures(); + + // Build the Offset-Picture lookup map + lookup = new HashMap(); + for (Picture p : all) { + lookup.put(p.getStartOffset(), p); + } + + // Work out which Pictures aren't referenced by + // a \u0001 in the main text + // These are \u0008 escher floating ones, ones + // found outside the normal text, and who + // knows what else... + nonU1based = new ArrayList(); + nonU1based.addAll(all); + Range r = doc.getRange(); + for (int i = 0; i < r.numCharacterRuns(); i++) { + CharacterRun cr = r.getCharacterRun(i); + if (picturesTable.hasPicture(cr)) { + Picture p = getFor(cr); + int at = nonU1based.indexOf(p); + nonU1based.set(at, null); + } + } + } + + /** + * Does this run have a Picture in it? + * + * @see #getFor(CharacterRun) + */ + public boolean hasPicture(CharacterRun cr) { + return picturesTable.hasPicture(cr); + } + + /** + * Get the Picture for this run, if any + */ + public Picture getFor(CharacterRun cr) { + return lookup.get(cr.getPicOffset()); + } + + /** + * Mark a Picture as claimed. + * Used when trying to match up non-Run based pictures + */ + public void markAsClaimed(Picture picture) { + claimed.add(picture); + } + + /** + * Has the given Picture been claimed by a non-Run yet? + */ + public boolean hasBeenClaimed(Picture picture) { + return claimed.contains(picture); + } + + /** + * Which Picture is this one of all the Pictures in + * the Document? + * + * Useful when trying to extract all Pictures with + * unique numbers or references + */ + public int pictureNumber(Picture picture) { + return all.indexOf(picture) + 1; + } + + /** + * Return the next unclaimed one, used towards + * the end + */ + public Picture nextUnclaimed() { + Picture p = null; + while (pn < nonU1based.size()) { + p = nonU1based.get(pn); + pn++; + if (p != null) { + claimed.add(p); + return p; + } + } + return null; + } +}