Copy over and tweak PicturesSource from Apache Tika
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1807651 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8d17a69005
commit
526dc01a50
@ -0,0 +1,136 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.model.PicturesTable;
|
||||
|
||||
/**
|
||||
* Helper class for mapping Pictures to Runs within
|
||||
* a document.
|
||||
*
|
||||
* This allows for easy access to Pictures by Run,
|
||||
* as well as a way to find "Escher Floating"
|
||||
* Pictures which don't have the regular \u0001
|
||||
* references in the main text.
|
||||
*
|
||||
* Provides access to the pictures by offset, iteration
|
||||
* over the un-claimed, and peeking forward.
|
||||
*/
|
||||
public class PictureRunMapper {
|
||||
private PicturesTable picturesTable;
|
||||
private Set<Picture> claimed = new HashSet<Picture>();
|
||||
private Map<Integer, Picture> lookup;
|
||||
private List<Picture> nonU1based;
|
||||
private List<Picture> all;
|
||||
private int pn = 0;
|
||||
|
||||
public PictureRunMapper(HWPFDocument doc) {
|
||||
picturesTable = doc.getPicturesTable();
|
||||
all = picturesTable.getAllPictures();
|
||||
|
||||
// Build the Offset-Picture lookup map
|
||||
lookup = new HashMap<Integer, Picture>();
|
||||
for (Picture p : all) {
|
||||
lookup.put(p.getStartOffset(), p);
|
||||
}
|
||||
|
||||
// Work out which Pictures aren't referenced by
|
||||
// a \u0001 in the main text
|
||||
// These are \u0008 escher floating ones, ones
|
||||
// found outside the normal text, and who
|
||||
// knows what else...
|
||||
nonU1based = new ArrayList<Picture>();
|
||||
nonU1based.addAll(all);
|
||||
Range r = doc.getRange();
|
||||
for (int i = 0; i < r.numCharacterRuns(); i++) {
|
||||
CharacterRun cr = r.getCharacterRun(i);
|
||||
if (picturesTable.hasPicture(cr)) {
|
||||
Picture p = getFor(cr);
|
||||
int at = nonU1based.indexOf(p);
|
||||
nonU1based.set(at, null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this run have a Picture in it?
|
||||
*
|
||||
* @see #getFor(CharacterRun)
|
||||
*/
|
||||
public boolean hasPicture(CharacterRun cr) {
|
||||
return picturesTable.hasPicture(cr);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Picture for this run, if any
|
||||
*/
|
||||
public Picture getFor(CharacterRun cr) {
|
||||
return lookup.get(cr.getPicOffset());
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a Picture as claimed.
|
||||
* Used when trying to match up non-Run based pictures
|
||||
*/
|
||||
public void markAsClaimed(Picture picture) {
|
||||
claimed.add(picture);
|
||||
}
|
||||
|
||||
/**
|
||||
* Has the given Picture been claimed by a non-Run yet?
|
||||
*/
|
||||
public boolean hasBeenClaimed(Picture picture) {
|
||||
return claimed.contains(picture);
|
||||
}
|
||||
|
||||
/**
|
||||
* Which Picture is this one of all the Pictures in
|
||||
* the Document?
|
||||
*
|
||||
* Useful when trying to extract all Pictures with
|
||||
* unique numbers or references
|
||||
*/
|
||||
public int pictureNumber(Picture picture) {
|
||||
return all.indexOf(picture) + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the next unclaimed one, used towards
|
||||
* the end
|
||||
*/
|
||||
public Picture nextUnclaimed() {
|
||||
Picture p = null;
|
||||
while (pn < nonU1based.size()) {
|
||||
p = nonU1based.get(pn);
|
||||
pn++;
|
||||
if (p != null) {
|
||||
claimed.add(p);
|
||||
return p;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user