github-54: when adding a picture to an XSSFWorkbook, reduce memory consumption by 100x and increase speed by 10x based on OpenJDK JMH benchmarking. Thanks to Tim Helmstedt! This closes #54.

https://github.com/apache/poi/pull/54

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1795252 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Javen O'Neal 2017-05-15 23:27:21 +00:00
parent 8c1346b138
commit 5c62492ffb
10 changed files with 264 additions and 109 deletions

View File

@ -199,7 +199,7 @@ project('ooxml') {
compile 'org.apache.santuario:xmlsec:2.0.6'
compile 'org.bouncycastle:bcpkix-jdk15on:1.54'
compile 'com.github.virtuald:curvesapi:1.04'
// for ooxml-lite, should we move this somewhere else?
compile 'junit:junit:4.12'
@ -210,6 +210,8 @@ project('ooxml') {
testCompile 'junit:junit:4.12'
testCompile project(path: ':main', configuration: 'tests')
testCompile 'org.openjdk.jmh:jmh-core:1.15'
testCompile 'org.openjdk.jmh:jmh-generator-annprocess:1.15'
}
// TOOD: we should not duplicate this task in each project, but I did not figure out how to inject the artifactId for each project

View File

@ -163,6 +163,11 @@ under the License.
<property name="main.log4j.url" value="${repository.m2}/maven2/log4j/log4j/1.2.17/log4j-1.2.17.jar"/>
<property name="main.junit.jar" location="${main.lib}/junit-4.12.jar"/>
<property name="main.junit.url" value="${repository.m2}/maven2/junit/junit/4.12/junit-4.12.jar"/>
<property name="main.jmh.jar" location="${main.lib}/jmh-core-1.15.jar"/>
<property name="main.jmh.url" value="${repository.m2}/maven2/org/openjdk/jmh/jmh-core/1.15/jmh-core-1.15.jar"/>
<property name="main.jmhAnnotation.jar" location="${main.lib}/jmh-generator-annprocess-1.15.jar"/>
<property name="main.jmhAnnotation.url" value="${repository.m2}/maven2/org/openjdk/jmh/jmh-generator-annprocess/1.15/jmh-generator-annprocess-1.15.jar"/>
<property name="main.hamcrest.jar" location="${main.lib}/hamcrest-core-1.3.jar"/>
<property name="main.hamcrest.url" value="${repository.m2}/maven2/org/hamcrest/hamcrest-core/1.3/hamcrest-core-1.3.jar"/>
<property name="main.ant.jar" location="${main.lib}/ant-1.9.4.jar"/>
@ -321,6 +326,8 @@ under the License.
<pathelement location="${main.commons-codec.jar}"/>
<pathelement location="${main.log4j.jar}"/>
<pathelement location="${main.junit.jar}"/>
<pathelement location="${main.jmh.jar}"/>
<pathelement location="${main.jmhAnnotation.jar}"/>
<pathelement location="${main.hamcrest.jar}"/>
<pathelement location="${main.commons-collections4.jar}"/>
</path>
@ -599,6 +606,8 @@ under the License.
<available file="${main.commons-codec.jar}"/>
<available file="${main.log4j.jar}"/>
<available file="${main.junit.jar}"/>
<available file="${main.jmh.jar}"/>
<available file="${main.jmhAnnotation.jar}"/>
<available file="${main.hamcrest.jar}"/>
<available file="${main.ant.jar}"/>
<available file="${main.antlauncher.jar}"/>
@ -624,6 +633,8 @@ under the License.
<downloadfile src="${main.commons-codec.url}" dest="${main.commons-codec.jar}"/>
<downloadfile src="${main.log4j.url}" dest="${main.log4j.jar}"/>
<downloadfile src="${main.junit.url}" dest="${main.junit.jar}"/>
<downloadfile src="${main.jmh.url}" dest="${main.jmh.jar}"/>
<downloadfile src="${main.jmhAnnotation.url}" dest="${main.jmhAnnotation.jar}"/>
<downloadfile src="${main.hamcrest.url}" dest="${main.hamcrest.jar}"/>
<downloadfile src="${main.ant.url}" dest="${main.ant.jar}"/>
<downloadfile src="${main.antlauncher.url}" dest="${main.antlauncher.jar}"/>
@ -2265,6 +2276,8 @@ under the License.
<auxClasspath path="${main.commons-codec.jar}" />
<auxClasspath path="${main.commons-logging.jar}" />
<auxClasspath path="${main.junit.jar}" />
<auxClasspath path="${main.jmh.jar}"/>
<auxClasspath path="${main.jmhAnnotation.jar}"/>
<auxClasspath path="${main.ant.jar}" />
<sourcePath path="src/java" />
<sourcePath path="src/ooxml/java" />

View File

@ -268,7 +268,7 @@ public class POIXMLDocumentPart {
* @since 3.14-Beta1
*/
public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) {
PackageRelationship pr = findExistingRelation(part);
PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart());
if (pr == null) {
PackagePartName ppn = part.getPackagePart().getPartName();
String relType = relationshipType.getRelation();
@ -290,30 +290,6 @@ public class POIXMLDocumentPart {
}
/**
* Check if the new part was already added before via PackagePart.addRelationship()
*
* @param part to find the relationship for
* @return The existing relationship, or null if there isn't yet one
*/
private PackageRelationship findExistingRelation(POIXMLDocumentPart part) {
String ppn = part.getPackagePart().getPartName().getName();
try {
for (PackageRelationship pr : packagePart.getRelationships()) {
if (pr.getTargetMode() == TargetMode.EXTERNAL) {
continue;
}
PackagePart pp = packagePart.getRelatedPart(pr);
if (ppn.equals(pp.getPartName().getName())) {
return pr;
}
}
} catch (InvalidFormatException e) {
throw new POIXMLException("invalid package relationships", e);
}
return null;
}
/**
* Remove the relation to the specified part in this package and remove the
* part, if it is no longer needed.

View File

@ -649,12 +649,11 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
*/
public ArrayList<PackagePart> getPartsByContentType(String contentType) {
ArrayList<PackagePart> retArr = new ArrayList<PackagePart>();
for (PackagePart part : partList.values()) {
for (PackagePart part : partList.sortedValues()) {
if (part.getContentType().equals(contentType)) {
retArr.add(part);
}
}
Collections.sort(retArr);
return retArr;
}
@ -697,13 +696,12 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
}
Matcher matcher = namePattern.matcher("");
ArrayList<PackagePart> result = new ArrayList<PackagePart>();
for (PackagePart part : partList.values()) {
for (PackagePart part : partList.sortedValues()) {
PackagePartName partName = part.getPartName();
if (matcher.reset(partName.getName()).matches()) {
result.add(part);
}
}
Collections.sort(result);
return result;
}
@ -813,9 +811,7 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
}
}
}
ArrayList<PackagePart> result = new ArrayList<PackagePart>(partList.values());
java.util.Collections.sort(result);
return result;
return new ArrayList<PackagePart>(partList.sortedValues());
}
/**

View File

@ -22,6 +22,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@ -63,6 +64,7 @@ public abstract class PackagePart implements RelationshipSource, Comparable<Pack
*/
private PackageRelationshipCollection _relationships;
/**
* Constructor.
*
@ -125,6 +127,16 @@ public abstract class PackagePart implements RelationshipSource, Comparable<Pack
this(pack, partName, new ContentType(contentType));
}
/**
* Check if the new part was already added before via PackagePart.addRelationship()
*
* @param packagePart to find the relationship for
* @return The existing relationship, or null if there isn't yet one
*/
public PackageRelationship findExistingRelation(PackagePart packagePart) {
return _relationships.findExistingInternalRelation(packagePart);
}
/**
* Adds an external relationship to a part (except relationships part).
*
@ -446,11 +458,7 @@ public abstract class PackagePart implements RelationshipSource, Comparable<Pack
* @see org.apache.poi.openxml4j.opc.RelationshipSource#isRelationshipExists(org.apache.poi.openxml4j.opc.PackageRelationship)
*/
public boolean isRelationshipExists(PackageRelationship rel) {
for (PackageRelationship r : _relationships) {
if (r == rel)
return true;
}
return false;
return _relationships.getRelationshipByID(rel.getId()) != null;
}
/**
@ -459,32 +467,31 @@ public abstract class PackagePart implements RelationshipSource, Comparable<Pack
* @param rel A relationship from this part to another one
* @return The target part of the relationship
*/
public PackagePart getRelatedPart(PackageRelationship rel) throws InvalidFormatException {
// Ensure this is one of ours
if(! isRelationshipExists(rel)) {
throw new IllegalArgumentException("Relationship " + rel + " doesn't start with this part " + _partName);
}
// Get the target URI, excluding any relative fragments
URI target = rel.getTargetURI();
if(target.getFragment() != null) {
String t = target.toString();
try {
target = new URI( t.substring(0, t.indexOf('#')) );
} catch(URISyntaxException e) {
throw new InvalidFormatException("Invalid target URI: " + target);
}
}
// Turn that into a name, and fetch
PackagePartName relName = PackagingURIHelper.createPartName(target);
PackagePart part = _container.getPart(relName);
if (part == null) {
throw new IllegalArgumentException("No part found for relationship " + rel);
}
return part;
}
public PackagePart getRelatedPart(PackageRelationship rel) throws InvalidFormatException {
// Ensure this is one of ours
if(! isRelationshipExists(rel)) {
throw new IllegalArgumentException("Relationship " + rel + " doesn't start with this part " + _partName);
}
// Get the target URI, excluding any relative fragments
URI target = rel.getTargetURI();
if(target.getFragment() != null) {
String t = target.toString();
try {
target = new URI( t.substring(0, t.indexOf('#')) );
} catch(URISyntaxException e) {
throw new InvalidFormatException("Invalid target URI: " + target);
}
}
// Turn that into a name, and fetch
PackagePartName relName = PackagingURIHelper.createPartName(target);
PackagePart part = _container.getPart(relName);
if (part == null) {
throw new IllegalArgumentException("No part found for relationship " + rel);
}
return part;
}
/**
* Get the input stream of this part to read its content.
*

View File

@ -17,8 +17,8 @@
package org.apache.poi.openxml4j.opc;
import java.util.ArrayList;
import java.util.TreeMap;
import java.io.Serializable;
import java.util.*;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@ -28,21 +28,19 @@ import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
* @author Julien Chable
* @version 0.1
*/
public final class PackagePartCollection extends
TreeMap<PackagePartName, PackagePart> {
public final class PackagePartCollection implements Serializable {
private static final long serialVersionUID = 2515031135957635515L;
private static final long serialVersionUID = 2515031135957635517L;
/**
* Arraylist use to store this collection part names as string for rule
* HashSet use to store this collection part names as string for rule
* M1.11 optimized checking.
*/
private ArrayList<String> registerPartNameStr = new ArrayList<String>();
private HashSet<String> registerPartNameStr = new HashSet<String>();
private final HashMap<PackagePartName, PackagePart> packagePartLookup = new HashMap<PackagePartName, PackagePart>();
@Override
public Object clone() {
return super.clone();
}
/**
* Check rule [M1.11]: a package implementer shall neither create nor
@ -53,11 +51,10 @@ public final class PackagePartCollection extends
* Throws if you try to add a part with a name derived from
* another part name.
*/
@Override
public PackagePart put(PackagePartName partName, PackagePart part) {
String[] segments = partName.getURI().toASCIIString().split(
PackagingURIHelper.FORWARD_SLASH_STRING);
StringBuffer concatSeg = new StringBuffer();
StringBuilder concatSeg = new StringBuilder();
for (String seg : segments) {
if (!seg.equals(""))
concatSeg.append(PackagingURIHelper.FORWARD_SLASH_CHAR);
@ -68,14 +65,35 @@ public final class PackagePartCollection extends
}
}
this.registerPartNameStr.add(partName.getName());
return super.put(partName, part);
return packagePartLookup.put(partName, part);
}
@Override
public PackagePart remove(Object key) {
if (key instanceof PackagePartName) {
this.registerPartNameStr.remove(((PackagePartName) key).getName());
}
return super.remove(key);
public PackagePart remove(PackagePartName key) {
this.registerPartNameStr.remove(key.getName());
return packagePartLookup.remove(key);
}
/**
* The values themselves should be returned in sorted order. Doing it here
* avoids paying the high cost of Natural Ordering per insertion.
*/
public Collection<PackagePart> sortedValues() {
ArrayList<PackagePart> packageParts = new ArrayList<PackagePart>(packagePartLookup.values());
Collections.sort(packageParts);
return packageParts;
}
public boolean containsKey(PackagePartName partName) {
return packagePartLookup.containsKey(partName);
}
public PackagePart get(PackagePartName partName) {
return packagePartLookup.get(partName);
}
public int size() {
return packagePartLookup.size();
}
}

View File

@ -18,10 +18,7 @@ package org.apache.poi.openxml4j.opc;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Locale;
import java.util.TreeMap;
import java.util.*;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@ -55,6 +52,12 @@ public final class PackageRelationshipCollection implements
*/
private TreeMap<String, PackageRelationship> relationshipsByType;
/**
* A lookup of internal relationships to avoid
*/
private HashMap<String, PackageRelationship> internalRelationshipsByTargetName = new HashMap<String, PackageRelationship>();
/**
* This relationshipPart.
*/
@ -74,7 +77,7 @@ public final class PackageRelationshipCollection implements
* Reference to the package.
*/
private OPCPackage container;
/**
* The ID number of the next rID# to generate, or -1
* if that is still to be determined.
@ -230,6 +233,9 @@ public final class PackageRelationshipCollection implements
sourcePart, targetUri, targetMode, relationshipType, id);
relationshipsByID.put(rel.getId(), rel);
relationshipsByType.put(rel.getRelationshipType(), rel);
if (targetMode == TargetMode.INTERNAL){
internalRelationshipsByTargetName.put(targetUri.toASCIIString(), rel);
}
return rel;
}
@ -245,24 +251,11 @@ public final class PackageRelationshipCollection implements
if (rel != null) {
relationshipsByID.remove(rel.getId());
relationshipsByType.values().remove(rel);
internalRelationshipsByTargetName.values().remove(rel);
}
}
}
/**
* Remove a relationship by its reference.
*
* @param rel
* The relationship to delete.
*/
public void removeRelationship(PackageRelationship rel) {
if (rel == null)
throw new IllegalArgumentException("rel");
relationshipsByID.values().remove(rel);
relationshipsByType.values().remove(rel);
}
/**
* Retrieves a relationship by its index in the collection.
*
@ -413,6 +406,11 @@ public final class PackageRelationshipCollection implements
public void clear() {
relationshipsByID.clear();
relationshipsByType.clear();
internalRelationshipsByTargetName.clear();
}
public PackageRelationship findExistingInternalRelation(PackagePart packagePart) {
return internalRelationshipsByTargetName.get(packagePart.getPartName().getName());
}
@Override

View File

@ -239,8 +239,8 @@ public final class ZipPackage extends OPCPackage {
}
if (this.zipArchive == null) {
return this.partList.values().toArray(
new PackagePart[this.partList.values().size()]);
return this.partList.sortedValues().toArray(
new PackagePart[this.partList.size()]);
}
// First we need to parse the content type part
@ -344,7 +344,7 @@ public final class ZipPackage extends OPCPackage {
}
}
return partList.values().toArray(new ZipPackagePart[partList.size()]);
return partList.sortedValues().toArray(new PackagePart[partList.size()]);
}
/**

View File

@ -0,0 +1,79 @@
package org.apache.poi.benchmark;/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.util.IOUtils;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.profile.GCProfiler;
import org.openjdk.jmh.profile.StackProfiler;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.concurrent.TimeUnit;
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Warmup(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS )
@Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
public class AddImageBench {
private Workbook wb;
private CreationHelper helper;
private byte[] bytes;
private Sheet sheet;
@Setup(Level.Iteration)
public void doit() {
wb = new XSSFWorkbook();
helper = wb.getCreationHelper();
//add a picture in this workbook.
bytes = HSSFTestDataSamples.getTestDataFileContent("45829.png");
sheet = wb.createSheet();
}
@Benchmark
public void benchCreatePicture() throws Exception {
Drawing drawing = sheet.createDrawingPatriarch();
ClientAnchor anchor = helper.createClientAnchor();
anchor.setCol1(1);
anchor.setRow1(1);
drawing.createPicture(anchor, wb.addPicture(bytes, Workbook.PICTURE_TYPE_JPEG));
}
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(".*" + AddImageBench.class.getSimpleName() + ".*")
.addProfiler(StackProfiler.class)
.addProfiler(GCProfiler.class)
.forks(1)
.build();
new Runner(opt).run();
}
}

View File

@ -18,10 +18,12 @@ package org.apache.poi.xssf.usermodel;
import org.apache.poi.POIXMLDocumentPart;
import org.apache.poi.POIXMLDocumentPart.RelationPart;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.ClientAnchor;
import org.apache.poi.ss.usermodel.FontUnderline;
import org.apache.poi.ss.usermodel.ShapeTypes;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.util.Units;
import org.apache.poi.xssf.XSSFTestDataSamples;
import org.junit.Test;
@ -31,7 +33,7 @@ import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
import org.openxmlformats.schemas.drawingml.x2006.main.STTextUnderlineType;
import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing;
import java.awt.Color;
import java.awt.*;
import java.io.IOException;
import java.util.List;
@ -755,7 +757,71 @@ public class TestXSSFDrawing {
shapes = drawing.getShapes();
assertEquals(4, shapes.size());
wb2.close();
}
@Test
public void testXSSFSAddPicture() throws Exception {
XSSFWorkbook wb1 = new XSSFWorkbook();
XSSFSheet sheet = wb1.createSheet();
//multiple calls of createDrawingPatriarch should return the same instance of XSSFDrawing
XSSFDrawing dr1 = sheet.createDrawingPatriarch();
XSSFDrawing dr2 = sheet.createDrawingPatriarch();
assertSame(dr1, dr2);
List<RelationPart> rels = sheet.getRelationParts();
assertEquals(1, rels.size());
RelationPart rp = rels.get(0);
assertTrue(rp.getDocumentPart() instanceof XSSFDrawing);
assertEquals(0, rp.getDocumentPart().getRelations().size());
XSSFDrawing drawing = rp.getDocumentPart();
String drawingId = rp.getRelationship().getId();
//there should be a relation to this drawing in the worksheet
assertTrue(sheet.getCTWorksheet().isSetDrawing());
assertEquals(drawingId, sheet.getCTWorksheet().getDrawing().getId());
byte[] pictureData = HSSFTestDataSamples.getTestDataFileContent("45829.png");
ClientAnchor anchor = wb1.getCreationHelper().createClientAnchor();
anchor.setCol1(1);
anchor.setRow1(1);
drawing.createPicture(anchor, wb1.addPicture(pictureData, Workbook.PICTURE_TYPE_JPEG));
final int pictureIndex = wb1.addPicture(pictureData, Workbook.PICTURE_TYPE_JPEG);
drawing.createPicture(anchor, pictureIndex);
drawing.createPicture(anchor, pictureIndex);
// repeated additions of same share package relationship
assertEquals(2, rp.getDocumentPart().getPackagePart().getRelationships().size());
List<XSSFShape> shapes = drawing.getShapes();
assertEquals(3, shapes.size());
assertTrue(shapes.get(0) instanceof XSSFPicture);
assertTrue(shapes.get(1) instanceof XSSFPicture);
// Save and re-load it
XSSFWorkbook wb2 = XSSFTestDataSamples.writeOutAndReadBack(wb1);
wb1.close();
sheet = wb2.getSheetAt(0);
// Check
dr1 = sheet.createDrawingPatriarch();
CTDrawing ctDrawing = dr1.getCTDrawing();
// Connector, shapes and text boxes are all two cell anchors
assertEquals(0, ctDrawing.sizeOfAbsoluteAnchorArray());
assertEquals(0, ctDrawing.sizeOfOneCellAnchorArray());
assertEquals(3, ctDrawing.sizeOfTwoCellAnchorArray());
shapes = dr1.getShapes();
assertEquals(3, shapes.size());
assertTrue(shapes.get(0) instanceof XSSFPicture);
assertTrue(shapes.get(1) instanceof XSSFPicture);
checkRewrite(wb2);
wb2.close();
}
@Test(expected=IllegalArgumentException.class)
@ -806,7 +872,7 @@ public class TestXSSFDrawing {
(int)(xfrmG1.getChExt().getCy()*0.8)
));
CTGroupTransform2D xfrmG2 = g2.getCTGroupShape().getGrpSpPr().getXfrm();
XSSFSimpleShape s2 = g2.createSimpleShape(new XSSFChildAnchor(
(int)(xfrmG2.getChExt().getCx()*0.1),
(int)(xfrmG2.getChExt().getCy()*0.1),
@ -818,7 +884,7 @@ public class TestXSSFDrawing {
XSSFWorkbook wb2 = XSSFTestDataSamples.writeOutAndReadBack(wb1);
wb1.close();
XSSFDrawing draw = wb2.getSheetAt(0).getDrawingPatriarch();
List<XSSFShape> shapes = draw.getShapes();
assertEquals(2, shapes.size());