From 5c62492ffb832d1085ab7f411e4afda171ee7268 Mon Sep 17 00:00:00 2001 From: Javen O'Neal Date: Mon, 15 May 2017 23:27:21 +0000 Subject: [PATCH] github-54: when adding a picture to an XSSFWorkbook, reduce memory consumption by 100x and increase speed by 10x based on OpenJDK JMH benchmarking. Thanks to Tim Helmstedt! This closes #54. https://github.com/apache/poi/pull/54 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1795252 13f79535-47bb-0310-9956-ffa450edef68 --- build.gradle | 4 +- build.xml | 13 +++ .../org/apache/poi/POIXMLDocumentPart.java | 26 +----- .../apache/poi/openxml4j/opc/OPCPackage.java | 10 +-- .../apache/poi/openxml4j/opc/PackagePart.java | 69 ++++++++-------- .../openxml4j/opc/PackagePartCollection.java | 58 +++++++++----- .../opc/PackageRelationshipCollection.java | 36 ++++----- .../apache/poi/openxml4j/opc/ZipPackage.java | 6 +- .../apache/poi/benchmark/AddImageBench.java | 79 +++++++++++++++++++ .../poi/xssf/usermodel/TestXSSFDrawing.java | 72 ++++++++++++++++- 10 files changed, 264 insertions(+), 109 deletions(-) create mode 100644 src/ooxml/testcases/org/apache/poi/benchmark/AddImageBench.java diff --git a/build.gradle b/build.gradle index 4acabe0f1..526cff040 100644 --- a/build.gradle +++ b/build.gradle @@ -199,7 +199,7 @@ project('ooxml') { compile 'org.apache.santuario:xmlsec:2.0.6' compile 'org.bouncycastle:bcpkix-jdk15on:1.54' compile 'com.github.virtuald:curvesapi:1.04' - + // for ooxml-lite, should we move this somewhere else? compile 'junit:junit:4.12' @@ -210,6 +210,8 @@ project('ooxml') { testCompile 'junit:junit:4.12' testCompile project(path: ':main', configuration: 'tests') + testCompile 'org.openjdk.jmh:jmh-core:1.15' + testCompile 'org.openjdk.jmh:jmh-generator-annprocess:1.15' } // TOOD: we should not duplicate this task in each project, but I did not figure out how to inject the artifactId for each project diff --git a/build.xml b/build.xml index 8aa626731..4df378eb8 100644 --- a/build.xml +++ b/build.xml @@ -163,6 +163,11 @@ under the License. + + + + + @@ -321,6 +326,8 @@ under the License. + + @@ -599,6 +606,8 @@ under the License. + + @@ -624,6 +633,8 @@ under the License. + + @@ -2265,6 +2276,8 @@ under the License. + + diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java index 163ace54a..41435ec4a 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java @@ -268,7 +268,7 @@ public class POIXMLDocumentPart { * @since 3.14-Beta1 */ public final RelationPart addRelation(String relId, POIXMLRelation relationshipType, POIXMLDocumentPart part) { - PackageRelationship pr = findExistingRelation(part); + PackageRelationship pr = this.packagePart.findExistingRelation(part.getPackagePart()); if (pr == null) { PackagePartName ppn = part.getPackagePart().getPartName(); String relType = relationshipType.getRelation(); @@ -290,30 +290,6 @@ public class POIXMLDocumentPart { } - /** - * Check if the new part was already added before via PackagePart.addRelationship() - * - * @param part to find the relationship for - * @return The existing relationship, or null if there isn't yet one - */ - private PackageRelationship findExistingRelation(POIXMLDocumentPart part) { - String ppn = part.getPackagePart().getPartName().getName(); - try { - for (PackageRelationship pr : packagePart.getRelationships()) { - if (pr.getTargetMode() == TargetMode.EXTERNAL) { - continue; - } - PackagePart pp = packagePart.getRelatedPart(pr); - if (ppn.equals(pp.getPartName().getName())) { - return pr; - } - } - } catch (InvalidFormatException e) { - throw new POIXMLException("invalid package relationships", e); - } - return null; - } - /** * Remove the relation to the specified part in this package and remove the * part, if it is no longer needed. diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java index 6b6c5e85c..1389fb742 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java @@ -649,12 +649,11 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { */ public ArrayList getPartsByContentType(String contentType) { ArrayList retArr = new ArrayList(); - for (PackagePart part : partList.values()) { + for (PackagePart part : partList.sortedValues()) { if (part.getContentType().equals(contentType)) { retArr.add(part); } } - Collections.sort(retArr); return retArr; } @@ -697,13 +696,12 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { } Matcher matcher = namePattern.matcher(""); ArrayList result = new ArrayList(); - for (PackagePart part : partList.values()) { + for (PackagePart part : partList.sortedValues()) { PackagePartName partName = part.getPartName(); if (matcher.reset(partName.getName()).matches()) { result.add(part); } } - Collections.sort(result); return result; } @@ -813,9 +811,7 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { } } } - ArrayList result = new ArrayList(partList.values()); - java.util.Collections.sort(result); - return result; + return new ArrayList(partList.sortedValues()); } /** diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePart.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePart.java index 641e1bc19..8cd1c9f5d 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePart.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagePart.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.URI; import java.net.URISyntaxException; +import java.util.HashMap; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -63,6 +64,7 @@ public abstract class PackagePart implements RelationshipSource, Comparable { +public final class PackagePartCollection implements Serializable { - private static final long serialVersionUID = 2515031135957635515L; + private static final long serialVersionUID = 2515031135957635517L; /** - * Arraylist use to store this collection part names as string for rule + * HashSet use to store this collection part names as string for rule * M1.11 optimized checking. */ - private ArrayList registerPartNameStr = new ArrayList(); + private HashSet registerPartNameStr = new HashSet(); + + + private final HashMap packagePartLookup = new HashMap(); - @Override - public Object clone() { - return super.clone(); - } /** * Check rule [M1.11]: a package implementer shall neither create nor @@ -53,11 +51,10 @@ public final class PackagePartCollection extends * Throws if you try to add a part with a name derived from * another part name. */ - @Override public PackagePart put(PackagePartName partName, PackagePart part) { String[] segments = partName.getURI().toASCIIString().split( PackagingURIHelper.FORWARD_SLASH_STRING); - StringBuffer concatSeg = new StringBuffer(); + StringBuilder concatSeg = new StringBuilder(); for (String seg : segments) { if (!seg.equals("")) concatSeg.append(PackagingURIHelper.FORWARD_SLASH_CHAR); @@ -68,14 +65,35 @@ public final class PackagePartCollection extends } } this.registerPartNameStr.add(partName.getName()); - return super.put(partName, part); + return packagePartLookup.put(partName, part); } - @Override - public PackagePart remove(Object key) { - if (key instanceof PackagePartName) { - this.registerPartNameStr.remove(((PackagePartName) key).getName()); - } - return super.remove(key); + public PackagePart remove(PackagePartName key) { + this.registerPartNameStr.remove(key.getName()); + return packagePartLookup.remove(key); + } + + + /** + * The values themselves should be returned in sorted order. Doing it here + * avoids paying the high cost of Natural Ordering per insertion. + */ + public Collection sortedValues() { + ArrayList packageParts = new ArrayList(packagePartLookup.values()); + Collections.sort(packageParts); + return packageParts; + + } + + public boolean containsKey(PackagePartName partName) { + return packagePartLookup.containsKey(partName); + } + + public PackagePart get(PackagePartName partName) { + return packagePartLookup.get(partName); + } + + public int size() { + return packagePartLookup.size(); } } diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java index 640063802..912a3d5b8 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java @@ -18,10 +18,7 @@ package org.apache.poi.openxml4j.opc; import java.net.URI; import java.net.URISyntaxException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.Locale; -import java.util.TreeMap; +import java.util.*; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -55,6 +52,12 @@ public final class PackageRelationshipCollection implements */ private TreeMap relationshipsByType; + /** + * A lookup of internal relationships to avoid + */ + private HashMap internalRelationshipsByTargetName = new HashMap(); + + /** * This relationshipPart. */ @@ -74,7 +77,7 @@ public final class PackageRelationshipCollection implements * Reference to the package. */ private OPCPackage container; - + /** * The ID number of the next rID# to generate, or -1 * if that is still to be determined. @@ -230,6 +233,9 @@ public final class PackageRelationshipCollection implements sourcePart, targetUri, targetMode, relationshipType, id); relationshipsByID.put(rel.getId(), rel); relationshipsByType.put(rel.getRelationshipType(), rel); + if (targetMode == TargetMode.INTERNAL){ + internalRelationshipsByTargetName.put(targetUri.toASCIIString(), rel); + } return rel; } @@ -245,24 +251,11 @@ public final class PackageRelationshipCollection implements if (rel != null) { relationshipsByID.remove(rel.getId()); relationshipsByType.values().remove(rel); + internalRelationshipsByTargetName.values().remove(rel); } } } - /** - * Remove a relationship by its reference. - * - * @param rel - * The relationship to delete. - */ - public void removeRelationship(PackageRelationship rel) { - if (rel == null) - throw new IllegalArgumentException("rel"); - - relationshipsByID.values().remove(rel); - relationshipsByType.values().remove(rel); - } - /** * Retrieves a relationship by its index in the collection. * @@ -413,6 +406,11 @@ public final class PackageRelationshipCollection implements public void clear() { relationshipsByID.clear(); relationshipsByType.clear(); + internalRelationshipsByTargetName.clear(); + } + + public PackageRelationship findExistingInternalRelation(PackagePart packagePart) { + return internalRelationshipsByTargetName.get(packagePart.getPartName().getName()); } @Override diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java index 5b9e2141e..17046da35 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java @@ -239,8 +239,8 @@ public final class ZipPackage extends OPCPackage { } if (this.zipArchive == null) { - return this.partList.values().toArray( - new PackagePart[this.partList.values().size()]); + return this.partList.sortedValues().toArray( + new PackagePart[this.partList.size()]); } // First we need to parse the content type part @@ -344,7 +344,7 @@ public final class ZipPackage extends OPCPackage { } } - return partList.values().toArray(new ZipPackagePart[partList.size()]); + return partList.sortedValues().toArray(new PackagePart[partList.size()]); } /** diff --git a/src/ooxml/testcases/org/apache/poi/benchmark/AddImageBench.java b/src/ooxml/testcases/org/apache/poi/benchmark/AddImageBench.java new file mode 100644 index 000000000..c910a7fe9 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/benchmark/AddImageBench.java @@ -0,0 +1,79 @@ +package org.apache.poi.benchmark;/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.ss.usermodel.*; +import org.apache.poi.util.IOUtils; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.openjdk.jmh.annotations.*; +import org.openjdk.jmh.profile.GCProfiler; +import org.openjdk.jmh.profile.StackProfiler; +import org.openjdk.jmh.runner.Runner; +import org.openjdk.jmh.runner.RunnerException; +import org.openjdk.jmh.runner.options.Options; +import org.openjdk.jmh.runner.options.OptionsBuilder; + +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.concurrent.TimeUnit; + + +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.MICROSECONDS) +@Warmup(iterations = 10, time = 1, timeUnit = TimeUnit.SECONDS ) +@Measurement(iterations = 10, time = 2, timeUnit = TimeUnit.SECONDS) +@State(Scope.Benchmark) +public class AddImageBench { + + private Workbook wb; + private CreationHelper helper; + private byte[] bytes; + private Sheet sheet; + + @Setup(Level.Iteration) + public void doit() { + wb = new XSSFWorkbook(); + helper = wb.getCreationHelper(); + //add a picture in this workbook. + + bytes = HSSFTestDataSamples.getTestDataFileContent("45829.png"); + sheet = wb.createSheet(); + } + + @Benchmark + public void benchCreatePicture() throws Exception { + Drawing drawing = sheet.createDrawingPatriarch(); + + ClientAnchor anchor = helper.createClientAnchor(); + anchor.setCol1(1); + anchor.setRow1(1); + drawing.createPicture(anchor, wb.addPicture(bytes, Workbook.PICTURE_TYPE_JPEG)); + + } + + public static void main(String[] args) throws RunnerException { + Options opt = new OptionsBuilder() + .include(".*" + AddImageBench.class.getSimpleName() + ".*") + .addProfiler(StackProfiler.class) + .addProfiler(GCProfiler.class) + .forks(1) + .build(); + + new Runner(opt).run(); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFDrawing.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFDrawing.java index c5b5d6fa4..51c079619 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFDrawing.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFDrawing.java @@ -18,10 +18,12 @@ package org.apache.poi.xssf.usermodel; import org.apache.poi.POIXMLDocumentPart; import org.apache.poi.POIXMLDocumentPart.RelationPart; +import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.ss.usermodel.ClientAnchor; import org.apache.poi.ss.usermodel.FontUnderline; import org.apache.poi.ss.usermodel.ShapeTypes; +import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.util.Units; import org.apache.poi.xssf.XSSFTestDataSamples; import org.junit.Test; @@ -31,7 +33,7 @@ import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph; import org.openxmlformats.schemas.drawingml.x2006.main.STTextUnderlineType; import org.openxmlformats.schemas.drawingml.x2006.spreadsheetDrawing.CTDrawing; -import java.awt.Color; +import java.awt.*; import java.io.IOException; import java.util.List; @@ -755,7 +757,71 @@ public class TestXSSFDrawing { shapes = drawing.getShapes(); assertEquals(4, shapes.size()); wb2.close(); + } + @Test + public void testXSSFSAddPicture() throws Exception { + XSSFWorkbook wb1 = new XSSFWorkbook(); + XSSFSheet sheet = wb1.createSheet(); + //multiple calls of createDrawingPatriarch should return the same instance of XSSFDrawing + XSSFDrawing dr1 = sheet.createDrawingPatriarch(); + XSSFDrawing dr2 = sheet.createDrawingPatriarch(); + assertSame(dr1, dr2); + + List rels = sheet.getRelationParts(); + assertEquals(1, rels.size()); + RelationPart rp = rels.get(0); + assertTrue(rp.getDocumentPart() instanceof XSSFDrawing); + + assertEquals(0, rp.getDocumentPart().getRelations().size()); + + XSSFDrawing drawing = rp.getDocumentPart(); + String drawingId = rp.getRelationship().getId(); + + //there should be a relation to this drawing in the worksheet + assertTrue(sheet.getCTWorksheet().isSetDrawing()); + assertEquals(drawingId, sheet.getCTWorksheet().getDrawing().getId()); + + byte[] pictureData = HSSFTestDataSamples.getTestDataFileContent("45829.png"); + + ClientAnchor anchor = wb1.getCreationHelper().createClientAnchor(); + anchor.setCol1(1); + anchor.setRow1(1); + + drawing.createPicture(anchor, wb1.addPicture(pictureData, Workbook.PICTURE_TYPE_JPEG)); + final int pictureIndex = wb1.addPicture(pictureData, Workbook.PICTURE_TYPE_JPEG); + drawing.createPicture(anchor, pictureIndex); + drawing.createPicture(anchor, pictureIndex); + + // repeated additions of same share package relationship + assertEquals(2, rp.getDocumentPart().getPackagePart().getRelationships().size()); + + List shapes = drawing.getShapes(); + assertEquals(3, shapes.size()); + assertTrue(shapes.get(0) instanceof XSSFPicture); + assertTrue(shapes.get(1) instanceof XSSFPicture); + + // Save and re-load it + XSSFWorkbook wb2 = XSSFTestDataSamples.writeOutAndReadBack(wb1); + wb1.close(); + sheet = wb2.getSheetAt(0); + + // Check + dr1 = sheet.createDrawingPatriarch(); + CTDrawing ctDrawing = dr1.getCTDrawing(); + + // Connector, shapes and text boxes are all two cell anchors + assertEquals(0, ctDrawing.sizeOfAbsoluteAnchorArray()); + assertEquals(0, ctDrawing.sizeOfOneCellAnchorArray()); + assertEquals(3, ctDrawing.sizeOfTwoCellAnchorArray()); + + shapes = dr1.getShapes(); + assertEquals(3, shapes.size()); + assertTrue(shapes.get(0) instanceof XSSFPicture); + assertTrue(shapes.get(1) instanceof XSSFPicture); + + checkRewrite(wb2); + wb2.close(); } @Test(expected=IllegalArgumentException.class) @@ -806,7 +872,7 @@ public class TestXSSFDrawing { (int)(xfrmG1.getChExt().getCy()*0.8) )); CTGroupTransform2D xfrmG2 = g2.getCTGroupShape().getGrpSpPr().getXfrm(); - + XSSFSimpleShape s2 = g2.createSimpleShape(new XSSFChildAnchor( (int)(xfrmG2.getChExt().getCx()*0.1), (int)(xfrmG2.getChExt().getCy()*0.1), @@ -818,7 +884,7 @@ public class TestXSSFDrawing { XSSFWorkbook wb2 = XSSFTestDataSamples.writeOutAndReadBack(wb1); wb1.close(); - + XSSFDrawing draw = wb2.getSheetAt(0).getDrawingPatriarch(); List shapes = draw.getShapes(); assertEquals(2, shapes.size());