#61942 - Refactor PackagePartName handling and add getUnusedPartIndex method

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1819708 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2017-12-31 00:56:07 +00:00
parent 2dc2933f9a
commit 2a18d2d4db
4 changed files with 221 additions and 215 deletions

View File

@ -571,7 +571,7 @@ public class POIXMLDocumentPart {
* equivalent part names and package implementers shall neither
* create nor recognize packages with equivalent part names.
*/
protected final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation){
public final RelationPart createRelationship(POIXMLRelation descriptor, POIXMLFactory factory, int idx, boolean noRelation){
try {
PackagePartName ppName = PackagingURIHelper.createPartName(descriptor.getFileName(idx));
PackageRelationship rel = null;

View File

@ -1670,4 +1670,19 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
this.isDirty = true;
}
/**
* Get an unused part index based on the namePattern, which doesn't exist yet
* and has the lowest positive index
*
* @param nameTemplate
* The template for new part names containing a {@code '#'} for the index,
* e.g. "/ppt/slides/slide#.xml"
* @return the next available part name index
* @throws InvalidFormatException if the nameTemplate is null or doesn't contain
* the index char (#) or results in an invalid part name
*/
public int getUnusedPartIndex(final String nameTemplate) throws InvalidFormatException {
return partList.getUnusedPartIndex(nameTemplate);
}
}

View File

@ -18,28 +18,34 @@
package org.apache.poi.openxml4j.opc;
import java.io.Serializable;
import java.util.*;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeMap;
import java.util.function.ToIntFunction;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
/**
* A package part collection.
*
* @author Julien Chable
* @version 0.1
*/
public final class PackagePartCollection implements Serializable {
private static final long serialVersionUID = 2515031135957635517L;
/**
* HashSet use to store this collection part names as string for rule
* M1.11 optimized checking.
*/
private HashSet<String> registerPartNameStr = new HashSet<>();
/**
* HashSet use to store this collection part names as string for rule
* M1.11 optimized checking.
*/
private final Set<String> registerPartNameStr = new HashSet<>();
private final HashMap<PackagePartName, PackagePart> packagePartLookup = new HashMap<>();
private final TreeMap<String, PackagePart> packagePartLookup =
new TreeMap<>(PackagePartName::compare);
/**
@ -51,26 +57,32 @@ public final class PackagePartCollection implements Serializable {
* Throws if you try to add a part with a name derived from
* another part name.
*/
public PackagePart put(PackagePartName partName, PackagePart part) {
String[] segments = partName.getURI().toASCIIString().split(
PackagingURIHelper.FORWARD_SLASH_STRING);
StringBuilder concatSeg = new StringBuilder();
for (String seg : segments) {
if (!seg.isEmpty())
concatSeg.append(PackagingURIHelper.FORWARD_SLASH_CHAR);
public PackagePart put(final PackagePartName partName, final PackagePart part) {
final String ppName = partName.getName();
final StringBuilder concatSeg = new StringBuilder();
// split at slash, but keep leading slash
final String delim = "(?=["+PackagingURIHelper.FORWARD_SLASH_STRING+".])";
for (String seg : ppName.split(delim)) {
concatSeg.append(seg);
if (this.registerPartNameStr.contains(concatSeg.toString())) {
if (registerPartNameStr.contains(concatSeg.toString())) {
throw new InvalidOperationException(
"You can't add a part with a part name derived from another part ! [M1.11]");
"You can't add a part with a part name derived from another part ! [M1.11]");
}
}
this.registerPartNameStr.add(partName.getName());
return packagePartLookup.put(partName, part);
registerPartNameStr.add(ppName);
return packagePartLookup.put(ppName, part);
}
public PackagePart remove(PackagePartName key) {
this.registerPartNameStr.remove(key.getName());
return packagePartLookup.remove(key);
if (key == null) {
return null;
}
final String ppName = key.getName();
PackagePart pp = packagePartLookup.remove(ppName);
if (pp != null) {
this.registerPartNameStr.remove(ppName);
}
return pp;
}
@ -79,21 +91,49 @@ public final class PackagePartCollection implements Serializable {
* avoids paying the high cost of Natural Ordering per insertion.
*/
public Collection<PackagePart> sortedValues() {
ArrayList<PackagePart> packageParts = new ArrayList<>(packagePartLookup.values());
Collections.sort(packageParts);
return packageParts;
return Collections.unmodifiableCollection(packagePartLookup.values());
}
public boolean containsKey(PackagePartName partName) {
return packagePartLookup.containsKey(partName);
return partName != null && packagePartLookup.containsKey(partName.getName());
}
public PackagePart get(PackagePartName partName) {
return packagePartLookup.get(partName);
return partName == null ? null : packagePartLookup.get(partName.getName());
}
public int size() {
return packagePartLookup.size();
}
/**
* Get an unused part index based on the namePattern, which doesn't exist yet
* and has the lowest positive index
*
* @param nameTemplate
* The template for new part names containing a {@code '#'} for the index,
* e.g. "/ppt/slides/slide#.xml"
* @return the next available part name index
* @throws InvalidFormatException if the nameTemplate is null or doesn't contain
* the index char (#) or results in an invalid part name
*/
public int getUnusedPartIndex(final String nameTemplate) throws InvalidFormatException {
if (nameTemplate == null || !nameTemplate.contains("#")) {
throw new InvalidFormatException("name template must not be null and contain an index char (#)");
}
final Pattern pattern = Pattern.compile(nameTemplate.replace("#", "([0-9]+)"));
final ToIntFunction<String> indexFromName = name -> {
Matcher m = pattern.matcher(name);
return m.matches() ? Integer.parseInt(m.group(1)) : 0;
};
return packagePartLookup.keySet().stream()
.mapToInt(indexFromName)
.collect(BitSet::new, BitSet::set, BitSet::or).nextClearBit(1);
}
}

View File

@ -28,8 +28,6 @@ import org.apache.poi.openxml4j.exceptions.OpenXML4JRuntimeException;
/**
* An immutable Open Packaging Convention compliant part name.
*
* @author Julien Chable
*
* @see <a href="http://www.ietf.org/rfc/rfc3986.txt">http://www.ietf.org/rfc/rfc3986.txt</a>
*/
public final class PackagePartName implements Comparable<PackagePartName> {
@ -37,32 +35,31 @@ public final class PackagePartName implements Comparable<PackagePartName> {
/**
* Part name stored as an URI.
*/
private URI partNameURI;
private final URI partNameURI;
/*
* URI Characters definition (RFC 3986)
*/
/**
* Reserved characters for sub delimitations.
* Reserved characters for sub delimiters.
*/
private static String[] RFC3986_PCHAR_SUB_DELIMS = { "!", "$", "&", "'",
"(", ")", "*", "+", ",", ";", "=" };
private static final String RFC3986_PCHAR_SUB_DELIMS = "!$&'()*+,;=";
/**
* Unreserved character (+ ALPHA & DIGIT).
*/
private static String[] RFC3986_PCHAR_UNRESERVED_SUP = { "-", ".", "_", "~" };
private static final String RFC3986_PCHAR_UNRESERVED_SUP = "-._~";
/**
* Authorized reserved characters for pChar.
*/
private static String[] RFC3986_PCHAR_AUTHORIZED_SUP = { ":", "@" };
private static final String RFC3986_PCHAR_AUTHORIZED_SUP = ":@";
/**
* Flag to know if this part name is from a relationship part name.
*/
private boolean isRelationship;
private final boolean isRelationship;
/**
* Constructor. Makes a ValidPartName object from a java.net.URI
@ -70,7 +67,7 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* @param uri
* The URI to validate and to transform into ValidPartName.
* @param checkConformance
* Flag to specify if the contructor have to validate the OPC
* Flag to specify if the constructor have to validate the OPC
* conformance. Must be always <code>true</code> except for
* special URI like '/' which is needed for internal use by
* OpenXML4J but is not valid.
@ -99,7 +96,7 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* @param partName
* Part name to valid and to create.
* @param checkConformance
* Flag to specify if the contructor have to validate the OPC
* Flag to specify if the constructor have to validate the OPC
* conformance. Must be always <code>true</code> except for
* special URI like '/' which is needed for internal use by
* OpenXML4J but is not valid.
@ -138,8 +135,9 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* part naming convention else <code>false</code>.
*/
private boolean isRelationshipPartURI(URI partUri) {
if (partUri == null)
throw new IllegalArgumentException("partUri");
if (partUri == null) {
throw new IllegalArgumentException("partUri");
}
return partUri.getPath().matches(
"^.*/" + PackagingURIHelper.RELATIONSHIP_PART_SEGMENT_NAME + "/.*\\"
@ -168,8 +166,9 @@ public final class PackagePartName implements Comparable<PackagePartName> {
*/
private static void throwExceptionIfInvalidPartUri(URI partUri)
throws InvalidFormatException {
if (partUri == null)
throw new IllegalArgumentException("partUri");
if (partUri == null) {
throw new IllegalArgumentException("partUri");
}
// Check if the part name URI is empty [M1.1]
throwExceptionIfEmptyURI(partUri);
@ -197,15 +196,17 @@ public final class PackagePartName implements Comparable<PackagePartName> {
*/
private static void throwExceptionIfEmptyURI(URI partURI)
throws InvalidFormatException {
if (partURI == null)
throw new IllegalArgumentException("partURI");
if (partURI == null) {
throw new IllegalArgumentException("partURI");
}
String uriPath = partURI.getPath();
if (uriPath.length() == 0
|| ((uriPath.length() == 1) && (uriPath.charAt(0) == PackagingURIHelper.FORWARD_SLASH_CHAR)))
throw new InvalidFormatException(
|| ((uriPath.length() == 1) && (uriPath.charAt(0) == PackagingURIHelper.FORWARD_SLASH_CHAR))) {
throw new InvalidFormatException(
"A part name shall not be empty [M1.1]: "
+ partURI.getPath());
}
}
/**
@ -240,32 +241,31 @@ public final class PackagePartName implements Comparable<PackagePartName> {
}
// Split the URI into several part and analyze each
String[] segments = partUri.toASCIIString().split("/");
if (segments.length <= 1 || !segments[0].isEmpty())
throw new InvalidFormatException(
"A part name shall not have empty segments [M1.3]: "
+ partUri.getPath());
String[] segments = partUri.toASCIIString()
.replaceFirst("^"+PackagingURIHelper.FORWARD_SLASH_CHAR,"")
.split(PackagingURIHelper.FORWARD_SLASH_STRING);
if (segments.length < 1) {
throw new InvalidFormatException(
"A part name shall not have empty segments [M1.3]: " + partUri.getPath());
}
for (int i = 1; i < segments.length; ++i) {
String seg = segments[i];
for (final String seg : segments) {
if (seg == null || seg.isEmpty()) {
throw new InvalidFormatException(
"A part name shall not have empty segments [M1.3]: "
+ partUri.getPath());
"A part name shall not have empty segments [M1.3]: " + partUri.getPath());
}
if (seg.endsWith(".")) {
throw new InvalidFormatException(
"A segment shall not end with a dot ('.') character [M1.9]: "
+ partUri.getPath());
"A segment shall not end with a dot ('.') character [M1.9]: " + partUri.getPath());
}
if (seg.replaceAll("\\\\.", "").isEmpty()) {
// Normally will never been invoked with the previous
// implementation rule [M1.9]
throw new InvalidFormatException(
"A segment shall include at least one non-dot character. [M1.10]: "
+ partUri.getPath());
"A segment shall include at least one non-dot character. [M1.10]: " + partUri.getPath());
}
// Check for rule M1.6, M1.7, M1.8
@ -288,93 +288,60 @@ public final class PackagePartName implements Comparable<PackagePartName> {
*/
private static void checkPCharCompliance(String segment)
throws InvalidFormatException {
boolean errorFlag;
final int length = segment.length();
for (int i = 0; i < length; ++i) {
char c = segment.charAt(i);
errorFlag = true;
final char c = segment.charAt(i);
/* Check rule M1.6 */
// Check for digit or letter
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9')) {
errorFlag = false;
} else {
// Check "-", ".", "_", "~"
for (int j = 0; j < RFC3986_PCHAR_UNRESERVED_SUP.length; ++j) {
if (c == RFC3986_PCHAR_UNRESERVED_SUP[j].charAt(0)) {
errorFlag = false;
break;
}
}
if (
// Check for digit or letter
isDigitOrLetter(c) ||
// Check "-", ".", "_", "~"
RFC3986_PCHAR_UNRESERVED_SUP.indexOf(c) > -1 ||
// Check ":", "@"
RFC3986_PCHAR_AUTHORIZED_SUP.indexOf(c) > -1 ||
// Check "!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="
RFC3986_PCHAR_SUB_DELIMS.indexOf(c) > -1
) {
continue;
}
// Check ":", "@"
for (int j = 0; errorFlag
&& j < RFC3986_PCHAR_AUTHORIZED_SUP.length; ++j) {
if (c == RFC3986_PCHAR_AUTHORIZED_SUP[j].charAt(0)) {
errorFlag = false;
}
}
// Check "!", "$", "&", "'", "(", ")", "*", "+", ",", ";", "="
for (int j = 0; errorFlag
&& j < RFC3986_PCHAR_SUB_DELIMS.length; ++j) {
if (c == RFC3986_PCHAR_SUB_DELIMS[j].charAt(0)) {
errorFlag = false;
}
}
if (c != '%') {
throw new InvalidFormatException(
"A segment shall not hold any characters other than pchar characters. [M1.6]");
}
// We certainly found an encoded character, check for length
// now ( '%' HEXDIGIT HEXDIGIT)
if ((length - i) < 2 || !isHexDigit(segment.charAt(i+1)) || !isHexDigit(segment.charAt(i+2))) {
throw new InvalidFormatException("The segment " + segment + " contain invalid encoded character !");
}
if (errorFlag && c == '%') {
// We certainly found an encoded character, check for length
// now ( '%' HEXDIGIT HEXDIGIT)
if (((length - i) < 2)) {
throw new InvalidFormatException("The segment " + segment
+ " contain invalid encoded character !");
}
// Decode the encoded character
final char decodedChar = (char) Integer.parseInt(segment.substring(i + 1, i + 3), 16);
i += 2;
// If not percent encoded character error occur then reset the
// flag -> the character is valid
errorFlag = false;
// Decode the encoded character
char decodedChar = (char) Integer.parseInt(segment.substring(
i + 1, i + 3), 16);
i += 2;
/* Check rule M1.7 */
if (decodedChar == '/' || decodedChar == '\\')
throw new InvalidFormatException(
"A segment shall not contain percent-encoded forward slash ('/'), or backward slash ('\') characters. [M1.7]");
/* Check rule M1.8 */
// Check for unreserved character like define in RFC3986
if ((decodedChar >= 'A' && decodedChar <= 'Z')
|| (decodedChar >= 'a' && decodedChar <= 'z')
|| (decodedChar >= '0' && decodedChar <= '9'))
errorFlag = true;
// Check for unreserved character "-", ".", "_", "~"
for (int j = 0; !errorFlag
&& j < RFC3986_PCHAR_UNRESERVED_SUP.length; ++j) {
if (c == RFC3986_PCHAR_UNRESERVED_SUP[j].charAt(0)) {
errorFlag = true;
break;
}
}
if (errorFlag)
throw new InvalidFormatException(
"A segment shall not contain percent-encoded unreserved characters. [M1.8]");
}
if (errorFlag)
/* Check rule M1.7 */
if (decodedChar == '/' || decodedChar == '\\') {
throw new InvalidFormatException(
"A segment shall not hold any characters other than pchar characters. [M1.6]");
"A segment shall not contain percent-encoded forward slash ('/'), or backward slash ('\') characters. [M1.7]");
}
/* Check rule M1.8 */
if (
// Check for unreserved character like define in RFC3986
isDigitOrLetter(decodedChar) ||
// Check for unreserved character "-", ".", "_", "~"
RFC3986_PCHAR_UNRESERVED_SUP.indexOf(decodedChar) > -1
) {
throw new InvalidFormatException(
"A segment shall not contain percent-encoded unreserved characters. [M1.8]");
}
}
}
/**
* Throws an exception if the specified part name doesn't start with a
* forward slash character '/'. [M1.4]
@ -389,10 +356,11 @@ public final class PackagePartName implements Comparable<PackagePartName> {
URI partUri) throws InvalidFormatException {
String uriPath = partUri.getPath();
if (uriPath.length() > 0
&& uriPath.charAt(0) != PackagingURIHelper.FORWARD_SLASH_CHAR)
throw new InvalidFormatException(
&& uriPath.charAt(0) != PackagingURIHelper.FORWARD_SLASH_CHAR) {
throw new InvalidFormatException(
"A part name shall start with a forward slash ('/') character [M1.4]: "
+ partUri.getPath());
}
}
/**
@ -409,10 +377,11 @@ public final class PackagePartName implements Comparable<PackagePartName> {
URI partUri) throws InvalidFormatException {
String uriPath = partUri.getPath();
if (uriPath.length() > 0
&& uriPath.charAt(uriPath.length() - 1) == PackagingURIHelper.FORWARD_SLASH_CHAR)
throw new InvalidFormatException(
&& uriPath.charAt(uriPath.length() - 1) == PackagingURIHelper.FORWARD_SLASH_CHAR) {
throw new InvalidFormatException(
"A part name shall not have a forward slash as the last character [M1.5]: "
+ partUri.getPath());
}
}
/**
@ -423,11 +392,10 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* @throws InvalidFormatException
* Throws if the specified URI is absolute.
*/
private static void throwExceptionIfAbsoluteUri(URI partUri)
throws InvalidFormatException {
if (partUri.isAbsolute())
throw new InvalidFormatException("Absolute URI forbidden: "
+ partUri);
private static void throwExceptionIfAbsoluteUri(URI partUri) throws InvalidFormatException {
if (partUri.isAbsolute()) {
throw new InvalidFormatException("Absolute URI forbidden: " + partUri);
}
}
/**
@ -438,12 +406,11 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* part names and package implementers shall neither create nor recognize
* packages with equivalent part names. [M1.12]
*/
@Override
public int compareTo(PackagePartName other)
{
// compare with natural sort order
return compare(this, other);
}
@Override
public int compareTo(PackagePartName other) {
// compare with natural sort order
return compare(this, other);
}
/**
@ -456,8 +423,9 @@ public final class PackagePartName implements Comparable<PackagePartName> {
String fragment = this.partNameURI.getPath();
if (fragment.length() > 0) {
int i = fragment.lastIndexOf(".");
if (i > -1)
return fragment.substring(i + 1);
if (i > -1) {
return fragment.substring(i + 1);
}
}
return "";
}
@ -468,7 +436,7 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* @return The name of this part name.
*/
public String getName() {
return this.partNameURI.toASCIIString();
return getURI().toASCIIString();
}
/**
@ -479,20 +447,13 @@ public final class PackagePartName implements Comparable<PackagePartName> {
*/
@Override
public boolean equals(Object other) {
if (other instanceof PackagePartName) {
// String.equals() is compatible with our compareTo(), but cheaper
return this.partNameURI.toASCIIString().toLowerCase(Locale.ROOT).equals
(
((PackagePartName) other).partNameURI.toASCIIString().toLowerCase(Locale.ROOT)
);
} else {
return false;
}
}
return (other instanceof PackagePartName) &&
compare(this.getName(), ((PackagePartName)other).getName()) == 0;
}
@Override
public int hashCode() {
return this.partNameURI.toASCIIString().toLowerCase(Locale.ROOT).hashCode();
return getName().toLowerCase(Locale.ROOT).hashCode();
}
@Override
@ -529,24 +490,10 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* part names and package implementers shall neither create nor recognize
* packages with equivalent part names. [M1.12]
*/
public static int compare(PackagePartName obj1, PackagePartName obj2)
{
// NOTE could also throw a NullPointerException() if desired
if (obj1 == null)
{
// (null) == (null), (null) < (non-null)
return (obj2 == null ? 0 : -1);
}
else if (obj2 == null)
{
// (non-null) > (null)
return 1;
}
return compare
(
obj1.getURI().toASCIIString().toLowerCase(Locale.ROOT),
obj2.getURI().toASCIIString().toLowerCase(Locale.ROOT)
public static int compare(PackagePartName obj1, PackagePartName obj2) {
return compare (
obj1 == null ? null : obj1.getName(),
obj2 == null ? null : obj2.getName()
);
}
@ -560,49 +507,48 @@ public final class PackagePartName implements Comparable<PackagePartName> {
* numerical portion), but sorts "File10.png" before "file2.png"
* (lexigraphical sort)
*/
public static int compare(String str1, String str2)
public static int compare(final String str1, final String str2)
{
if (str1 == null)
{
if (str1 == null) {
// (null) == (null), (null) < (non-null)
return (str2 == null ? 0 : -1);
}
else if (str2 == null)
{
} else if (str2 == null) {
// (non-null) > (null)
return 1;
}
if (str1.equalsIgnoreCase(str2)) {
return 0;
}
final String name1 = str1.toLowerCase(Locale.ROOT);
final String name2 = str2.toLowerCase(Locale.ROOT);
int len1 = str1.length();
int len2 = str2.length();
for (int idx1 = 0, idx2 = 0; idx1 < len1 && idx2 < len2; /*nil*/)
{
char c1 = str1.charAt(idx1++);
char c2 = str2.charAt(idx2++);
final int len1 = name1.length();
final int len2 = name2.length();
for (int idx1 = 0, idx2 = 0; idx1 < len1 && idx2 < len2; /*nil*/) {
final char c1 = name1.charAt(idx1++);
final char c2 = name2.charAt(idx2++);
if (Character.isDigit(c1) && Character.isDigit(c2))
{
int beg1 = idx1 - 1; // undo previous increment
while (idx1 < len1 && Character.isDigit(str1.charAt(idx1)))
{
++idx1;
if (Character.isDigit(c1) && Character.isDigit(c2)) {
final int beg1 = idx1 - 1; // undo previous increment
while (idx1 < len1 && Character.isDigit(name1.charAt(idx1))) {
idx1++;
}
int beg2 = idx2 - 1; // undo previous increment
while (idx2 < len2 && Character.isDigit(str2.charAt(idx2)))
{
++idx2;
final int beg2 = idx2 - 1; // undo previous increment
while (idx2 < len2 && Character.isDigit(name2.charAt(idx2))) {
idx2++;
}
// note: BigInteger for extra safety
int cmp = new BigInteger(str1.substring(beg1, idx1)).compareTo
(
new BigInteger(str2.substring(beg2, idx2))
);
if (cmp != 0) return cmp;
final BigInteger b1 = new BigInteger(name1.substring(beg1, idx1));
final BigInteger b2 = new BigInteger(name2.substring(beg2, idx2));
final int cmp = b1.compareTo(b2);
if (cmp != 0) {
return cmp;
}
}
else if (c1 != c2)
{
else if (c1 != c2) {
return (c1 - c2);
}
}
@ -610,6 +556,11 @@ public final class PackagePartName implements Comparable<PackagePartName> {
return (len1 - len2);
}
private static boolean isDigitOrLetter(char c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
}
private static boolean isHexDigit(char c) {
return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f');
}
}
/* ************************************************************************** */