diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 74eeae2ee..be82f7f14 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 50154 - Allow white spaces and unicode in OPC relationship targets 50113 - Remove cell from Calculation Chain after setting cell type to blank 49966 - Ensure that XSSFRow#removeCell cleares calculation chain entries 50096 - Fixed evaluation of cell references with column index greater than 255 diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java index 990a3bf4c..a5a40990c 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java @@ -351,16 +351,8 @@ public final class PackageRelationshipCollection implements PackageRelationship.TARGET_ATTRIBUTE_NAME) .getValue(); - if (value.indexOf("\\") != -1) { - logger - .log(POILogger.INFO, "target contains \\ therefore not a valid URI" - + value + " replaced by /"); - value = value.replaceAll("\\\\", "/"); - // word can save external relationship with a \ instead - // of / - } + target = PackagingURIHelper.toURI(value); - target = new URI(value); } catch (URISyntaxException e) { logger.log(POILogger.ERROR, "Cannot convert " + value + " in a valid relationship URI-> ignored", e); diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java index 337dacb4e..01ed54c96 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java @@ -75,6 +75,11 @@ public interface PackageRelationshipTypes { */ String IMAGE_PART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image"; + /** + * Hyperlink type. + */ + String HYPERLINK_PART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink"; + /** * Style type. */ diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java index c4169d9db..4d48d2bd7 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java @@ -19,6 +19,8 @@ package org.apache.poi.openxml4j.opc; import java.net.URI; import java.net.URISyntaxException; +import java.nio.ByteBuffer; +import java.io.UnsupportedEncodingException; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -287,7 +289,7 @@ public final class PackagingURIHelper { // form must actually be an absolute URI if(sourceURI.toString().equals("/")) { String path = targetURI.getPath(); - if(msCompatible && path.charAt(0) == '/') { + if(msCompatible && path.length() > 0 && path.charAt(0) == '/') { try { targetURI = new URI(path.substring(1)); } catch (Exception e) { @@ -362,6 +364,12 @@ public final class PackagingURIHelper { } } + // if the target had a fragment then append it to the result + String fragment = targetURI.getRawFragment(); + if (fragment != null) { + retVal.append("#").append(fragment); + } + try { return new URI(retVal.toString()); } catch (Exception e) { @@ -412,9 +420,9 @@ public final class PackagingURIHelper { * Get URI from a string path. */ public static URI getURIFromPath(String path) { - URI retUri = null; + URI retUri; try { - retUri = new URI(path); + retUri = toURI(path); } catch (URISyntaxException e) { throw new IllegalArgumentException("path"); } @@ -484,7 +492,7 @@ public final class PackagingURIHelper { throws InvalidFormatException { URI partNameURI; try { - partNameURI = new URI(resolvePartName(partName)); + partNameURI = toURI(partName); } catch (URISyntaxException e) { throw new InvalidFormatException(e.getMessage()); } @@ -648,7 +656,9 @@ public final class PackagingURIHelper { } /** - * If part name is not a valid URI, it is resolved as follows: + * Convert a string to {@link java.net.URI} + * + * If part name is not a valid URI, it is resolved as follows: *

* 1. Percent-encode each open bracket ([) and close bracket (]). * 2. Percent-encode each percent (%) character that is not followed by a hexadecimal notation of an octet value. @@ -663,12 +673,72 @@ public final class PackagingURIHelper { * in ?5.2 of RFC 3986. The path component of the resulting absolute URI is the part name. *

* - * @param partName the name to resolve + * @param value the string to be parsed into a URI * @return the resolved part name that should be OK to construct a URI * * TODO YK: for now this method does only (5). Finish the rest. */ - public static String resolvePartName(String partName){ - return partName.replace('\\', '/'); + public static URI toURI(String value) throws URISyntaxException { + //5. Convert all back slashes to forward slashes + if (value.indexOf("\\") != -1) { + value = value.replace('\\', '/'); + } + + // URI fragemnts (those starting with '#') are not encoded + // and may contain white spaces and raw unicode characters + int fragmentIdx = value.indexOf('#'); + if(fragmentIdx != -1){ + String path = value.substring(0, fragmentIdx); + String fragment = value.substring(fragmentIdx + 1); + + value = path + "#" + encode(fragment); + } + + return new URI(value); } + + /** + * percent-encode white spaces and characters above 0x80. + *

+ * Examples: + * 'Apache POI' --> 'Apache%20POI' + * 'Apache\u0410POI' --> 'Apache%04%10POI' + * + * @param s the string to encode + * @return the encoded string + */ + public static String encode(String s) { + int n = s.length(); + if (n == 0) return s; + + ByteBuffer bb; + try { + bb = ByteBuffer.wrap(s.getBytes("UTF-8")); + } catch (UnsupportedEncodingException e){ + // should not happen + throw new RuntimeException(e); + } + StringBuilder sb = new StringBuilder(); + while (bb.hasRemaining()) { + int b = bb.get() & 0xff; + if (isUnsafe(b)) { + sb.append('%'); + sb.append(hexDigits[(b >> 4) & 0x0F]); + sb.append(hexDigits[(b >> 0) & 0x0F]); + } else { + sb.append((char)b); + } + } + return sb.toString(); + } + + private final static char[] hexDigits = { + '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' + }; + + private static boolean isUnsafe(int ch) { + return ch > 0x80 || " ".indexOf(ch) >= 0; + } + } diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java index c8bbb96af..4a9fec855 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java @@ -163,10 +163,7 @@ public final class ZipPartMarshaller implements PartMarshaller { } else { URI targetURI = rel.getTargetURI(); targetValue = PackagingURIHelper.relativizeURI( - sourcePartURI, targetURI, true).getPath(); - if (targetURI.getRawFragment() != null) { - targetValue += "#" + targetURI.getRawFragment(); - } + sourcePartURI, targetURI, true).toString(); } relElem.addAttribute(PackageRelationship.TARGET_ATTRIBUTE_NAME, targetValue); diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackagingURIHelper.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackagingURIHelper.java index 7064fc48d..9e2297d53 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackagingURIHelper.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackagingURIHelper.java @@ -17,6 +17,7 @@ package org.apache.poi.openxml4j.opc; import java.net.URI; +import java.net.URISyntaxException; import junit.framework.TestCase; @@ -35,7 +36,9 @@ public class TestPackagingURIHelper extends TestCase { public void testRelativizeURI() throws Exception { URI uri1 = new URI("/word/document.xml"); URI uri2 = new URI("/word/media/image1.gif"); - + URI uri3 = new URI("/word/media/image1.gif#Sheet1!A1"); + URI uri4 = new URI("#'My%20Sheet1'!A1"); + // Document to image is down a directory URI retURI1to2 = PackagingURIHelper.relativizeURI(uri1, uri2); assertEquals("media/image1.gif", retURI1to2.getPath()); @@ -60,6 +63,12 @@ public class TestPackagingURIHelper extends TestCase { //URI compatible with MS Office and OpenOffice: leading slash is removed uriRes = PackagingURIHelper.relativizeURI(root, uri1, true); assertEquals("word/document.xml", uriRes.toString()); + + //preserve URI fragments + uriRes = PackagingURIHelper.relativizeURI(uri1, uri3, true); + assertEquals("media/image1.gif#Sheet1!A1", uriRes.toString()); + uriRes = PackagingURIHelper.relativizeURI(root, uri4, true); + assertEquals("#'My%20Sheet1'!A1", uriRes.toString()); } /** @@ -104,4 +113,22 @@ public class TestPackagingURIHelper extends TestCase { .equals(relativeName)); pkg.revert(); } + + public void testCreateURIFromString() throws Exception { + String[] href = { + "..\\\\\\cygwin\\home\\yegor\\.vim\\filetype.vim", + "..\\Program%20Files\\AGEIA%20Technologies\\v2.3.3\\NxCooking.dll", + "file:///D:\\seva\\1981\\r810102ns.mp3", + "..\\cygwin\\home\\yegor\\dinom\\%5baccess%5d.2010-10-26.log", + "#'Instructions (Text)'!B21" + }; + for(String s : href){ + try { + URI uri = PackagingURIHelper.toURI(s); + } catch (URISyntaxException e){ + fail("Failed to create URI from " + s); + } + } + } + } diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestRelationships.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestRelationships.java index 2b3b28138..7711b1d0c 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestRelationships.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestRelationships.java @@ -18,6 +18,7 @@ package org.apache.poi.openxml4j.opc; import java.io.*; +import java.net.URI; import junit.framework.TestCase; @@ -254,4 +255,62 @@ public class TestRelationships extends TestCase { pkg.getRelationshipsByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties").getRelationship(0).getTargetURI().toString()); } + + public void testTargetWithSpecialChars() throws Exception{ + + OPCPackage pkg; + + String filepath = OpenXML4JTestDataSamples.getSampleFileName("50154.xlsx"); + pkg = OPCPackage.open(filepath); + assert_50154(pkg); + + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + pkg.save(baos); + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + pkg = OPCPackage.open(bais); + + assert_50154(pkg); + } + + public void assert_50154(OPCPackage pkg) throws Exception { + URI drawingURI = new URI("/xl/drawings/drawing1.xml"); + PackagePart drawingPart = pkg.getPart(PackagingURIHelper.createPartName(drawingURI)); + PackageRelationshipCollection drawingRels = drawingPart.getRelationships(); + + assertEquals(6, drawingRels.size()); + + // expected one image + assertEquals(1, drawingPart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/image").size()); + // and three hyperlinks + assertEquals(5, drawingPart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink").size()); + + PackageRelationship rId1 = drawingPart.getRelationship("rId1"); + URI parent = drawingPart.getPartName().getURI(); + URI rel1 = parent.relativize(rId1.getTargetURI()); + URI rel11 = PackagingURIHelper.relativizeURI(drawingPart.getPartName().getURI(), rId1.getTargetURI()); + assertEquals("'Another Sheet'!A1", rel1.getFragment()); + + PackageRelationship rId2 = drawingPart.getRelationship("rId2"); + URI rel2 = PackagingURIHelper.relativizeURI(drawingPart.getPartName().getURI(), rId2.getTargetURI()); + assertEquals("../media/image1.png", rel2.getPath()); + + PackageRelationship rId3 = drawingPart.getRelationship("rId3"); + URI rel3 = parent.relativize(rId3.getTargetURI()); + assertEquals("ThirdSheet!A1", rel3.getFragment()); + + PackageRelationship rId4 = drawingPart.getRelationship("rId4"); + URI rel4 = parent.relativize(rId4.getTargetURI()); + assertEquals("'\u0410\u043F\u0430\u0447\u0435 \u041F\u041E\u0418'!A1", rel4.getFragment()); + + PackageRelationship rId5 = drawingPart.getRelationship("rId5"); + URI rel5 = parent.relativize(rId5.getTargetURI()); + // back slashed have been replaced with forward + assertEquals("file:///D:/chan-chan.mp3", rel5.toString()); + + PackageRelationship rId6 = drawingPart.getRelationship("rId6"); + URI rel6 = parent.relativize(rId6.getTargetURI()); + assertEquals("../../../../../../../cygwin/home/yegor/dinom/&&&[access].2010-10-26.log", rel6.getPath()); + assertEquals("'\u0410\u043F\u0430\u0447\u0435 \u041F\u041E\u0418'!A5", rel6.getFragment()); + } + } diff --git a/test-data/openxml4j/50154.xlsx b/test-data/openxml4j/50154.xlsx new file mode 100644 index 000000000..7637fb37b Binary files /dev/null and b/test-data/openxml4j/50154.xlsx differ