#54570 - InvalidFormatException because of Absolute URI forbidden

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1744137 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2016-05-16 21:04:00 +00:00
parent 33241e821f
commit 61321b4b37
4 changed files with 240 additions and 251 deletions

View File

@ -191,7 +191,7 @@ implements SlideShow<XSLFShape,XSLFTextParagraph> {
List<PackagePart> mediaParts = getPackage().getPartsByName(Pattern.compile("/ppt/media/.*?"));
_pictures = new ArrayList<XSLFPictureData>(mediaParts.size());
for(PackagePart part : mediaParts){
XSLFPictureData pd = new XSLFPictureData(part, null);
XSLFPictureData pd = new XSLFPictureData(part);
pd.setIndex(_pictures.size());
_pictures.add(pd);
}

View File

@ -16,104 +16,107 @@
==================================================================== */
package org.apache.poi.xslf.extractor;
import static org.apache.poi.POITestCase.assertContains;
import static org.apache.poi.POITestCase.assertNotContained;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIDataSamples;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.usermodel.XSLFSlideShow;
import junit.framework.TestCase;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.xmlbeans.XmlException;
import org.junit.Test;
/**
* Tests for HXFPowerPointExtractor
* Tests for XSLFPowerPointExtractor
*/
public class TestXSLFPowerPointExtractor extends TestCase {
/**
* A simple file
*/
private XSLFSlideShow xmlA;
private OPCPackage pkg;
private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
protected void setUp() throws Exception {
slTests = POIDataSamples.getSlideShowInstance();
pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
xmlA = new XSLFSlideShow(pkg);
}
public class TestXSLFPowerPointExtractor {
private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
/**
* Get text out of the simple file
* @throws XmlException
* @throws OpenXML4JException
*/
public void testGetSimpleText() throws Exception {
new XSLFPowerPointExtractor(xmlA).close();
@Test
public void testGetSimpleText()
throws IOException, XmlException, OpenXML4JException {
XMLSlideShow xmlA = openPPTX("sample.pptx");
@SuppressWarnings("resource")
OPCPackage pkg = xmlA.getPackage();
new XSLFPowerPointExtractor(xmlA).close();
new XSLFPowerPointExtractor(pkg).close();
XSLFPowerPointExtractor extractor =
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
extractor.getText();
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check Basics
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
assertTrue(text.contains("amet\n\n"));
assertContains(text, "amet\n\n");
// Our placeholder master text
// This shouldn't show up in the output
String masterText =
"Click to edit Master title style\n" +
"Click to edit Master subtitle style\n" +
"\n\n\n\n\n\n" +
"Click to edit Master title style\n" +
"Click to edit Master text styles\n" +
"Second level\n" +
"Third level\n" +
"Fourth level\n" +
"Fifth level\n";
// String masterText =
// "Click to edit Master title style\n" +
// "Click to edit Master subtitle style\n" +
// "\n\n\n\n\n\n" +
// "Click to edit Master title style\n" +
// "Click to edit Master text styles\n" +
// "Second level\n" +
// "Third level\n" +
// "Fourth level\n" +
// "Fifth level\n";
// Just slides, no notes
text = extractor.getText(true, false, false);
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n"
, text
);
String slideText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n";
assertEquals(slideText, text);
// Just notes, no slides
text = extractor.getText(false, true);
assertEquals(
"\n\n\n\n", text
);
assertEquals("\n\n\n\n", text);
// Both
text = extractor.getText(true, true, false);
assertEquals(
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
String bothText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n\n\n" +
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n\n\n"
, text
);
"Lorem ipsum dolor sit amet\n" +
"Lorem\n" +
"ipsum\n" +
"dolor\n" +
"sit\n" +
"amet\n" +
"\n\n\n";
assertEquals(bothText, text);
// With Slides and Master Text
text = extractor.getText(true, false, true);
assertEquals(
text = extractor.getText(true, false, true);
String smText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
@ -123,13 +126,12 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"dolor\n" +
"sit\n" +
"amet\n" +
"\n"
, text
);
"\n";
assertEquals(smText, text);
// With Slides, Notes and Master Text
text = extractor.getText(true, true, true);
assertEquals(
text = extractor.getText(true, true, true);
String snmText =
"Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" +
@ -140,67 +142,61 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"dolor\n" +
"sit\n" +
"amet\n" +
"\n\n\n"
, text
);
"\n\n\n";
assertEquals(snmText, text);
// Via set defaults
extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(true);
text = extractor.getText();
assertEquals(
"\n\n\n\n", text
);
assertEquals("\n\n\n\n", text);
extractor.close();
xmlA.close();
}
public void testGetComments() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
public void testGetComments() throws IOException {
XMLSlideShow xml = openPPTX("45545_Comment.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check comments are there
assertContains(text, "testdoc");
assertContains(text, "test phrase");
// Check the authors came through too
assertContains(text, "XPVMWARE01");
extractor.close();
xml.close();
}
// Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
extractor.close();
}
public void testGetMasterText() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(false);
extractor.setMasterByDefault(true);
String text = extractor.getText();
assertTrue(text.length() > 0);
XMLSlideShow xml = openPPTX("WithMaster.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
extractor.setSlidesByDefault(true);
extractor.setNotesByDefault(false);
extractor.setMasterByDefault(true);
// Check master text is there
assertTrue("Unable to find expected word in text\n" + text,
text.contains("Footer from the master slide"));
String text = extractor.getText();
assertTrue(text.length() > 0);
// Theme text shouldn't show up
String themeText =
"Theme Master Title\n" +
"Theme Master first level\n" +
"And the 2nd level\n" +
"Our 3rd level goes here\n" +
"And onto the 4th, such fun....\n" +
"Finally is the Fifth level\n";
// Check the whole text
assertEquals(
// Check master text is there
assertContains(text, "Footer from the master slide");
// Theme text shouldn't show up
// String themeText =
// "Theme Master Title\n" +
// "Theme Master first level\n" +
// "And the 2nd level\n" +
// "Our 3rd level goes here\n" +
// "And onto the 4th, such fun....\n" +
// "Finally is the Fifth level\n";
// Check the whole text
String wholeText =
"First page title\n" +
"First page subtitle\n" +
"This is the Master Title\n" +
@ -210,108 +206,124 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"2nd page subtitle\n" +
"Footer from the master slide\n" +
"This is the Master Title\n" +
"This text comes from the Master Slide\n"
, text
);
"This text comes from the Master Slide\n";
assertEquals(wholeText, text);
extractor.close();
xml.close();
}
public void testTable() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
@Test
public void testTable() throws Exception {
XMLSlideShow xml = openPPTX("present1.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
extractor.close();
xml.close();
}
/**
* Test that we can get the text from macro enabled,
* template, theme, slide enabled etc formats, as
* template, theme, slide enabled etc formats, as
* well as from the normal file
*/
@Test
public void testDifferentSubformats() throws Exception {
String[] extensions = new String[] {
"pptx", "pptm", "ppsm", "ppsx",
"thmx",
//"xps" // Doesn't have a core document
};
for(String extension : extensions) {
String filename = "testPPT." + extension;
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
String[] extensions = new String[] {
"pptx", "pptm", "ppsm", "ppsx", "thmx",
// "xps" - Doesn't have a core document
};
for(String extension : extensions) {
String filename = "testPPT." + extension;
XMLSlideShow xml = openPPTX(filename);
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
if (extension.equals("thmx")) {
// Theme file doesn't have any textual content
assertEquals(0, text.length());
continue;
}
String text = extractor.getText();
if(extension.equals("thmx")) {
// Theme file doesn't have any textual content
assertEquals(0, text.length());
continue;
}
assertTrue(text.length() > 0);
assertTrue(
"Text missing for " + filename + "\n" + text,
"Text missing for " + filename + "\n" + text,
text.contains("Attachment Test")
);
assertTrue(
"Text missing for " + filename + "\n" + text,
"Text missing for " + filename + "\n" + text,
text.contains("This is a test file data with the same content")
);
assertTrue(
"Text missing for " + filename + "\n" + text,
"Text missing for " + filename + "\n" + text,
text.contains("content parsing")
);
assertTrue(
"Text missing for " + filename + "\n" + text,
"Text missing for " + filename + "\n" + text,
text.contains("Different words to test against")
);
assertTrue(
"Text missing for " + filename + "\n" + text,
"Text missing for " + filename + "\n" + text,
text.contains("Mystery")
);
extractor.close();
xml.close();
}
}
public void test45541() throws Exception {
// extract text from a powerpoint that has a header in the notes-element
POITextExtractor extr = ExtractorFactory.createExtractor(slTests
.openResourceAsStream("45541_Header.pptx"));
String text = extr.getText();
assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertNotNull(text);
assertTrue("Had: " + text, text.contains("testdoc"));
extr.close();
@Test
public void test45541() throws Exception {
// extract text from a powerpoint that has a header in the notes-element
POITextExtractor extr = ExtractorFactory.createExtractor(
slTests.getFile("45541_Header.pptx"));
String text = extr.getText();
assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertContains(text, "testdoc");
extr.close();
assertNotNull(text);
// extract text from a powerpoint that has a footer in the master-slide
extr = ExtractorFactory.createExtractor(slTests
.openResourceAsStream("45541_Footer.pptx"));
text = extr.getText();
assertNotNull(text);
assertFalse("Had " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
// extract text from a powerpoint that has a footer in the master-slide
extr = ExtractorFactory.createExtractor(
slTests.getFile("45541_Footer.pptx"));
text = extr.getText();
assertNotContained(text, "testdoc");
text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertNotContained(text, "testdoc");
extr.close();
}
text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
assertNotContained(text, "testdoc");
extr.close();
}
@Test
public void bug54570() throws IOException {
XMLSlideShow xml = openPPTX("bug54570.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertNotNull(text);
extractor.close();
xml.close();
}
private XMLSlideShow openPPTX(String file) throws IOException {
InputStream is = slTests.openResourceAsStream(file);
try {
return new XMLSlideShow(is);
} finally {
is.close();
}
}
}

View File

@ -18,6 +18,7 @@
package org.apache.poi;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
@ -26,73 +27,48 @@ import java.lang.reflect.Method;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.Map;
import org.apache.poi.util.SuppressForbidden;
/**
* Parent class for POI JUnit TestCases, which provide additional
* features
* Util class for POI JUnit TestCases, which provide additional features
*/
public class POITestCase {
public final class POITestCase {
public static void assertContains(String haystack, String needle) {
assertNotNull(haystack);
assertTrue(
"Unable to find expected text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle)
);
}
}
public static void assertNotContained(String haystack, String needle) {
assertNotNull(haystack);
assertFalse(
"Unexpectedly found text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle)
);
}
}
public static <T> void assertContains(T needle, T[] haystack)
{
// Check
for (T thing : haystack) {
if (thing.equals(needle)) {
return;
}
}
// Failed, try to build a nice error
StringBuilder sb = new StringBuilder();
sb.append("Unable to find ").append(needle).append(" in [");
for (T thing : haystack) {
sb.append(" ").append(thing.toString()).append(" ,");
}
sb.setCharAt(sb.length()-1, ']');
fail(sb.toString());
}
public static <T> void assertContains(T needle, Collection<T> haystack) {
if (haystack.contains(needle)) {
return;
}
fail("Unable to find " + needle + " in " + haystack);
}
/**
* @param map haystack
* @param key needle
*/
public static <T> void assertContains(Map<T, ?> map, T key) {
if (map.containsKey(key)) {
/**
* @param map haystack
* @param key needle
*/
public static <T> void assertContains(Map<T, ?> map, T key) {
if (map.containsKey(key)) {
return;
}
fail("Unable to find " + key + " in " + map);
}
}
fail("Unable to find " + key + " in " + map);
}
/** Utility method to get the value of a private/protected field.
* Only use this method in test cases!!!
*/
public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
try {
/**
* Utility method to get the value of a private/protected field.
* Only use this method in test cases!!!
*/
public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
@Override
@SuppressWarnings("unchecked")
@ -103,18 +79,19 @@ public class POITestCase {
return (R) f.get(instance);
}
});
} catch (PrivilegedActionException pae) {
throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
}
}
} catch (PrivilegedActionException pae) {
throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
}
}
/** Utility method to call a private/protected method.
* Only use this method in test cases!!!
*/
public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
final Class<?>[] parameterTypes, final Object[] parameters) {
assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
try {
/**
* Utility method to call a private/protected method.
* Only use this method in test cases!!!
*/
public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
final Class<?>[] parameterTypes, final Object[] parameters) {
assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
@Override
@SuppressWarnings("unchecked")
@ -125,8 +102,8 @@ public class POITestCase {
return (R) m.invoke(instance, parameters);
}
});
} catch (PrivilegedActionException pae) {
throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
}
}
} catch (PrivilegedActionException pae) {
throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
}
}
}

Binary file not shown.