#54570 - InvalidFormatException because of Absolute URI forbidden

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1744137 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andreas Beeker 2016-05-16 21:04:00 +00:00
parent 33241e821f
commit 61321b4b37
4 changed files with 240 additions and 251 deletions

View File

@ -191,7 +191,7 @@ implements SlideShow<XSLFShape,XSLFTextParagraph> {
List<PackagePart> mediaParts = getPackage().getPartsByName(Pattern.compile("/ppt/media/.*?")); List<PackagePart> mediaParts = getPackage().getPartsByName(Pattern.compile("/ppt/media/.*?"));
_pictures = new ArrayList<XSLFPictureData>(mediaParts.size()); _pictures = new ArrayList<XSLFPictureData>(mediaParts.size());
for(PackagePart part : mediaParts){ for(PackagePart part : mediaParts){
XSLFPictureData pd = new XSLFPictureData(part, null); XSLFPictureData pd = new XSLFPictureData(part);
pd.setIndex(_pictures.size()); pd.setIndex(_pictures.size());
_pictures.add(pd); _pictures.add(pd);
} }

View File

@ -16,104 +16,107 @@
==================================================================== */ ==================================================================== */
package org.apache.poi.xslf.extractor; package org.apache.poi.xslf.extractor;
import static org.apache.poi.POITestCase.assertContains;
import static org.apache.poi.POITestCase.assertNotContained;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xslf.usermodel.XSLFSlideShow; import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.xmlbeans.XmlException;
import junit.framework.TestCase; import org.junit.Test;
/** /**
* Tests for HXFPowerPointExtractor * Tests for XSLFPowerPointExtractor
*/ */
public class TestXSLFPowerPointExtractor extends TestCase { public class TestXSLFPowerPointExtractor {
/** private static POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
* A simple file
*/
private XSLFSlideShow xmlA;
private OPCPackage pkg;
private POIDataSamples slTests = POIDataSamples.getSlideShowInstance();
protected void setUp() throws Exception {
slTests = POIDataSamples.getSlideShowInstance();
pkg = OPCPackage.open(slTests.openResourceAsStream("sample.pptx"));
xmlA = new XSLFSlideShow(pkg);
}
/** /**
* Get text out of the simple file * Get text out of the simple file
* @throws XmlException
* @throws OpenXML4JException
*/ */
public void testGetSimpleText() throws Exception { @Test
new XSLFPowerPointExtractor(xmlA).close(); public void testGetSimpleText()
throws IOException, XmlException, OpenXML4JException {
XMLSlideShow xmlA = openPPTX("sample.pptx");
@SuppressWarnings("resource")
OPCPackage pkg = xmlA.getPackage();
new XSLFPowerPointExtractor(xmlA).close();
new XSLFPowerPointExtractor(pkg).close(); new XSLFPowerPointExtractor(pkg).close();
XSLFPowerPointExtractor extractor = XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA); new XSLFPowerPointExtractor(xmlA);
extractor.getText(); extractor.getText();
String text = extractor.getText(); String text = extractor.getText();
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
// Check Basics // Check Basics
assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n")); assertTrue(text.startsWith("Lorem ipsum dolor sit amet\n"));
assertTrue(text.contains("amet\n\n")); assertContains(text, "amet\n\n");
// Our placeholder master text // Our placeholder master text
// This shouldn't show up in the output // This shouldn't show up in the output
String masterText = // String masterText =
"Click to edit Master title style\n" + // "Click to edit Master title style\n" +
"Click to edit Master subtitle style\n" + // "Click to edit Master subtitle style\n" +
"\n\n\n\n\n\n" + // "\n\n\n\n\n\n" +
"Click to edit Master title style\n" + // "Click to edit Master title style\n" +
"Click to edit Master text styles\n" + // "Click to edit Master text styles\n" +
"Second level\n" + // "Second level\n" +
"Third level\n" + // "Third level\n" +
"Fourth level\n" + // "Fourth level\n" +
"Fifth level\n"; // "Fifth level\n";
// Just slides, no notes // Just slides, no notes
text = extractor.getText(true, false, false); text = extractor.getText(true, false, false);
assertEquals( String slideText =
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" + "\n" +
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Lorem\n" + "Lorem\n" +
"ipsum\n" + "ipsum\n" +
"dolor\n" + "dolor\n" +
"sit\n" + "sit\n" +
"amet\n" + "amet\n" +
"\n" "\n";
, text assertEquals(slideText, text);
);
// Just notes, no slides // Just notes, no slides
text = extractor.getText(false, true); text = extractor.getText(false, true);
assertEquals( assertEquals("\n\n\n\n", text);
"\n\n\n\n", text
);
// Both // Both
text = extractor.getText(true, true, false); text = extractor.getText(true, true, false);
assertEquals( String bothText =
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n\n\n" + "\n\n\n" +
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Lorem\n" + "Lorem\n" +
"ipsum\n" + "ipsum\n" +
"dolor\n" + "dolor\n" +
"sit\n" + "sit\n" +
"amet\n" + "amet\n" +
"\n\n\n" "\n\n\n";
, text assertEquals(bothText, text);
);
// With Slides and Master Text // With Slides and Master Text
text = extractor.getText(true, false, true); text = extractor.getText(true, false, true);
assertEquals( String smText =
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" + "\n" +
@ -123,13 +126,12 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"dolor\n" + "dolor\n" +
"sit\n" + "sit\n" +
"amet\n" + "amet\n" +
"\n" "\n";
, text assertEquals(smText, text);
);
// With Slides, Notes and Master Text // With Slides, Notes and Master Text
text = extractor.getText(true, true, true); text = extractor.getText(true, true, true);
assertEquals( String snmText =
"Lorem ipsum dolor sit amet\n" + "Lorem ipsum dolor sit amet\n" +
"Nunc at risus vel erat tempus posuere. Aenean non ante.\n" + "Nunc at risus vel erat tempus posuere. Aenean non ante.\n" +
"\n" + "\n" +
@ -140,67 +142,61 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"dolor\n" + "dolor\n" +
"sit\n" + "sit\n" +
"amet\n" + "amet\n" +
"\n\n\n" "\n\n\n";
, text assertEquals(snmText, text);
);
// Via set defaults // Via set defaults
extractor.setSlidesByDefault(false); extractor.setSlidesByDefault(false);
extractor.setNotesByDefault(true); extractor.setNotesByDefault(true);
text = extractor.getText(); text = extractor.getText();
assertEquals( assertEquals("\n\n\n\n", text);
"\n\n\n\n", text
);
extractor.close(); extractor.close();
xmlA.close();
} }
public void testGetComments() throws Exception {
XSLFSlideShow xml =
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("45545_Comment.pptx")));
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xml);
String text = extractor.getText(); public void testGetComments() throws IOException {
assertTrue(text.length() > 0); XMLSlideShow xml = openPPTX("45545_Comment.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
// Check comments are there String text = extractor.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue(text.length() > 0);
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
// Check comments are there
assertContains(text, "testdoc");
assertContains(text, "test phrase");
// Check the authors came through too
assertContains(text, "XPVMWARE01");
extractor.close();
xml.close();
}
// Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
extractor.close();
}
public void testGetMasterText() throws Exception { public void testGetMasterText() throws Exception {
XSLFSlideShow xml = XMLSlideShow xml = openPPTX("WithMaster.pptx");
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("WithMaster.pptx"))); XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
XSLFPowerPointExtractor extractor = extractor.setSlidesByDefault(true);
new XSLFPowerPointExtractor(xml); extractor.setNotesByDefault(false);
extractor.setSlidesByDefault(true); extractor.setMasterByDefault(true);
extractor.setNotesByDefault(false);
extractor.setMasterByDefault(true);
String text = extractor.getText();
assertTrue(text.length() > 0);
// Check master text is there String text = extractor.getText();
assertTrue("Unable to find expected word in text\n" + text, assertTrue(text.length() > 0);
text.contains("Footer from the master slide"));
// Theme text shouldn't show up // Check master text is there
String themeText = assertContains(text, "Footer from the master slide");
"Theme Master Title\n" +
"Theme Master first level\n" + // Theme text shouldn't show up
"And the 2nd level\n" + // String themeText =
"Our 3rd level goes here\n" + // "Theme Master Title\n" +
"And onto the 4th, such fun....\n" + // "Theme Master first level\n" +
"Finally is the Fifth level\n"; // "And the 2nd level\n" +
// "Our 3rd level goes here\n" +
// Check the whole text // "And onto the 4th, such fun....\n" +
assertEquals( // "Finally is the Fifth level\n";
// Check the whole text
String wholeText =
"First page title\n" + "First page title\n" +
"First page subtitle\n" + "First page subtitle\n" +
"This is the Master Title\n" + "This is the Master Title\n" +
@ -210,108 +206,124 @@ public class TestXSLFPowerPointExtractor extends TestCase {
"2nd page subtitle\n" + "2nd page subtitle\n" +
"Footer from the master slide\n" + "Footer from the master slide\n" +
"This is the Master Title\n" + "This is the Master Title\n" +
"This text comes from the Master Slide\n" "This text comes from the Master Slide\n";
, text assertEquals(wholeText, text);
);
extractor.close(); extractor.close();
xml.close();
} }
public void testTable() throws Exception { @Test
XSLFSlideShow xml = public void testTable() throws Exception {
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream("present1.pptx"))); XMLSlideShow xml = openPPTX("present1.pptx");
XSLFPowerPointExtractor extractor = XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
new XSLFPowerPointExtractor(xml);
String text = extractor.getText(); String text = extractor.getText();
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
// Check comments are there // Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
extractor.close(); extractor.close();
xml.close();
} }
/** /**
* Test that we can get the text from macro enabled, * Test that we can get the text from macro enabled,
* template, theme, slide enabled etc formats, as * template, theme, slide enabled etc formats, as
* well as from the normal file * well as from the normal file
*/ */
@Test
public void testDifferentSubformats() throws Exception { public void testDifferentSubformats() throws Exception {
String[] extensions = new String[] { String[] extensions = new String[] {
"pptx", "pptm", "ppsm", "ppsx", "pptx", "pptm", "ppsm", "ppsx", "thmx",
"thmx", // "xps" - Doesn't have a core document
//"xps" // Doesn't have a core document };
}; for(String extension : extensions) {
for(String extension : extensions) { String filename = "testPPT." + extension;
String filename = "testPPT." + extension; XMLSlideShow xml = openPPTX(filename);
XSLFSlideShow xml = XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
new XSLFSlideShow(OPCPackage.open(slTests.openResourceAsStream(filename)));
XSLFPowerPointExtractor extractor = String text = extractor.getText();
new XSLFPowerPointExtractor(xml); if (extension.equals("thmx")) {
// Theme file doesn't have any textual content
assertEquals(0, text.length());
continue;
}
String text = extractor.getText();
if(extension.equals("thmx")) {
// Theme file doesn't have any textual content
assertEquals(0, text.length());
continue;
}
assertTrue(text.length() > 0); assertTrue(text.length() > 0);
assertTrue( assertTrue(
"Text missing for " + filename + "\n" + text, "Text missing for " + filename + "\n" + text,
text.contains("Attachment Test") text.contains("Attachment Test")
); );
assertTrue( assertTrue(
"Text missing for " + filename + "\n" + text, "Text missing for " + filename + "\n" + text,
text.contains("This is a test file data with the same content") text.contains("This is a test file data with the same content")
); );
assertTrue( assertTrue(
"Text missing for " + filename + "\n" + text, "Text missing for " + filename + "\n" + text,
text.contains("content parsing") text.contains("content parsing")
); );
assertTrue( assertTrue(
"Text missing for " + filename + "\n" + text, "Text missing for " + filename + "\n" + text,
text.contains("Different words to test against") text.contains("Different words to test against")
); );
assertTrue( assertTrue(
"Text missing for " + filename + "\n" + text, "Text missing for " + filename + "\n" + text,
text.contains("Mystery") text.contains("Mystery")
); );
extractor.close(); extractor.close();
xml.close();
} }
} }
public void test45541() throws Exception { @Test
// extract text from a powerpoint that has a header in the notes-element public void test45541() throws Exception {
POITextExtractor extr = ExtractorFactory.createExtractor(slTests // extract text from a powerpoint that has a header in the notes-element
.openResourceAsStream("45541_Header.pptx")); POITextExtractor extr = ExtractorFactory.createExtractor(
String text = extr.getText(); slTests.getFile("45541_Header.pptx"));
assertNotNull(text); String text = extr.getText();
assertFalse("Had: " + text, text.contains("testdoc")); assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertNotNull(text); text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertTrue("Had: " + text, text.contains("testdoc")); assertContains(text, "testdoc");
extr.close(); extr.close();
assertNotNull(text); assertNotNull(text);
// extract text from a powerpoint that has a footer in the master-slide // extract text from a powerpoint that has a footer in the master-slide
extr = ExtractorFactory.createExtractor(slTests extr = ExtractorFactory.createExtractor(
.openResourceAsStream("45541_Footer.pptx")); slTests.getFile("45541_Footer.pptx"));
text = extr.getText(); text = extr.getText();
assertNotNull(text); assertNotContained(text, "testdoc");
assertFalse("Had " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertNotNull(text);
assertFalse("Had: " + text, text.contains("testdoc"));
text = ((XSLFPowerPointExtractor)extr).getText(false, false, true); text = ((XSLFPowerPointExtractor)extr).getText(false, true);
assertNotNull(text); assertNotContained(text, "testdoc");
assertFalse("Had: " + text, text.contains("testdoc"));
extr.close(); text = ((XSLFPowerPointExtractor)extr).getText(false, false, true);
} assertNotContained(text, "testdoc");
extr.close();
}
@Test
public void bug54570() throws IOException {
XMLSlideShow xml = openPPTX("bug54570.pptx");
XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xml);
String text = extractor.getText();
assertNotNull(text);
extractor.close();
xml.close();
}
private XMLSlideShow openPPTX(String file) throws IOException {
InputStream is = slTests.openResourceAsStream(file);
try {
return new XMLSlideShow(is);
} finally {
is.close();
}
}
} }

View File

@ -18,6 +18,7 @@
package org.apache.poi; package org.apache.poi;
import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue; import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail; import static org.junit.Assert.fail;
@ -26,73 +27,48 @@ import java.lang.reflect.Method;
import java.security.AccessController; import java.security.AccessController;
import java.security.PrivilegedActionException; import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction; import java.security.PrivilegedExceptionAction;
import java.util.Collection;
import java.util.Map; import java.util.Map;
import org.apache.poi.util.SuppressForbidden; import org.apache.poi.util.SuppressForbidden;
/** /**
* Parent class for POI JUnit TestCases, which provide additional * Util class for POI JUnit TestCases, which provide additional features
* features
*/ */
public class POITestCase { public final class POITestCase {
public static void assertContains(String haystack, String needle) { public static void assertContains(String haystack, String needle) {
assertNotNull(haystack);
assertTrue( assertTrue(
"Unable to find expected text '" + needle + "' in text:\n" + haystack, "Unable to find expected text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle) haystack.contains(needle)
); );
} }
public static void assertNotContained(String haystack, String needle) { public static void assertNotContained(String haystack, String needle) {
assertNotNull(haystack);
assertFalse( assertFalse(
"Unexpectedly found text '" + needle + "' in text:\n" + haystack, "Unexpectedly found text '" + needle + "' in text:\n" + haystack,
haystack.contains(needle) haystack.contains(needle)
); );
} }
public static <T> void assertContains(T needle, T[] haystack) /**
{ * @param map haystack
// Check * @param key needle
for (T thing : haystack) { */
if (thing.equals(needle)) { public static <T> void assertContains(Map<T, ?> map, T key) {
return; if (map.containsKey(key)) {
}
}
// Failed, try to build a nice error
StringBuilder sb = new StringBuilder();
sb.append("Unable to find ").append(needle).append(" in [");
for (T thing : haystack) {
sb.append(" ").append(thing.toString()).append(" ,");
}
sb.setCharAt(sb.length()-1, ']');
fail(sb.toString());
}
public static <T> void assertContains(T needle, Collection<T> haystack) {
if (haystack.contains(needle)) {
return;
}
fail("Unable to find " + needle + " in " + haystack);
}
/**
* @param map haystack
* @param key needle
*/
public static <T> void assertContains(Map<T, ?> map, T key) {
if (map.containsKey(key)) {
return; return;
} }
fail("Unable to find " + key + " in " + map); fail("Unable to find " + key + " in " + map);
} }
/** Utility method to get the value of a private/protected field. /**
* Only use this method in test cases!!! * Utility method to get the value of a private/protected field.
*/ * Only use this method in test cases!!!
public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) { */
assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi.")); public static <R,T> R getFieldValue(final Class<? super T> clazz, final T instance, final Class<R> fieldType, final String fieldName) {
try { assertTrue("Reflection of private fields is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() { return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@ -103,18 +79,19 @@ public class POITestCase {
return (R) f.get(instance); return (R) f.get(instance);
} }
}); });
} catch (PrivilegedActionException pae) { } catch (PrivilegedActionException pae) {
throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException()); throw new RuntimeException("Cannot access field '" + fieldName + "' of class " + clazz, pae.getException());
} }
} }
/** Utility method to call a private/protected method. /**
* Only use this method in test cases!!! * Utility method to call a private/protected method.
*/ * Only use this method in test cases!!!
public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName, */
final Class<?>[] parameterTypes, final Object[] parameters) { public static <R,T> R callMethod(final Class<? super T> clazz, final T instance, final Class<R> returnType, final String methodName,
assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi.")); final Class<?>[] parameterTypes, final Object[] parameters) {
try { assertTrue("Reflection of private methods is only allowed for POI classes.", clazz.getName().startsWith("org.apache.poi."));
try {
return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() { return AccessController.doPrivileged(new PrivilegedExceptionAction<R>() {
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@ -125,8 +102,8 @@ public class POITestCase {
return (R) m.invoke(instance, parameters); return (R) m.invoke(instance, parameters);
} }
}); });
} catch (PrivilegedActionException pae) { } catch (PrivilegedActionException pae) {
throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException()); throw new RuntimeException("Cannot access method '" + methodName + "' of class " + clazz, pae.getException());
} }
} }
} }

Binary file not shown.