Give a more helpful exception if a Visio VSDX ooxml file is passed to ExtractorFactory

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665929 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2015-03-11 16:17:41 +00:00
parent aff5642202
commit 555e42f4de
3 changed files with 34 additions and 3 deletions

View File

@ -68,6 +68,8 @@ import org.apache.xmlbeans.XmlException;
public class ExtractorFactory { public class ExtractorFactory {
public static final String CORE_DOCUMENT_REL = public static final String CORE_DOCUMENT_REL =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"; "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
public static final String VISIO_DOCUMENT_REL =
"http://schemas.microsoft.com/visio/2010/relationships/document";
/** Should this thread prefer event based over usermodel based extractors? */ /** Should this thread prefer event based over usermodel based extractors? */
@ -158,12 +160,25 @@ public class ExtractorFactory {
} }
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
// Check for the normal Office core document
PackageRelationshipCollection core = PackageRelationshipCollection core =
pkg.getRelationshipsByType(CORE_DOCUMENT_REL); pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
// If nothing was found, try some of the other OOXML-based core types
if (core.size() == 0) {
// Could it be a visio one?
PackageRelationshipCollection visio =
pkg.getRelationshipsByType(VISIO_DOCUMENT_REL);
if (visio.size() == 1) {
throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files");
}
}
// Should just be a single core document, complain if not
if (core.size() != 1) { if (core.size() != 1) {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
} }
// Grab the core document part, and try to identify from that
PackagePart corePart = pkg.getPart(core.getRelationship(0)); PackagePart corePart = pkg.getPart(core.getRelationship(0));
// Is it XSSF? // Is it XSSF?

View File

@ -71,6 +71,7 @@ public class TestExtractorFactory extends TestCase {
private File msgEmbMsg; private File msgEmbMsg;
private File vsd; private File vsd;
private File vsdx;
private File pub; private File pub;
@ -109,6 +110,7 @@ public class TestExtractorFactory extends TestCase {
POIDataSamples dgTests = POIDataSamples.getDiagramInstance(); POIDataSamples dgTests = POIDataSamples.getDiagramInstance();
vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd"); vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd");
vsdx = getFileAndCheck(dgTests, "test.vsdx");
POIDataSamples pubTests = POIDataSamples.getPublisherInstance(); POIDataSamples pubTests = POIDataSamples.getPublisherInstance();
pub = getFileAndCheck(pubTests, "Simple.pub"); pub = getFileAndCheck(pubTests, "Simple.pub");
@ -230,7 +232,7 @@ public class TestExtractorFactory extends TestCase {
); );
extractor.close(); extractor.close();
// Visio // Visio - binary
assertTrue( assertTrue(
ExtractorFactory.createExtractor(vsd) ExtractorFactory.createExtractor(vsd)
instanceof VisioTextExtractor instanceof VisioTextExtractor
@ -238,6 +240,13 @@ public class TestExtractorFactory extends TestCase {
assertTrue( assertTrue(
ExtractorFactory.createExtractor(vsd).getText().length() > 50 ExtractorFactory.createExtractor(vsd).getText().length() > 50
); );
// Visio - vsdx
try {
ExtractorFactory.createExtractor(vsdx);
fail();
} catch(IllegalArgumentException e) {
// Good
}
// Publisher // Publisher
assertTrue( assertTrue(
@ -342,6 +351,13 @@ public class TestExtractorFactory extends TestCase {
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50 ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
); );
// Visio - vsdx
try {
ExtractorFactory.createExtractor(new FileInputStream(vsdx));
fail();
} catch(IllegalArgumentException e) {
// Good
}
// Publisher // Publisher
assertTrue( assertTrue(

BIN
test-data/diagram/test.vsdx Normal file

Binary file not shown.