Detect OOXML-strict, and give more helpful exceptions for them

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1666525 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2015-03-13 17:39:47 +00:00
parent 48e3a2e56a
commit ff737c5df4
4 changed files with 42 additions and 7 deletions

View File

@ -94,6 +94,15 @@ public class POIXMLDocumentPart {
*/ */
public POIXMLDocumentPart(OPCPackage pkg) { public POIXMLDocumentPart(OPCPackage pkg) {
PackageRelationship coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); PackageRelationship coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
if (coreRel == null) {
coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0);
if (coreRel != null) {
throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
}
}
if (coreRel == null) {
throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
}
this.packagePart = pkg.getPart(coreRel); this.packagePart = pkg.getPart(coreRel);
this.packageRel = coreRel; this.packageRel = coreRel;

View File

@ -47,6 +47,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.Entry;
@ -66,10 +67,9 @@ import org.apache.xmlbeans.XmlException;
* document, and returns it. * document, and returns it.
*/ */
public class ExtractorFactory { public class ExtractorFactory {
public static final String CORE_DOCUMENT_REL = public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"; protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
public static final String VISIO_DOCUMENT_REL = protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
"http://schemas.microsoft.com/visio/2010/relationships/document";
/** Should this thread prefer event based over usermodel based extractors? */ /** Should this thread prefer event based over usermodel based extractors? */
@ -165,6 +165,10 @@ public class ExtractorFactory {
pkg.getRelationshipsByType(CORE_DOCUMENT_REL); pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
// If nothing was found, try some of the other OOXML-based core types // If nothing was found, try some of the other OOXML-based core types
if (core.size() == 0) {
// Could it be an OOXML-Strict one?
core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
}
if (core.size() == 0) { if (core.size() == 0) {
// Could it be a visio one? // Could it be a visio one?
PackageRelationshipCollection visio = PackageRelationshipCollection visio =
@ -173,6 +177,7 @@ public class ExtractorFactory {
throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files"); throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files");
} }
} }
// Should just be a single core document, complain if not // Should just be a single core document, complain if not
if (core.size() != 1) { if (core.size() != 1) {
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());

View File

@ -25,6 +25,7 @@ import junit.framework.TestCase;
import org.apache.poi.POIDataSamples; import org.apache.poi.POIDataSamples;
import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor; import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLException;
import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
@ -162,6 +163,12 @@ public class TestExtractorFactory extends TestCase {
extractor.close(); extractor.close();
// TODO Support OOXML-Strict, see bug #57699 // TODO Support OOXML-Strict, see bug #57699
try {
extractor = ExtractorFactory.createExtractor(xlsxStrict);
fail("OOXML-Strict isn't yet supported");
} catch (POIXMLException e) {
// Expected, for now
}
// extractor = ExtractorFactory.createExtractor(xlsxStrict); // extractor = ExtractorFactory.createExtractor(xlsxStrict);
// assertTrue( // assertTrue(
// extractor // extractor
@ -307,6 +314,14 @@ public class TestExtractorFactory extends TestCase {
assertTrue( assertTrue(
ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200 ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
); );
// TODO Support OOXML-Strict, see bug #57699
// assertTrue(
// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict))
// instanceof XSSFExcelExtractor
// );
// assertTrue(
// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200
// );
// Word // Word
assertTrue( assertTrue(

View File

@ -2293,13 +2293,19 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
/** /**
* OOXML-Strict files * OOXML-Strict files
* Not currently working - namespace mis-match from XMLBeans
*/ */
@Test @Test
@Ignore @Ignore("XMLBeans namespace mis-match on ooxml-strict files")
public void test57699() throws Exception { public void test57699() throws Exception {
Workbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.strict.xlsx"); XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.strict.xlsx");
assertEquals(3, wb.getNumberOfSheets()); assertEquals(3, wb.getNumberOfSheets());
// TODO Check sheet contents
// TODO Check formula evaluation
// TODO Check the rest XSSFWorkbook wbBack = XSSFTestDataSamples.writeOutAndReadBack(wb);
assertEquals(3, wbBack.getNumberOfSheets());
// TODO Re-check sheet contents
// TODO Re-check formula evaluation
} }
} }