Detect OOXML-strict, and give more helpful exceptions for them
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1666525 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
48e3a2e56a
commit
ff737c5df4
@ -94,6 +94,15 @@ public class POIXMLDocumentPart {
|
|||||||
*/
|
*/
|
||||||
public POIXMLDocumentPart(OPCPackage pkg) {
|
public POIXMLDocumentPart(OPCPackage pkg) {
|
||||||
PackageRelationship coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
|
PackageRelationship coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
|
||||||
|
if (coreRel == null) {
|
||||||
|
coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0);
|
||||||
|
if (coreRel != null) {
|
||||||
|
throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (coreRel == null) {
|
||||||
|
throw new POIXMLException("OOXML file structure broken/invalid - no core document found!");
|
||||||
|
}
|
||||||
|
|
||||||
this.packagePart = pkg.getPart(coreRel);
|
this.packagePart = pkg.getPart(coreRel);
|
||||||
this.packageRel = coreRel;
|
this.packageRel = coreRel;
|
||||||
|
@ -47,6 +47,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
|
|||||||
import org.apache.poi.openxml4j.opc.PackageAccess;
|
import org.apache.poi.openxml4j.opc.PackageAccess;
|
||||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||||
|
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
import org.apache.poi.poifs.filesystem.DirectoryEntry;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
import org.apache.poi.poifs.filesystem.Entry;
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
@ -66,10 +67,9 @@ import org.apache.xmlbeans.XmlException;
|
|||||||
* document, and returns it.
|
* document, and returns it.
|
||||||
*/
|
*/
|
||||||
public class ExtractorFactory {
|
public class ExtractorFactory {
|
||||||
public static final String CORE_DOCUMENT_REL =
|
public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT;
|
||||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
|
protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
|
||||||
public static final String VISIO_DOCUMENT_REL =
|
protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
|
||||||
"http://schemas.microsoft.com/visio/2010/relationships/document";
|
|
||||||
|
|
||||||
|
|
||||||
/** Should this thread prefer event based over usermodel based extractors? */
|
/** Should this thread prefer event based over usermodel based extractors? */
|
||||||
@ -165,6 +165,10 @@ public class ExtractorFactory {
|
|||||||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||||
|
|
||||||
// If nothing was found, try some of the other OOXML-based core types
|
// If nothing was found, try some of the other OOXML-based core types
|
||||||
|
if (core.size() == 0) {
|
||||||
|
// Could it be an OOXML-Strict one?
|
||||||
|
core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL);
|
||||||
|
}
|
||||||
if (core.size() == 0) {
|
if (core.size() == 0) {
|
||||||
// Could it be a visio one?
|
// Could it be a visio one?
|
||||||
PackageRelationshipCollection visio =
|
PackageRelationshipCollection visio =
|
||||||
@ -173,6 +177,7 @@ public class ExtractorFactory {
|
|||||||
throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files");
|
throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Should just be a single core document, complain if not
|
// Should just be a single core document, complain if not
|
||||||
if (core.size() != 1) {
|
if (core.size() != 1) {
|
||||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||||
|
@ -25,6 +25,7 @@ import junit.framework.TestCase;
|
|||||||
import org.apache.poi.POIDataSamples;
|
import org.apache.poi.POIDataSamples;
|
||||||
import org.apache.poi.POIOLE2TextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.POITextExtractor;
|
import org.apache.poi.POITextExtractor;
|
||||||
|
import org.apache.poi.POIXMLException;
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
|
||||||
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
|
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
|
||||||
@ -162,6 +163,12 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
extractor.close();
|
extractor.close();
|
||||||
|
|
||||||
// TODO Support OOXML-Strict, see bug #57699
|
// TODO Support OOXML-Strict, see bug #57699
|
||||||
|
try {
|
||||||
|
extractor = ExtractorFactory.createExtractor(xlsxStrict);
|
||||||
|
fail("OOXML-Strict isn't yet supported");
|
||||||
|
} catch (POIXMLException e) {
|
||||||
|
// Expected, for now
|
||||||
|
}
|
||||||
// extractor = ExtractorFactory.createExtractor(xlsxStrict);
|
// extractor = ExtractorFactory.createExtractor(xlsxStrict);
|
||||||
// assertTrue(
|
// assertTrue(
|
||||||
// extractor
|
// extractor
|
||||||
@ -307,6 +314,14 @@ public class TestExtractorFactory extends TestCase {
|
|||||||
assertTrue(
|
assertTrue(
|
||||||
ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
|
ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
|
||||||
);
|
);
|
||||||
|
// TODO Support OOXML-Strict, see bug #57699
|
||||||
|
// assertTrue(
|
||||||
|
// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict))
|
||||||
|
// instanceof XSSFExcelExtractor
|
||||||
|
// );
|
||||||
|
// assertTrue(
|
||||||
|
// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200
|
||||||
|
// );
|
||||||
|
|
||||||
// Word
|
// Word
|
||||||
assertTrue(
|
assertTrue(
|
||||||
|
@ -2293,13 +2293,19 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* OOXML-Strict files
|
* OOXML-Strict files
|
||||||
|
* Not currently working - namespace mis-match from XMLBeans
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
@Ignore
|
@Ignore("XMLBeans namespace mis-match on ooxml-strict files")
|
||||||
public void test57699() throws Exception {
|
public void test57699() throws Exception {
|
||||||
Workbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.strict.xlsx");
|
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.strict.xlsx");
|
||||||
assertEquals(3, wb.getNumberOfSheets());
|
assertEquals(3, wb.getNumberOfSheets());
|
||||||
|
// TODO Check sheet contents
|
||||||
|
// TODO Check formula evaluation
|
||||||
|
|
||||||
// TODO Check the rest
|
XSSFWorkbook wbBack = XSSFTestDataSamples.writeOutAndReadBack(wb);
|
||||||
|
assertEquals(3, wbBack.getNumberOfSheets());
|
||||||
|
// TODO Re-check sheet contents
|
||||||
|
// TODO Re-check formula evaluation
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user