Refactor to make it easier to tell which content types each POIXMLTextExtractor handles
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@980414 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
507d4dd3a5
commit
443dd75e04
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.7-beta2" date="2010-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">Make it easier to tell which content types each POIXMLTextExtractor handles</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49649 - Added clone support for UserSView* and Feat* families of records</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49653 - Support for escaped unicode characters in Shared String Table</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49579 - prevent ArrayIndexOutOfBoundException in UnknowEscherRecord</action>
|
||||
|
@ -52,6 +52,7 @@ import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.xslf.XSLFSlideShow;
|
||||
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
|
||||
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
@ -155,42 +156,40 @@ public class ExtractorFactory {
|
||||
}
|
||||
|
||||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException {
|
||||
PackageRelationshipCollection core =
|
||||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||
if(core.size() != 1) {
|
||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||
}
|
||||
PackageRelationshipCollection core =
|
||||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
|
||||
if(core.size() != 1) {
|
||||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
|
||||
}
|
||||
|
||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
if (corePart.getContentType().equals(XSSFRelation.WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACRO_TEMPLATE_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACRO_ADDIN_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.TEMPLATE_WORKBOOK.getContentType()) ||
|
||||
corePart.getContentType().equals(XSSFRelation.MACROS_WORKBOOK.getContentType())) {
|
||||
if(getPreferEventExtractor()) {
|
||||
return new XSSFEventBasedExcelExtractor(pkg);
|
||||
} else {
|
||||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
}
|
||||
PackagePart corePart = pkg.getPart(core.getRelationship(0));
|
||||
|
||||
if(corePart.getContentType().equals(XWPFRelation.DOCUMENT.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.TEMPLATE.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.MACRO_DOCUMENT.getContentType()) ||
|
||||
corePart.getContentType().equals(XWPFRelation.MACRO_TEMPLATE_DOCUMENT.getContentType()) ) {
|
||||
return new XWPFWordExtractor(pkg);
|
||||
}
|
||||
// Is it XSSF?
|
||||
for(XSSFRelation rel : XSSFExcelExtractor.SUPPORTED_TYPES) {
|
||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
||||
if(getPreferEventExtractor()) {
|
||||
return new XSSFEventBasedExcelExtractor(pkg);
|
||||
} else {
|
||||
return new XSSFExcelExtractor(pkg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE) ||
|
||||
corePart.getContentType().equals(XSLFSlideShow.MACRO_CONTENT_TYPE) ||
|
||||
corePart.getContentType().equals(XSLFSlideShow.MACRO_TEMPLATE_CONTENT_TYPE) ||
|
||||
corePart.getContentType().equals(XSLFSlideShow.PRESENTATIONML_CONTENT_TYPE) ||
|
||||
corePart.getContentType().equals(XSLFSlideShow.PRESENTATIONML_TEMPLATE_CONTENT_TYPE) ||
|
||||
corePart.getContentType().equals(XSLFSlideShow.PRESENTATION_MACRO_CONTENT_TYPE)) {
|
||||
return new XSLFPowerPointExtractor(pkg);
|
||||
}
|
||||
// Is it XWPF?
|
||||
for(XWPFRelation rel : XWPFWordExtractor.SUPPORTED_TYPES) {
|
||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
||||
return new XWPFWordExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||
// Is it XSLF?
|
||||
for(XSLFRelation rel : XSLFPowerPointExtractor.SUPPORTED_TYPES) {
|
||||
if(corePart.getContentType().equals(rel.getContentType())) {
|
||||
return new XSLFPowerPointExtractor(pkg);
|
||||
}
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")");
|
||||
}
|
||||
|
||||
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
|
||||
|
@ -22,6 +22,7 @@ import java.util.List;
|
||||
|
||||
import org.apache.poi.POIXMLDocument;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
|
||||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||
@ -29,7 +30,6 @@ import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxmlformats.schemas.drawingml.x2006.main.ThemeDocument;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTCommentList;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTNotesSlide;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.CTPresentation;
|
||||
@ -57,18 +57,6 @@ import org.openxmlformats.schemas.presentationml.x2006.main.SldMasterDocument;
|
||||
* WARNING - APIs expected to change rapidly
|
||||
*/
|
||||
public class XSLFSlideShow extends POIXMLDocument {
|
||||
public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml";
|
||||
public static final String MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml";
|
||||
public static final String MACRO_TEMPLATE_CONTENT_TYPE = "application/vnd.ms-powerpoint.template.macroEnabled.main+xml";
|
||||
public static final String PRESENTATIONML_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml";
|
||||
public static final String PRESENTATIONML_TEMPLATE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.template.main+xml";
|
||||
public static final String PRESENTATION_MACRO_CONTENT_TYPE = "application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml";
|
||||
public static final String THEME_MANAGER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.themeManager+xml";
|
||||
public static final String NOTES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml";
|
||||
public static final String SLIDE_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.presentationml.slide+xml";
|
||||
public static final String SLIDE_LAYOUT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout";
|
||||
public static final String NOTES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide";
|
||||
public static final String COMMENT_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments";
|
||||
|
||||
private PresentationDocument presentationDoc;
|
||||
/**
|
||||
@ -79,7 +67,7 @@ public class XSLFSlideShow extends POIXMLDocument {
|
||||
public XSLFSlideShow(OPCPackage container) throws OpenXML4JException, IOException, XmlException {
|
||||
super(container);
|
||||
|
||||
if(getCorePart().getContentType().equals(THEME_MANAGER_CONTENT_TYPE)) {
|
||||
if(getCorePart().getContentType().equals(XSLFRelation.THEME_MANAGER.getContentType())) {
|
||||
rebase(getPackage());
|
||||
}
|
||||
|
||||
@ -187,7 +175,7 @@ public class XSLFSlideShow extends POIXMLDocument {
|
||||
PackagePart slidePart = getSlidePart(parentSlide);
|
||||
|
||||
try {
|
||||
notes = slidePart.getRelationshipsByType(NOTES_RELATION_TYPE);
|
||||
notes = slidePart.getRelationshipsByType(XSLFRelation.NOTES.getRelation());
|
||||
} catch(InvalidFormatException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
@ -231,7 +219,7 @@ public class XSLFSlideShow extends POIXMLDocument {
|
||||
PackagePart slidePart = getSlidePart(slide);
|
||||
|
||||
try {
|
||||
commentRels = slidePart.getRelationshipsByType(COMMENT_RELATION_TYPE);
|
||||
commentRels = slidePart.getRelationshipsByType(XSLFRelation.COMMENTS.getRelation());
|
||||
} catch(InvalidFormatException e) {
|
||||
throw new IllegalStateException(e);
|
||||
}
|
||||
|
@ -23,6 +23,7 @@ import org.apache.poi.xslf.XSLFSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.DrawingParagraph;
|
||||
import org.apache.poi.xslf.usermodel.XMLSlideShow;
|
||||
import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
|
||||
import org.apache.poi.xslf.usermodel.XSLFRelation;
|
||||
import org.apache.poi.xslf.usermodel.XSLFSlide;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxmlformats.schemas.presentationml.x2006.main.*;
|
||||
@ -30,6 +31,12 @@ import org.openxmlformats.schemas.presentationml.x2006.main.*;
|
||||
import java.io.IOException;
|
||||
|
||||
public class XSLFPowerPointExtractor extends POIXMLTextExtractor {
|
||||
public static final XSLFRelation[] SUPPORTED_TYPES = new XSLFRelation[] {
|
||||
XSLFRelation.MAIN, XSLFRelation.MACRO, XSLFRelation.MACRO_TEMPLATE,
|
||||
XSLFRelation.PRESENTATIONML, XSLFRelation.PRESENTATIONML_TEMPLATE,
|
||||
XSLFRelation.PRESENTATION_MACRO
|
||||
};
|
||||
|
||||
private XMLSlideShow slideshow;
|
||||
private boolean slidesByDefault = true;
|
||||
private boolean notesByDefault = false;
|
||||
|
111
src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFRelation.java
Normal file
111
src/ooxml/java/org/apache/poi/xslf/usermodel/XSLFRelation.java
Normal file
@ -0,0 +1,111 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xslf.usermodel;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
import org.apache.poi.POIXMLRelation;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
public class XSLFRelation extends POIXMLRelation {
|
||||
|
||||
private static POILogger log = POILogFactory.getLogger(XSLFRelation.class);
|
||||
|
||||
/**
|
||||
* A map to lookup POIXMLRelation by its relation type
|
||||
*/
|
||||
protected static Map<String, XSLFRelation> _table = new HashMap<String, XSLFRelation>();
|
||||
|
||||
public static final XSLFRelation MAIN = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation MACRO = new XSLFRelation(
|
||||
"application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation MACRO_TEMPLATE = new XSLFRelation(
|
||||
"application/vnd.ms-powerpoint.template.macroEnabled.main+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation PRESENTATIONML = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation PRESENTATIONML_TEMPLATE = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation PRESENTATION_MACRO = new XSLFRelation(
|
||||
"application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation THEME_MANAGER = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.themeManager+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation NOTES = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide",
|
||||
null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation SLIDE = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slide+xml",
|
||||
null, null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation SLIDE_LAYOUT = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.slideLayout+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/slideLayout",
|
||||
null, null
|
||||
);
|
||||
|
||||
public static final XSLFRelation COMMENTS = new XSLFRelation(
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.comments+xml",
|
||||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments",
|
||||
null, null
|
||||
);
|
||||
|
||||
|
||||
private XSLFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
|
||||
super(type, rel, defaultName, cls);
|
||||
|
||||
if(cls != null && !_table.containsKey(rel)) _table.put(rel, this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get POIXMLRelation by relation type
|
||||
*
|
||||
* @param rel relation type, for example,
|
||||
* <code>http://schemas.openxmlformats.org/officeDocument/2006/relationships/image</code>
|
||||
* @return registered POIXMLRelation or null if not found
|
||||
*/
|
||||
public static XSLFRelation getInstance(String rel){
|
||||
return _table.get(rel);
|
||||
}
|
||||
}
|
@ -28,6 +28,7 @@ import org.apache.poi.ss.usermodel.Comment;
|
||||
import org.apache.poi.ss.usermodel.HeaderFooter;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCell;
|
||||
import org.apache.poi.xssf.usermodel.XSSFRelation;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
@ -36,6 +37,12 @@ import org.apache.xmlbeans.XmlException;
|
||||
* Helper class to extract text from an OOXML Excel file
|
||||
*/
|
||||
public class XSSFExcelExtractor extends POIXMLTextExtractor implements org.apache.poi.ss.extractor.ExcelExtractor {
|
||||
public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
|
||||
XSSFRelation.WORKBOOK, XSSFRelation.MACRO_TEMPLATE_WORKBOOK,
|
||||
XSSFRelation.MACRO_ADDIN_WORKBOOK, XSSFRelation.TEMPLATE_WORKBOOK,
|
||||
XSSFRelation.MACROS_WORKBOOK
|
||||
};
|
||||
|
||||
private XSSFWorkbook workbook;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean formulasNotResults = false;
|
||||
|
@ -30,6 +30,7 @@ import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
|
||||
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFRelation;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFTable;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
|
||||
@ -38,6 +39,12 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
|
||||
* Helper class to extract text from an OOXML Word file
|
||||
*/
|
||||
public class XWPFWordExtractor extends POIXMLTextExtractor {
|
||||
public static final XWPFRelation[] SUPPORTED_TYPES = new XWPFRelation[] {
|
||||
XWPFRelation.DOCUMENT, XWPFRelation.TEMPLATE,
|
||||
XWPFRelation.MACRO_DOCUMENT,
|
||||
XWPFRelation.MACRO_TEMPLATE_DOCUMENT
|
||||
};
|
||||
|
||||
private XWPFDocument document;
|
||||
private boolean fetchHyperlinks = false;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user