diff --git a/build.xml b/build.xml index 5dd4a52fb..64845c4d6 100644 --- a/build.xml +++ b/build.xml @@ -220,6 +220,7 @@ under the License. + diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java new file mode 100644 index 000000000..f5aee4cc6 --- /dev/null +++ b/src/java/org/apache/poi/POIOLE2TextExtractor.java @@ -0,0 +1,53 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.SummaryInformation; + +/** + * Common Parent for OLE2 based Text Extractors + * of POI Documents, such as .doc, .xls + * You will typically find the implementation of + * a given format's text extractor under + * org.apache.poi.[format].extractor . + * @see org.apache.poi.hssf.extractor.ExcelExtractor + * @see org.apache.poi.hslf.extractor.PowerPointExtractor + * @see org.apache.poi.hdgf.extractor.VisioTextExtractor + * @see org.apache.poi.hwpf.extractor.WordExtractor + */ +public abstract class POIOLE2TextExtractor extends POITextExtractor { + /** + * Creates a new text extractor for the given document + */ + public POIOLE2TextExtractor(POIDocument document) { + super(document); + } + + /** + * Returns the document information metadata for the document + */ + public DocumentSummaryInformation getDocSummaryInformation() { + return document.getDocumentSummaryInformation(); + } + /** + * Returns the summary information metadata for the document + */ + public SummaryInformation getSummaryInformation() { + return document.getSummaryInformation(); + } +} diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index f45f54dff..2a9c455ca 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -18,7 +18,7 @@ package org.apache.poi.hssf.extractor; import java.io.IOException; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; @@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * the XLS2CSVmra example * @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra */ -public class ExcelExtractor extends POITextExtractor{ +public class ExcelExtractor extends POIOLE2TextExtractor { private HSSFWorkbook wb; private boolean includeSheetNames = true; private boolean formulasNotResults = false; diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 318b68d8f..12321bfac 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -29,6 +29,7 @@ import org.openxml4j.opc.Package; import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; @@ -104,7 +105,7 @@ public class ExtractorFactory { throw new IllegalArgumentException("No supported documents found in the OOXML package"); } - public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException { + public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException { // Look for certain entries in the stream, to figure it // out from for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) { diff --git a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java index 034714c7b..9b1307cee 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hdgf.HDGFDiagram; import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.Chunk.Command; @@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * Can opperate on the command line (outputs to stdout), or * can return the text for you (eg for use with Lucene). */ -public class VisioTextExtractor extends POITextExtractor { +public class VisioTextExtractor extends POIOLE2TextExtractor { private HDGFDiagram hdgf; private POIFSFileSystem fs; diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index f24722700..cd9fa2825 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -23,7 +23,7 @@ package org.apache.poi.hslf.extractor; import java.io.*; import java.util.HashSet; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.hslf.*; import org.apache.poi.hslf.model.*; @@ -36,7 +36,7 @@ import org.apache.poi.hslf.usermodel.*; * @author Nick Burch */ -public class PowerPointExtractor extends POITextExtractor +public class PowerPointExtractor extends POIOLE2TextExtractor { private HSLFSlideShow _hslfshow; private SlideShow _show; diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index 6f15ee1f9..85009459d 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -22,7 +22,7 @@ import java.io.FileInputStream; import java.io.UnsupportedEncodingException; import java.util.Iterator; -import org.apache.poi.POITextExtractor; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.model.TextPiece; import org.apache.poi.hwpf.usermodel.Paragraph; @@ -37,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; * * @author Nick Burch (nick at torchbox dot com) */ -public class WordExtractor extends POITextExtractor { +public class WordExtractor extends POIOLE2TextExtractor { private POIFSFileSystem fs; private HWPFDocument doc;