Provide a common ole2 implementation of POITextExtractor, which gives access to the document metadata
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@646312 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
08802e667e
commit
3a7871a2dd
53
src/java/org/apache/poi/POIOLE2TextExtractor.java
Normal file
53
src/java/org/apache/poi/POIOLE2TextExtractor.java
Normal file
@ -0,0 +1,53 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi;
|
||||
|
||||
import org.apache.poi.hpsf.DocumentSummaryInformation;
|
||||
import org.apache.poi.hpsf.SummaryInformation;
|
||||
|
||||
/**
|
||||
* Common Parent for OLE2 based Text Extractors
|
||||
* of POI Documents, such as .doc, .xls
|
||||
* You will typically find the implementation of
|
||||
* a given format's text extractor under
|
||||
* org.apache.poi.[format].extractor .
|
||||
* @see org.apache.poi.hssf.extractor.ExcelExtractor
|
||||
* @see org.apache.poi.hslf.extractor.PowerPointExtractor
|
||||
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor
|
||||
* @see org.apache.poi.hwpf.extractor.WordExtractor
|
||||
*/
|
||||
public abstract class POIOLE2TextExtractor extends POITextExtractor {
|
||||
/**
|
||||
* Creates a new text extractor for the given document
|
||||
*/
|
||||
public POIOLE2TextExtractor(POIDocument document) {
|
||||
super(document);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the document information metadata for the document
|
||||
*/
|
||||
public DocumentSummaryInformation getDocSummaryInformation() {
|
||||
return document.getDocumentSummaryInformation();
|
||||
}
|
||||
/**
|
||||
* Returns the summary information metadata for the document
|
||||
*/
|
||||
public SummaryInformation getSummaryInformation() {
|
||||
return document.getSummaryInformation();
|
||||
}
|
||||
}
|
@ -18,7 +18,7 @@ package org.apache.poi.hssf.extractor;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||
@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
* the XLS2CSVmra example
|
||||
* @see org.apache.poi.hssf.eventusermodel.examples.XLS2CSVmra
|
||||
*/
|
||||
public class ExcelExtractor extends POITextExtractor{
|
||||
public class ExcelExtractor extends POIOLE2TextExtractor {
|
||||
private HSSFWorkbook wb;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean formulasNotResults = false;
|
||||
|
@ -21,7 +21,7 @@ import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hdgf.HDGFDiagram;
|
||||
import org.apache.poi.hdgf.chunks.Chunk;
|
||||
import org.apache.poi.hdgf.chunks.Chunk.Command;
|
||||
@ -35,7 +35,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
* Can opperate on the command line (outputs to stdout), or
|
||||
* can return the text for you (eg for use with Lucene).
|
||||
*/
|
||||
public class VisioTextExtractor extends POITextExtractor {
|
||||
public class VisioTextExtractor extends POIOLE2TextExtractor {
|
||||
private HDGFDiagram hdgf;
|
||||
private POIFSFileSystem fs;
|
||||
|
||||
|
@ -23,7 +23,7 @@ package org.apache.poi.hslf.extractor;
|
||||
import java.io.*;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.hslf.*;
|
||||
import org.apache.poi.hslf.model.*;
|
||||
@ -36,7 +36,7 @@ import org.apache.poi.hslf.usermodel.*;
|
||||
* @author Nick Burch
|
||||
*/
|
||||
|
||||
public class PowerPointExtractor extends POITextExtractor
|
||||
public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
{
|
||||
private HSLFSlideShow _hslfshow;
|
||||
private SlideShow _show;
|
||||
|
@ -22,7 +22,7 @@ import java.io.FileInputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POITextExtractor;
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.model.TextPiece;
|
||||
import org.apache.poi.hwpf.usermodel.Paragraph;
|
||||
@ -37,7 +37,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
*
|
||||
* @author Nick Burch (nick at torchbox dot com)
|
||||
*/
|
||||
public class WordExtractor extends POITextExtractor {
|
||||
public class WordExtractor extends POIOLE2TextExtractor {
|
||||
private POIFSFileSystem fs;
|
||||
private HWPFDocument doc;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user