From d60c98c37bcf5ed449e22855bfd5ffad6918942d Mon Sep 17 00:00:00 2001
From: Nick Burch <nick@apache.org>
Date: Tue, 12 Aug 2008 19:02:41 +0000
Subject: [PATCH] Fix a typo in the file name, and add a generic method to
 POITextExtractor to get the appropriate metadata text extractor

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@685267 13f79535-47bb-0310-9956-ffa450edef68
---
 src/documentation/content/xdocs/changes.xml       |  2 +-
 src/documentation/content/xdocs/hpsf/how-to.xml   |  2 +-
 src/documentation/content/xdocs/status.xml        |  2 +-
 src/java/org/apache/poi/POIOLE2TextExtractor.java |  9 +++++++++
 src/java/org/apache/poi/POITextExtractor.java     |  7 +++++++
 ...xtractor.java => HPSFPropertiesExtractor.java} | 15 +++++++++++----
 ...ctor.java => TestHPSFPropertiesExtractor.java} | 14 +++++++-------
 7 files changed, 37 insertions(+), 14 deletions(-)
 rename src/java/org/apache/poi/hpsf/extractor/{HPFSPropertiesExtractor.java => HPSFPropertiesExtractor.java} (90%)
 rename src/testcases/org/apache/poi/hpsf/extractor/{TestHPFSPropertiesExtractor.java => TestHPSFPropertiesExtractor.java} (90%)
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index 6a0cae267..b1cfff6b9 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,7 +37,7 @@
 
 		<!-- Don't forget to update status.xml too! -->
         <release version="3.1.1-alpha1" date="2008-??-??">
-           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action>
+           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action>
            <action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action>
            <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
diff --git a/src/documentation/content/xdocs/hpsf/how-to.xml b/src/documentation/content/xdocs/hpsf/how-to.xml
index aadf753a4..964005bf2 100644
--- a/src/documentation/content/xdocs/hpsf/how-to.xml
+++ b/src/documentation/content/xdocs/hpsf/how-to.xml
@@ -95,7 +95,7 @@
     <p>If all you are interested in is getting the textual content of
      all the document properties, such as for full text indexing, then
      take a look at 
-     <code>org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</code>. However,
+     <code>org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</code>. However,
      if you want full access to the properties, please read on!</p>
 
     <p>The first thing you should understand is that a Microsoft Office file is
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 0146f55e7..fbe242aa2 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,7 +34,7 @@
 	<!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1.1-alpha1" date="2008-??-??">
-           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action>
+           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action>
            <action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action>
            <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java
index f5aee4cc6..d46c7e4aa 100644
--- a/src/java/org/apache/poi/POIOLE2TextExtractor.java
+++ b/src/java/org/apache/poi/POIOLE2TextExtractor.java
@@ -18,6 +18,7 @@ package org.apache.poi;
 
 import org.apache.poi.hpsf.DocumentSummaryInformation;
 import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
 
 /**
  * Common Parent for OLE2 based Text Extractors
@@ -50,4 +51,12 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
 	public SummaryInformation getSummaryInformation() {
 		return document.getSummaryInformation();
 	}
+	
+	/**
+	 * Returns an HPSF powered text extractor for the 
+	 *  document properties metadata, such as title and author.
+	 */
+	public POITextExtractor getMetadataTextExtractor() {
+		return new HPSFPropertiesExtractor(this);
+	}
 }
diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java
index a7ffd4419..0b69894d0 100644
--- a/src/java/org/apache/poi/POITextExtractor.java
+++ b/src/java/org/apache/poi/POITextExtractor.java
@@ -54,4 +54,11 @@ public abstract class POITextExtractor {
 	 * @return All the text from the document
 	 */
 	public abstract String getText();
+	
+	/**
+	 * Returns another text extractor, which is able to
+	 *  output the textual content of the document
+	 *  metadata / properties, such as author and title.
+	 */
+	public abstract POITextExtractor getMetadataTextExtractor();
 }
diff --git a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
similarity index 90%
rename from src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java
rename to src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
index c85f1bb04..ecad5c05b 100644
--- a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java
+++ b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
@@ -36,14 +36,14 @@ import org.apache.poi.util.LittleEndian;
  *  build in and custom, returning them in 
  *  textual form.
  */
-public class HPFSPropertiesExtractor extends POITextExtractor {
-	public HPFSPropertiesExtractor(POITextExtractor mainExtractor) {
+public class HPSFPropertiesExtractor extends POITextExtractor {
+	public HPSFPropertiesExtractor(POITextExtractor mainExtractor) {
 		super(mainExtractor);
 	}
-	public HPFSPropertiesExtractor(POIDocument doc) {
+	public HPSFPropertiesExtractor(POIDocument doc) {
 		super(doc);
 	}
-	public HPFSPropertiesExtractor(POIFSFileSystem fs) {
+	public HPSFPropertiesExtractor(POIFSFileSystem fs) {
 		super(new PropertiesOnlyDocument(fs));
 	}
 	
@@ -127,6 +127,13 @@ public class HPFSPropertiesExtractor extends POITextExtractor {
 	public String getText() {
 		return getSummaryInformationText() + getDocumentSummaryInformationText();
 	}
+	
+	/**
+	 * Prevent recursion!
+	 */
+	public POITextExtractor getMetadataTextExtractor() {
+		throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+	}
 
 	/**
 	 * So we can get at the properties of any 
diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
similarity index 90%
rename from src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java
rename to src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
index 7d967874a..3a189353d 100644
--- a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java
+++ b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
@@ -25,7 +25,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 
 import junit.framework.TestCase;
 
-public class TestHPFSPropertiesExtractor extends TestCase {
+public class TestHPSFPropertiesExtractor extends TestCase {
 	private String dir;
 	
     protected void setUp() throws Exception {
@@ -37,7 +37,7 @@ public class TestHPFSPropertiesExtractor extends TestCase {
 		POIFSFileSystem fs = new POIFSFileSystem(
 				new FileInputStream(new File(dir, "TestMickey.doc"))
 		);
-		HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
+		HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
 		ext.getText();
 		
 		// Check each bit in turn
@@ -60,7 +60,7 @@ public class TestHPFSPropertiesExtractor extends TestCase {
 		POIFSFileSystem fs = new POIFSFileSystem(
 				new FileInputStream(new File(dir, "TestUnicode.xls"))
 		);
-		HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
+		HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
 		ext.getText();
 		
 		// Check each bit in turn
@@ -83,7 +83,7 @@ public class TestHPFSPropertiesExtractor extends TestCase {
 		POIFSFileSystem fs = new POIFSFileSystem(
 				new FileInputStream(new File(dir, "TestMickey.doc"))
 		);
-		HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
+		HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
 
 		// Custom properties are part of the document info stream
 		String dinfText = ext.getDocumentSummaryInformationText();
@@ -102,9 +102,9 @@ public class TestHPFSPropertiesExtractor extends TestCase {
 		HSSFWorkbook wb = new HSSFWorkbook(fs);
 		ExcelExtractor excelExt = new ExcelExtractor(wb);
 		
-		String fsText = (new HPFSPropertiesExtractor(fs)).getText();
-		String hwText = (new HPFSPropertiesExtractor(wb)).getText();
-		String eeText = (new HPFSPropertiesExtractor(excelExt)).getText();
+		String fsText = (new HPSFPropertiesExtractor(fs)).getText();
+		String hwText = (new HPSFPropertiesExtractor(wb)).getText();
+		String eeText = (new HPSFPropertiesExtractor(excelExt)).getText();
 		
 		assertEquals(fsText, hwText);
 		assertEquals(fsText, eeText);