diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 34fe02d10..35554d114 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,7 @@ + 47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF 47415 - Fixed PageSettingsBlock to allow multiple PLS records 47412 - Fixed concurrency issue with EscherProperties.initProps() 47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index a43852be8..8b6d2fdae 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -344,6 +344,28 @@ public final class HWPFDocument extends POIDocument ); } + /** + * Returns the range which covers all the Endnotes. + */ + public Range getEndnoteRange() { + return new Range( + _cpSplit.getEndNoteStart(), + _cpSplit.getEndNoteEnd(), + this + ); + } + + /** + * Returns the range which covers all the Endnotes. + */ + public Range getCommentsRange() { + return new Range( + _cpSplit.getCommentsStart(), + _cpSplit.getCommentsEnd(), + this + ); + } + /** * Returns the range which covers all "Header Stories". * A header story contains a header, footer, end note diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index e63ad4f5a..10ac95426 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -22,6 +22,7 @@ import java.io.InputStream; import java.io.FileInputStream; import java.io.UnsupportedEncodingException; import java.util.Iterator; +import java.util.Arrays; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hwpf.HWPFDocument; @@ -95,34 +96,58 @@ public final class WordExtractor extends POIOLE2TextExtractor { * Get the text from the word file, as an array with one String * per paragraph */ - public String[] getParagraphText() { - String[] ret; + public String[] getParagraphText() { + String[] ret; - // Extract using the model code - try { - Range r = doc.getRange(); + // Extract using the model code + try { + Range r = doc.getRange(); - ret = new String[r.numParagraphs()]; - for(int i=0; i -1 ); } + + public void testFootnote() throws Exception { + HWPFDocument doc = new HWPFDocument( + new FileInputStream(filename6) + ); + extractor = new WordExtractor(doc); + + String[] text = extractor.getFootnoteText(); + StringBuffer b = new StringBuffer(); + for (int i=0; i