From 3c2f15b9ffb705c079b98c1400bd1136d148b560 Mon Sep 17 00:00:00 2001 From: Sergey Vladimirov Date: Thu, 7 Jul 2011 11:38:59 +0000 Subject: [PATCH] add content of main text box to document text git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1143769 13f79535-47bb-0310-9956-ffa450edef68 --- .../src/org/apache/poi/hwpf/HWPFDocument.java | 11 ++++++ .../poi/hwpf/extractor/WordExtractor.java | 7 ++++ .../apache/poi/hwpf/model/FIBLongHandler.java | 36 +++++++++++++++---- 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index db9dcde34..847a15abc 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -363,6 +363,17 @@ public final class HWPFDocument extends HWPFDocumentCore ); } + /** + * Returns the range which covers all the Endnotes. + */ + public Range getMainTextboxRange() { + return new Range( + _cpSplit.getMainTextboxStart(), + _cpSplit.getMainTextboxEnd(), + this + ); + } + /** * Returns the range which covers all "Header Stories". * A header story contains a header, footer, end note diff --git a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java index 1c6ed2b1d..b5dcc78a8 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/extractor/WordExtractor.java @@ -120,6 +120,12 @@ public final class WordExtractor extends POIOLE2TextExtractor { return getParagraphText(r); } + public String[] getMainTextboxText() { + Range r = doc.getMainTextboxRange(); + + return getParagraphText(r); + } + public String[] getEndnoteText() { Range r = doc.getEndnoteRange(); @@ -251,6 +257,7 @@ public final class WordExtractor extends POIOLE2TextExtractor { ArrayList text = new ArrayList(); text.addAll(Arrays.asList(getParagraphText())); + text.addAll(Arrays.asList(getMainTextboxText())); text.addAll(Arrays.asList(getFootnoteText())); text.addAll(Arrays.asList(getEndnoteText())); diff --git a/src/scratchpad/src/org/apache/poi/hwpf/model/FIBLongHandler.java b/src/scratchpad/src/org/apache/poi/hwpf/model/FIBLongHandler.java index d5e55b1a2..ef51961c8 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/model/FIBLongHandler.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/model/FIBLongHandler.java @@ -27,13 +27,35 @@ public final class FIBLongHandler { public static final int CBMAC = 0; public static final int PRODUCTCREATED = 1; public static final int PRODUCTREVISED = 2; - public static final int CCPTEXT = 3; - public static final int CCPFTN = 4; - public static final int CCPHDD = 5; - public static final int CCPMCR = 6; - public static final int CCPATN = 7; - public static final int CCPEDN = 8; - public static final int CCPTXBX = 9; + /** + * Pointer to length of main document text stream 1 + */ + public static final int CCPTEXT = 3; + /** + * Pointer to length of footnote subdocument text stream + */ + public static final int CCPFTN = 4; + /** + * Pointer to length of header subdocument text stream + */ + public static final int CCPHDD = 5; + /** + * Pointer to length of macro subdocument text stream, which should now + * always be 0 + */ + public static final int CCPMCR = 6; + /** + * Pointer to length of annotation subdocument text stream + */ + public static final int CCPATN = 7; + /** + * Pointer to length of endnote subdocument text stream + */ + public static final int CCPEDN = 8; + /** + * Pointer to length of textbox subdocument text stream + */ + public static final int CCPTXBX = 9; public static final int CCPHDRTXBX = 10; public static final int PNFBPCHPFIRST = 11; public static final int PNCHPFIRST = 12;