diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 0da406cdc..5038f8314 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 49936 - Handle HWPF documents with problematic HeaderStories better 49933 - Support sections in Word 6 and Word 95 files (HWPFOldDocument) 49941 - Correctly handle space preservation of XSSFRichTextRuns when applying fonts to parts of the string Correct XWPFRun detection of bold/italic in a paragraph with multiple runs of different styles diff --git a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java index 4afaba9d1..c0322115b 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/usermodel/HeaderStories.java @@ -157,10 +157,18 @@ public final class HeaderStories { // Empty story return ""; } + if(prop.getEnd() < prop.getStart()) { + // Broken properties? + return ""; + } + + // Ensure we're getting a sensible length + String rawText = headerStories.text(); + int start = Math.min(prop.getStart(), rawText.length()); + int end = Math.min(prop.getEnd(), rawText.length()); // Grab the contents - String text = - headerStories.text().substring(prop.getStart(), prop.getEnd()); + String text = rawText.substring(start, end); // Strip off fields and macros if requested if(stripFields) { diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index 3ca917514..19e67a46d 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -21,6 +21,7 @@ import org.apache.poi.EncryptedDocumentException; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.HWPFTestCase; import org.apache.poi.hwpf.HWPFTestDataSamples; +import org.apache.poi.hwpf.extractor.WordExtractor; import org.apache.poi.hwpf.model.StyleSheet; /** @@ -232,6 +233,27 @@ public final class TestProblems extends HWPFTestCase { } } } + + /** + * Bug #49936 - Problems with reading the header out of + * the Header Stories + */ + public void testProblemHeaderStories49936() throws Exception { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("HeaderFooterProblematic.doc"); + HeaderStories hs = new HeaderStories(doc); + + assertEquals("", hs.getFirstHeader()); + assertEquals("\r", hs.getEvenHeader()); + assertEquals("", hs.getOddHeader()); + + assertEquals("", hs.getFirstFooter()); + assertEquals("", hs.getEvenFooter()); + assertEquals("", hs.getOddFooter()); + + WordExtractor ext = new WordExtractor(doc); + assertEquals("\n", ext.getHeaderText()); + assertEquals("", ext.getFooterText()); + } /** * Bug #48245 - don't include the text from the diff --git a/test-data/document/HeaderFooterProblematic.doc b/test-data/document/HeaderFooterProblematic.doc new file mode 100644 index 000000000..a4d9d303d Binary files /dev/null and b/test-data/document/HeaderFooterProblematic.doc differ