diff --git a/src/java/org/apache/poi/POIDocument.java b/src/java/org/apache/poi/POIDocument.java index ece7a3f13..8d91c06e7 100644 --- a/src/java/org/apache/poi/POIDocument.java +++ b/src/java/org/apache/poi/POIDocument.java @@ -67,14 +67,28 @@ public abstract class POIDocument { /** * Find, and create objects for, the standard - * Documment Information Properties (HPSF) + * Documment Information Properties (HPSF). + * If a given property set is missing or corrupt, + * it will remain null; */ protected void readProperties() { + PropertySet ps; + // DocumentSummaryInformation - dsInf = (DocumentSummaryInformation)getPropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME); + ps = getPropertySet(DocumentSummaryInformation.DEFAULT_STREAM_NAME); + if(ps != null && ps instanceof DocumentSummaryInformation) { + dsInf = (DocumentSummaryInformation)ps; + } else if(ps != null) { + logger.log(POILogger.WARN, "DocumentSummaryInformation property set came back with wrong class - ", ps.getClass()); + } // SummaryInformation - sInf = (SummaryInformation)getPropertySet(SummaryInformation.DEFAULT_STREAM_NAME); + ps = getPropertySet(SummaryInformation.DEFAULT_STREAM_NAME); + if(ps instanceof SummaryInformation) { + sInf = (SummaryInformation)ps; + } else if(ps != null) { + logger.log(POILogger.WARN, "SummaryInformation property set came back with wrong class - ", ps.getClass()); + } } /** diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.doc b/src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.doc new file mode 100644 index 000000000..b98008943 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/ProblemExtracting.doc differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java new file mode 100644 index 000000000..b87f586d3 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java @@ -0,0 +1,45 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.poi.hwpf.extractor; + +import java.io.FileInputStream; + +import junit.framework.TestCase; + +/** + * Tests for bugs with the WordExtractor + * + * @author Nick Burch (nick at torchbox dot com) + */ +public class TestWordExtractorBugs extends TestCase { + private String dirname; + protected void setUp() throws Exception { + dirname = System.getProperty("HWPF.testdata.path"); + } + + public void testProblemMetadata() throws Exception { + String filename = dirname + "/ProblemExtracting.doc"; + WordExtractor extractor = + new WordExtractor(new FileInputStream(filename)); + + // Check it gives text without error + extractor.getText(); + extractor.getParagraphText(); + extractor.getTextFromPieces(); + } + +}