bug#51686 - ConcurrentModificationException in Tika's OfficeParser

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1160137 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Maxim Valyanskiy 2011-08-22 07:56:43 +00:00
parent 750f848de7
commit 7a008634fa
3 changed files with 33 additions and 21 deletions

View File

@ -141,41 +141,32 @@ public abstract class HWPFDocumentCore extends POIDocument
* @throws IOException If there is an unexpected IOException from the passed * @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem. * in POIFSFileSystem.
*/ */
public HWPFDocumentCore(DirectoryNode directory) throws IOException public HWPFDocumentCore(DirectoryNode directory) throws IOException {
{
// Sort out the hpsf properties // Sort out the hpsf properties
super(directory); super(directory);
// read in the main stream. // read in the main stream.
DocumentEntry documentProps = (DocumentEntry) DocumentEntry documentProps = (DocumentEntry)
directory.getEntry("WordDocument"); directory.getEntry("WordDocument");
_mainStream = new byte[documentProps.getSize()]; _mainStream = new byte[documentProps.getSize()];
directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream); directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream);
// Create our FIB, and check for the doc being encrypted // Create our FIB, and check for the doc being encrypted
_fib = new FileInformationBlock(_mainStream); _fib = new FileInformationBlock(_mainStream);
if(_fib.isFEncrypted()) { if (_fib.isFEncrypted()) {
throw new EncryptedDocumentException("Cannot process encrypted word files!"); throw new EncryptedDocumentException("Cannot process encrypted word files!");
} }
{ try {
DirectoryEntry objectPoolEntry; DirectoryEntry objectPoolEntry = (DirectoryEntry) directory
try .getEntry(STREAM_OBJECT_POOL);
{ _objectPool = new ObjectPoolImpl(objectPoolEntry);
objectPoolEntry = (DirectoryEntry) directory } catch (FileNotFoundException exc) {
.getEntry( STREAM_OBJECT_POOL );
}
catch ( FileNotFoundException exc )
{
objectPoolEntry = directory
.createDirectory( STREAM_OBJECT_POOL );
}
_objectPool = new ObjectPoolImpl( objectPoolEntry );
}
} }
}
/** /**
* Returns the range which covers the whole of the document, but excludes * Returns the range which covers the whole of the document, but excludes
* any headers and footers. * any headers and footers.
*/ */

View File

@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFTestDataSamples; import org.apache.poi.hwpf.HWPFTestDataSamples;
import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import java.io.IOException;
import java.io.InputStream;
/** /**
* Test the different routes to extracting text * Test the different routes to extracting text
* *
@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase {
assertEquals(p_text1_block, extractor.getText()); assertEquals(p_text1_block, extractor.getText());
} }
} }
public void testRootEntiesNavigation() throws IOException {
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc");
POIFSFileSystem fs = new POIFSFileSystem(is);
String text = null;
for (Entry entry : fs.getRoot()) {
if ("WordDocument".equals(entry.getName())) {
WordExtractor ex = new WordExtractor(fs);
text = ex.getText();
}
}
assertNotNull(text);
}
} }

Binary file not shown.