bug#51686 - ConcurrentModificationException in Tika's OfficeParser
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1160137 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
750f848de7
commit
7a008634fa
@ -141,41 +141,32 @@ public abstract class HWPFDocumentCore extends POIDocument
|
||||
* @throws IOException If there is an unexpected IOException from the passed
|
||||
* in POIFSFileSystem.
|
||||
*/
|
||||
public HWPFDocumentCore(DirectoryNode directory) throws IOException
|
||||
{
|
||||
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
|
||||
// Sort out the hpsf properties
|
||||
super(directory);
|
||||
super(directory);
|
||||
|
||||
// read in the main stream.
|
||||
DocumentEntry documentProps = (DocumentEntry)
|
||||
directory.getEntry("WordDocument");
|
||||
directory.getEntry("WordDocument");
|
||||
_mainStream = new byte[documentProps.getSize()];
|
||||
|
||||
directory.createDocumentInputStream(STREAM_WORD_DOCUMENT).read(_mainStream);
|
||||
|
||||
// Create our FIB, and check for the doc being encrypted
|
||||
_fib = new FileInformationBlock(_mainStream);
|
||||
if(_fib.isFEncrypted()) {
|
||||
throw new EncryptedDocumentException("Cannot process encrypted word files!");
|
||||
if (_fib.isFEncrypted()) {
|
||||
throw new EncryptedDocumentException("Cannot process encrypted word files!");
|
||||
}
|
||||
|
||||
{
|
||||
DirectoryEntry objectPoolEntry;
|
||||
try
|
||||
{
|
||||
objectPoolEntry = (DirectoryEntry) directory
|
||||
.getEntry( STREAM_OBJECT_POOL );
|
||||
}
|
||||
catch ( FileNotFoundException exc )
|
||||
{
|
||||
objectPoolEntry = directory
|
||||
.createDirectory( STREAM_OBJECT_POOL );
|
||||
}
|
||||
_objectPool = new ObjectPoolImpl( objectPoolEntry );
|
||||
}
|
||||
try {
|
||||
DirectoryEntry objectPoolEntry = (DirectoryEntry) directory
|
||||
.getEntry(STREAM_OBJECT_POOL);
|
||||
_objectPool = new ObjectPoolImpl(objectPoolEntry);
|
||||
} catch (FileNotFoundException exc) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
/**
|
||||
* Returns the range which covers the whole of the document, but excludes
|
||||
* any headers and footers.
|
||||
*/
|
||||
|
@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||
import org.apache.poi.poifs.filesystem.Entry;
|
||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
/**
|
||||
* Test the different routes to extracting text
|
||||
*
|
||||
@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase {
|
||||
assertEquals(p_text1_block, extractor.getText());
|
||||
}
|
||||
}
|
||||
|
||||
public void testRootEntiesNavigation() throws IOException {
|
||||
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc");
|
||||
|
||||
POIFSFileSystem fs = new POIFSFileSystem(is);
|
||||
|
||||
String text = null;
|
||||
|
||||
for (Entry entry : fs.getRoot()) {
|
||||
if ("WordDocument".equals(entry.getName())) {
|
||||
WordExtractor ex = new WordExtractor(fs);
|
||||
text = ex.getText();
|
||||
}
|
||||
}
|
||||
|
||||
assertNotNull(text);
|
||||
}
|
||||
}
|
||||
|
BIN
test-data/document/testWORD.doc
Normal file
BIN
test-data/document/testWORD.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user