bug#51686 - ConcurrentModificationException in Tika's OfficeParser
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1160137 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
750f848de7
commit
7a008634fa
@ -141,8 +141,7 @@ public abstract class HWPFDocumentCore extends POIDocument
|
|||||||
* @throws IOException If there is an unexpected IOException from the passed
|
* @throws IOException If there is an unexpected IOException from the passed
|
||||||
* in POIFSFileSystem.
|
* in POIFSFileSystem.
|
||||||
*/
|
*/
|
||||||
public HWPFDocumentCore(DirectoryNode directory) throws IOException
|
public HWPFDocumentCore(DirectoryNode directory) throws IOException {
|
||||||
{
|
|
||||||
// Sort out the hpsf properties
|
// Sort out the hpsf properties
|
||||||
super(directory);
|
super(directory);
|
||||||
|
|
||||||
@ -159,19 +158,11 @@ public abstract class HWPFDocumentCore extends POIDocument
|
|||||||
throw new EncryptedDocumentException("Cannot process encrypted word files!");
|
throw new EncryptedDocumentException("Cannot process encrypted word files!");
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
try {
|
||||||
DirectoryEntry objectPoolEntry;
|
DirectoryEntry objectPoolEntry = (DirectoryEntry) directory
|
||||||
try
|
|
||||||
{
|
|
||||||
objectPoolEntry = (DirectoryEntry) directory
|
|
||||||
.getEntry(STREAM_OBJECT_POOL);
|
.getEntry(STREAM_OBJECT_POOL);
|
||||||
}
|
|
||||||
catch ( FileNotFoundException exc )
|
|
||||||
{
|
|
||||||
objectPoolEntry = directory
|
|
||||||
.createDirectory( STREAM_OBJECT_POOL );
|
|
||||||
}
|
|
||||||
_objectPool = new ObjectPoolImpl(objectPoolEntry);
|
_objectPool = new ObjectPoolImpl(objectPoolEntry);
|
||||||
|
} catch (FileNotFoundException exc) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,9 +24,13 @@ import org.apache.poi.hwpf.HWPFDocument;
|
|||||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||||
import org.apache.poi.hwpf.OldWordFileFormatException;
|
import org.apache.poi.hwpf.OldWordFileFormatException;
|
||||||
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
import org.apache.poi.poifs.filesystem.DirectoryNode;
|
||||||
|
import org.apache.poi.poifs.filesystem.Entry;
|
||||||
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
||||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test the different routes to extracting text
|
* Test the different routes to extracting text
|
||||||
*
|
*
|
||||||
@ -353,4 +357,21 @@ public final class TestWordExtractor extends TestCase {
|
|||||||
assertEquals(p_text1_block, extractor.getText());
|
assertEquals(p_text1_block, extractor.getText());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRootEntiesNavigation() throws IOException {
|
||||||
|
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("testWORD.doc");
|
||||||
|
|
||||||
|
POIFSFileSystem fs = new POIFSFileSystem(is);
|
||||||
|
|
||||||
|
String text = null;
|
||||||
|
|
||||||
|
for (Entry entry : fs.getRoot()) {
|
||||||
|
if ("WordDocument".equals(entry.getName())) {
|
||||||
|
WordExtractor ex = new WordExtractor(fs);
|
||||||
|
text = ex.getText();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assertNotNull(text);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/document/testWORD.doc
Normal file
BIN
test-data/document/testWORD.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user