diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java index a93db00ec..fae45758a 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/usermodel/TestProblems.java @@ -708,6 +708,28 @@ public final class TestProblems extends HWPFTestCase { insertTable( 3, 3 ); } + /** + * [FAILING] Bug 47731 - Word Extractor considers text copied from some + * website as an embedded object + */ + public void test47731() throws Exception + { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47731.doc" ); + String foundText = new WordExtractor( doc ).getText(); + + try + { + assertTrue( foundText + .contains( "Soak the rice in water for three to four hours" ) ); + + fixed( "47731" ); + } + catch ( AssertionFailedError exc ) + { + // expected + } + } + /** * Bug 4774 - text extracted by WordExtractor is broken */ diff --git a/test-data/document/Bug47731.doc b/test-data/document/Bug47731.doc new file mode 100644 index 000000000..c8cd88d25 Binary files /dev/null and b/test-data/document/Bug47731.doc differ