763 lines
26 KiB
Java
763 lines
26 KiB
Java
/* ====================================================================
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
this work for additional information regarding copyright ownership.
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
(the "License"); you may not use this file except in compliance with
|
|
the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
See the License for the specific language governing permissions and
|
|
limitations under the License.
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.hwpf.usermodel;
|
|
|
|
import java.io.InputStream;
|
|
import java.util.List;
|
|
|
|
import junit.framework.AssertionFailedError;
|
|
import org.apache.commons.codec.digest.DigestUtils;
|
|
import org.apache.poi.EncryptedDocumentException;
|
|
import org.apache.poi.POIDataSamples;
|
|
import org.apache.poi.hwpf.HWPFDocument;
|
|
import org.apache.poi.hwpf.HWPFOldDocument;
|
|
import org.apache.poi.hwpf.HWPFTestCase;
|
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
|
import org.apache.poi.hwpf.extractor.Word6Extractor;
|
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
|
import org.apache.poi.hwpf.model.StyleSheet;
|
|
import org.apache.poi.util.IOUtils;
|
|
|
|
/**
|
|
* Test various problem documents
|
|
*
|
|
* @author Nick Burch (nick at torchbox dot com)
|
|
*/
|
|
public final class TestProblems extends HWPFTestCase {
|
|
|
|
/**
|
|
* ListEntry passed no ListTable
|
|
*/
|
|
public void testListEntryNoListTable() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("ListEntryNoListTable.doc");
|
|
|
|
Range r = doc.getRange();
|
|
StyleSheet styleSheet = doc.getStyleSheet();
|
|
for (int x = 0; x < r.numSections(); x++) {
|
|
Section s = r.getSection(x);
|
|
for (int y = 0; y < s.numParagraphs(); y++) {
|
|
Paragraph paragraph = s.getParagraph(y);
|
|
// System.out.println(paragraph.getCharacterRun(0).text());
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* AIOOB for TableSprmUncompressor.unCompressTAPOperation
|
|
*/
|
|
public void testSprmAIOOB() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("AIOOB-Tap.doc");
|
|
|
|
Range r = doc.getRange();
|
|
StyleSheet styleSheet = doc.getStyleSheet();
|
|
for (int x = 0; x < r.numSections(); x++) {
|
|
Section s = r.getSection(x);
|
|
for (int y = 0; y < s.numParagraphs(); y++) {
|
|
Paragraph paragraph = s.getParagraph(y);
|
|
// System.out.println(paragraph.getCharacterRun(0).text());
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Test for TableCell not skipping the last paragraph. Bugs #45062 and
|
|
* #44292
|
|
*/
|
|
public void testTableCellLastParagraph() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug44292.doc");
|
|
Range r = doc.getRange();
|
|
assertEquals(6, r.numParagraphs());
|
|
assertEquals(0, r.getStartOffset());
|
|
assertEquals(87, r.getEndOffset());
|
|
|
|
// Paragraph with table
|
|
Paragraph p = r.getParagraph(0);
|
|
assertEquals(0, p.getStartOffset());
|
|
assertEquals(20, p.getEndOffset());
|
|
|
|
// Check a few bits of the table directly
|
|
assertEquals("One paragraph is ok\7", r.getParagraph(0).text());
|
|
assertEquals("First para is ok\r", r.getParagraph(1).text());
|
|
assertEquals("Second paragraph is skipped\7", r.getParagraph(2).text());
|
|
assertEquals("One paragraph is ok\7", r.getParagraph(3).text());
|
|
assertEquals("\7", r.getParagraph(4).text());
|
|
assertEquals("\r", r.getParagraph(5).text());
|
|
for(int i=0; i<=5; i++) {
|
|
assertFalse(r.getParagraph(i).usesUnicode());
|
|
}
|
|
|
|
|
|
// Get the table
|
|
Table t = r.getTable(p);
|
|
|
|
// get the only row
|
|
assertEquals(1, t.numRows());
|
|
TableRow row = t.getRow(0);
|
|
|
|
// sanity check our row
|
|
assertEquals(5, row.numParagraphs());
|
|
assertEquals(0, row._parStart);
|
|
assertEquals(5, row._parEnd);
|
|
assertEquals(0, row.getStartOffset());
|
|
assertEquals(86, row.getEndOffset());
|
|
|
|
|
|
// get the first cell
|
|
TableCell cell = row.getCell(0);
|
|
// First cell should have one paragraph
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
|
|
assertEquals(0, cell._parStart);
|
|
assertEquals(1, cell._parEnd);
|
|
assertEquals(0, cell.getStartOffset());
|
|
assertEquals(20, cell.getEndOffset());
|
|
|
|
|
|
// get the second
|
|
cell = row.getCell(1);
|
|
// Second cell should be detected as having two paragraphs
|
|
assertEquals(2, cell.numParagraphs());
|
|
assertEquals("First para is ok\r", cell.getParagraph(0).text());
|
|
assertEquals("Second paragraph is skipped\7", cell.getParagraph(1).text());
|
|
assertEquals(1, cell._parStart);
|
|
assertEquals(3, cell._parEnd);
|
|
assertEquals(20, cell.getStartOffset());
|
|
assertEquals(65, cell.getEndOffset());
|
|
|
|
|
|
// get the last cell
|
|
cell = row.getCell(2);
|
|
// Last cell should have one paragraph
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals("One paragraph is ok\7", cell.getParagraph(0).text());
|
|
assertEquals(3, cell._parStart);
|
|
assertEquals(4, cell._parEnd);
|
|
assertEquals(65, cell.getStartOffset());
|
|
assertEquals(85, cell.getEndOffset());
|
|
}
|
|
|
|
public void testRangeDelete() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug28627.doc");
|
|
|
|
Range range = doc.getRange();
|
|
int numParagraphs = range.numParagraphs();
|
|
|
|
int totalLength = 0, deletedLength = 0;
|
|
|
|
for (int i = 0; i < numParagraphs; i++) {
|
|
Paragraph para = range.getParagraph(i);
|
|
String text = para.text();
|
|
|
|
totalLength += text.length();
|
|
if (text.indexOf("{delete me}") > -1) {
|
|
para.delete();
|
|
deletedLength = text.length();
|
|
}
|
|
}
|
|
|
|
// check the text length after deletion
|
|
int newLength = 0;
|
|
range = doc.getRange();
|
|
numParagraphs = range.numParagraphs();
|
|
|
|
for (int i = 0; i < numParagraphs; i++) {
|
|
Paragraph para = range.getParagraph(i);
|
|
String text = para.text();
|
|
|
|
newLength += text.length();
|
|
}
|
|
|
|
assertEquals(newLength, totalLength - deletedLength);
|
|
}
|
|
|
|
/**
|
|
* With an encrypted file, we should give a suitable exception, and not OOM
|
|
*/
|
|
public void testEncryptedFile() {
|
|
try {
|
|
HWPFTestDataSamples.openSampleFile("PasswordProtected.doc");
|
|
fail();
|
|
} catch (EncryptedDocumentException e) {
|
|
// Good
|
|
}
|
|
}
|
|
|
|
public void testWriteProperties() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("SampleDoc.doc");
|
|
assertEquals("Nick Burch", doc.getSummaryInformation().getAuthor());
|
|
|
|
// Write and read
|
|
HWPFDocument doc2 = writeOutAndRead(doc);
|
|
assertEquals("Nick Burch", doc2.getSummaryInformation().getAuthor());
|
|
}
|
|
|
|
/**
|
|
* Test for reading paragraphs from Range after replacing some
|
|
* text in this Range.
|
|
* Bug #45269
|
|
*/
|
|
public void testReadParagraphsAfterReplaceText()throws Exception{
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
|
|
Range range = doc.getRange();
|
|
|
|
String toFind = "campo1";
|
|
String longer = " foi porraaaaa ";
|
|
String shorter = " foi ";
|
|
|
|
//check replace with longer text
|
|
for (int x = 0; x < range.numParagraphs(); x++) {
|
|
Paragraph para = range.getParagraph(x);
|
|
int offset = para.text().indexOf(toFind);
|
|
if (offset >= 0) {
|
|
para.replaceText(toFind, longer, offset);
|
|
assertEquals(offset, para.text().indexOf(longer));
|
|
}
|
|
}
|
|
|
|
doc = HWPFTestDataSamples.openSampleFile("Bug45269.doc");
|
|
range = doc.getRange();
|
|
|
|
//check replace with shorter text
|
|
for (int x = 0; x < range.numParagraphs(); x++) {
|
|
Paragraph para = range.getParagraph(x);
|
|
int offset = para.text().indexOf(toFind);
|
|
if (offset >= 0) {
|
|
para.replaceText(toFind, shorter, offset);
|
|
assertEquals(offset, para.text().indexOf(shorter));
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Bug #49936 - Problems with reading the header out of
|
|
* the Header Stories
|
|
*/
|
|
public void testProblemHeaderStories49936() throws Exception {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("HeaderFooterProblematic.doc");
|
|
HeaderStories hs = new HeaderStories(doc);
|
|
|
|
assertEquals("", hs.getFirstHeader());
|
|
assertEquals("\r", hs.getEvenHeader());
|
|
assertEquals("", hs.getOddHeader());
|
|
|
|
assertEquals("", hs.getFirstFooter());
|
|
assertEquals("", hs.getEvenFooter());
|
|
assertEquals("", hs.getOddFooter());
|
|
|
|
WordExtractor ext = new WordExtractor(doc);
|
|
assertEquals("\n", ext.getHeaderText());
|
|
assertEquals("", ext.getFooterText());
|
|
}
|
|
|
|
/**
|
|
* Bug #45877 - problematic PAPX with no parent set
|
|
*/
|
|
public void testParagraphPAPXNoParent45877() throws Exception {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug45877.doc");
|
|
assertEquals(17, doc.getRange().numParagraphs());
|
|
|
|
assertEquals("First paragraph\r", doc.getRange().getParagraph(0).text());
|
|
assertEquals("After Crashing Part\r", doc.getRange().getParagraph(13).text());
|
|
}
|
|
|
|
/**
|
|
* Bug #48245 - don't include the text from the
|
|
* next cell in the current one
|
|
*/
|
|
public void testTableIterator() throws Exception {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("simple-table2.doc");
|
|
Range r = doc.getRange();
|
|
|
|
// Check the text is as we'd expect
|
|
assertEquals(13, r.numParagraphs());
|
|
assertEquals("Row 1/Cell 1\u0007", r.getParagraph(0).text());
|
|
assertEquals("Row 1/Cell 2\u0007", r.getParagraph(1).text());
|
|
assertEquals("Row 1/Cell 3\u0007", r.getParagraph(2).text());
|
|
assertEquals("\u0007", r.getParagraph(3).text());
|
|
assertEquals("Row 2/Cell 1\u0007", r.getParagraph(4).text());
|
|
assertEquals("Row 2/Cell 2\u0007", r.getParagraph(5).text());
|
|
assertEquals("Row 2/Cell 3\u0007", r.getParagraph(6).text());
|
|
assertEquals("\u0007", r.getParagraph(7).text());
|
|
assertEquals("Row 3/Cell 1\u0007", r.getParagraph(8).text());
|
|
assertEquals("Row 3/Cell 2\u0007", r.getParagraph(9).text());
|
|
assertEquals("Row 3/Cell 3\u0007", r.getParagraph(10).text());
|
|
assertEquals("\u0007", r.getParagraph(11).text());
|
|
assertEquals("\r", r.getParagraph(12).text());
|
|
for(int i=0; i<=12; i++) {
|
|
assertFalse(r.getParagraph(i).usesUnicode());
|
|
}
|
|
|
|
Paragraph p;
|
|
|
|
// Take a look in detail at the first couple of
|
|
// paragraphs
|
|
p = r.getParagraph(0);
|
|
assertEquals(1, p.numParagraphs());
|
|
assertEquals(0, p.getStartOffset());
|
|
assertEquals(13, p.getEndOffset());
|
|
assertEquals(0, p._parStart);
|
|
assertEquals(1, p._parEnd);
|
|
|
|
p = r.getParagraph(1);
|
|
assertEquals(1, p.numParagraphs());
|
|
assertEquals(13, p.getStartOffset());
|
|
assertEquals(26, p.getEndOffset());
|
|
assertEquals(1, p._parStart);
|
|
assertEquals(2, p._parEnd);
|
|
|
|
p = r.getParagraph(2);
|
|
assertEquals(1, p.numParagraphs());
|
|
assertEquals(26, p.getStartOffset());
|
|
assertEquals(39, p.getEndOffset());
|
|
assertEquals(2, p._parStart);
|
|
assertEquals(3, p._parEnd);
|
|
|
|
|
|
// Now look at the table
|
|
Table table = r.getTable(r.getParagraph(0));
|
|
assertEquals(3, table.numRows());
|
|
|
|
TableRow row;
|
|
TableCell cell;
|
|
|
|
|
|
row = table.getRow(0);
|
|
assertEquals(0, row._parStart);
|
|
assertEquals(4, row._parEnd);
|
|
|
|
cell = row.getCell(0);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(0, cell._parStart);
|
|
assertEquals(1, cell._parEnd);
|
|
assertEquals(0, cell.getStartOffset());
|
|
assertEquals(13, cell.getEndOffset());
|
|
assertEquals("Row 1/Cell 1\u0007", cell.text());
|
|
assertEquals("Row 1/Cell 1\u0007", cell.getParagraph(0).text());
|
|
|
|
cell = row.getCell(1);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(1, cell._parStart);
|
|
assertEquals(2, cell._parEnd);
|
|
assertEquals(13, cell.getStartOffset());
|
|
assertEquals(26, cell.getEndOffset());
|
|
assertEquals("Row 1/Cell 2\u0007", cell.text());
|
|
assertEquals("Row 1/Cell 2\u0007", cell.getParagraph(0).text());
|
|
|
|
cell = row.getCell(2);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(2, cell._parStart);
|
|
assertEquals(3, cell._parEnd);
|
|
assertEquals(26, cell.getStartOffset());
|
|
assertEquals(39, cell.getEndOffset());
|
|
assertEquals("Row 1/Cell 3\u0007", cell.text());
|
|
assertEquals("Row 1/Cell 3\u0007", cell.getParagraph(0).text());
|
|
|
|
|
|
// Onto row #2
|
|
row = table.getRow(1);
|
|
assertEquals(4, row._parStart);
|
|
assertEquals(8, row._parEnd);
|
|
|
|
cell = row.getCell(0);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(4, cell._parStart);
|
|
assertEquals(5, cell._parEnd);
|
|
assertEquals(40, cell.getStartOffset());
|
|
assertEquals(53, cell.getEndOffset());
|
|
assertEquals("Row 2/Cell 1\u0007", cell.text());
|
|
|
|
cell = row.getCell(1);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(5, cell._parStart);
|
|
assertEquals(6, cell._parEnd);
|
|
assertEquals(53, cell.getStartOffset());
|
|
assertEquals(66, cell.getEndOffset());
|
|
assertEquals("Row 2/Cell 2\u0007", cell.text());
|
|
|
|
cell = row.getCell(2);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(6, cell._parStart);
|
|
assertEquals(7, cell._parEnd);
|
|
assertEquals(66, cell.getStartOffset());
|
|
assertEquals(79, cell.getEndOffset());
|
|
assertEquals("Row 2/Cell 3\u0007", cell.text());
|
|
|
|
|
|
// Finally row 3
|
|
row = table.getRow(2);
|
|
assertEquals(8, row._parStart);
|
|
assertEquals(12, row._parEnd);
|
|
|
|
cell = row.getCell(0);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(8, cell._parStart);
|
|
assertEquals(9, cell._parEnd);
|
|
assertEquals(80, cell.getStartOffset());
|
|
assertEquals(93, cell.getEndOffset());
|
|
assertEquals("Row 3/Cell 1\u0007", cell.text());
|
|
|
|
cell = row.getCell(1);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(9, cell._parStart);
|
|
assertEquals(10, cell._parEnd);
|
|
assertEquals(93, cell.getStartOffset());
|
|
assertEquals(106, cell.getEndOffset());
|
|
assertEquals("Row 3/Cell 2\u0007", cell.text());
|
|
|
|
cell = row.getCell(2);
|
|
assertEquals(1, cell.numParagraphs());
|
|
assertEquals(10, cell._parStart);
|
|
assertEquals(11, cell._parEnd);
|
|
assertEquals(106, cell.getStartOffset());
|
|
assertEquals(119, cell.getEndOffset());
|
|
assertEquals("Row 3/Cell 3\u0007", cell.text());
|
|
}
|
|
|
|
static void fixed(String bugzillaId) {
|
|
throw new Error("Bug " + bugzillaId + " seems to be fixed. " +
|
|
"Please resolve the issue in Bugzilla and remove fail() from the test");
|
|
}
|
|
|
|
/**
|
|
* Bug 33519 - HWPF fails to read a file
|
|
*/
|
|
public void test33519() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug33519.doc");
|
|
WordExtractor extractor = new WordExtractor(doc);
|
|
String text = extractor.getText();
|
|
}
|
|
|
|
/**
|
|
* Bug 34898 - WordExtractor doesn't read the whole string from the file
|
|
*/
|
|
public void test34898() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug34898.doc");
|
|
WordExtractor extractor = new WordExtractor(doc);
|
|
assertEquals("\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", extractor.getText().trim());
|
|
}
|
|
|
|
/**
|
|
* [FAILING] Bug 44331 - Output is corrupted
|
|
*/
|
|
public void test44431() {
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug44431.doc");
|
|
WordExtractor extractor1 = new WordExtractor(doc1);
|
|
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
|
WordExtractor extractor2 = new WordExtractor(doc2);
|
|
try {
|
|
assertEquals(extractor1.getFooterText(), extractor2.getFooterText());
|
|
assertEquals(extractor1.getHeaderText(), extractor2.getHeaderText());
|
|
assertEquals(extractor1.getParagraphText(), extractor2.getParagraphText());
|
|
|
|
assertEquals(extractor1.getText(), extractor2.getText());
|
|
|
|
fixed("44431");
|
|
} catch (AssertionFailedError e) {
|
|
// expected exception
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
|
|
* missing
|
|
*/
|
|
public void test46817()
|
|
{
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug46817.doc" );
|
|
WordExtractor extractor = new WordExtractor( doc );
|
|
String text = extractor.getText().trim();
|
|
|
|
assertTrue( text.contains( "Nazwa wykonawcy" ) );
|
|
assertTrue( text.contains( "kujawsko-pomorskie" ) );
|
|
assertTrue( text.contains( "ekomel@ekomel.com.pl" ) );
|
|
}
|
|
|
|
/**
|
|
* Bug 46220 - images are not properly extracted
|
|
*/
|
|
public void test46220() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc");
|
|
// reference checksums as in Bugzilla
|
|
String[] md5 = {
|
|
"851be142bce6d01848e730cb6903f39e",
|
|
"7fc6d8fb58b09ababd036d10a0e8c039",
|
|
"a7dc644c40bc2fbf17b2b62d07f99248",
|
|
"72d07b8db5fad7099d90bc4c304b4666"
|
|
};
|
|
List<Picture> pics = doc.getPicturesTable().getAllPictures();
|
|
assertEquals(4, pics.size());
|
|
for (int i = 0; i < pics.size(); i++) {
|
|
Picture pic = pics.get(i);
|
|
byte[] data = pic.getRawContent();
|
|
// use Apache Commons Codec utils to compute md5
|
|
assertEquals(md5[i], DigestUtils.md5Hex(data));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Bug 45473 - HWPF cannot read file after save
|
|
*/
|
|
public void test45473() {
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
|
|
String text1 = new WordExtractor(doc1).getText().trim();
|
|
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
|
String text2 = new WordExtractor(doc2).getText().trim();
|
|
|
|
// the text in the saved document has some differences in line separators but we tolerate that
|
|
assertEquals(text1.replaceAll("\n", ""), text2.replaceAll("\n", ""));
|
|
}
|
|
|
|
/**
|
|
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
|
|
* CharacterRun.replaceText()
|
|
*/
|
|
public void test47287()
|
|
{
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "Bug47287.doc" );
|
|
String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
|
|
"1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
|
|
int usedVal = 0;
|
|
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
|
|
Range r = doc.getRange();
|
|
for ( int x = 0; x < r.numSections(); x++ )
|
|
{
|
|
Section s = r.getSection( x );
|
|
for ( int y = 0; y < s.numParagraphs(); y++ )
|
|
{
|
|
Paragraph p = s.getParagraph( y );
|
|
|
|
for ( int z = 0; z < p.numCharacterRuns(); z++ )
|
|
{
|
|
boolean isFound = false;
|
|
|
|
// character run
|
|
CharacterRun run = p.getCharacterRun( z );
|
|
// character run text
|
|
String text = run.text();
|
|
String oldText = text;
|
|
int c = text.indexOf( "FORMTEXT " );
|
|
if ( c < 0 )
|
|
{
|
|
int k = text.indexOf( PLACEHOLDER );
|
|
if ( k >= 0 )
|
|
{
|
|
text = text.substring( 0, k ) + values[usedVal]
|
|
+ text.substring( k + PLACEHOLDER.length() );
|
|
usedVal++;
|
|
isFound = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for ( ; c >= 0; c = text.indexOf( "FORMTEXT ", c
|
|
+ "FORMTEXT ".length() ) )
|
|
{
|
|
int k = text.indexOf( PLACEHOLDER, c );
|
|
if ( k >= 0 )
|
|
{
|
|
text = text.substring( 0, k )
|
|
+ values[usedVal]
|
|
+ text.substring( k
|
|
+ PLACEHOLDER.length() );
|
|
usedVal++;
|
|
isFound = true;
|
|
}
|
|
}
|
|
}
|
|
if ( isFound )
|
|
{
|
|
run.replaceText( oldText, text, 0 );
|
|
}
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
String docText = r.text();
|
|
|
|
assertTrue( docText.contains( "1-1" ) );
|
|
assertTrue( docText.contains( "1-12" ) );
|
|
|
|
assertFalse( docText.contains( "1-13" ) );
|
|
assertFalse( docText.contains( "1-15" ) );
|
|
}
|
|
|
|
private static void insertTable( int rows, int columns )
|
|
{
|
|
// POI apparently can't create a document from scratch,
|
|
// so we need an existing empty dummy document
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile( "empty.doc" );
|
|
|
|
Range range = doc.getRange();
|
|
Table table = range.insertBefore(
|
|
new TableProperties( (short) columns ), rows );
|
|
table.sanityCheck();
|
|
range.sanityCheck();
|
|
|
|
for ( int rowIdx = 0; rowIdx < table.numRows(); rowIdx++ )
|
|
{
|
|
TableRow row = table.getRow( rowIdx );
|
|
row.sanityCheck();
|
|
for ( int colIdx = 0; colIdx < row.numCells(); colIdx++ )
|
|
{
|
|
TableCell cell = row.getCell( colIdx );
|
|
cell.sanityCheck();
|
|
|
|
Paragraph par = cell.getParagraph( 0 );
|
|
par.sanityCheck();
|
|
|
|
par.insertBefore( "" + ( rowIdx * row.numCells() + colIdx ) );
|
|
|
|
par.sanityCheck();
|
|
cell.sanityCheck();
|
|
row.sanityCheck();
|
|
table.sanityCheck();
|
|
range.sanityCheck();
|
|
}
|
|
}
|
|
|
|
String text = range.text();
|
|
int mustBeAfter = 0;
|
|
for ( int i = 0; i < rows * columns; i++ )
|
|
{
|
|
int next = text.indexOf( Integer.toString( i ), mustBeAfter );
|
|
assertFalse( next == -1 );
|
|
mustBeAfter = next;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [RESOLVED FIXED] Bug 47563 - Exception when working with table
|
|
*/
|
|
public void test47563()
|
|
{
|
|
insertTable( 1, 5 );
|
|
insertTable( 1, 6 );
|
|
insertTable( 5, 1 );
|
|
insertTable( 6, 1 );
|
|
insertTable( 2, 2 );
|
|
insertTable( 3, 2 );
|
|
insertTable( 2, 3 );
|
|
insertTable( 3, 3 );
|
|
}
|
|
|
|
/**
|
|
* Bug 4774 - text extracted by WordExtractor is broken
|
|
*/
|
|
public void test47742() throws Exception {
|
|
|
|
// (1) extract text from MS Word document via POI
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47742.doc");
|
|
String foundText = new WordExtractor(doc).getText();
|
|
|
|
// (2) read text from text document (retrieved by saving the word
|
|
// document as text file using encoding UTF-8)
|
|
InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("Bug47742-text.txt");
|
|
byte[] expectedBytes = IOUtils.toByteArray(is);
|
|
String expectedText = new String(expectedBytes, "utf-8").substring(1); // strip-off the unicode marker
|
|
|
|
assertEquals(expectedText, foundText);
|
|
}
|
|
|
|
/**
|
|
* [FAILING] Bug 47958 - Exception during Escher walk of pictures
|
|
*/
|
|
public void test47958() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc");
|
|
try {
|
|
for (Picture pic : doc.getPicturesTable().getAllPictures()) {
|
|
System.out.println(pic.suggestFullFileName());
|
|
}
|
|
fixed("47958");
|
|
} catch (Exception e) {
|
|
// expected exception
|
|
}
|
|
}
|
|
|
|
/**
|
|
* [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
|
|
* formatting)
|
|
*/
|
|
public void test48065()
|
|
{
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile( "Bug48065.doc" );
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack( doc1 );
|
|
|
|
Range expected = doc1.getRange();
|
|
Range actual = doc2.getRange();
|
|
|
|
assertEquals(
|
|
expected.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ),
|
|
actual.text().replace( "\r", "\n" ).replaceAll( "\n\n", "\n" ) );
|
|
|
|
assertEquals( expected.numParagraphs(), actual.numParagraphs() );
|
|
for ( int p = 0; p < expected.numParagraphs(); p++ )
|
|
{
|
|
Paragraph expParagraph = expected.getParagraph( p );
|
|
Paragraph actParagraph = actual.getParagraph( p );
|
|
|
|
assertEquals( expParagraph.text(), actParagraph.text() );
|
|
assertEquals( expParagraph.isInTable(), actParagraph.isInTable() );
|
|
assertEquals( expParagraph.isTableRowEnd(),
|
|
actParagraph.isTableRowEnd() );
|
|
|
|
if ( expParagraph.isInTable() && actParagraph.isInTable() )
|
|
{
|
|
Table expTable, actTable;
|
|
try
|
|
{
|
|
expTable = expected.getTable( expParagraph );
|
|
actTable = actual.getTable( actParagraph );
|
|
}
|
|
catch ( Exception exc )
|
|
{
|
|
continue;
|
|
}
|
|
|
|
assertEquals( expTable.numRows(), actTable.numRows() );
|
|
assertEquals( expTable.numParagraphs(),
|
|
actTable.numParagraphs() );
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Bug 50936 - HWPF fails to read a file
|
|
*/
|
|
public void test50936() {
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug50936.doc");
|
|
}
|
|
|
|
/**
|
|
* [FAILING] Bug 50955 - error while retrieving the text file
|
|
*/
|
|
public void test50955() {
|
|
try {
|
|
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug50955.doc");
|
|
Word6Extractor extractor = new Word6Extractor(doc);
|
|
String text = extractor.getText();
|
|
fixed("50955");
|
|
} catch (Exception e) {
|
|
// expected exception
|
|
}
|
|
}
|
|
|
|
}
|