2011-08-09 00:46:36 -04:00
|
|
|
/* ====================================================================
|
|
|
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
|
|
|
contributor license agreements. See the NOTICE file distributed with
|
|
|
|
this work for additional information regarding copyright ownership.
|
|
|
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
|
|
|
(the "License"); you may not use this file except in compliance with
|
|
|
|
the License. You may obtain a copy of the License at
|
|
|
|
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
|
|
|
|
Unless required by applicable law or agreed to in writing, software
|
|
|
|
distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
See the License for the specific language governing permissions and
|
|
|
|
limitations under the License.
|
|
|
|
==================================================================== */
|
|
|
|
package org.apache.poi.hwpf.usermodel;
|
|
|
|
|
2011-08-17 10:53:28 -04:00
|
|
|
import java.io.ByteArrayOutputStream;
|
2011-08-11 12:37:31 -04:00
|
|
|
import java.io.FileNotFoundException;
|
2016-01-25 15:21:30 -05:00
|
|
|
import java.io.FileOutputStream;
|
2011-08-09 00:46:36 -04:00
|
|
|
import java.io.IOException;
|
|
|
|
import java.io.InputStream;
|
2016-01-25 15:21:30 -05:00
|
|
|
import java.io.OutputStream;
|
2011-08-09 00:46:36 -04:00
|
|
|
import java.util.Arrays;
|
2011-08-09 09:04:28 -04:00
|
|
|
import java.util.Collection;
|
2011-08-09 00:46:36 -04:00
|
|
|
import java.util.List;
|
|
|
|
|
|
|
|
import org.apache.commons.codec.digest.DigestUtils;
|
|
|
|
import org.apache.poi.POIDataSamples;
|
|
|
|
import org.apache.poi.hwpf.HWPFDocument;
|
|
|
|
import org.apache.poi.hwpf.HWPFOldDocument;
|
|
|
|
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
2015-01-03 04:34:07 -05:00
|
|
|
import org.apache.poi.hwpf.converter.AbstractWordUtils;
|
2014-10-09 13:58:47 -04:00
|
|
|
import org.apache.poi.hwpf.converter.WordToTextConverter;
|
2011-08-09 00:46:36 -04:00
|
|
|
import org.apache.poi.hwpf.extractor.Word6Extractor;
|
|
|
|
import org.apache.poi.hwpf.extractor.WordExtractor;
|
|
|
|
import org.apache.poi.hwpf.model.FieldsDocumentPart;
|
2011-08-11 12:37:31 -04:00
|
|
|
import org.apache.poi.hwpf.model.FileInformationBlock;
|
2016-01-25 15:21:30 -05:00
|
|
|
import org.apache.poi.hwpf.model.PicturesTable;
|
2011-08-09 00:46:36 -04:00
|
|
|
import org.apache.poi.hwpf.model.PlexOfField;
|
2011-08-11 12:37:31 -04:00
|
|
|
import org.apache.poi.hwpf.model.SubdocumentType;
|
|
|
|
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
2011-09-30 11:49:19 -04:00
|
|
|
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
|
2011-08-09 00:46:36 -04:00
|
|
|
import org.apache.poi.util.IOUtils;
|
2011-09-30 11:49:19 -04:00
|
|
|
import org.apache.poi.util.POILogFactory;
|
|
|
|
import org.apache.poi.util.POILogger;
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2016-01-25 15:21:30 -05:00
|
|
|
import junit.framework.TestCase;
|
|
|
|
|
2011-08-09 00:46:36 -04:00
|
|
|
/**
|
2016-06-19 18:13:41 -04:00
|
|
|
* Test different problems reported in the Apache Bugzilla
|
|
|
|
* against HWPF
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
|
|
|
public class TestBugs extends TestCase
|
|
|
|
{
|
2011-09-30 11:49:19 -04:00
|
|
|
private static final POILogger logger = POILogFactory
|
2015-01-03 04:34:07 -05:00
|
|
|
.getLogger(TestBugs.class);
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
public static void assertEqualsIgnoreNewline(String expected, String actual )
|
2011-08-09 08:38:52 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
String newExpected = expected.replaceAll("\r\n", "\n" )
|
|
|
|
.replaceAll("\r", "\n").trim();
|
|
|
|
String newActual = actual.replaceAll("\r\n", "\n" )
|
|
|
|
.replaceAll("\r", "\n").trim();
|
|
|
|
TestCase.assertEquals(newExpected, newActual);
|
2011-08-09 08:38:52 -04:00
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
private static void assertTableStructures(Range expected, Range actual )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEquals(expected.numParagraphs(), actual.numParagraphs());
|
|
|
|
for (int p = 0; p < expected.numParagraphs(); p++ )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Paragraph expParagraph = expected.getParagraph(p);
|
|
|
|
Paragraph actParagraph = actual.getParagraph(p);
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(expParagraph.text(), actParagraph.text());
|
|
|
|
assertEquals("Diffent isInTable flags for paragraphs #" + p
|
2011-08-09 00:46:36 -04:00
|
|
|
+ " -- " + expParagraph + " -- " + actParagraph + ".",
|
2015-01-03 04:34:07 -05:00
|
|
|
expParagraph.isInTable(), actParagraph.isInTable());
|
|
|
|
assertEquals(expParagraph.isTableRowEnd(),
|
|
|
|
actParagraph.isTableRowEnd());
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
if (expParagraph.isInTable() && actParagraph.isInTable() )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
|
|
|
Table expTable, actTable;
|
|
|
|
try
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
expTable = expected.getTable(expParagraph);
|
|
|
|
actTable = actual.getTable(actParagraph);
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
2015-01-03 04:34:07 -05:00
|
|
|
catch (Exception exc )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEquals(expTable.numRows(), actTable.numRows());
|
|
|
|
assertEquals(expTable.numParagraphs(),
|
|
|
|
actTable.numParagraphs());
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
private static void fixed(String bugzillaId )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
|
|
|
throw new Error(
|
|
|
|
"Bug "
|
|
|
|
+ bugzillaId
|
|
|
|
+ " seems to be fixed. "
|
2015-01-03 04:34:07 -05:00
|
|
|
+ "Please resolve the issue in Bugzilla and remove fail() from the test");
|
|
|
|
}
|
|
|
|
|
|
|
|
private String getText(String samplefile) throws IOException {
|
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile(samplefile);
|
|
|
|
WordExtractor extractor = new WordExtractor(doc);
|
|
|
|
try {
|
|
|
|
return extractor.getText();
|
|
|
|
} finally {
|
|
|
|
extractor.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private String getTextOldFile(String samplefile) throws IOException {
|
|
|
|
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile(samplefile);
|
|
|
|
Word6Extractor extractor = new Word6Extractor(doc);
|
|
|
|
try {
|
|
|
|
return extractor.getText();
|
|
|
|
} finally {
|
|
|
|
extractor.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 33519 - HWPF fails to read a file
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test33519() throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug33519.doc"));
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
2011-08-09 08:50:15 -04:00
|
|
|
/**
|
2011-08-09 09:04:28 -04:00
|
|
|
* Bug 34898 - WordExtractor doesn't read the whole string from the file
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-09 08:50:15 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test34898() throws IOException
|
2011-08-09 09:04:28 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline("\u30c7\u30a3\u30ec\u30af\u30c8\u30ea", getText("Bug34898.doc").trim());
|
2011-08-09 09:04:28 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* [RESOLVED INVALID] 41898 - Word 2003 pictures cannot be extracted
|
|
|
|
*/
|
|
|
|
public void test41898()
|
2011-08-09 08:50:15 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug41898.doc");
|
2011-08-09 08:50:15 -04:00
|
|
|
List<Picture> pics = doc.getPicturesTable().getAllPictures();
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(pics);
|
|
|
|
assertEquals(1, pics.size());
|
2011-08-09 08:50:15 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
Picture pic = pics.get(0);
|
|
|
|
assertNotNull(pic.suggestFileExtension());
|
|
|
|
assertNotNull(pic.suggestFullFileName());
|
2011-08-09 08:50:15 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(pic.getContent());
|
|
|
|
assertNotNull(pic.getRawContent());
|
2011-08-09 08:50:15 -04:00
|
|
|
|
2011-08-09 09:04:28 -04:00
|
|
|
/*
|
|
|
|
* This is a file with empty EMF image, but present Office Drawing
|
|
|
|
* --sergey
|
|
|
|
*/
|
|
|
|
final Collection<OfficeDrawing> officeDrawings = doc
|
|
|
|
.getOfficeDrawingsMain().getOfficeDrawings();
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(officeDrawings);
|
|
|
|
assertEquals(1, officeDrawings.size());
|
2011-08-09 09:04:28 -04:00
|
|
|
|
|
|
|
OfficeDrawing officeDrawing = officeDrawings.iterator().next();
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(officeDrawing);
|
|
|
|
assertEquals(1044, officeDrawing.getShapeId());
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 44331 - HWPFDocument.write destroys fields
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
2016-01-25 15:21:30 -05:00
|
|
|
@SuppressWarnings("deprecation")
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test44431() throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug44431.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
WordExtractor extractor1 = new WordExtractor(doc1);
|
|
|
|
try {
|
|
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
|
|
|
|
|
|
|
WordExtractor extractor2 = new WordExtractor(doc2);
|
|
|
|
try {
|
|
|
|
assertEqualsIgnoreNewline(extractor1.getFooterText(), extractor2.getFooterText());
|
|
|
|
assertEqualsIgnoreNewline(extractor1.getHeaderText(), extractor2.getHeaderText());
|
|
|
|
assertEqualsIgnoreNewline(Arrays.toString(extractor1.getParagraphText() ),
|
|
|
|
Arrays.toString(extractor2.getParagraphText()));
|
|
|
|
|
|
|
|
assertEqualsIgnoreNewline(extractor1.getText(), extractor2.getText());
|
|
|
|
} finally {
|
|
|
|
extractor2.close();
|
|
|
|
}
|
|
|
|
} finally {
|
|
|
|
extractor1.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
2011-09-07 08:12:17 -04:00
|
|
|
/**
|
|
|
|
* Bug 44331 - HWPFDocument.write destroys fields
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-09-07 08:12:17 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test44431_2() throws IOException
|
2011-09-07 08:12:17 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline("File name=FieldsTest.doc\n" +
|
2011-09-07 08:12:17 -04:00
|
|
|
"\n" +
|
|
|
|
"\n" +
|
|
|
|
"STYLEREF test\n" +
|
|
|
|
"\n" +
|
|
|
|
"\n" +
|
|
|
|
"\n" +
|
|
|
|
"TEST TABLE OF CONTENTS\n" +
|
|
|
|
"\n" +
|
|
|
|
"Heading paragraph in next page\t2\n" +
|
|
|
|
"Another heading paragraph in further page\t3\n" +
|
|
|
|
"Another heading paragraph in further page\t3\n" +
|
|
|
|
"\n" +
|
|
|
|
"\n" +
|
|
|
|
"Heading paragraph in next page\n" +
|
|
|
|
"Another heading paragraph in further page\n" +
|
|
|
|
"\n" +
|
|
|
|
"\n" +
|
|
|
|
"\n" +
|
2015-01-03 04:34:07 -05:00
|
|
|
"Page 3 of 3", getText("Bug44431.doc"));
|
2011-09-07 08:12:17 -04:00
|
|
|
}
|
|
|
|
|
2011-08-09 00:46:36 -04:00
|
|
|
/**
|
|
|
|
* Bug 45473 - HWPF cannot read file after save
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test45473() throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2016-06-19 18:13:41 -04:00
|
|
|
// Fetch the current text
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
|
|
|
|
WordExtractor wordExtractor = new WordExtractor(doc1);
|
|
|
|
final String text1;
|
|
|
|
try {
|
|
|
|
text1 = wordExtractor.getText().trim();
|
|
|
|
} finally {
|
|
|
|
wordExtractor.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2016-06-19 18:13:41 -04:00
|
|
|
// Re-load, then re-save and re-check
|
|
|
|
doc1 = HWPFTestDataSamples.openSampleFile("Bug45473.doc");
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
|
|
|
WordExtractor wordExtractor2 = new WordExtractor(doc2);
|
|
|
|
final String text2;
|
|
|
|
try {
|
|
|
|
text2 = wordExtractor2.getText().trim();
|
|
|
|
} finally {
|
|
|
|
wordExtractor2.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
|
|
|
|
// the text in the saved document has some differences in line
|
|
|
|
// separators but we tolerate that
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(text1.replaceAll("\n", "" ), text2.replaceAll("\n", ""));
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 46220 - images are not properly extracted
|
|
|
|
*/
|
|
|
|
public void test46220()
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug46220.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
// reference checksums as in Bugzilla
|
|
|
|
String[] md5 = { "851be142bce6d01848e730cb6903f39e",
|
|
|
|
"7fc6d8fb58b09ababd036d10a0e8c039",
|
|
|
|
"a7dc644c40bc2fbf17b2b62d07f99248",
|
|
|
|
"72d07b8db5fad7099d90bc4c304b4666" };
|
|
|
|
List<Picture> pics = doc.getPicturesTable().getAllPictures();
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEquals(4, pics.size());
|
|
|
|
for (int i = 0; i < pics.size(); i++ )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Picture pic = pics.get(i);
|
2011-08-09 00:46:36 -04:00
|
|
|
byte[] data = pic.getRawContent();
|
|
|
|
// use Apache Commons Codec utils to compute md5
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(md5[i], DigestUtils.md5Hex(data));
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* [RESOLVED FIXED] Bug 46817 - Regression: Text from some table cells
|
|
|
|
* missing
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test46817() throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
String text = getText("Bug46817.doc").trim();
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertTrue(text.contains("Nazwa wykonawcy"));
|
|
|
|
assertTrue(text.contains("kujawsko-pomorskie"));
|
|
|
|
assertTrue(text.contains("ekomel@ekomel.com.pl"));
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* [FAILING] Bug 47286 - Word documents saves in wrong format if source
|
|
|
|
* contains form elements
|
|
|
|
*
|
|
|
|
* @throws IOException
|
|
|
|
*/
|
2016-01-25 15:21:30 -05:00
|
|
|
@SuppressWarnings("deprecation")
|
2011-08-09 00:46:36 -04:00
|
|
|
public void test47286() throws IOException
|
|
|
|
{
|
2016-06-19 18:13:41 -04:00
|
|
|
// Fetch the current text
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug47286.doc");
|
|
|
|
WordExtractor wordExtractor = new WordExtractor(doc1);
|
|
|
|
final String text1;
|
|
|
|
try {
|
|
|
|
text1 = wordExtractor.getText().trim();
|
|
|
|
} finally {
|
|
|
|
wordExtractor.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2016-06-19 18:13:41 -04:00
|
|
|
// Re-load, then re-save and re-check
|
|
|
|
doc1 = HWPFTestDataSamples.openSampleFile("Bug47286.doc");
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
|
|
|
WordExtractor wordExtractor2 = new WordExtractor(doc2);
|
|
|
|
final String text2;
|
|
|
|
try {
|
|
|
|
text2 = wordExtractor2.getText().trim();
|
|
|
|
} finally {
|
|
|
|
wordExtractor2.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
|
|
|
|
// the text in the saved document has some differences in line
|
|
|
|
// separators but we tolerate that
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(text1.replaceAll("\n", "" ), text2.replaceAll("\n", ""));
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEquals(doc1.getCharacterTable().getTextRuns().size(), doc2
|
|
|
|
.getCharacterTable().getTextRuns().size());
|
2011-08-09 00:46:36 -04:00
|
|
|
|
|
|
|
List<PlexOfField> expectedFields = doc1.getFieldsTables()
|
2015-01-03 04:34:07 -05:00
|
|
|
.getFieldsPLCF(FieldsDocumentPart.MAIN);
|
2011-08-09 00:46:36 -04:00
|
|
|
List<PlexOfField> actualFields = doc2.getFieldsTables().getFieldsPLCF(
|
2015-01-03 04:34:07 -05:00
|
|
|
FieldsDocumentPart.MAIN);
|
|
|
|
assertEquals(expectedFields.size(), actualFields.size());
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertTableStructures(doc1.getRange(), doc2.getRange());
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* [RESOLVED FIXED] Bug 47287 - StringIndexOutOfBoundsException in
|
|
|
|
* CharacterRun.replaceText()
|
|
|
|
*/
|
|
|
|
public void test47287()
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47287.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
String[] values = { "1-1", "1-2", "1-3", "1-4", "1-5", "1-6", "1-7",
|
|
|
|
"1-8", "1-9", "1-10", "1-11", "1-12", "1-13", "1-14", "1-15", };
|
|
|
|
int usedVal = 0;
|
|
|
|
String PLACEHOLDER = "\u2002\u2002\u2002\u2002\u2002";
|
|
|
|
Range r = doc.getRange();
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int x = 0; x < r.numSections(); x++ )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Section s = r.getSection(x);
|
|
|
|
for (int y = 0; y < s.numParagraphs(); y++ )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Paragraph p = s.getParagraph(y);
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int z = 0; z < p.numCharacterRuns(); z++ )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
|
|
|
boolean isFound = false;
|
|
|
|
|
|
|
|
// character run
|
2015-01-03 04:34:07 -05:00
|
|
|
CharacterRun run = p.getCharacterRun(z);
|
2011-08-09 00:46:36 -04:00
|
|
|
// character run text
|
|
|
|
String text = run.text();
|
|
|
|
String oldText = text;
|
2015-01-03 04:34:07 -05:00
|
|
|
int c = text.indexOf("FORMTEXT ");
|
|
|
|
if (c < 0 )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
int k = text.indexOf(PLACEHOLDER);
|
|
|
|
if (k >= 0 )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
text = text.substring(0, k ) + values[usedVal]
|
|
|
|
+ text.substring(k + PLACEHOLDER.length());
|
2011-08-09 00:46:36 -04:00
|
|
|
usedVal++;
|
|
|
|
isFound = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
for (; c >= 0; c = text.indexOf("FORMTEXT ", c
|
2011-08-09 00:46:36 -04:00
|
|
|
+ "FORMTEXT ".length() ) )
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
int k = text.indexOf(PLACEHOLDER, c);
|
|
|
|
if (k >= 0 )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
text = text.substring(0, k )
|
2011-08-09 00:46:36 -04:00
|
|
|
+ values[usedVal]
|
2015-01-03 04:34:07 -05:00
|
|
|
+ text.substring(k
|
|
|
|
+ PLACEHOLDER.length());
|
2011-08-09 00:46:36 -04:00
|
|
|
usedVal++;
|
|
|
|
isFound = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-01-03 04:34:07 -05:00
|
|
|
if (isFound )
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
run.replaceText(oldText, text, 0);
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
String docText = r.text();
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertTrue(docText.contains("1-1"));
|
|
|
|
assertTrue(docText.contains("1-12"));
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertFalse(docText.contains("1-13"));
|
|
|
|
assertFalse(docText.contains("1-15"));
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-08-09 08:42:34 -04:00
|
|
|
* [RESOLVED FIXED] Bug 47731 - Word Extractor considers text copied from
|
|
|
|
* some website as an embedded object
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
|
|
|
public void test47731() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
String foundText = getText("Bug47731.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertTrue(foundText
|
|
|
|
.contains("Soak the rice in water for three to four hours"));
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 4774 - text extracted by WordExtractor is broken
|
|
|
|
*/
|
|
|
|
public void test47742() throws Exception
|
|
|
|
{
|
|
|
|
// (1) extract text from MS Word document via POI
|
2015-01-03 04:34:07 -05:00
|
|
|
String foundText = getText("Bug47742.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
|
|
|
|
// (2) read text from text document (retrieved by saving the word
|
|
|
|
// document as text file using encoding UTF-8)
|
|
|
|
InputStream is = POIDataSamples.getDocumentInstance()
|
2015-01-03 04:34:07 -05:00
|
|
|
.openResourceAsStream("Bug47742-text.txt");
|
2014-06-11 16:36:30 -04:00
|
|
|
try {
|
2015-01-03 04:34:07 -05:00
|
|
|
byte[] expectedBytes = IOUtils.toByteArray(is);
|
|
|
|
String expectedText = new String(expectedBytes, "utf-8" )
|
|
|
|
.substring(1); // strip-off the unicode marker
|
2014-06-11 16:36:30 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(expectedText, foundText);
|
2014-06-11 16:36:30 -04:00
|
|
|
} finally {
|
|
|
|
is.close();
|
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-10-29 20:33:44 -04:00
|
|
|
* Bug 47958 - Exception during Escher walk of pictures
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
|
|
|
public void test47958()
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug47958.doc");
|
2011-10-29 20:33:44 -04:00
|
|
|
doc.getPicturesTable().getAllPictures();
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* [RESOLVED FIXED] Bug 48065 - Problems with save output of HWPF (losing
|
|
|
|
* formatting)
|
|
|
|
*/
|
|
|
|
public void test48065()
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc1 = HWPFTestDataSamples.openSampleFile("Bug48065.doc");
|
|
|
|
HWPFDocument doc2 = HWPFTestDataSamples.writeOutAndReadBack(doc1);
|
2011-08-09 00:46:36 -04:00
|
|
|
|
|
|
|
Range expected = doc1.getRange();
|
|
|
|
Range actual = doc2.getRange();
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(
|
|
|
|
expected.text().replace("\r", "\n").replaceAll("\n\n", "\n" ),
|
|
|
|
actual.text().replace("\r", "\n").replaceAll("\n\n", "\n"));
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertTableStructures(expected, actual);
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test49933() throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
String text = getTextOldFile("Bug49933.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
|
|
|
|
assertTrue( text.contains( "best.wine.jump.ru" ) );
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2011-10-29 20:33:44 -04:00
|
|
|
* Bug 50936 - Exception parsing MS Word 8.0 file
|
|
|
|
*/
|
|
|
|
public void test50936_1()
|
|
|
|
{
|
|
|
|
HWPFDocument hwpfDocument = HWPFTestDataSamples
|
2015-01-03 04:34:07 -05:00
|
|
|
.openSampleFile("Bug50936_1.doc");
|
2011-10-29 20:33:44 -04:00
|
|
|
hwpfDocument.getPicturesTable().getAllPictures();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 50936 - Exception parsing MS Word 8.0 file
|
|
|
|
*/
|
|
|
|
public void test50936_2()
|
|
|
|
{
|
|
|
|
HWPFDocument hwpfDocument = HWPFTestDataSamples
|
2015-01-03 04:34:07 -05:00
|
|
|
.openSampleFile("Bug50936_2.doc");
|
2011-10-29 20:33:44 -04:00
|
|
|
hwpfDocument.getPicturesTable().getAllPictures();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 50936 - Exception parsing MS Word 8.0 file
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
2011-10-29 20:33:44 -04:00
|
|
|
public void test50936_3()
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2011-10-29 20:33:44 -04:00
|
|
|
HWPFDocument hwpfDocument = HWPFTestDataSamples
|
2015-01-03 04:34:07 -05:00
|
|
|
.openSampleFile("Bug50936_3.doc");
|
2011-10-29 20:33:44 -04:00
|
|
|
hwpfDocument.getPicturesTable().getAllPictures();
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* [FAILING] Bug 50955 - error while retrieving the text file
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test50955() throws IOException
|
2011-08-09 00:46:36 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
try {
|
|
|
|
getTextOldFile("Bug50955.doc");
|
2011-08-09 00:46:36 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
fixed("50955");
|
|
|
|
} catch (IllegalStateException e) {
|
|
|
|
// expected here
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-08-09 01:17:35 -04:00
|
|
|
/**
|
2015-01-03 04:34:07 -05:00
|
|
|
* [RESOLVED FIXED] Bug 51604 - replace text fails for doc (poi 3.8 beta
|
2011-08-11 09:55:35 -04:00
|
|
|
* release from download site )
|
2011-08-09 01:17:35 -04:00
|
|
|
*/
|
|
|
|
public void test51604()
|
|
|
|
{
|
|
|
|
HWPFDocument document = HWPFTestDataSamples
|
2015-01-03 04:34:07 -05:00
|
|
|
.openSampleFile("Bug51604.doc");
|
2011-08-09 01:17:35 -04:00
|
|
|
|
|
|
|
Range range = document.getRange();
|
|
|
|
int numParagraph = range.numParagraphs();
|
|
|
|
int counter = 0;
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int i = 0; i < numParagraph; i++ )
|
2011-08-09 01:17:35 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Paragraph paragraph = range.getParagraph(i);
|
2011-08-09 01:17:35 -04:00
|
|
|
int numCharRuns = paragraph.numCharacterRuns();
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int j = 0; j < numCharRuns; j++ )
|
2011-08-09 01:17:35 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
CharacterRun charRun = paragraph.getCharacterRun(j);
|
2011-08-09 01:17:35 -04:00
|
|
|
String text = charRun.text();
|
2015-01-03 04:34:07 -05:00
|
|
|
charRun.replaceText(text, "+" + (++counter));
|
2011-08-09 01:17:35 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
document = HWPFTestDataSamples.writeOutAndReadBack(document);
|
2011-08-09 01:17:35 -04:00
|
|
|
String text = document.getDocumentText();
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline("+1+2+3+4+5+6+7+8+9+10+11+12", text);
|
2011-08-09 01:17:35 -04:00
|
|
|
}
|
|
|
|
|
2011-08-11 09:55:35 -04:00
|
|
|
/**
|
2015-01-03 04:34:07 -05:00
|
|
|
* [RESOLVED FIXED] Bug 51604 - replace text fails for doc (poi 3.8 beta
|
2011-08-11 09:55:35 -04:00
|
|
|
* release from download site )
|
2011-08-11 12:37:31 -04:00
|
|
|
*
|
|
|
|
* @throws IOException
|
|
|
|
* @throws FileNotFoundException
|
2011-08-11 09:55:35 -04:00
|
|
|
*/
|
2011-08-11 14:49:35 -04:00
|
|
|
public void test51604p2() throws Exception
|
2011-08-11 09:55:35 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug51604.doc");
|
2011-08-11 09:55:35 -04:00
|
|
|
|
|
|
|
Range range = doc.getRange();
|
|
|
|
int numParagraph = range.numParagraphs();
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int i = 0; i < numParagraph; i++ )
|
2011-08-11 09:55:35 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Paragraph paragraph = range.getParagraph(i);
|
2011-08-11 09:55:35 -04:00
|
|
|
int numCharRuns = paragraph.numCharacterRuns();
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int j = 0; j < numCharRuns; j++ )
|
2011-08-11 09:55:35 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
CharacterRun charRun = paragraph.getCharacterRun(j);
|
2011-08-11 09:55:35 -04:00
|
|
|
String text = charRun.text();
|
2015-01-03 04:34:07 -05:00
|
|
|
if (text.contains("Header" ) )
|
|
|
|
charRun.replaceText(text, "added");
|
2011-08-11 09:55:35 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
|
2011-08-11 09:55:35 -04:00
|
|
|
final FileInformationBlock fileInformationBlock = doc
|
|
|
|
.getFileInformationBlock();
|
|
|
|
|
|
|
|
int totalLength = 0;
|
2015-01-03 04:34:07 -05:00
|
|
|
for (SubdocumentType type : SubdocumentType.values() )
|
2011-08-11 09:55:35 -04:00
|
|
|
{
|
|
|
|
final int partLength = fileInformationBlock
|
2015-01-03 04:34:07 -05:00
|
|
|
.getSubdocumentTextStreamLength(type);
|
|
|
|
assert (partLength >= 0);
|
2011-08-11 09:55:35 -04:00
|
|
|
|
|
|
|
totalLength += partLength;
|
|
|
|
}
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEquals(doc.getText().length(), totalLength);
|
2011-08-11 12:37:31 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2015-01-03 04:34:07 -05:00
|
|
|
* [RESOLVED FIXED] Bug 51604 - replace text fails for doc (poi 3.8 beta
|
2011-08-11 12:37:31 -04:00
|
|
|
* release from download site )
|
|
|
|
*/
|
2011-08-11 14:49:35 -04:00
|
|
|
public void test51604p3() throws Exception
|
2011-08-11 12:37:31 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug51604.doc");
|
2011-08-11 12:37:31 -04:00
|
|
|
|
|
|
|
byte[] originalData = new byte[doc.getFileInformationBlock()
|
|
|
|
.getLcbDop()];
|
2015-01-03 04:34:07 -05:00
|
|
|
System.arraycopy(doc.getTableStream(), doc.getFileInformationBlock()
|
|
|
|
.getFcDop(), originalData, 0, originalData.length);
|
2011-08-11 12:37:31 -04:00
|
|
|
|
|
|
|
HWPFOutputStream outputStream = new HWPFOutputStream();
|
2015-01-03 04:34:07 -05:00
|
|
|
doc.getDocProperties().writeTo(outputStream);
|
2011-08-11 12:37:31 -04:00
|
|
|
final byte[] oldData = outputStream.toByteArray();
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(Arrays.toString(originalData ),
|
|
|
|
Arrays.toString(oldData));
|
2011-08-11 12:37:31 -04:00
|
|
|
|
|
|
|
Range range = doc.getRange();
|
|
|
|
int numParagraph = range.numParagraphs();
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int i = 0; i < numParagraph; i++ )
|
2011-08-11 12:37:31 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
Paragraph paragraph = range.getParagraph(i);
|
2011-08-11 12:37:31 -04:00
|
|
|
int numCharRuns = paragraph.numCharacterRuns();
|
2015-01-03 04:34:07 -05:00
|
|
|
for (int j = 0; j < numCharRuns; j++ )
|
2011-08-11 12:37:31 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
CharacterRun charRun = paragraph.getCharacterRun(j);
|
2011-08-11 12:37:31 -04:00
|
|
|
String text = charRun.text();
|
2015-01-03 04:34:07 -05:00
|
|
|
if (text.contains("Header" ) )
|
|
|
|
charRun.replaceText(text, "added");
|
2011-08-11 12:37:31 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
|
2011-08-11 12:37:31 -04:00
|
|
|
|
|
|
|
outputStream = new HWPFOutputStream();
|
2015-01-03 04:34:07 -05:00
|
|
|
doc.getDocProperties().writeTo(outputStream);
|
2011-08-11 12:37:31 -04:00
|
|
|
final byte[] newData = outputStream.toByteArray();
|
2011-08-11 09:55:35 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline(Arrays.toString(oldData ), Arrays.toString(newData));
|
2011-08-11 09:55:35 -04:00
|
|
|
}
|
2011-08-17 10:53:28 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* [RESOLVED FIXED] Bug 51671 - HWPFDocument.write based on NPOIFSFileSystem
|
|
|
|
* throws a NullPointerException
|
|
|
|
*/
|
|
|
|
public void test51671() throws Exception
|
|
|
|
{
|
|
|
|
InputStream is = POIDataSamples.getDocumentInstance()
|
2015-01-03 04:34:07 -05:00
|
|
|
.openResourceAsStream("empty.doc");
|
|
|
|
NPOIFSFileSystem npoifsFileSystem = new NPOIFSFileSystem(is);
|
|
|
|
try {
|
|
|
|
HWPFDocument hwpfDocument = new HWPFDocument(
|
|
|
|
npoifsFileSystem.getRoot());
|
|
|
|
hwpfDocument.write(new ByteArrayOutputStream());
|
|
|
|
} finally {
|
|
|
|
npoifsFileSystem.close();
|
|
|
|
}
|
2011-08-17 10:53:28 -04:00
|
|
|
}
|
2011-08-18 10:30:15 -04:00
|
|
|
|
|
|
|
/**
|
2011-09-07 08:12:17 -04:00
|
|
|
* Bug 51678 - Extracting text from Bug51524.zip is slow Bug 51524 -
|
|
|
|
* PapBinTable constructor is slow
|
2015-01-03 04:34:07 -05:00
|
|
|
* @throws IOException
|
2011-08-18 10:30:15 -04:00
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
public void test51678And51524() throws IOException
|
2011-08-18 10:30:15 -04:00
|
|
|
{
|
2011-09-07 08:12:17 -04:00
|
|
|
// YK: the test will run only if the poi.test.remote system property is
|
|
|
|
// set.
|
2011-08-21 11:01:23 -04:00
|
|
|
// TODO: refactor into something nicer!
|
2015-01-03 04:34:07 -05:00
|
|
|
if (System.getProperty("poi.test.remote" ) != null )
|
2011-09-07 08:12:17 -04:00
|
|
|
{
|
2011-08-21 11:01:23 -04:00
|
|
|
String href = "http://domex.nps.edu/corp/files/govdocs1/007/007488.doc";
|
2011-09-07 08:12:17 -04:00
|
|
|
HWPFDocument hwpfDocument = HWPFTestDataSamples
|
2015-01-03 04:34:07 -05:00
|
|
|
.openRemoteFile(href);
|
2011-08-21 11:01:23 -04:00
|
|
|
|
2015-01-03 04:34:07 -05:00
|
|
|
WordExtractor wordExtractor = new WordExtractor(hwpfDocument);
|
|
|
|
try {
|
|
|
|
wordExtractor.getText();
|
|
|
|
} finally {
|
|
|
|
wordExtractor.close();
|
|
|
|
}
|
2011-08-21 11:01:23 -04:00
|
|
|
}
|
2011-09-30 11:49:19 -04:00
|
|
|
}
|
2011-08-21 11:01:23 -04:00
|
|
|
|
2011-09-30 11:49:19 -04:00
|
|
|
/**
|
|
|
|
* [FIXED] Bug 51902 - Picture.fillRawImageContent -
|
|
|
|
* ArrayIndexOutOfBoundsException
|
|
|
|
*/
|
|
|
|
public void testBug51890()
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Bug51890.doc");
|
|
|
|
for (Picture picture : doc.getPicturesTable().getAllPictures() )
|
2011-09-30 11:49:19 -04:00
|
|
|
{
|
|
|
|
PictureType pictureType = picture.suggestPictureType();
|
2015-01-03 04:34:07 -05:00
|
|
|
logger.log(POILogger.DEBUG,
|
2011-09-30 11:49:19 -04:00
|
|
|
"Picture at offset " + picture.getStartOffset()
|
2015-01-03 04:34:07 -05:00
|
|
|
+ " has type " + pictureType);
|
2011-09-30 11:49:19 -04:00
|
|
|
}
|
2011-08-18 10:30:15 -04:00
|
|
|
}
|
2011-10-01 21:06:22 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* [RESOLVED FIXED] Bug 51834 - Opening and Writing .doc file results in
|
|
|
|
* corrupt document
|
|
|
|
*/
|
|
|
|
public void testBug51834() throws Exception
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* we don't have Java test for this file - it should be checked using
|
|
|
|
* Microsoft BFF Validator. But check read-write-read anyway. -- sergey
|
|
|
|
*/
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFTestDataSamples.openSampleFile("Bug51834.doc");
|
|
|
|
HWPFTestDataSamples.writeOutAndReadBack(HWPFTestDataSamples
|
|
|
|
.openSampleFile("Bug51834.doc"));
|
2011-10-01 21:06:22 -04:00
|
|
|
}
|
2011-10-29 19:01:54 -04:00
|
|
|
|
2011-10-29 20:04:38 -04:00
|
|
|
/**
|
|
|
|
* Bug 51944 - PAPFormattedDiskPage.getPAPX - IndexOutOfBounds
|
|
|
|
*/
|
|
|
|
public void testBug51944() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
HWPFOldDocument doc = HWPFTestDataSamples.openOldSampleFile("Bug51944.doc");
|
|
|
|
assertNotNull(WordToTextConverter.getText(doc));
|
2011-10-29 20:04:38 -04:00
|
|
|
}
|
|
|
|
|
2011-10-29 18:34:09 -04:00
|
|
|
/**
|
|
|
|
* Bug 52032 - [BUG] & [partial-PATCH] HWPF - ArrayIndexOutofBoundsException
|
|
|
|
* with no stack trace (broken after revision 1178063)
|
|
|
|
*/
|
2011-10-29 19:01:54 -04:00
|
|
|
public void testBug52032_1() throws Exception
|
2011-10-29 18:34:09 -04:00
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug52032_1.doc"));
|
2011-10-29 19:01:54 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 52032 - [BUG] & [partial-PATCH] HWPF - ArrayIndexOutofBoundsException
|
|
|
|
* with no stack trace (broken after revision 1178063)
|
|
|
|
*/
|
|
|
|
public void testBug52032_2() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug52032_2.doc"));
|
2011-10-29 19:01:54 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 52032 - [BUG] & [partial-PATCH] HWPF - ArrayIndexOutofBoundsException
|
|
|
|
* with no stack trace (broken after revision 1178063)
|
|
|
|
*/
|
|
|
|
public void testBug52032_3() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug52032_3.doc"));
|
2011-10-29 18:34:09 -04:00
|
|
|
}
|
2012-09-11 15:49:44 -04:00
|
|
|
|
|
|
|
/**
|
2012-09-25 17:42:09 -04:00
|
|
|
* Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
|
2012-09-11 15:49:44 -04:00
|
|
|
*/
|
|
|
|
public void testBug53380_1() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug53380_1.doc"));
|
2012-09-11 15:49:44 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2012-09-25 17:42:09 -04:00
|
|
|
* Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
|
2012-09-11 15:49:44 -04:00
|
|
|
*/
|
|
|
|
public void testBug53380_2() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug53380_2.doc"));
|
2012-09-11 15:49:44 -04:00
|
|
|
}
|
2012-09-21 03:16:03 -04:00
|
|
|
|
|
|
|
/**
|
2012-09-25 17:42:09 -04:00
|
|
|
* Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
|
2012-09-21 03:16:03 -04:00
|
|
|
*/
|
|
|
|
public void testBug53380_3() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug53380_3.doc"));
|
2012-09-21 03:16:03 -04:00
|
|
|
}
|
2012-09-25 17:42:09 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Bug 53380 - ArrayIndexOutOfBounds Exception parsing word 97 document
|
|
|
|
*/
|
|
|
|
public void testBug53380_4() throws Exception
|
|
|
|
{
|
2015-01-03 04:34:07 -05:00
|
|
|
assertNotNull(getText("Bug53380_4.doc"));
|
2012-09-25 17:42:09 -04:00
|
|
|
}
|
2014-10-09 13:58:47 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
* java.lang.UnsupportedOperationException: Non-extended character
|
|
|
|
* Pascal strings are not supported right now
|
|
|
|
*
|
|
|
|
* Disabled pending a fix for the bug
|
|
|
|
*/
|
|
|
|
public void DISABLEDtest56880() throws Exception {
|
|
|
|
HWPFDocument doc =
|
|
|
|
HWPFTestDataSamples.openSampleFile("56880.doc");
|
2015-01-03 04:34:07 -05:00
|
|
|
assertEqualsIgnoreNewline("Check Request", doc.getRange().text());
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
// These are the values the are explected to be read when the file
|
|
|
|
// is checked.
|
|
|
|
private int section1LeftMargin = 1440;
|
|
|
|
private int section1RightMargin = 1440;
|
|
|
|
private int section1TopMargin = 1440;
|
|
|
|
private int section1BottomMargin = 1440;
|
|
|
|
private int section1NumColumns = 1;
|
|
|
|
private int section2LeftMargin = 1440;
|
|
|
|
private int section2RightMargin = 1440;
|
|
|
|
private int section2TopMargin = 1440;
|
|
|
|
private int section2BottomMargin = 1440;
|
|
|
|
private int section2NumColumns = 3;
|
|
|
|
|
|
|
|
public void testHWPFSections() {
|
|
|
|
HWPFDocument document = null;
|
|
|
|
Paragraph para = null;
|
|
|
|
Section section = null;
|
|
|
|
Range overallRange = null;
|
|
|
|
int numParas = 0;
|
|
|
|
int numSections = 0;
|
|
|
|
document = HWPFTestDataSamples.openSampleFile("Bug53453Section.doc");
|
|
|
|
overallRange = document.getOverallRange();
|
|
|
|
numParas = overallRange.numParagraphs();
|
|
|
|
for(int i = 0; i < numParas; i++) {
|
|
|
|
para = overallRange.getParagraph(i);
|
|
|
|
numSections = para.numSections();
|
|
|
|
for(int j = 0; j < numSections; j++) {
|
|
|
|
section = para.getSection(j);
|
|
|
|
if(para.text().trim().equals("Section1")) {
|
|
|
|
assertEquals(section1BottomMargin, section.getMarginBottom());
|
|
|
|
assertEquals(section1LeftMargin, section.getMarginLeft());
|
|
|
|
assertEquals(section1RightMargin, section.getMarginRight());
|
|
|
|
assertEquals(section1TopMargin, section.getMarginTop());
|
|
|
|
assertEquals(section1NumColumns, section.getNumColumns());
|
|
|
|
}
|
|
|
|
else if(para.text().trim().equals("Section2")) {
|
|
|
|
assertEquals(section2BottomMargin, section.getMarginBottom());
|
|
|
|
assertEquals(section2LeftMargin, section.getMarginLeft());
|
|
|
|
assertEquals(section2RightMargin, section.getMarginRight());
|
|
|
|
assertEquals(section2TopMargin, section.getMarginTop());
|
|
|
|
assertEquals(section2NumColumns, section.getNumColumns());
|
|
|
|
|
|
|
|
// Change the margin widths
|
|
|
|
this.section2BottomMargin = (int)(1.5 * AbstractWordUtils.TWIPS_PER_INCH);
|
|
|
|
this.section2TopMargin = (int)(1.75 * AbstractWordUtils.TWIPS_PER_INCH);
|
|
|
|
this.section2LeftMargin = (int)(0.5 * AbstractWordUtils.TWIPS_PER_INCH);
|
|
|
|
this.section2RightMargin = (int)(0.75 * AbstractWordUtils.TWIPS_PER_INCH);
|
|
|
|
section.setMarginBottom(this.section2BottomMargin);
|
|
|
|
section.setMarginLeft(this.section2LeftMargin);
|
|
|
|
section.setMarginRight(this.section2RightMargin);
|
|
|
|
section.setMarginTop(this.section2TopMargin);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Save away and re-read the document to prove the chages are permanent
|
|
|
|
document = HWPFTestDataSamples.writeOutAndReadBack(document);
|
|
|
|
overallRange = document.getOverallRange();
|
|
|
|
numParas = overallRange.numParagraphs();
|
|
|
|
for(int i = 0; i < numParas; i++) {
|
|
|
|
para = overallRange.getParagraph(i);
|
|
|
|
numSections = para.numSections();
|
|
|
|
for(int j = 0; j < numSections; j++) {
|
|
|
|
section = para.getSection(j);
|
|
|
|
if(para.text().trim().equals("Section1")) {
|
|
|
|
// No changes to the margins in Section1
|
|
|
|
assertEquals(section1BottomMargin, section.getMarginBottom());
|
|
|
|
assertEquals(section1LeftMargin, section.getMarginLeft());
|
|
|
|
assertEquals(section1RightMargin, section.getMarginRight());
|
|
|
|
assertEquals(section1TopMargin, section.getMarginTop());
|
|
|
|
assertEquals(section1NumColumns, section.getNumColumns());
|
|
|
|
}
|
|
|
|
else if(para.text().trim().equals("Section2")) {
|
|
|
|
// The margins in Section2 have kept the new settings.
|
|
|
|
assertEquals(section2BottomMargin, section.getMarginBottom());
|
|
|
|
assertEquals(section2LeftMargin, section.getMarginLeft());
|
|
|
|
assertEquals(section2RightMargin, section.getMarginRight());
|
|
|
|
assertEquals(section2TopMargin, section.getMarginTop());
|
|
|
|
assertEquals(section2NumColumns, section.getNumColumns());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-10-09 13:58:47 -04:00
|
|
|
}
|
2011-08-09 00:46:36 -04:00
|
|
|
}
|