Work inspired by bug #48018 - get HWPF lists more consistent in read vs write, and preserve order as apparently that matters. Includes a fair number of list related unit tests, but not for everything
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@998943 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ca0fc0ec1e
commit
cc419a3a85
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.7-beta3" date="2010-??-??">
|
||||
<action dev="poi-developers" type="fix">48018 - Improve HWPF handling of lists in documents read and then saved, by preserving order better</action>
|
||||
<action dev="poi-developers" type="fix">49820 - Fix HWPF paragraph levels, so that outline levels can be properly fetched</action>
|
||||
<action dev="poi-developers" type="fix">47271 - Avoid infinite loops on broken HWPF documents with a corrupt CHP style with a parent of itself</action>
|
||||
<action dev="poi-developers" type="fix">49936 - Handle HWPF documents with problematic HeaderStories better</action>
|
||||
|
@ -216,10 +216,10 @@ public final class ListLevel
|
||||
LittleEndian.putShort(buf, offset, _reserved);
|
||||
offset += LittleEndian.SHORT_SIZE;
|
||||
|
||||
System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx);
|
||||
offset += _cbGrpprlChpx;
|
||||
System.arraycopy(_grpprlPapx, 0, buf, offset, _cbGrpprlPapx);
|
||||
offset += _cbGrpprlPapx;
|
||||
System.arraycopy(_grpprlChpx, 0, buf, offset, _cbGrpprlChpx);
|
||||
offset += _cbGrpprlChpx;
|
||||
|
||||
if (_numberText == null) {
|
||||
// TODO - write junit to test this flow
|
||||
|
@ -17,20 +17,23 @@
|
||||
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.poi.hwpf.model.io.HWPFOutputStream;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.POILogFactory;
|
||||
import org.apache.poi.util.POILogger;
|
||||
|
||||
import org.apache.poi.hwpf.model.io.*;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @author Ryan Ackley
|
||||
*/
|
||||
@ -40,8 +43,8 @@ public final class ListTables
|
||||
private static final int LIST_FORMAT_OVERRIDE_SIZE = 16;
|
||||
private static POILogger log = POILogFactory.getLogger(ListTables.class);
|
||||
|
||||
HashMap _listMap = new HashMap();
|
||||
ArrayList _overrideList = new ArrayList();
|
||||
ListMap _listMap = new ListMap();
|
||||
ArrayList<ListFormatOverride> _overrideList = new ArrayList<ListFormatOverride>();
|
||||
|
||||
public ListTables()
|
||||
{
|
||||
@ -109,22 +112,17 @@ public final class ListTables
|
||||
public void writeListDataTo(HWPFOutputStream tableStream)
|
||||
throws IOException
|
||||
{
|
||||
|
||||
Integer[] intList = (Integer[])_listMap.keySet().toArray(new Integer[0]);
|
||||
int listSize = _listMap.size();
|
||||
|
||||
// use this stream as a buffer for the levels since their size varies.
|
||||
ByteArrayOutputStream levelBuf = new ByteArrayOutputStream();
|
||||
|
||||
// use a byte array for the lists because we know their size.
|
||||
byte[] listBuf = new byte[intList.length * LIST_DATA_SIZE];
|
||||
|
||||
byte[] shortHolder = new byte[2];
|
||||
LittleEndian.putShort(shortHolder, (short)intList.length);
|
||||
LittleEndian.putShort(shortHolder, (short)listSize);
|
||||
tableStream.write(shortHolder);
|
||||
|
||||
for (int x = 0; x < intList.length; x++)
|
||||
{
|
||||
ListData lst = (ListData)_listMap.get(intList[x]);
|
||||
for(Integer x : _listMap.sortedKeys()) {
|
||||
ListData lst = _listMap.get(x);
|
||||
tableStream.write(lst.toByteArray());
|
||||
ListLevel[] lvls = lst.getLevels();
|
||||
for (int y = 0; y < lvls.length; y++)
|
||||
@ -150,7 +148,7 @@ public final class ListTables
|
||||
|
||||
for (int x = 0; x < size; x++)
|
||||
{
|
||||
ListFormatOverride lfo = (ListFormatOverride)_overrideList.get(x);
|
||||
ListFormatOverride lfo = _overrideList.get(x);
|
||||
tableStream.write(lfo.toByteArray());
|
||||
ListFormatOverrideLevel[] lfolvls = lfo.getLevelOverrides();
|
||||
for (int y = 0; y < lfolvls.length; y++)
|
||||
@ -164,7 +162,7 @@ public final class ListTables
|
||||
|
||||
public ListFormatOverride getOverride(int lfoIndex)
|
||||
{
|
||||
return (ListFormatOverride)_overrideList.get(lfoIndex - 1);
|
||||
return _overrideList.get(lfoIndex - 1);
|
||||
}
|
||||
|
||||
public int getOverrideIndexFromListID(int lstid)
|
||||
@ -173,7 +171,7 @@ public final class ListTables
|
||||
int size = _overrideList.size();
|
||||
for (int x = 0; x < size; x++)
|
||||
{
|
||||
ListFormatOverride next = (ListFormatOverride)_overrideList.get(x);
|
||||
ListFormatOverride next = _overrideList.get(x);
|
||||
if (next.getLsid() == lstid)
|
||||
{
|
||||
// 1-based index I think
|
||||
@ -190,7 +188,7 @@ public final class ListTables
|
||||
|
||||
public ListLevel getLevel(int listID, int level)
|
||||
{
|
||||
ListData lst = (ListData)_listMap.get(Integer.valueOf(listID));
|
||||
ListData lst = _listMap.get(Integer.valueOf(listID));
|
||||
if(level < lst.numLevels()) {
|
||||
ListLevel lvl = lst.getLevels()[level];
|
||||
return lvl;
|
||||
@ -201,7 +199,7 @@ public final class ListTables
|
||||
|
||||
public ListData getListData(int listID)
|
||||
{
|
||||
return (ListData) _listMap.get(Integer.valueOf(listID));
|
||||
return _listMap.get(Integer.valueOf(listID));
|
||||
}
|
||||
|
||||
public boolean equals(Object obj)
|
||||
@ -215,12 +213,12 @@ public final class ListTables
|
||||
|
||||
if (_listMap.size() == tables._listMap.size())
|
||||
{
|
||||
Iterator it = _listMap.keySet().iterator();
|
||||
Iterator<Integer> it = _listMap.keySet().iterator();
|
||||
while (it.hasNext())
|
||||
{
|
||||
Object key = it.next();
|
||||
ListData lst1 = (ListData)_listMap.get(key);
|
||||
ListData lst2 = (ListData)tables._listMap.get(key);
|
||||
Integer key = it.next();
|
||||
ListData lst1 = _listMap.get(key);
|
||||
ListData lst2 = tables._listMap.get(key);
|
||||
if (!lst1.equals(lst2))
|
||||
{
|
||||
return false;
|
||||
@ -241,4 +239,70 @@ public final class ListTables
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private static class ListMap implements Map<Integer, ListData> {
|
||||
private ArrayList<Integer> keyList = new ArrayList<Integer>();
|
||||
private HashMap<Integer,ListData> parent = new HashMap<Integer,ListData>();
|
||||
private ListMap() {}
|
||||
|
||||
public void clear() {
|
||||
keyList.clear();
|
||||
parent.clear();
|
||||
}
|
||||
|
||||
public boolean containsKey(Object key) {
|
||||
return parent.containsKey(key);
|
||||
}
|
||||
|
||||
public boolean containsValue(Object value) {
|
||||
return parent.containsValue(value);
|
||||
}
|
||||
|
||||
public ListData get(Object key) {
|
||||
return parent.get(key);
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return parent.isEmpty();
|
||||
}
|
||||
|
||||
public ListData put(Integer key, ListData value) {
|
||||
keyList.add(key);
|
||||
return parent.put(key, value);
|
||||
}
|
||||
|
||||
public void putAll(Map<? extends Integer, ? extends ListData> map) {
|
||||
for(Entry<? extends Integer, ? extends ListData> entry : map.entrySet()) {
|
||||
put(entry.getKey(), entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
public ListData remove(Object key) {
|
||||
keyList.remove(key);
|
||||
return parent.remove(key);
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return parent.size();
|
||||
}
|
||||
|
||||
public Set<Entry<Integer, ListData>> entrySet() {
|
||||
throw new IllegalStateException("Use sortedKeys() + get() instead");
|
||||
}
|
||||
|
||||
public List<Integer> sortedKeys() {
|
||||
return Collections.unmodifiableList(keyList);
|
||||
}
|
||||
public Set<Integer> keySet() {
|
||||
throw new IllegalStateException("Use sortedKeys() instead");
|
||||
}
|
||||
|
||||
public Collection<ListData> values() {
|
||||
ArrayList<ListData> values = new ArrayList<ListData>();
|
||||
for(Integer key : keyList) {
|
||||
values.add(parent.get(key));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -434,16 +434,30 @@ public class Paragraph extends Range implements Cloneable {
|
||||
_papx.updateSprm(SPRM_DCS, dcs.toShort());
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ilfo, an index to the document's hpllfo, which
|
||||
* describes the automatic number formatting of the paragraph.
|
||||
* A value of zero means it isn't numbered.
|
||||
*/
|
||||
public int getIlfo()
|
||||
{
|
||||
return _props.getIlfo();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the multi-level indent for the paragraph. Will be
|
||||
* zero for non-list paragraphs, and the first level of any
|
||||
* list. Subsequent levels in hold values 1-8.
|
||||
*/
|
||||
public int getIlvl()
|
||||
{
|
||||
return _props.getIlvl();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the heading level (1-8), or 9 if the paragraph
|
||||
* isn't in a heading style.
|
||||
*/
|
||||
public int getLvl()
|
||||
{
|
||||
return _props.getLvl();
|
||||
|
@ -0,0 +1,210 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.hwpf.usermodel;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.poi.hwpf.HWPFDocument;
|
||||
import org.apache.poi.hwpf.HWPFTestDataSamples;
|
||||
|
||||
/**
|
||||
* Tests for our handling of lists
|
||||
*/
|
||||
public final class TestLists extends TestCase {
|
||||
public void testBasics() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
|
||||
Range r = doc.getRange();
|
||||
|
||||
assertEquals(40, r.numParagraphs());
|
||||
assertEquals("Heading Level 1\r", r.getParagraph(0).text());
|
||||
assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text());
|
||||
assertEquals("The end!\r", r.getParagraph(38).text());
|
||||
assertEquals("\r", r.getParagraph(39).text());
|
||||
|
||||
assertEquals(0, r.getParagraph(0).getLvl());
|
||||
assertEquals(9, r.getParagraph(1).getLvl());
|
||||
assertEquals(9, r.getParagraph(38).getLvl());
|
||||
assertEquals(9, r.getParagraph(39).getLvl());
|
||||
}
|
||||
|
||||
public void testUnorderedLists() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
|
||||
Range r = doc.getRange();
|
||||
assertEquals(40, r.numParagraphs());
|
||||
|
||||
// Normal bullet points
|
||||
assertEquals("This document has different lists in it for testing\r", r.getParagraph(1).text());
|
||||
assertEquals("Unordered list 1\r", r.getParagraph(2).text());
|
||||
assertEquals("UL 2\r", r.getParagraph(3).text());
|
||||
assertEquals("UL 3\r", r.getParagraph(4).text());
|
||||
assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text());
|
||||
|
||||
assertEquals(9, r.getParagraph(1).getLvl());
|
||||
assertEquals(9, r.getParagraph(2).getLvl());
|
||||
assertEquals(9, r.getParagraph(3).getLvl());
|
||||
assertEquals(9, r.getParagraph(4).getLvl());
|
||||
assertEquals(9, r.getParagraph(5).getLvl());
|
||||
|
||||
assertEquals(0, r.getParagraph(1).getIlvl());
|
||||
assertEquals(0, r.getParagraph(2).getIlvl());
|
||||
assertEquals(0, r.getParagraph(3).getIlvl());
|
||||
assertEquals(0, r.getParagraph(4).getIlvl());
|
||||
assertEquals(0, r.getParagraph(5).getIlvl());
|
||||
|
||||
// Tick bullets
|
||||
assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text());
|
||||
assertEquals("Tick 1\r", r.getParagraph(10).text());
|
||||
assertEquals("Tick 2\r", r.getParagraph(11).text());
|
||||
assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text());
|
||||
|
||||
assertEquals(9, r.getParagraph(9).getLvl());
|
||||
assertEquals(9, r.getParagraph(10).getLvl());
|
||||
assertEquals(9, r.getParagraph(11).getLvl());
|
||||
assertEquals(9, r.getParagraph(12).getLvl());
|
||||
|
||||
assertEquals(0, r.getParagraph(9).getIlvl());
|
||||
assertEquals(0, r.getParagraph(10).getIlvl());
|
||||
assertEquals(0, r.getParagraph(11).getIlvl());
|
||||
assertEquals(0, r.getParagraph(12).getIlvl());
|
||||
|
||||
// TODO Test for tick not bullet
|
||||
}
|
||||
|
||||
public void testOrderedLists() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
|
||||
Range r = doc.getRange();
|
||||
assertEquals(40, r.numParagraphs());
|
||||
|
||||
assertEquals("Next up is an ordered list:\r", r.getParagraph(5).text());
|
||||
assertEquals("Ordered list 1\r", r.getParagraph(6).text());
|
||||
assertEquals("OL 2\r", r.getParagraph(7).text());
|
||||
assertEquals("OL 3\r", r.getParagraph(8).text());
|
||||
assertEquals("Now for an un-ordered list with a different bullet style:\r", r.getParagraph(9).text());
|
||||
|
||||
assertEquals(9, r.getParagraph(5).getLvl());
|
||||
assertEquals(9, r.getParagraph(6).getLvl());
|
||||
assertEquals(9, r.getParagraph(7).getLvl());
|
||||
assertEquals(9, r.getParagraph(8).getLvl());
|
||||
assertEquals(9, r.getParagraph(9).getLvl());
|
||||
|
||||
assertEquals(0, r.getParagraph(5).getIlvl());
|
||||
assertEquals(0, r.getParagraph(6).getIlvl());
|
||||
assertEquals(0, r.getParagraph(7).getIlvl());
|
||||
assertEquals(0, r.getParagraph(8).getIlvl());
|
||||
assertEquals(0, r.getParagraph(9).getIlvl());
|
||||
}
|
||||
|
||||
public void testMultiLevelLists() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
|
||||
Range r = doc.getRange();
|
||||
assertEquals(40, r.numParagraphs());
|
||||
|
||||
assertEquals("Multi-level un-ordered list:\r", r.getParagraph(12).text());
|
||||
assertEquals("ML 1:1\r", r.getParagraph(13).text());
|
||||
assertEquals("ML 1:2\r", r.getParagraph(14).text());
|
||||
assertEquals("ML 2:1\r", r.getParagraph(15).text());
|
||||
assertEquals("ML 2:2\r", r.getParagraph(16).text());
|
||||
assertEquals("ML 2:3\r", r.getParagraph(17).text());
|
||||
assertEquals("ML 3:1\r", r.getParagraph(18).text());
|
||||
assertEquals("ML 4:1\r", r.getParagraph(19).text());
|
||||
assertEquals("ML 5:1\r", r.getParagraph(20).text());
|
||||
assertEquals("ML 5:2\r", r.getParagraph(21).text());
|
||||
assertEquals("ML 2:4\r", r.getParagraph(22).text());
|
||||
assertEquals("ML 1:3\r", r.getParagraph(23).text());
|
||||
assertEquals("Multi-level ordered list:\r", r.getParagraph(24).text());
|
||||
assertEquals("OL 1\r", r.getParagraph(25).text());
|
||||
assertEquals("OL 2\r", r.getParagraph(26).text());
|
||||
assertEquals("OL 2.1\r", r.getParagraph(27).text());
|
||||
assertEquals("OL 2.2\r", r.getParagraph(28).text());
|
||||
assertEquals("OL 2.2.1\r", r.getParagraph(29).text());
|
||||
assertEquals("OL 2.2.2\r", r.getParagraph(30).text());
|
||||
assertEquals("OL 2.2.2.1\r", r.getParagraph(31).text());
|
||||
assertEquals("OL 2.2.3\r", r.getParagraph(32).text());
|
||||
assertEquals("OL 3\r", r.getParagraph(33).text());
|
||||
assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text());
|
||||
|
||||
for(int i=12; i<=34; i++) {
|
||||
assertEquals(9, r.getParagraph(12).getLvl());
|
||||
}
|
||||
assertEquals(0, r.getParagraph(12).getIlvl());
|
||||
assertEquals(0, r.getParagraph(13).getIlvl());
|
||||
assertEquals(0, r.getParagraph(14).getIlvl());
|
||||
assertEquals(1, r.getParagraph(15).getIlvl());
|
||||
assertEquals(1, r.getParagraph(16).getIlvl());
|
||||
assertEquals(1, r.getParagraph(17).getIlvl());
|
||||
assertEquals(2, r.getParagraph(18).getIlvl());
|
||||
assertEquals(3, r.getParagraph(19).getIlvl());
|
||||
assertEquals(4, r.getParagraph(20).getIlvl());
|
||||
assertEquals(4, r.getParagraph(21).getIlvl());
|
||||
assertEquals(1, r.getParagraph(22).getIlvl());
|
||||
assertEquals(0, r.getParagraph(23).getIlvl());
|
||||
assertEquals(0, r.getParagraph(24).getIlvl());
|
||||
assertEquals(0, r.getParagraph(25).getIlvl());
|
||||
assertEquals(0, r.getParagraph(26).getIlvl());
|
||||
assertEquals(1, r.getParagraph(27).getIlvl());
|
||||
assertEquals(1, r.getParagraph(28).getIlvl());
|
||||
assertEquals(2, r.getParagraph(29).getIlvl());
|
||||
assertEquals(2, r.getParagraph(30).getIlvl());
|
||||
assertEquals(3, r.getParagraph(31).getIlvl());
|
||||
assertEquals(2, r.getParagraph(32).getIlvl());
|
||||
assertEquals(0, r.getParagraph(33).getIlvl());
|
||||
assertEquals(0, r.getParagraph(34).getIlvl());
|
||||
}
|
||||
|
||||
public void testIndentedText() {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
|
||||
Range r = doc.getRange();
|
||||
|
||||
assertEquals(40, r.numParagraphs());
|
||||
assertEquals("Finally we want some indents, to tell the difference\r", r.getParagraph(34).text());
|
||||
assertEquals("Indented once\r", r.getParagraph(35).text());
|
||||
assertEquals("Indented twice\r", r.getParagraph(36).text());
|
||||
assertEquals("Indented three times\r", r.getParagraph(37).text());
|
||||
assertEquals("The end!\r", r.getParagraph(38).text());
|
||||
|
||||
assertEquals(9, r.getParagraph(34).getLvl());
|
||||
assertEquals(9, r.getParagraph(35).getLvl());
|
||||
assertEquals(9, r.getParagraph(36).getLvl());
|
||||
assertEquals(9, r.getParagraph(37).getLvl());
|
||||
assertEquals(9, r.getParagraph(38).getLvl());
|
||||
assertEquals(9, r.getParagraph(39).getLvl());
|
||||
|
||||
assertEquals(0, r.getParagraph(34).getIlvl());
|
||||
assertEquals(0, r.getParagraph(35).getIlvl());
|
||||
assertEquals(0, r.getParagraph(36).getIlvl());
|
||||
assertEquals(0, r.getParagraph(37).getIlvl());
|
||||
assertEquals(0, r.getParagraph(38).getIlvl());
|
||||
assertEquals(0, r.getParagraph(39).getIlvl());
|
||||
|
||||
// TODO Test the indent
|
||||
}
|
||||
|
||||
public void testWriteRead() throws IOException {
|
||||
HWPFDocument doc = HWPFTestDataSamples.openSampleFile("Lists.doc");
|
||||
doc = HWPFTestDataSamples.writeOutAndReadBack(doc);
|
||||
|
||||
Range r = doc.getRange();
|
||||
|
||||
// Check a couple at random
|
||||
assertEquals(4, r.getParagraph(21).getIlvl());
|
||||
assertEquals(1, r.getParagraph(22).getIlvl());
|
||||
assertEquals(0, r.getParagraph(23).getIlvl());
|
||||
}
|
||||
}
|
BIN
test-data/document/Lists.doc
Normal file
BIN
test-data/document/Lists.doc
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user