diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index f214e05e1..84c1156af 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers 30311 - More work on Conditional Formatting refactored all junits' usage of HSSF.testdata.path to one place 44739 - Small fixes for conditional formatting (regions with max row/col index) diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 7bdc284da..bb7394dd3 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers 30311 - More work on Conditional Formatting refactored all junits' usage of HSSF.testdata.path to one place 44739 - Small fixes for conditional formatting (regions with max row/col index) diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java index 0d17669c2..88c91c74f 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeader.java @@ -47,10 +47,32 @@ public abstract class ChunkHeader { ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 18); return ch; - } else if(documentVersion == 5) { - throw new RuntimeException("TODO"); + } else if(documentVersion == 5 || documentVersion == 4) { + ChunkHeaderV4V5 ch = new ChunkHeaderV4V5(); + + ch.type = (int)LittleEndian.getShort(data, offset + 0); + ch.id = (int)LittleEndian.getShort(data, offset + 2); + ch.unknown2 = (short)LittleEndian.getUnsignedByte(data, offset + 4); + ch.unknown3 = (short)LittleEndian.getUnsignedByte(data, offset + 5); + ch.unknown1 = (short)LittleEndian.getShort(data, offset + 6); + ch.length = (int)LittleEndian.getUInt(data, offset + 8); + + return ch; } else { - throw new IllegalArgumentException("Visio files with versions below 5 are not supported, yours was " + documentVersion); + throw new IllegalArgumentException("Visio files with versions below 4 are not supported, yours was " + documentVersion); + } + } + + /** + * Returns the size of a chunk header for the given document version. + */ + public static int getHeaderSize(int documentVersion) { + if(documentVersion > 6) { + return ChunkHeaderV11.getHeaderSize(); + } else if(documentVersion == 6) { + return ChunkHeaderV6.getHeaderSize(); + } else { + return ChunkHeaderV4V5.getHeaderSize(); } } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java new file mode 100644 index 000000000..335c9c543 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV4V5.java @@ -0,0 +1,56 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hdgf.chunks; + +/** + * A chunk header from v4 or v5 + */ +public class ChunkHeaderV4V5 extends ChunkHeader { + protected short unknown2; + protected short unknown3; + + public short getUnknown2() { + return unknown2; + } + public short getUnknown3() { + return unknown3; + } + + protected static int getHeaderSize() { + return 12; + } + + public int getSizeInBytes() { + return getHeaderSize(); + } + + /** + * Does the chunk have a trailer? + */ + public boolean hasTrailer() { + // V4 and V5 never has trailers + return false; + } + + /** + * Does the chunk have a separator? + */ + public boolean hasSeparator() { + // V4 and V5 never has separators + return false; + } +} diff --git a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java index e8061563a..4d13b85bd 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/chunks/ChunkHeaderV6.java @@ -30,9 +30,13 @@ public class ChunkHeaderV6 extends ChunkHeader { return unknown3; } - public int getSizeInBytes() { + protected static int getHeaderSize() { + // Looks like it ought to be 19... return 19; } + public int getSizeInBytes() { + return getHeaderSize(); + } /** * Does the chunk have a trailer? diff --git a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java index a7857e46f..034714c7b 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/extractor/VisioTextExtractor.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import org.apache.poi.POITextExtractor; import org.apache.poi.hdgf.HDGFDiagram; +import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.Chunk.Command; import org.apache.poi.hdgf.streams.ChunkStream; import org.apache.poi.hdgf.streams.PointerContainingStream; @@ -71,11 +72,13 @@ public class VisioTextExtractor extends POITextExtractor { if(stream instanceof ChunkStream) { ChunkStream cs = (ChunkStream)stream; for(int i=0; i 0) { // First command - Command cmd = cs.getChunks()[i].getCommands()[0]; + Command cmd = chunk.getCommands()[0]; if(cmd != null && cmd.getValue() != null) { text.add( cmd.getValue().toString() ); } diff --git a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java index a59fe43ff..65b69d95a 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/streams/ChunkStream.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import org.apache.poi.hdgf.chunks.Chunk; import org.apache.poi.hdgf.chunks.ChunkFactory; +import org.apache.poi.hdgf.chunks.ChunkHeader; import org.apache.poi.hdgf.pointers.Pointer; public class ChunkStream extends Stream { @@ -51,10 +52,17 @@ public class ChunkStream extends Stream { int pos = 0; byte[] contents = getStore().getContents(); while(pos < contents.length) { - Chunk chunk = chunkFactory.createChunk(contents, pos); - chunksA.add(chunk); - - pos += chunk.getOnDiskSize(); + // Ensure we have enough data to create a chunk from + int headerSize = ChunkHeader.getHeaderSize(chunkFactory.getVersion()); + if(pos+headerSize <= contents.length) { + Chunk chunk = chunkFactory.createChunk(contents, pos); + chunksA.add(chunk); + + pos += chunk.getOnDiskSize(); + } else { + System.err.println("Needed " + headerSize + " bytes to create the next chunk header, but only found " + (contents.length-pos) + " bytes, ignoring rest of data"); + pos = contents.length; + } } chunks = (Chunk[])chunksA.toArray(new Chunk[chunksA.size()]); diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd new file mode 100644 index 000000000..a597a0d93 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594-2.vsd differ diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd new file mode 100644 index 000000000..abed78b4d Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hdgf/data/44594.vsd differ diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java index a6541e9b0..8f890db3a 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/extractor/TestVisioExtractor.java @@ -17,25 +17,21 @@ package org.apache.poi.hdgf.extractor; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; import java.io.PrintStream; import junit.framework.TestCase; import org.apache.poi.hdgf.HDGFDiagram; -import org.apache.poi.hdgf.chunks.Chunk; -import org.apache.poi.hdgf.chunks.ChunkFactory; -import org.apache.poi.hdgf.pointers.Pointer; -import org.apache.poi.hdgf.pointers.PointerFactory; -import org.apache.poi.hssf.record.formula.eval.StringOperationEval; -import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public class TestVisioExtractor extends TestCase { - private String filename; + private String dirname; + private String defFilename; protected void setUp() throws Exception { - String dirname = System.getProperty("HDGF.testdata.path"); - filename = dirname + "/Test_Visio-Some_Random_Text.vsd"; + dirname = System.getProperty("HDGF.testdata.path"); + defFilename = dirname + "/Test_Visio-Some_Random_Text.vsd"; } /** @@ -44,14 +40,14 @@ public class TestVisioExtractor extends TestCase { public void testCreation() throws Exception { VisioTextExtractor extractor; - extractor = new VisioTextExtractor(new FileInputStream(filename)); + extractor = new VisioTextExtractor(new FileInputStream(defFilename)); assertNotNull(extractor); assertNotNull(extractor.getAllText()); assertEquals(3, extractor.getAllText().length); extractor = new VisioTextExtractor( new POIFSFileSystem( - new FileInputStream(filename) + new FileInputStream(defFilename) ) ); assertNotNull(extractor); @@ -61,7 +57,7 @@ public class TestVisioExtractor extends TestCase { extractor = new VisioTextExtractor( new HDGFDiagram( new POIFSFileSystem( - new FileInputStream(filename) + new FileInputStream(defFilename) ) ) ); @@ -72,7 +68,7 @@ public class TestVisioExtractor extends TestCase { public void testExtraction() throws Exception { VisioTextExtractor extractor = - new VisioTextExtractor(new FileInputStream(filename)); + new VisioTextExtractor(new FileInputStream(defFilename)); // Check the array fetch String[] text = extractor.getAllText(); @@ -88,13 +84,30 @@ public class TestVisioExtractor extends TestCase { assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS); } + public void testProblemFiles() throws Exception { + File a = new File(dirname, "44594.vsd"); + VisioTextExtractor.main(new String[] {a.toString()}); + + File b = new File(dirname, "44594-2.vsd"); + VisioTextExtractor.main(new String[] {b.toString()}); + + File c = new File(dirname, "ShortChunk1.vsd"); + VisioTextExtractor.main(new String[] {c.toString()}); + + File d = new File(dirname, "ShortChunk2.vsd"); + VisioTextExtractor.main(new String[] {d.toString()}); + + File e = new File(dirname, "ShortChunk3.vsd"); + VisioTextExtractor.main(new String[] {e.toString()}); + } + public void testMain() throws Exception { PrintStream oldOut = System.out; ByteArrayOutputStream baos = new ByteArrayOutputStream(); PrintStream capture = new PrintStream(baos); System.setOut(capture); - VisioTextExtractor.main(new String[] {filename}); + VisioTextExtractor.main(new String[] {defFilename}); // Put things back System.setOut(oldOut); diff --git a/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamBugs.java b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamBugs.java new file mode 100644 index 000000000..9d4cac168 --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hdgf/streams/TestStreamBugs.java @@ -0,0 +1,102 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hdgf.streams; + +import java.io.FileInputStream; + +import org.apache.poi.hdgf.HDGFDiagram; +import org.apache.poi.hdgf.chunks.ChunkFactory; +import org.apache.poi.hdgf.pointers.Pointer; +import org.apache.poi.hdgf.pointers.PointerFactory; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +/** + * Tests for bugs with streams + */ +public class TestStreamBugs extends StreamTest { + private byte[] contents; + private ChunkFactory chunkFactory; + private PointerFactory ptrFactory; + private POIFSFileSystem filesystem; + + protected void setUp() throws Exception { + String dirname = System.getProperty("HDGF.testdata.path"); + String filename = dirname + "/44594.vsd"; + ptrFactory = new PointerFactory(11); + chunkFactory = new ChunkFactory(11); + + FileInputStream fin = new FileInputStream(filename); + filesystem = new POIFSFileSystem(fin); + + DocumentEntry docProps = + (DocumentEntry)filesystem.getRoot().getEntry("VisioDocument"); + + // Grab the document stream + contents = new byte[docProps.getSize()]; + filesystem.createDocumentInputStream("VisioDocument").read(contents); + } + + public void testGetTrailer() throws Exception { + Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24); + Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory); + } + + public void TOIMPLEMENTtestGetCertainChunks() throws Exception { + int offsetA = 3708; + int offsetB = 3744; + } + + public void testGetChildren() throws Exception { + Pointer trailerPointer = ptrFactory.createPointer(contents, 0x24); + TrailerStream trailer = (TrailerStream) + Stream.createStream(trailerPointer, contents, chunkFactory, ptrFactory); + + // Get without recursing + Pointer[] ptrs = trailer.getChildPointers(); + for(int i=0; i