Tag as 3.0.1-RC3

git-svn-id: https://svn.apache.org/repos/asf/poi/tags/REL_3_0_1_RC3@551531 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2007-06-28 11:43:11 +00:00
parent cd3e480727
commit 2949f09894
24 changed files with 522 additions and 18 deletions

View File

@ -1,5 +1,16 @@
Apache Jakarta POI
Apache POI
Copyright 2001-2007 The Apache Software Foundation
This product includes software developed by
The Apache Software Foundation (http://www.apache.org/).
Unit testing support is provided by JUnit, under the
Common Public License Version 1.0:
http://www.opensource.org/licenses/cpl.php
See http://www.junit.org/
Small parts of the POI component HDGF are based on VSDump,
and are under the GNU General Public Licence version 3 (GPL v3):
http://gplv3.fsf.org/
See http://www.gnome.ru/projects/vsdump_en.html

View File

@ -39,6 +39,7 @@
<menu-item label="HWPF" href="hwpf/index.html"/>
<menu-item label="HPSF" href="hpsf/index.html"/>
<menu-item label="HSLF" href="hslf/index.html"/>
<menu-item label="HDGF" href="hdgf/index.html"/>
<menu-item label="POI-Ruby" href="poi-ruby.html"/>
<menu-item label="POI-Utils" href="utils/index.html"/>
<menu-item label="Download" href="ext:download"/>

View File

@ -35,7 +35,7 @@
<person id="YK" name="Yegor Kozlov" email="yegor@apache.org"/>
</devs>
<release version="3.0.1-FINAL" date="2007-06-15">
<release version="3.0.1-FINAL" date="2007-07-05">
<action dev="POI-DEVELOPERS" type="fix">Administrative updates to the Maven POMs, and the release artificat build process</action>
<action dev="POI-DEVELOPERS" type="fix">23951 - [PATCH] Fix for HSSF setSheetOrder and tab names</action>
<action dev="POI-DEVELOPERS" type="fix">42524 - [PATCH] Better HSLF support for problem shape groups</action>
@ -44,6 +44,9 @@
<action dev="POI-DEVELOPERS" type="add">Additional HSLF support for Title and Slide Master Sheets</action>
<action dev="POI-DEVELOPERS" type="fix">42474 - [PATCH] Improved HSLF note to slide matching, and a NPE</action>
<action dev="POI-DEVELOPERS" type="fix">42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching</action>
<action dev="POI-DEVELOPERS" type="fix">42667 - [PATCH] Fix for HSLF writing of files with tables</action>
<action dev="POI-DEVELOPERS" type="add">Improved way of detecting HSSF cells that contain dates, isADateFormat</action>
<action dev="POI-DEVELOPERS" type="add">Initial, read-only support for Visio documents, as HDGF</action>
</release>
<release version="3.0-FINAL" date="2007-05-18">

View File

@ -0,0 +1,34 @@
<?xml version="1.0"?>
<!--
====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
====================================================================
-->
<!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" "../dtd/book-cocoon-v10.dtd">
<book software="POI Project"
title="HGDF"
copyright="@year@ POI Project">
<menu label="Apache POI">
<menu-item label="Top" href="../index.html"/>
</menu>
<menu label="HDGF">
<menu-item label="Overview" href="index.html"/>
</menu>
</book>

View File

@ -0,0 +1,98 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
====================================================================
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V1.1//EN" "../dtd/document-v11.dtd">
<document>
<header>
<title>POI-HDGF - Java API To Access Microsoft Visio Format Files</title>
<subtitle>Overview</subtitle>
<authors>
<person name="Nick Burch" email="nick at apache dot org"/>
</authors>
</header>
<body>
<section>
<title>Overview</title>
<p>HDGF is the POI Project's pure Java implementation of the Visio file format.</p>
<p>Currently, HDGF provides a low-level, read-only api for
accessing Visio documents. It also provides a
<link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/extractor/">way</link>
to extract the textual content from a file.
</p>
<p>At this time, there is no <em>usermodel</em> api or similar,
only low level access to the streams, chunks and chunk commands.
Users are advised to check the unit tests to see how everything
works. They are also well advised to read the documentation
supplied with
<link href="http://www.gnome.ru/projects/vsdump_en.html">vsdump</link>
to get a feel for how Visio files are structured.</p>
<p>To get a feel for the contents of a file, and to track down
where data of interest is stored, HDGF comes with
<link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/dev/">VSDDumper</link>
to print out the contents of the file. Users should also make
use of
<link href="http://www.gnome.ru/projects/vsdump_en.html">vsdump</link>
to probe the structure of files.</p>
<note>
This code currently lives the
<link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
of the POI SVN repository.
Ensure that you have the scratchpad jar or the scratchpad
build area in your
classpath before experimenting with this code.
</note>
<section>
<title>Steps required for write support</title>
<p>Currently, HDGF is only able to read visio files, it is
not able to write them back out again. We believe the
following are the steps that would need to be taken to
implement it.</p>
<ol>
<li>Re-write the decompression support in LZW4HDGF to be
less opaque, and also under the ASL.</li>
<li>Add compression support to the new LZw4HDGF.</li>
<li>Have HDGF just write back the raw bytes it read in, and
have a test to ensure the file is un-changed.</li>
<li>Have HDGF generate the bytes to write out from the
Stream stores, using the compressed data as appropriate,
without re-compressing. Plus test to ensure file is
un-changed.</li>
<li>Have HDGF generate the bytes to write out from the
Stream stores, re-compressing any streams that were
decompressed. Plus test to ensure file is un-changed.</li>
<li>Have HDGF re-generate the offsets in pointers for the
locations of the streams. Plus test to ensure file is
un-changed.</li>
<li>Have HDGF re-generate the bytes for all the chunks, from
the chunk commands. Tests to ensure the chunks are
serialized properly, and then that the file is un-changed</li>
<li>Alter the data of one command, but keep it the same
length, and check visio can open the file when written
out.</li>
<li>Alter the data of one command, to a new length, and
check that visio can open the file when written out.</li>
</ol>
</section>
</section>
</body>
</document>

View File

@ -20,7 +20,7 @@
<!DOCTYPE book PUBLIC "-//APACHE//DTD Cocoon Documentation Book V1.0//EN" "../dtd/book-cocoon-v10.dtd">
<book software="POI Project"
title="HSSF"
title="HSLF"
copyright="@year@ POI Project">
<menu label="Apache POI">

View File

@ -34,12 +34,12 @@
<title>Overview</title>
<p>HSLF is the POI Project's pure Java implementation of the Powerpoint file format.</p>
<p>HSSF provides a way to read powerpoint presentations, and extract text from it.
<p>HSLF provides a way to read powerpoint presentations, and extract text from it.
It also provides some (currently limited) edit capabilities.
</p>
<note>
This code currently lives the
<link href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">scratchpad area</link>
<link href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">scratchpad area</link>
of the POI SVN repository.
Ensure that you have the scratchpad jar or the scratchpad
build area in your

View File

@ -460,7 +460,7 @@ some of the rows or cells. It can be found at
<code>/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/XLS2CSVmra.java</code>,
and may be called on the command line, or from within your own code.
The latest version is always available from
<link href="http://svn.apache.org/repos/asf/jakarta/poi/trunk/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/">subversion</link>.
<link href="http://svn.apache.org/repos/asf/poi/trunk/src/scratchpad/examples/src/org/apache/poi/hssf/eventusermodel/examples/">subversion</link>.
</p>
<p>
<em>This code is currently in the scratchpad section, so you will either

View File

@ -38,7 +38,7 @@
to pure Java.</p>
<p>HWPF is still in early development. It is in the <link
href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">
href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
scratchpad section of the SVN.</link> You will need to ensure you
either have a recent SVN checkout, or a recent SVN nightly build
(including the scratchpad jar!)</p>

View File

@ -30,7 +30,7 @@
<body>
<p>HWPF is still in early development. It is in the <link
href="http://svn.apache.org/viewcvs.cgi/jakarta/poi/trunk/src/scratchpad/">
href="http://svn.apache.org/viewcvs.cgi/poi/trunk/src/scratchpad/">
scratchpad section of the SVN.</link> You will need to ensure you
either have a recent SVN checkout, or a recent SVN nightly build
(including the scratchpad jar!)</p>
@ -68,7 +68,7 @@ can then get text and other properties.
<section><title>Further Examples</title>
<p>For now, the best source of additional examples is in the unit
tests. <link
href="http://svn.apache.org/viewvc/jakarta/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
href="http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hwpf/">
Browse the HWPF unit tests.</link>
</p>
</section>

View File

@ -38,6 +38,10 @@
<link href="http://www.apache.org/dyn/closer.cgi/poi/release/">download</link>
the source and binaries from your
<link href="http://www.apache.org/dyn/closer.cgi/poi/release/">local mirror</link>.</p>
<p>We would also like to confirm that verion 3.0 of Apache POI does
<em>not</em> contain any viruses. Users of broken virus checkers
which do detect a 94 byte file, sci_cec.db, as containing one are
advised to contact their vendor for a fix.</p>
</section>
<section><title>Purpose</title>
@ -107,12 +111,19 @@
development. Jump in!</p>
</section>
<section><title>HSLF for PowerPoint Documents</title>
<p>HWSL is our port of the Microsoft PowerPoint 97(-2003) file format to pure
<p>HSLF is our port of the Microsoft PowerPoint 97(-2003) file format to pure
Java. It supports read and write capabilities of some, but not yet all
of the core records. Please see <link
href="./hslf/index.html">the HSLF project page for more
information</link>.</p>
</section>
<section><title>HDGF for Visio Documents</title>
<p>HDGF is our port of the Microsoft Viso 97(-2003) file format to pure
Java. It currently only supports reading at a very low level, and
simple text extraction. Please see <link
href="./hdgf/index.html">the HDGF project page for more
information</link>.</p>
</section>
<section><title>HPSF for Document Properties</title>
<p>HPSF is our port of the OLE 2 property set format to pure
Java. Property sets are mostly use to store a document's properties

View File

@ -32,7 +32,7 @@
</developers>
<changes>
<release version="3.0.1-FINAL" date="2007-06-15">
<release version="3.0.1-FINAL" date="2007-07-05">
<action dev="POI-DEVELOPERS" type="fix">Administrative updates to the Maven POMs, and the release artificat build process</action>
<action dev="POI-DEVELOPERS" type="fix">23951 - [PATCH] Fix for HSSF setSheetOrder and tab names</action>
<action dev="POI-DEVELOPERS" type="fix">42524 - [PATCH] Better HSLF support for problem shape groups</action>
@ -41,6 +41,9 @@
<action dev="POI-DEVELOPERS" type="add">Additional HSLF support for Title and Slide Master Sheets</action>
<action dev="POI-DEVELOPERS" type="fix">42474 - [PATCH] Improved HSLF note to slide matching, and a NPE</action>
<action dev="POI-DEVELOPERS" type="fix">42481 - [PATCH] Tweak some HSLF exceptions, to make it clearer what you're catching</action>
<action dev="POI-DEVELOPERS" type="fix">42667 - [PATCH] Fix for HSLF writing of files with tables</action>
<action dev="POI-DEVELOPERS" type="add">Improved way of detecting HSSF cells that contain dates, isADateFormat</action>
<action dev="POI-DEVELOPERS" type="add">Initial, read-only support for Visio documents, as HDGF</action>
</release>
<release version="3.0-FINAL" date="2007-05-18">

View File

@ -20,6 +20,9 @@ import java.util.ArrayList;
import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil;
/**
* Base of all chunks, which hold data, flags etc
@ -44,6 +47,9 @@ public class Chunk {
/** The name of the chunk, as found from the commandDefinitions */
private String name;
/** For logging warnings about the structure of the file */
private POILogger logger = POILogFactory.getLogger(Chunk.class);
public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) {
this.header = header;
this.trailer = trailer;
@ -148,7 +154,9 @@ public class Chunk {
// Check we seem to have enough data
if(offset >= contents.length) {
System.err.println("Command offset " + offset + " past end of data at " + contents.length);
logger.log(POILogger.WARN,
"Command offset " + offset + " past end of data at " + contents.length
);
continue;
}
@ -167,9 +175,27 @@ public class Chunk {
LittleEndian.getDouble(contents, offset)
);
break;
case 12:
// A Little Endian String
// Starts 8 bytes into the data segment
// Ends at end of data, or 00 00
int startsAt = 8;
int endsAt = startsAt;
for(int j=startsAt; j<contents.length-1 && endsAt == startsAt; j++) {
if(contents[j] == 0 && contents[j+1] == 0) {
endsAt = j;
}
}
if(endsAt == startsAt) {
endsAt = contents.length;
}
int strLen = (endsAt-startsAt) / 2;
command.value = StringUtil.getFromUnicodeLE(contents, startsAt, strLen);
break;
case 25:
command.value = new Short(
LittleEndian.getShort(contents, offset)
LittleEndian.getShort(contents, offset)
);
break;
case 26:
@ -188,7 +214,8 @@ public class Chunk {
break;
default:
//System.err.println("Warning - Command of type " + type + " not processed!");
logger.log(POILogger.INFO,
"Command of type " + type + " not processed!");
}
// Add to the array

View File

@ -24,6 +24,9 @@ import java.util.ArrayList;
import java.util.Hashtable;
import java.util.StringTokenizer;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/**
* Factor class to create the appropriate chunks, which
* needs the version of the file to process the chunk header
@ -42,6 +45,9 @@ public class ChunkFactory {
private static String chunkTableName =
"/org/apache/poi/hdgf/chunks/chunks_parse_cmds.tbl";
/** For logging problems we spot with the file */
private POILogger logger = POILogFactory.getLogger(ChunkFactory.class);
public ChunkFactory(int version) throws IOException {
this.version = version;
@ -107,7 +113,8 @@ public class ChunkFactory {
// Check we have enough data, and tweak the header size
// as required
if(endOfDataPos > data.length) {
System.err.println("Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
logger.log(POILogger.WARN,
"Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
endOfDataPos = data.length;
header.length = data.length - offset - header.getSizeInBytes();

View File

@ -24,6 +24,10 @@ public class ChunkHeaderV11 extends ChunkHeaderV6 {
* Does the chunk have a separator?
*/
public boolean hasSeparator() {
// For some reason, there are two types that don't have a
// separator despite the flags that indicate they do
if(type == 0x1f || type == 0xc9) { return false; }
// If there's a trailer, there's a separator
if(hasTrailer()) { return true; }

View File

@ -27,4 +27,8 @@ public class ChunkSeparator {
separatorData = new byte[4];
System.arraycopy(data, offset, separatorData, 0, 4);
}
public String toString() {
return "<ChunkSeparator of length " + separatorData.length + ">";
}
}

View File

@ -26,4 +26,8 @@ public class ChunkTrailer {
trailerData = new byte[8];
System.arraycopy(data, offset, trailerData, 0, 8);
}
public String toString() {
return "<ChunkTrailer of length " + trailerData.length + ">";
}
}

View File

@ -70,6 +70,11 @@ public class VSDDumper {
" - " + Integer.toHexString(ptr.getFormat()));
System.out.println(ind + " Length is\t" + ptr.getLength() +
" - " + Integer.toHexString(ptr.getLength()));
if(ptr.destinationCompressed()) {
int decompLen = stream._getContentsLength();
System.out.println(ind + " DC.Length is\t" + decompLen +
" - " + Integer.toHexString(decompLen));
}
System.out.println(ind + " Compressed is\t" + ptr.destinationCompressed());
System.out.println(ind + " Stream is\t" + stream.getClass().getName());
@ -100,6 +105,9 @@ public class VSDDumper {
for(int i=0; i<cs.getChunks().length; i++) {
Chunk chunk = cs.getChunks()[i];
System.out.println(ind2 + "" + chunk.getName());
System.out.println(ind2 + " Length is " + chunk._getContents().length + " (" + Integer.toHexString(chunk._getContents().length) + ")");
System.out.println(ind2 + " OD Size is " + chunk.getOnDiskSize() + " (" + Integer.toHexString(chunk.getOnDiskSize()) + ")");
System.out.println(ind2 + " T / S is " + chunk.getTrailer() + " / " + chunk.getSeparator());
System.out.println(ind2 + " Holds " + chunk.getCommands().length + " commands");
for(int j=0; j<chunk.getCommands().length; j++) {
Command command = chunk.getCommands()[j];

View File

@ -0,0 +1,114 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf.extractor;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk.Command;
import org.apache.poi.hdgf.streams.ChunkStream;
import org.apache.poi.hdgf.streams.PointerContainingStream;
import org.apache.poi.hdgf.streams.Stream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Class to find all the text in a Visio file, and return it.
* Can opperate on the command line (outputs to stdout), or
* can return the text for you (eg for use with Lucene).
*/
public class VisioTextExtractor {
private HDGFDiagram hdgf;
private POIFSFileSystem fs;
public VisioTextExtractor(HDGFDiagram hdgf) {
this.hdgf = hdgf;
}
public VisioTextExtractor(POIFSFileSystem fs) throws IOException {
this(new HDGFDiagram(fs));
this.fs = fs;
}
public VisioTextExtractor(InputStream inp) throws IOException {
this(new POIFSFileSystem(inp));
}
/**
* Locates all the text entries in the file, and returns their
* contents.
*/
public String[] getAllText() {
ArrayList text = new ArrayList();
for(int i=0; i<hdgf.getTopLevelStreams().length; i++) {
findText(hdgf.getTopLevelStreams()[i], text);
}
return (String[])text.toArray( new String[text.size()] );
}
private void findText(Stream stream, ArrayList text) {
if(stream instanceof PointerContainingStream) {
PointerContainingStream ps = (PointerContainingStream)stream;
for(int i=0; i<ps.getPointedToStreams().length; i++) {
findText(ps.getPointedToStreams()[i], text);
}
}
if(stream instanceof ChunkStream) {
ChunkStream cs = (ChunkStream)stream;
for(int i=0; i<cs.getChunks().length; i++) {
if(cs.getChunks()[i] != null &&
cs.getChunks()[i].getName() != null &&
cs.getChunks()[i].getName().equals("Text")) {
// First command
Command cmd = cs.getChunks()[i].getCommands()[0];
if(cmd != null && cmd.getValue() != null) {
text.add( cmd.getValue().toString() );
}
}
}
}
}
/**
* Returns the textual contents of the file.
*/
public String getText() {
StringBuffer text = new StringBuffer();
String[] allText = getAllText();
for(int i=0; i<allText.length; i++) {
text.append(allText[i]);
if(!allText[i].endsWith("\r") &&
!allText[i].endsWith("\n")) {
text.append("\n");
}
}
return text.toString();
}
public static void main(String[] args) throws Exception {
if(args.length == 0) {
System.err.println("Use:");
System.err.println(" VisioTextExtractor <file.vsd>");
System.exit(1);
}
VisioTextExtractor extractor =
new VisioTextExtractor(new FileInputStream(args[0]));
// Print not PrintLn as already has \n added to it
System.out.print(extractor.getText());
}
}

View File

@ -43,6 +43,11 @@ public class ChunkStream extends Stream {
public void findChunks() {
ArrayList chunksA = new ArrayList();
if(getPointer().getOffset() == 0x64b3) {
int i = 0;
i++;
}
int pos = 0;
byte[] contents = getStore().getContents();
while(pos < contents.length) {

View File

@ -83,7 +83,7 @@ public abstract class Stream {
return new ChunkStream(pointer, store, chunkFactory);
}
else if(pointer.destinationHasStrings()) {
return new StringsStream(pointer, store);
return new StringsStream(pointer, store, chunkFactory);
}
// Give up and return a generic one

View File

@ -16,13 +16,16 @@ limitations under the License.
==================================================================== */
package org.apache.poi.hdgf.streams;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.pointers.Pointer;
/**
* A Stream which holds Strings
* A Stream which holds Strings. This is just another kind
* of ChunkStream, it seems
*/
public class StringsStream extends Stream {
protected StringsStream(Pointer pointer, StreamStore store) {
protected StringsStream(Pointer pointer, StreamStore store, ChunkFactory chunkFactory) {
super(pointer, store);
// super(pointer, store, chunkFactory);
}
}

View File

@ -0,0 +1,107 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf.extractor;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.PrintStream;
import junit.framework.TestCase;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.pointers.Pointer;
import org.apache.poi.hdgf.pointers.PointerFactory;
import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class TestVisioExtractor extends TestCase {
private String filename;
protected void setUp() throws Exception {
String dirname = System.getProperty("HDGF.testdata.path");
filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
}
/**
* Test the 3 different ways of creating one
*/
public void testCreation() throws Exception {
VisioTextExtractor extractor;
extractor = new VisioTextExtractor(new FileInputStream(filename));
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new POIFSFileSystem(
new FileInputStream(filename)
)
);
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new HDGFDiagram(
new POIFSFileSystem(
new FileInputStream(filename)
)
)
);
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
}
public void testExtraction() throws Exception {
VisioTextExtractor extractor =
new VisioTextExtractor(new FileInputStream(filename));
// Check the array fetch
String[] text = extractor.getAllText();
assertNotNull(text);
assertEquals(3, text.length);
assertEquals("Test View\n", text[0]);
assertEquals("I am a test view\n", text[1]);
assertEquals("Some random text, on a page\n", text[2]);
// And the all-in fetch
String textS = extractor.getText();
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
}
public void testMain() throws Exception {
PrintStream oldOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream capture = new PrintStream(baos);
System.setOut(capture);
VisioTextExtractor.main(new String[] {filename});
// Put things back
System.setOut(oldOut);
// Check
capture.flush();
String text = baos.toString();
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", text);
}
}

View File

@ -18,6 +18,7 @@ package org.apache.poi.hdgf.streams;
import java.io.FileInputStream;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.pointers.Pointer;
import org.apache.poi.hdgf.pointers.PointerFactory;
@ -202,4 +203,63 @@ public class TestStreamComplex extends StreamTest {
assertTrue(s8451.getPointedToStreams()[0] instanceof StringsStream);
assertTrue(s8451.getPointedToStreams()[1] instanceof StringsStream);
}
public void testChunkWithText() throws Exception {
// Parent ChunkStream is at 0x7194
// This is one of the last children of the trailer
Pointer trailerPtr = ptrFactory.createPointer(contents, trailerPointerAt);
TrailerStream ts = (TrailerStream)
Stream.createStream(trailerPtr, contents, chunkFactory, ptrFactory);
ts.findChildren(contents);
assertNotNull(ts.getChildPointers());
assertNotNull(ts.getPointedToStreams());
assertEquals(20, ts.getChildPointers().length);
assertEquals(20, ts.getPointedToStreams().length);
assertEquals(0x7194, ts.getChildPointers()[13].getOffset());
assertEquals(0x7194, ts.getPointedToStreams()[13].getPointer().getOffset());
PointerContainingStream ps7194 = (PointerContainingStream)
ts.getPointedToStreams()[13];
// First child is at 0x64b3
assertEquals(0x64b3, ps7194.getChildPointers()[0].getOffset());
assertEquals(0x64b3, ps7194.getPointedToStreams()[0].getPointer().getOffset());
ChunkStream cs = (ChunkStream)ps7194.getPointedToStreams()[0];
// Should be 26bc bytes un-compressed
assertEquals(0x26bc, cs.getStore().getContents().length);
// And should have lots of children
assertEquals(131, cs.getChunks().length);
// One of which is Text
boolean hasText = false;
for(int i=0; i<cs.getChunks().length; i++) {
if(cs.getChunks()[i].getName().equals("Text")) {
hasText = true;
}
}
assertTrue(hasText);
// Which is the 72nd command
assertEquals("Text", cs.getChunks()[72].getName());
Chunk text = cs.getChunks()[72];
assertEquals("Text", text.getName());
// Which contains our text
assertEquals(1, text.getCommands().length);
assertEquals("Test View\n", text.getCommands()[0].getValue());
// Almost at the end is some more text
assertEquals("Text", cs.getChunks()[128].getName());
text = cs.getChunks()[128];
assertEquals("Text", text.getName());
assertEquals(1, text.getCommands().length);
assertEquals("Some random text, on a page\n", text.getCommands()[0].getValue());
}
}