Include a test for the text extraction code. Update code to use POILogger instead of System.err. Update the NOTICE file for the TLP change, and add entries for JUnit and the small GPL v3 bits of HDGF

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@551273 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2007-06-27 19:00:35 +00:00
parent 27f71146d8
commit 3f4950fa82
5 changed files with 140 additions and 6 deletions

View File

@ -1,5 +1,16 @@
Apache Jakarta POI Apache POI
Copyright 2001-2007 The Apache Software Foundation Copyright 2001-2007 The Apache Software Foundation
This product includes software developed by This product includes software developed by
The Apache Software Foundation (http://www.apache.org/). The Apache Software Foundation (http://www.apache.org/).
Unit testing support is provided by JUnit, under the
Common Public License Version 1.0:
http://www.opensource.org/licenses/cpl.php
See http://www.junit.org/
Small parts of the POI component HDGF are based on VSDump,
and are under the GNU General Public Licence version 3 (GPL v3):
http://gplv3.fsf.org/
See http://www.gnome.ru/projects/vsdump_en.html

View File

@ -20,6 +20,8 @@ import java.util.ArrayList;
import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition; import org.apache.poi.hdgf.chunks.ChunkFactory.CommandDefinition;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil; import org.apache.poi.util.StringUtil;
/** /**
@ -45,6 +47,9 @@ public class Chunk {
/** The name of the chunk, as found from the commandDefinitions */ /** The name of the chunk, as found from the commandDefinitions */
private String name; private String name;
/** For logging warnings about the structure of the file */
private POILogger logger = POILogFactory.getLogger(Chunk.class);
public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) { public Chunk(ChunkHeader header, ChunkTrailer trailer, ChunkSeparator separator, byte[] contents) {
this.header = header; this.header = header;
this.trailer = trailer; this.trailer = trailer;
@ -149,7 +154,9 @@ public class Chunk {
// Check we seem to have enough data // Check we seem to have enough data
if(offset >= contents.length) { if(offset >= contents.length) {
System.err.println("Command offset " + offset + " past end of data at " + contents.length); logger.log(POILogger.WARN,
"Command offset " + offset + " past end of data at " + contents.length
);
continue; continue;
} }
@ -207,7 +214,8 @@ public class Chunk {
break; break;
default: default:
//System.err.println("Warning - Command of type " + type + " not processed!"); logger.log(POILogger.INFO,
"Command of type " + type + " not processed!");
} }
// Add to the array // Add to the array

View File

@ -24,6 +24,9 @@ import java.util.ArrayList;
import java.util.Hashtable; import java.util.Hashtable;
import java.util.StringTokenizer; import java.util.StringTokenizer;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
/** /**
* Factor class to create the appropriate chunks, which * Factor class to create the appropriate chunks, which
* needs the version of the file to process the chunk header * needs the version of the file to process the chunk header
@ -42,6 +45,9 @@ public class ChunkFactory {
private static String chunkTableName = private static String chunkTableName =
"/org/apache/poi/hdgf/chunks/chunks_parse_cmds.tbl"; "/org/apache/poi/hdgf/chunks/chunks_parse_cmds.tbl";
/** For logging problems we spot with the file */
private POILogger logger = POILogFactory.getLogger(ChunkFactory.class);
public ChunkFactory(int version) throws IOException { public ChunkFactory(int version) throws IOException {
this.version = version; this.version = version;
@ -107,7 +113,8 @@ public class ChunkFactory {
// Check we have enough data, and tweak the header size // Check we have enough data, and tweak the header size
// as required // as required
if(endOfDataPos > data.length) { if(endOfDataPos > data.length) {
System.err.println("Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!"); logger.log(POILogger.WARN,
"Header called for " + header.getLength() +" bytes, but that would take us passed the end of the data!");
endOfDataPos = data.length; endOfDataPos = data.length;
header.length = data.length - offset - header.getSizeInBytes(); header.length = data.length - offset - header.getSizeInBytes();

View File

@ -57,7 +57,6 @@ public class VisioTextExtractor {
for(int i=0; i<hdgf.getTopLevelStreams().length; i++) { for(int i=0; i<hdgf.getTopLevelStreams().length; i++) {
findText(hdgf.getTopLevelStreams()[i], text); findText(hdgf.getTopLevelStreams()[i], text);
} }
System.err.println("Found " + text.size() + " text string");
return (String[])text.toArray( new String[text.size()] ); return (String[])text.toArray( new String[text.size()] );
} }
private void findText(Stream stream, ArrayList text) { private void findText(Stream stream, ArrayList text) {
@ -108,6 +107,8 @@ public class VisioTextExtractor {
VisioTextExtractor extractor = VisioTextExtractor extractor =
new VisioTextExtractor(new FileInputStream(args[0])); new VisioTextExtractor(new FileInputStream(args[0]));
System.out.println(extractor.getText());
// Print not PrintLn as already has \n added to it
System.out.print(extractor.getText());
} }
} }

View File

@ -0,0 +1,107 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf.extractor;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.PrintStream;
import junit.framework.TestCase;
import org.apache.poi.hdgf.HDGFDiagram;
import org.apache.poi.hdgf.chunks.Chunk;
import org.apache.poi.hdgf.chunks.ChunkFactory;
import org.apache.poi.hdgf.pointers.Pointer;
import org.apache.poi.hdgf.pointers.PointerFactory;
import org.apache.poi.hssf.record.formula.eval.StringOperationEval;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public class TestVisioExtractor extends TestCase {
private String filename;
protected void setUp() throws Exception {
String dirname = System.getProperty("HDGF.testdata.path");
filename = dirname + "/Test_Visio-Some_Random_Text.vsd";
}
/**
* Test the 3 different ways of creating one
*/
public void testCreation() throws Exception {
VisioTextExtractor extractor;
extractor = new VisioTextExtractor(new FileInputStream(filename));
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new POIFSFileSystem(
new FileInputStream(filename)
)
);
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
extractor = new VisioTextExtractor(
new HDGFDiagram(
new POIFSFileSystem(
new FileInputStream(filename)
)
)
);
assertNotNull(extractor);
assertNotNull(extractor.getAllText());
assertEquals(3, extractor.getAllText().length);
}
public void testExtraction() throws Exception {
VisioTextExtractor extractor =
new VisioTextExtractor(new FileInputStream(filename));
// Check the array fetch
String[] text = extractor.getAllText();
assertNotNull(text);
assertEquals(3, text.length);
assertEquals("Test View\n", text[0]);
assertEquals("I am a test view\n", text[1]);
assertEquals("Some random text, on a page\n", text[2]);
// And the all-in fetch
String textS = extractor.getText();
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", textS);
}
public void testMain() throws Exception {
PrintStream oldOut = System.out;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintStream capture = new PrintStream(baos);
System.setOut(capture);
VisioTextExtractor.main(new String[] {filename});
// Put things back
System.setOut(oldOut);
// Check
capture.flush();
String text = baos.toString();
assertEquals("Test View\nI am a test view\nSome random text, on a page\n", text);
}
}