Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-647199 via svnmerge from

https://svn.apache.org:443/repos/asf/poi/trunk

........
  r646854 | nick | 2008-04-10 16:54:02 +0100 (Thu, 10 Apr 2008) | 1 line
  
  Initial support for getting and changing chart titles and series titles
........
  r646870 | nick | 2008-04-10 17:59:10 +0100 (Thu, 10 Apr 2008) | 1 line
  
  Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document
........
  r647152 | nick | 2008-04-11 14:04:11 +0100 (Fri, 11 Apr 2008) | 1 line
  
  Update the sample embeded documents to be from know sources, so we can test better
........
  r647186 | nick | 2008-04-11 15:43:05 +0100 (Fri, 11 Apr 2008) | 1 line
  
  Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents
........


git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@647203 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2008-04-11 15:52:17 +00:00
parent 077fb6c6f9
commit 58889f782a
30 changed files with 676 additions and 69 deletions

View File

@ -577,6 +577,8 @@ under the License.
file="${main.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HPSF.testdata.path"
file="${main.src.test}/org/apache/poi/hpsf/data"/>
<sysproperty key="POIFS.testdata.path"
file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
<formatter type="xml"/>
@ -612,6 +614,8 @@ under the License.
file="${main.src.test}/org/apache/poi/hpsf/data"/>
<sysproperty key="HWPF.testdata.path"
file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="POIFS.testdata.path"
file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<batchtest todir="${main.reports.test}">
@ -641,6 +645,7 @@ under the License.
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<formatter type="xml"/>
@ -657,6 +662,7 @@ under the License.
<classpath refid="test.classpath"/>
<sysproperty key="HSSF.testdata.path" file="${main.src.test}/org/apache/poi/hssf/data"/>
<sysproperty key="HPSF.testdata.path" file="${main.src.test}/org/apache/poi/hpsf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<test name="${testcase}"/>
@ -695,6 +701,7 @@ under the License.
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
<formatter type="xml"/>
@ -729,6 +736,7 @@ under the License.
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>

View File

@ -37,6 +37,10 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.0.3-beta1" date="2008-04-??">
<action dev="POI-DEVELOPERS" type="add">Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents</action>
<action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>

View File

@ -34,6 +34,10 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.0.3-beta1" date="2008-04-??">
<action dev="POI-DEVELOPERS" type="add">Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents</action>
<action dev="POI-DEVELOPERS" type="add">Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document</action>
<action dev="POI-DEVELOPERS" type="add">Initial support for getting and changing chart and series titles</action>
<action dev="POI-DEVELOPERS" type="add">Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it</action>
<action dev="POI-DEVELOPERS" type="fix">44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord.</action>
<action dev="POI-DEVELOPERS" type="fix">43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers</action>
<action dev="POI-DEVELOPERS" type="add">30311 - More work on Conditional Formatting</action>

View File

@ -29,6 +29,7 @@ import org.apache.poi.hpsf.PropertySet;
import org.apache.poi.hpsf.PropertySetFactory;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.Entry;
@ -50,12 +51,23 @@ public abstract class POIDocument {
protected DocumentSummaryInformation dsInf;
/** The open POIFS FileSystem that contains our document */
protected POIFSFileSystem filesystem;
/** The directory that our document lives in */
protected DirectoryNode directory;
/** For our own logging use */
protected POILogger logger = POILogFactory.getLogger(this.getClass());
/* Have the property streams been read yet? (Only done on-demand) */
protected boolean initialized = false;
protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) {
this.filesystem = fs;
this.directory = dir;
}
protected POIDocument(POIFSFileSystem fs) {
this(fs.getRoot(), fs);
}
/**
* Fetch the Document Summary Information of the document
@ -110,7 +122,7 @@ public abstract class POIDocument {
DocumentInputStream dis;
try {
// Find the entry, and get an input stream for it
dis = filesystem.createDocumentInputStream(setName);
dis = directory.createDocumentInputStream(setName);
} catch(IOException ie) {
// Oh well, doesn't exist
logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie);

View File

@ -17,6 +17,8 @@
package org.apache.poi.hssf.record;
import java.io.ByteArrayInputStream;
/**
* This is purely for the biff viewer. During normal operations we don't want
* to be seeing this.
@ -35,6 +37,21 @@ public class DrawingRecordForBiffViewer
super(in);
}
public DrawingRecordForBiffViewer(DrawingRecord r)
{
super(convertToInputStream(r));
convertRawBytesToEscherRecords();
}
private static RecordInputStream convertToInputStream(DrawingRecord r)
{
byte[] data = r.serialize();
RecordInputStream rinp = new RecordInputStream(
new ByteArrayInputStream(data)
);
rinp.nextRecord();
return rinp;
}
protected String getRecordName()
{
return "MSODRAWING";

View File

@ -77,6 +77,8 @@ public class RecordFactory
NoteRecord.class, ObjectProtectRecord.class, ScenarioProtectRecord.class,
FileSharingRecord.class, ChartTitleFormatRecord.class,
DVRecord.class, DVALRecord.class, UncalcedRecord.class,
ChartRecord.class, LegendRecord.class, ChartTitleFormatRecord.class,
SeriesRecord.class, SeriesTextRecord.class,
HyperlinkRecord.class,
ExternalNameRecord.class, // TODO - same changes in non-@deprecated version of this class
SupBookRecord.class,

View File

@ -1571,18 +1571,14 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet
}
/**
* Returns the top-level drawing patriach, if there is
* one.
* This will hold any graphics or charts for the sheet.
* Returns the agregate escher records for this sheet,
* it there is one.
* WARNING - calling this will trigger a parsing of the
* associated escher records. Any that aren't supported
* (such as charts and complex drawing types) will almost
* certainly be lost or corrupted when written out. Only
* use this with simple drawings, otherwise call
* {@link HSSFSheet#createDrawingPatriarch()} and
* start from scratch!
* certainly be lost or corrupted when written out.
*/
public HSSFPatriarch getDrawingPatriarch() {
public EscherAggregate getDrawingEscherAggregate() {
book.findDrawingGroup();
// If there's now no drawing manager, then there's
@ -1601,6 +1597,25 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet
// Grab our aggregate record, and wire it up
EscherAggregate agg = (EscherAggregate) sheet.findFirstRecordBySid(EscherAggregate.sid);
return agg;
}
/**
* Returns the top-level drawing patriach, if there is
* one.
* This will hold any graphics or charts for the sheet.
* WARNING - calling this will trigger a parsing of the
* associated escher records. Any that aren't supported
* (such as charts and complex drawing types) will almost
* certainly be lost or corrupted when written out. Only
* use this with simple drawings, otherwise call
* {@link HSSFSheet#createDrawingPatriarch()} and
* start from scratch!
*/
public HSSFPatriarch getDrawingPatriarch() {
EscherAggregate agg = getDrawingEscherAggregate();
if(agg == null) return null;
HSSFPatriarch patriarch = new HSSFPatriarch(this, agg);
agg.setPatriarch(patriarch);

View File

@ -62,6 +62,7 @@ import org.apache.poi.hssf.record.formula.Area3DPtg;
import org.apache.poi.hssf.record.formula.MemFuncPtg;
import org.apache.poi.hssf.record.formula.UnionPtg;
import org.apache.poi.hssf.util.CellReference;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.CreationHelper;
import org.apache.poi.util.POILogFactory;
@ -155,6 +156,7 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm
protected HSSFWorkbook( Workbook book )
{
super(null, null);
workbook = book;
sheets = new ArrayList( INITIAL_CAPACITY );
names = new ArrayList( INITIAL_CAPACITY );
@ -176,17 +178,37 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm
* @see org.apache.poi.poifs.filesystem.POIFSFileSystem
* @exception IOException if the stream cannot be read
*/
public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes)
throws IOException
{
this(fs.getRoot(), fs, preserveNodes);
}
/**
* given a POI POIFSFileSystem object, and a specific directory
* within it, read in its Workbook and populate the high and
* low level models. If you're reading in a workbook...start here.
*
* @param directory the POI filesystem directory to process from
* @param fs the POI filesystem that contains the Workbook stream.
* @param preserveNodes whether to preseve other nodes, such as
* macros. This takes more memory, so only say yes if you
* need to. If set, will store all of the POIFSFileSystem
* in memory
* @see org.apache.poi.poifs.filesystem.POIFSFileSystem
* @exception IOException if the stream cannot be read
*/
public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes)
throws IOException
{
super(directory, fs);
this.preserveNodes = preserveNodes;
this.filesystem = fs;
// If we're not preserving nodes, don't track the
// POIFS any more
if(! preserveNodes) {
this.filesystem = null;
this.directory = null;
}
sheets = new ArrayList(INITIAL_CAPACITY);
@ -197,13 +219,13 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm
// put theirs in one called "WORKBOOK"
String workbookName = "Workbook";
try {
fs.getRoot().getEntry(workbookName);
directory.getEntry(workbookName);
// Is the default name
} catch(FileNotFoundException fe) {
// Try the upper case form
try {
workbookName = "WORKBOOK";
fs.getRoot().getEntry(workbookName);
directory.getEntry(workbookName);
} catch(FileNotFoundException wfe) {
// Doesn't contain it in either form
throw new IllegalArgumentException("The supplied POIFSFileSystem contained neither a 'Workbook' entry, nor a 'WORKBOOK' entry. Is it really an excel file?");
@ -213,7 +235,7 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm
// Grab the data from the workbook stream, however
// it happens to be spelt.
InputStream stream = fs.createDocumentInputStream(workbookName);
InputStream stream = directory.createDocumentInputStream(workbookName);
EventRecordFactory factory = new EventRecordFactory();

View File

@ -0,0 +1,81 @@
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.poifs.dev;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* A lister of the entries in POIFS files.
*
* Much simpler than {@link POIFSViewer}
*/
public class POIFSLister {
/**
* Display the entries of multiple POIFS files
*
* @param args the names of the files to be displayed
*/
public static void main(final String args[]) throws IOException {
if (args.length == 0)
{
System.err.println("Must specify at least one file to view");
System.exit(1);
}
for (int j = 0; j < args.length; j++)
{
viewFile(args[ j ]);
}
}
public static void viewFile(final String filename) throws IOException
{
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename)
);
displayDirectory(fs.getRoot(), "");
}
public static void displayDirectory(DirectoryNode dir, String indent) {
System.out.println(indent + dir.getName() + " -");
String newIndent = indent + " ";
for(Iterator it = dir.getEntries(); it.hasNext(); ) {
Object entry = it.next();
if(entry instanceof DirectoryNode) {
displayDirectory((DirectoryNode)entry, newIndent);
} else {
DocumentNode doc = (DocumentNode)entry;
String name = doc.getName();
if(name.charAt(0) < 10) {
String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1);
name = name.substring(1) + " <" + altname + ">";
}
System.out.println(newIndent + name);
}
}
}
}

View File

@ -105,6 +105,31 @@ public class DirectoryNode
{
return _path;
}
/**
* open a document in the directory's entry's list of entries
*
* @param documentName the name of the document to be opened
*
* @return a newly opened DocumentInputStream
*
* @exception IOException if the document does not exist or the
* name is that of a DirectoryEntry
*/
public DocumentInputStream createDocumentInputStream(
final String documentName)
throws IOException
{
Entry document = getEntry(documentName);
if (!document.isDocumentEntry())
{
throw new IOException("Entry '" + documentName
+ "' is not a DocumentEntry");
}
return new DocumentInputStream(( DocumentEntry ) document);
}
/**
* create a new DocumentEntry

View File

@ -287,7 +287,7 @@ public class POIFSFileSystem
{
return getRoot().createDirectory(name);
}
/**
* Write the filesystem out
*
@ -422,7 +422,7 @@ public class POIFSFileSystem
* @return the root entry
*/
public DirectoryEntry getRoot()
public DirectoryNode getRoot()
{
if (_root == null)
{
@ -446,14 +446,7 @@ public class POIFSFileSystem
final String documentName)
throws IOException
{
Entry document = getRoot().getEntry(documentName);
if (!document.isDocumentEntry())
{
throw new IOException("Entry '" + documentName
+ "' is not a DocumentEntry");
}
return new DocumentInputStream(( DocumentEntry ) document);
return getRoot().createDocumentInputStream(documentName);
}
/**

View File

@ -53,7 +53,7 @@ public class HDGFDiagram extends POIDocument {
private PointerFactory ptrFactory;
public HDGFDiagram(POIFSFileSystem fs) throws IOException {
filesystem = fs;
super(fs);
DocumentEntry docProps =
(DocumentEntry)filesystem.getRoot().getEntry("VisioDocument");

View File

@ -45,6 +45,7 @@ import org.apache.poi.hslf.record.Record;
import org.apache.poi.hslf.record.UserEditAtom;
import org.apache.poi.hslf.usermodel.ObjectData;
import org.apache.poi.hslf.usermodel.PictureData;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.DocumentInputStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@ -124,7 +125,21 @@ public class HSLFSlideShow extends POIDocument
*/
public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException
{
this.filesystem = filesystem;
this(filesystem.getRoot(), filesystem);
}
/**
* Constructs a Powerpoint document from a specific point in a
* POIFS Filesystem. Parses the document and places all the
* important stuff into data structures.
*
* @param dir the POIFS directory to read from
* @param filesystem the POIFS FileSystem to read from
* @throws IOException if there is a problem while parsing the document.
*/
public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException
{
super(dir, filesystem);
// First up, grab the "Current User" stream
// We need this before we can detect Encrypted Documents
@ -186,11 +201,11 @@ public class HSLFSlideShow extends POIDocument
{
// Get the main document stream
DocumentEntry docProps =
(DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document");
(DocumentEntry)directory.getEntry("PowerPoint Document");
// Grab the document stream
_docstream = new byte[docProps.getSize()];
filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream);
directory.createDocumentInputStream("PowerPoint Document").read(_docstream);
}
/**
@ -272,7 +287,7 @@ public class HSLFSlideShow extends POIDocument
*/
private void readCurrentUserStream() {
try {
currentUser = new CurrentUserAtom(filesystem);
currentUser = new CurrentUserAtom(directory);
} catch(IOException ie) {
logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie);
currentUser = new CurrentUserAtom();
@ -293,9 +308,9 @@ public class HSLFSlideShow extends POIDocument
byte[] pictstream;
try {
DocumentEntry entry = (DocumentEntry)filesystem.getRoot().getEntry("Pictures");
DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures");
pictstream = new byte[entry.getSize()];
DocumentInputStream is = filesystem.createDocumentInputStream("Pictures");
DocumentInputStream is = directory.createDocumentInputStream("Pictures");
is.read(pictstream);
} catch (FileNotFoundException e){
// Silently catch exceptions if the presentation doesn't

View File

@ -93,9 +93,15 @@ public class CurrentUserAtom
* Find the Current User in the filesystem, and create from that
*/
public CurrentUserAtom(POIFSFileSystem fs) throws IOException {
this(fs.getRoot());
}
/**
* Find the Current User in the filesystem, and create from that
*/
public CurrentUserAtom(DirectoryNode dir) throws IOException {
// Decide how big it is
DocumentEntry docProps =
(DocumentEntry)fs.getRoot().getEntry("Current User");
(DocumentEntry)dir.getEntry("Current User");
_contents = new byte[docProps.getSize()];
// Check it's big enough - if it's not at least 28 bytes long, then
@ -105,7 +111,7 @@ public class CurrentUserAtom
}
// Grab the contents
InputStream in = fs.createDocumentInputStream("Current User");
InputStream in = dir.createDocumentInputStream("Current User");
in.read(_contents);
// Set everything up

View File

@ -19,14 +19,58 @@
package org.apache.poi.hssf.usermodel;
import org.apache.poi.hssf.record.*;
import org.apache.poi.hssf.record.formula.Area3DPtg;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import org.apache.poi.hssf.record.AreaFormatRecord;
import org.apache.poi.hssf.record.AxisLineFormatRecord;
import org.apache.poi.hssf.record.AxisOptionsRecord;
import org.apache.poi.hssf.record.AxisParentRecord;
import org.apache.poi.hssf.record.AxisRecord;
import org.apache.poi.hssf.record.AxisUsedRecord;
import org.apache.poi.hssf.record.BOFRecord;
import org.apache.poi.hssf.record.BarRecord;
import org.apache.poi.hssf.record.BeginRecord;
import org.apache.poi.hssf.record.CategorySeriesAxisRecord;
import org.apache.poi.hssf.record.ChartFormatRecord;
import org.apache.poi.hssf.record.ChartRecord;
import org.apache.poi.hssf.record.ChartTitleFormatRecord;
import org.apache.poi.hssf.record.DataFormatRecord;
import org.apache.poi.hssf.record.DefaultDataLabelTextPropertiesRecord;
import org.apache.poi.hssf.record.DimensionsRecord;
import org.apache.poi.hssf.record.EOFRecord;
import org.apache.poi.hssf.record.EndRecord;
import org.apache.poi.hssf.record.FontBasisRecord;
import org.apache.poi.hssf.record.FontIndexRecord;
import org.apache.poi.hssf.record.FooterRecord;
import org.apache.poi.hssf.record.FrameRecord;
import org.apache.poi.hssf.record.HCenterRecord;
import org.apache.poi.hssf.record.HeaderRecord;
import org.apache.poi.hssf.record.LegendRecord;
import org.apache.poi.hssf.record.LineFormatRecord;
import org.apache.poi.hssf.record.LinkedDataFormulaField;
import org.apache.poi.hssf.record.LinkedDataRecord;
import org.apache.poi.hssf.record.PlotAreaRecord;
import org.apache.poi.hssf.record.PlotGrowthRecord;
import org.apache.poi.hssf.record.PrintSetupRecord;
import org.apache.poi.hssf.record.ProtectRecord;
import org.apache.poi.hssf.record.Record;
import org.apache.poi.hssf.record.SCLRecord;
import org.apache.poi.hssf.record.SeriesIndexRecord;
import org.apache.poi.hssf.record.SeriesRecord;
import org.apache.poi.hssf.record.SeriesTextRecord;
import org.apache.poi.hssf.record.SeriesToChartGroupRecord;
import org.apache.poi.hssf.record.SheetPropertiesRecord;
import org.apache.poi.hssf.record.TextRecord;
import org.apache.poi.hssf.record.TickRecord;
import org.apache.poi.hssf.record.UnitsRecord;
import org.apache.poi.hssf.record.UnknownRecord;
import org.apache.poi.hssf.record.VCenterRecord;
import org.apache.poi.hssf.record.ValueRangeRecord;
import org.apache.poi.hssf.record.formula.Area3DPtg;
/**
* Has methods for construction of a chart object.
*
@ -35,11 +79,13 @@ import java.util.Stack;
public class HSSFChart
{
private ChartRecord chartRecord;
private SeriesRecord seriesRecord;
private LegendRecord legendRecord;
private ChartTitleFormatRecord chartTitleFormat;
private SeriesTextRecord chartTitleText;
private List series = new ArrayList();
private HSSFChart(ChartRecord chartRecord) {
this.chartRecord = chartRecord;
}
@ -121,8 +167,8 @@ public class HSSFChart
/**
* Returns all the charts for the given sheet.
*
* NOTE: Does not yet work... checking it in just so others
* can take a look.
* NOTE: You won't be able to do very much with
* these charts yet, as this is very limited support
*/
public static HSSFChart[] getSheetCharts(HSSFSheet sheet) {
List charts = new ArrayList();
@ -132,33 +178,49 @@ public class HSSFChart
List records = sheet.getSheet().getRecords();
for(Iterator it = records.iterator(); it.hasNext();) {
Record r = (Record)it.next();
System.err.println(r);
if(r instanceof DrawingRecord) {
DrawingRecord dr = (DrawingRecord)r;
}
if(r instanceof ChartRecord) {
lastChart = new HSSFChart((ChartRecord)r);
charts.add(lastChart);
}
if(r instanceof LegendRecord) {
lastChart.legendRecord = (LegendRecord)r;
}
if(r instanceof SeriesRecord) {
lastChart.seriesRecord = (SeriesRecord)r;
HSSFSeries series = lastChart.new HSSFSeries( (SeriesRecord)r );
lastChart.series.add(series);
}
if(r instanceof ChartTitleFormatRecord) {
lastChart.chartTitleFormat =
(ChartTitleFormatRecord)r;
}
if(r instanceof SeriesTextRecord) {
lastChart.chartTitleText =
(SeriesTextRecord)r;
// Applies to a series, unless we've seen
// a legend already
SeriesTextRecord str = (SeriesTextRecord)r;
if(lastChart.legendRecord == null &&
lastChart.series.size() > 0) {
HSSFSeries series = (HSSFSeries)
lastChart.series.get(lastChart.series.size()-1);
series.seriesTitleText = str;
} else {
lastChart.chartTitleText = str;
}
}
}
return (HSSFChart[])
charts.toArray( new HSSFChart[charts.size()] );
}
/**
* Returns the series of the chart
*/
public HSSFSeries[] getSeries() {
return (HSSFSeries[])
series.toArray(new HSSFSeries[series.size()]);
}
/**
* Returns the chart's title, if there is one,
@ -184,7 +246,6 @@ public class HSSFChart
}
}
private EOFRecord createEOFRecord()
{
@ -858,4 +919,51 @@ public class HSSFChart
r.setUnits( (short) 0 );
return r;
}
/**
* A series in a chart
*/
public class HSSFSeries {
private SeriesRecord series;
private SeriesTextRecord seriesTitleText;
private HSSFSeries(SeriesRecord series) {
this.series = series;
}
public short getNumValues() {
return series.getNumValues();
}
/**
* See {@link SeriesRecord}
*/
public short getValueType() {
return series.getValuesDataType();
}
/**
* Returns the series' title, if there is one,
* or null if not
*/
public String getSeriesTitle() {
if(seriesTitleText != null) {
return seriesTitleText.getText();
}
return null;
}
/**
* Changes the series' title, but only if there
* was one already.
* TODO - add in the records if not
*/
public void setSeriesTitle(String title) {
if(seriesTitleText != null) {
seriesTitleText.setText(title);
} else {
throw new IllegalStateException("No series title found to change");
}
}
}
}

View File

@ -29,6 +29,7 @@ import java.io.ByteArrayInputStream;
import java.util.Iterator;
import org.apache.poi.POIDocument;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.common.POIFSConstants;
@ -95,7 +96,7 @@ public class HWPFDocument extends POIDocument
protected HWPFDocument()
{
super(null, null);
}
/**
@ -132,7 +133,7 @@ public class HWPFDocument extends POIDocument
//do Ole stuff
this( verifyAndBuildPOIFS(istream) );
}
/**
* This constructor loads a Word document from a POIFSFileSystem
*
@ -141,16 +142,31 @@ public class HWPFDocument extends POIDocument
* in POIFSFileSystem.
*/
public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException
{
this(pfilesystem.getRoot(), pfilesystem);
}
/**
* This constructor loads a Word document from a specific point
* in a POIFSFileSystem, probably not the default.
* Used typically to open embeded documents.
*
* @param pfilesystem The POIFSFileSystem that contains the Word document.
* @throws IOException If there is an unexpected IOException from the passed
* in POIFSFileSystem.
*/
public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException
{
// Sort out the hpsf properties
filesystem = pfilesystem;
super(directory, pfilesystem);
readProperties();
// read in the main stream.
DocumentEntry documentProps =
(DocumentEntry)filesystem.getRoot().getEntry("WordDocument");
DocumentEntry documentProps = (DocumentEntry)
directory.getEntry("WordDocument");
_mainStream = new byte[documentProps.getSize()];
filesystem.createDocumentInputStream("WordDocument").read(_mainStream);
directory.createDocumentInputStream("WordDocument").read(_mainStream);
// use the fib to determine the name of the table stream.
_fib = new FileInformationBlock(_mainStream);
@ -165,14 +181,14 @@ public class HWPFDocument extends POIDocument
DocumentEntry tableProps;
try {
tableProps =
(DocumentEntry)filesystem.getRoot().getEntry(name);
(DocumentEntry)directory.getEntry(name);
} catch(FileNotFoundException fnfe) {
throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)");
}
// read in the table stream.
_tableStream = new byte[tableProps.getSize()];
filesystem.createDocumentInputStream(name).read(_tableStream);
directory.createDocumentInputStream(name).read(_tableStream);
_fib.fillVariableFields(_mainStream, _tableStream);
@ -180,7 +196,7 @@ public class HWPFDocument extends POIDocument
try
{
DocumentEntry dataProps =
(DocumentEntry) filesystem.getRoot().getEntry("Data");
(DocumentEntry)directory.getEntry("Data");
_dataStream = new byte[dataProps.getSize()];
filesystem.createDocumentInputStream("Data").read(_dataStream);
}

View File

@ -21,6 +21,12 @@
package org.apache.poi.hslf.extractor;
import java.io.FileInputStream;
import org.apache.poi.hslf.HSLFSlideShow;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import junit.framework.TestCase;
/**
@ -35,6 +41,8 @@ public class TextExtractor extends TestCase {
private PowerPointExtractor ppe2;
/** Where to go looking for our test files */
private String dirname;
/** Where our embeded files live */
private String pdirname;
public TextExtractor() throws Exception {
dirname = System.getProperty("HSLF.testdata.path");
@ -42,6 +50,8 @@ public class TextExtractor extends TestCase {
ppe = new PowerPointExtractor(filename);
String filename2 = dirname + "/with_textbox.ppt";
ppe2 = new PowerPointExtractor(filename2);
pdirname = System.getProperty("POIFS.testdata.path");
}
public void testReadSheetText() throws Exception {
@ -123,9 +133,87 @@ public class TextExtractor extends TestCase {
char[] expC = exp.toCharArray();
char[] actC = act.toCharArray();
for(int i=0; i<expC.length; i++) {
System.out.println(i + "\t" + expC[i] + " " + actC[i]);
assertEquals(expC[i],actC[i]);
assertEquals("Char " + i, expC[i], actC[i]);
}
assertEquals(exp,act);
}
public void testExtractFromEmbeded() throws Exception {
String filename3 = pdirname + "/excel_with_embeded.xls";
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename3)
);
HSLFSlideShow ss;
DirectoryNode dirA = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B6");
DirectoryNode dirB = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B3");
assertNotNull(dirA.getEntry("PowerPoint Document"));
assertNotNull(dirB.getEntry("PowerPoint Document"));
// Check the first file
ss = new HSLFSlideShow(dirA, fs);
ppe = new PowerPointExtractor(ss);
assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
ppe.getText(true, false)
);
// And the second
ss = new HSLFSlideShow(dirB, fs);
ppe = new PowerPointExtractor(ss);
assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
ppe.getText(true, false)
);
}
/**
* A powerpoint file with embeded powerpoint files
* TODO - figure out how to handle this, as ppt
* appears to embed not as ole2 streams
*/
public void DISABLEDtestExtractFromOwnEmbeded() throws Exception {
String filename3 = pdirname + "/ppt_with_embeded.ppt";
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename3)
);
HSLFSlideShow ss;
DirectoryNode dirA = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B6");
DirectoryNode dirB = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B3");
assertNotNull(dirA.getEntry("PowerPoint Document"));
assertNotNull(dirB.getEntry("PowerPoint Document"));
// Check the first file
ss = new HSLFSlideShow(dirA, fs);
ppe = new PowerPointExtractor(ss);
assertEquals("Sample PowerPoint file\nThis is the 1st file\nNot much too it\n",
ppe.getText(true, false)
);
// And the second
ss = new HSLFSlideShow(dirB, fs);
ppe = new PowerPointExtractor(ss);
assertEquals("Sample PowerPoint file\nThis is the 2nd file\nNot much too it either\n",
ppe.getText(true, false)
);
// Check the master doc two ways
ss = new HSLFSlideShow(fs.getRoot(), fs);
ppe = new PowerPointExtractor(ss);
assertEquals("I have embeded files in me\n",
ppe.getText(true, false)
);
ss = new HSLFSlideShow(fs);
ppe = new PowerPointExtractor(ss);
assertEquals("I have embeded files in me\n",
ppe.getText(true, false)
);
}
}

View File

@ -19,6 +19,8 @@ package org.apache.poi.hssf.usermodel;
import java.io.File;
import java.io.FileInputStream;
import org.apache.poi.hssf.record.SeriesRecord;
import junit.framework.TestCase;
public class TestHSSFChart extends TestCase {
@ -29,14 +31,65 @@ public class TestHSSFChart extends TestCase {
}
public void testSingleChart() throws Exception {
HSSFWorkbook wb = new HSSFWorkbook(
new FileInputStream(new File(dirName, "WithChart.xls"))
);
HSSFSheet s1 = wb.getSheetAt(0);
HSSFSheet s2 = wb.getSheetAt(1);
HSSFSheet s3 = wb.getSheetAt(2);
assertEquals(0, HSSFChart.getSheetCharts(s1).length);
assertEquals(1, HSSFChart.getSheetCharts(s2).length);
assertEquals(0, HSSFChart.getSheetCharts(s3).length);
HSSFChart[] charts;
// Check the chart on the 2nd sheet
charts = HSSFChart.getSheetCharts(s2);
assertEquals(1, charts.length);
assertEquals(2, charts[0].getSeries().length);
assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle());
assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle());
assertEquals(null, charts[0].getChartTitle());
}
public void testTwoCharts() throws Exception {
HSSFWorkbook wb = new HSSFWorkbook(
new FileInputStream(new File(dirName, "WithTwoCharts.xls"))
);
HSSFSheet s1 = wb.getSheetAt(0);
HSSFSheet s2 = wb.getSheetAt(1);
HSSFSheet s3 = wb.getSheetAt(2);
assertEquals(0, HSSFChart.getSheetCharts(s1).length);
assertEquals(1, HSSFChart.getSheetCharts(s2).length);
assertEquals(1, HSSFChart.getSheetCharts(s3).length);
HSSFChart[] charts;
// Check the chart on the 2nd sheet
charts = HSSFChart.getSheetCharts(s2);
assertEquals(1, charts.length);
assertEquals(2, charts[0].getSeries().length);
assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle());
assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle());
assertEquals(null, charts[0].getChartTitle());
// And the third sheet
charts = HSSFChart.getSheetCharts(s3);
assertEquals(1, charts.length);
assertEquals(2, charts[0].getSeries().length);
assertEquals("Squares", charts[0].getSeries()[0].getSeriesTitle());
assertEquals("Base Numbers", charts[0].getSeries()[1].getSeriesTitle());
assertEquals(null, charts[0].getChartTitle());
}
public void BROKENtestThreeCharts() throws Exception {
public void testThreeCharts() throws Exception {
HSSFWorkbook wb = new HSSFWorkbook(
new FileInputStream(new File(dirName, "WithThreeCharts.xls"))
);
@ -51,11 +104,30 @@ public class TestHSSFChart extends TestCase {
HSSFChart[] charts;
// Check the charts on the 2nd sheet
charts = HSSFChart.getSheetCharts(s2);
assertNull(charts[0].getChartTitle());
assertEquals(2, charts.length);
assertEquals(2, charts[0].getSeries().length);
assertEquals("1st Column", charts[0].getSeries()[0].getSeriesTitle());
assertEquals("2nd Column", charts[0].getSeries()[1].getSeriesTitle());
assertEquals(6, charts[0].getSeries()[0].getNumValues());
assertEquals(6, charts[0].getSeries()[1].getNumValues());
assertEquals(SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC, charts[0].getSeries()[0].getValueType());
assertEquals(SeriesRecord.CATEGORY_DATA_TYPE_NUMERIC, charts[0].getSeries()[1].getValueType());
assertEquals(null, charts[0].getChartTitle());
assertEquals(1, charts[1].getSeries().length);
assertEquals(null, charts[1].getSeries()[0].getSeriesTitle());
assertEquals("Pie Chart Title Thingy", charts[1].getChartTitle());
// And the third sheet
charts = HSSFChart.getSheetCharts(s3);
assertEquals("Sheet 3 Chart with Title", charts[1].getChartTitle());
assertEquals(1, charts.length);
assertEquals(2, charts[0].getSeries().length);
assertEquals("Squares", charts[0].getSeries()[0].getSeriesTitle());
assertEquals("Base Numbers", charts[0].getSeries()[1].getSeriesTitle());
assertEquals("Sheet 3 Chart with Title", charts[0].getChartTitle());
}
}

View File

@ -17,15 +17,13 @@
package org.apache.poi.hwpf.extractor;
import java.io.FileInputStream;
import java.util.Iterator;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.model.TextPiece;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import junit.framework.TestCase;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
* Test the different routes to extracting text
*
@ -54,12 +52,16 @@ public class TestWordExtractor extends TestCase {
private WordExtractor extractor;
// Corrupted document - can't do paragraph based stuff
private WordExtractor extractor2;
// A word doc embeded in an excel file
private String filename3;
protected void setUp() throws Exception {
String dirname = System.getProperty("HWPF.testdata.path");
String pdirname = System.getProperty("POIFS.testdata.path");
String filename = dirname + "/test2.doc";
String filename2 = dirname + "/test.doc";
filename3 = pdirname + "/excel_with_embeded.xls";
extractor = new WordExtractor(new FileInputStream(filename));
extractor2 = new WordExtractor(new FileInputStream(filename2));
@ -101,4 +103,50 @@ public class TestWordExtractor extends TestCase {
String text = extractor.getTextFromPieces();
assertEquals(p_text1_block, text);
}
/**
* Test that we can get data from two different
* embeded word documents
* @throws Exception
*/
public void testExtractFromEmbeded() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(new FileInputStream(filename3));
HWPFDocument doc;
WordExtractor extractor3;
DirectoryNode dirA = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B7");
DirectoryNode dirB = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B2");
// Should have WordDocument and 1Table
assertNotNull(dirA.getEntry("1Table"));
assertNotNull(dirA.getEntry("WordDocument"));
assertNotNull(dirB.getEntry("1Table"));
assertNotNull(dirB.getEntry("WordDocument"));
// Check each in turn
doc = new HWPFDocument(dirA, fs);
extractor3 = new WordExtractor(doc);
assertNotNull(extractor3.getText());
assertTrue(extractor3.getText().length() > 20);
assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n",
extractor3.getText());
assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle());
assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject());
doc = new HWPFDocument(dirB, fs);
extractor3 = new WordExtractor(doc);
assertNotNull(extractor3.getText());
assertTrue(extractor3.getText().length() > 20);
assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n",
extractor3.getText());
assertEquals("Sample Doc 2", extractor3.getSummaryInformation().getTitle());
assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject());
}
}

View File

@ -17,12 +17,15 @@
package org.apache.poi.hssf.extractor;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import junit.framework.TestCase;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
/**
*
@ -118,4 +121,72 @@ public final class TestExcelExtractor extends TestCase {
assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText());
}
/**
* Embded in a non-excel file
*/
public void testWithEmbeded() throws Exception {
String pdirname = System.getProperty("POIFS.testdata.path");
String filename = pdirname + "/word_with_embeded.doc";
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename)
);
DirectoryNode objPool = (DirectoryNode)
fs.getRoot().getEntry("ObjectPool");
DirectoryNode dirA = (DirectoryNode)
objPool.getEntry("_1269427460");
DirectoryNode dirB = (DirectoryNode)
objPool.getEntry("_1269427461");
HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
ExcelExtractor exA = new ExcelExtractor(wbA);
ExcelExtractor exB = new ExcelExtractor(wbB);
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
exA.getText());
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
exB.getText());
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
}
/**
* Excel embeded in excel
*/
public void testWithEmbededInOwn() throws Exception {
String pdirname = System.getProperty("POIFS.testdata.path");
String filename = pdirname + "/excel_with_embeded.xls";
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(filename)
);
DirectoryNode dirA = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B5");
DirectoryNode dirB = (DirectoryNode)
fs.getRoot().getEntry("MBD0000A3B4");
HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
ExcelExtractor exA = new ExcelExtractor(wbA);
ExcelExtractor exB = new ExcelExtractor(wbB);
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
exA.getText());
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
exB.getText());
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
// And the base file too
ExcelExtractor ex = new ExcelExtractor(fs);
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
ex.getText());
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
}
}