diff --git a/build.xml b/build.xml index 64845c4d6..9b66bab53 100644 --- a/build.xml +++ b/build.xml @@ -577,6 +577,8 @@ under the License. file="${main.src.test}/org/apache/poi/hwpf/data"/> + @@ -612,6 +614,8 @@ under the License. file="${main.src.test}/org/apache/poi/hpsf/data"/> + @@ -641,6 +645,7 @@ under the License. + @@ -657,6 +662,7 @@ under the License. + @@ -695,6 +701,7 @@ under the License. + @@ -729,6 +736,7 @@ under the License. + diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 35e5a8f01..1cd4891fd 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,10 @@ + Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents + Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document + Initial support for getting and changing chart and series titles + Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it 44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord. 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers 30311 - More work on Conditional Formatting diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index c102f37e8..33bfce8ce 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,10 @@ + Update HSLFSlideShow and HSSFWorkbook to take advantage of POIFS updates, and allow reading embeded documents + Improve how POIFS works with directory entries, and update HWPFDocument to support reading an embeded word document + Initial support for getting and changing chart and series titles + Implement a proxy HSSFListener which tracks the format records, and lets you lookup the format string for a given cell. Convert the xls to csv example to use it 44792 - fixed encode/decode problems in ExternalNameRecord and CRNRecord. 43670, 44501 - Fix how HDGF deals with trailing data in the list of chunk headers 30311 - More work on Conditional Formatting diff --git a/src/java/org/apache/poi/POIDocument.java b/src/java/org/apache/poi/POIDocument.java index 075fa4538..01e50231c 100644 --- a/src/java/org/apache/poi/POIDocument.java +++ b/src/java/org/apache/poi/POIDocument.java @@ -29,6 +29,7 @@ import org.apache.poi.hpsf.PropertySet; import org.apache.poi.hpsf.PropertySetFactory; import org.apache.poi.hpsf.SummaryInformation; import org.apache.poi.poifs.filesystem.DirectoryEntry; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.Entry; @@ -50,12 +51,23 @@ public abstract class POIDocument { protected DocumentSummaryInformation dsInf; /** The open POIFS FileSystem that contains our document */ protected POIFSFileSystem filesystem; + /** The directory that our document lives in */ + protected DirectoryNode directory; /** For our own logging use */ protected POILogger logger = POILogFactory.getLogger(this.getClass()); /* Have the property streams been read yet? (Only done on-demand) */ protected boolean initialized = false; + + + protected POIDocument(DirectoryNode dir, POIFSFileSystem fs) { + this.filesystem = fs; + this.directory = dir; + } + protected POIDocument(POIFSFileSystem fs) { + this(fs.getRoot(), fs); + } /** * Fetch the Document Summary Information of the document @@ -110,7 +122,7 @@ public abstract class POIDocument { DocumentInputStream dis; try { // Find the entry, and get an input stream for it - dis = filesystem.createDocumentInputStream(setName); + dis = directory.createDocumentInputStream(setName); } catch(IOException ie) { // Oh well, doesn't exist logger.log(POILogger.WARN, "Error getting property set with name " + setName + "\n" + ie); diff --git a/src/java/org/apache/poi/hssf/record/DrawingRecordForBiffViewer.java b/src/java/org/apache/poi/hssf/record/DrawingRecordForBiffViewer.java index b0a1bbf2e..4e5536f30 100644 --- a/src/java/org/apache/poi/hssf/record/DrawingRecordForBiffViewer.java +++ b/src/java/org/apache/poi/hssf/record/DrawingRecordForBiffViewer.java @@ -17,6 +17,8 @@ package org.apache.poi.hssf.record; +import java.io.ByteArrayInputStream; + /** * This is purely for the biff viewer. During normal operations we don't want * to be seeing this. @@ -35,6 +37,21 @@ public class DrawingRecordForBiffViewer super(in); } + public DrawingRecordForBiffViewer(DrawingRecord r) + { + super(convertToInputStream(r)); + convertRawBytesToEscherRecords(); + } + private static RecordInputStream convertToInputStream(DrawingRecord r) + { + byte[] data = r.serialize(); + RecordInputStream rinp = new RecordInputStream( + new ByteArrayInputStream(data) + ); + rinp.nextRecord(); + return rinp; + } + protected String getRecordName() { return "MSODRAWING"; diff --git a/src/java/org/apache/poi/hssf/record/RecordFactory.java b/src/java/org/apache/poi/hssf/record/RecordFactory.java index 1dada4450..5a627988f 100644 --- a/src/java/org/apache/poi/hssf/record/RecordFactory.java +++ b/src/java/org/apache/poi/hssf/record/RecordFactory.java @@ -77,6 +77,8 @@ public class RecordFactory NoteRecord.class, ObjectProtectRecord.class, ScenarioProtectRecord.class, FileSharingRecord.class, ChartTitleFormatRecord.class, DVRecord.class, DVALRecord.class, UncalcedRecord.class, + ChartRecord.class, LegendRecord.class, ChartTitleFormatRecord.class, + SeriesRecord.class, SeriesTextRecord.class, HyperlinkRecord.class, ExternalNameRecord.class, // TODO - same changes in non-@deprecated version of this class SupBookRecord.class, diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java index ca0c126f4..992a0f5ee 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFSheet.java @@ -1571,18 +1571,14 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet } /** - * Returns the top-level drawing patriach, if there is - * one. - * This will hold any graphics or charts for the sheet. + * Returns the agregate escher records for this sheet, + * it there is one. * WARNING - calling this will trigger a parsing of the * associated escher records. Any that aren't supported * (such as charts and complex drawing types) will almost - * certainly be lost or corrupted when written out. Only - * use this with simple drawings, otherwise call - * {@link HSSFSheet#createDrawingPatriarch()} and - * start from scratch! + * certainly be lost or corrupted when written out. */ - public HSSFPatriarch getDrawingPatriarch() { + public EscherAggregate getDrawingEscherAggregate() { book.findDrawingGroup(); // If there's now no drawing manager, then there's @@ -1601,6 +1597,25 @@ public class HSSFSheet implements org.apache.poi.ss.usermodel.Sheet // Grab our aggregate record, and wire it up EscherAggregate agg = (EscherAggregate) sheet.findFirstRecordBySid(EscherAggregate.sid); + return agg; + } + + /** + * Returns the top-level drawing patriach, if there is + * one. + * This will hold any graphics or charts for the sheet. + * WARNING - calling this will trigger a parsing of the + * associated escher records. Any that aren't supported + * (such as charts and complex drawing types) will almost + * certainly be lost or corrupted when written out. Only + * use this with simple drawings, otherwise call + * {@link HSSFSheet#createDrawingPatriarch()} and + * start from scratch! + */ + public HSSFPatriarch getDrawingPatriarch() { + EscherAggregate agg = getDrawingEscherAggregate(); + if(agg == null) return null; + HSSFPatriarch patriarch = new HSSFPatriarch(this, agg); agg.setPatriarch(patriarch); diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index 51499a99e..57f44148b 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -62,6 +62,7 @@ import org.apache.poi.hssf.record.formula.Area3DPtg; import org.apache.poi.hssf.record.formula.MemFuncPtg; import org.apache.poi.hssf.record.formula.UnionPtg; import org.apache.poi.hssf.util.CellReference; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.ss.usermodel.CreationHelper; import org.apache.poi.util.POILogFactory; @@ -155,6 +156,7 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm protected HSSFWorkbook( Workbook book ) { + super(null, null); workbook = book; sheets = new ArrayList( INITIAL_CAPACITY ); names = new ArrayList( INITIAL_CAPACITY ); @@ -176,17 +178,37 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm * @see org.apache.poi.poifs.filesystem.POIFSFileSystem * @exception IOException if the stream cannot be read */ - public HSSFWorkbook(POIFSFileSystem fs, boolean preserveNodes) throws IOException { + this(fs.getRoot(), fs, preserveNodes); + } + + /** + * given a POI POIFSFileSystem object, and a specific directory + * within it, read in its Workbook and populate the high and + * low level models. If you're reading in a workbook...start here. + * + * @param directory the POI filesystem directory to process from + * @param fs the POI filesystem that contains the Workbook stream. + * @param preserveNodes whether to preseve other nodes, such as + * macros. This takes more memory, so only say yes if you + * need to. If set, will store all of the POIFSFileSystem + * in memory + * @see org.apache.poi.poifs.filesystem.POIFSFileSystem + * @exception IOException if the stream cannot be read + */ + public HSSFWorkbook(DirectoryNode directory, POIFSFileSystem fs, boolean preserveNodes) + throws IOException + { + super(directory, fs); this.preserveNodes = preserveNodes; - this.filesystem = fs; // If we're not preserving nodes, don't track the // POIFS any more if(! preserveNodes) { this.filesystem = null; + this.directory = null; } sheets = new ArrayList(INITIAL_CAPACITY); @@ -197,13 +219,13 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm // put theirs in one called "WORKBOOK" String workbookName = "Workbook"; try { - fs.getRoot().getEntry(workbookName); + directory.getEntry(workbookName); // Is the default name } catch(FileNotFoundException fe) { // Try the upper case form try { workbookName = "WORKBOOK"; - fs.getRoot().getEntry(workbookName); + directory.getEntry(workbookName); } catch(FileNotFoundException wfe) { // Doesn't contain it in either form throw new IllegalArgumentException("The supplied POIFSFileSystem contained neither a 'Workbook' entry, nor a 'WORKBOOK' entry. Is it really an excel file?"); @@ -213,7 +235,7 @@ public class HSSFWorkbook extends POIDocument implements org.apache.poi.ss.userm // Grab the data from the workbook stream, however // it happens to be spelt. - InputStream stream = fs.createDocumentInputStream(workbookName); + InputStream stream = directory.createDocumentInputStream(workbookName); EventRecordFactory factory = new EventRecordFactory(); diff --git a/src/java/org/apache/poi/poifs/dev/POIFSLister.java b/src/java/org/apache/poi/poifs/dev/POIFSLister.java new file mode 100644 index 000000000..c9fa349d6 --- /dev/null +++ b/src/java/org/apache/poi/poifs/dev/POIFSLister.java @@ -0,0 +1,81 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.poifs.dev; + +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Iterator; + +import org.apache.poi.poifs.filesystem.DirectoryEntry; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.DocumentNode; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + +/** + * A lister of the entries in POIFS files. + * + * Much simpler than {@link POIFSViewer} + */ +public class POIFSLister { + /** + * Display the entries of multiple POIFS files + * + * @param args the names of the files to be displayed + */ + public static void main(final String args[]) throws IOException { + if (args.length == 0) + { + System.err.println("Must specify at least one file to view"); + System.exit(1); + } + + for (int j = 0; j < args.length; j++) + { + viewFile(args[ j ]); + } + } + + public static void viewFile(final String filename) throws IOException + { + POIFSFileSystem fs = new POIFSFileSystem( + new FileInputStream(filename) + ); + displayDirectory(fs.getRoot(), ""); + } + + public static void displayDirectory(DirectoryNode dir, String indent) { + System.out.println(indent + dir.getName() + " -"); + String newIndent = indent + " "; + + for(Iterator it = dir.getEntries(); it.hasNext(); ) { + Object entry = it.next(); + if(entry instanceof DirectoryNode) { + displayDirectory((DirectoryNode)entry, newIndent); + } else { + DocumentNode doc = (DocumentNode)entry; + String name = doc.getName(); + if(name.charAt(0) < 10) { + String altname = "(0x0" + (int)name.charAt(0) + ")" + name.substring(1); + name = name.substring(1) + " <" + altname + ">"; + } + System.out.println(newIndent + name); + } + } + } +} \ No newline at end of file diff --git a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java index cb8039033..6805e5197 100644 --- a/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java +++ b/src/java/org/apache/poi/poifs/filesystem/DirectoryNode.java @@ -105,6 +105,31 @@ public class DirectoryNode { return _path; } + + /** + * open a document in the directory's entry's list of entries + * + * @param documentName the name of the document to be opened + * + * @return a newly opened DocumentInputStream + * + * @exception IOException if the document does not exist or the + * name is that of a DirectoryEntry + */ + + public DocumentInputStream createDocumentInputStream( + final String documentName) + throws IOException + { + Entry document = getEntry(documentName); + + if (!document.isDocumentEntry()) + { + throw new IOException("Entry '" + documentName + + "' is not a DocumentEntry"); + } + return new DocumentInputStream(( DocumentEntry ) document); + } /** * create a new DocumentEntry diff --git a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java index 61774dc67..7c693a5de 100644 --- a/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java +++ b/src/java/org/apache/poi/poifs/filesystem/POIFSFileSystem.java @@ -287,7 +287,7 @@ public class POIFSFileSystem { return getRoot().createDirectory(name); } - + /** * Write the filesystem out * @@ -422,7 +422,7 @@ public class POIFSFileSystem * @return the root entry */ - public DirectoryEntry getRoot() + public DirectoryNode getRoot() { if (_root == null) { @@ -446,14 +446,7 @@ public class POIFSFileSystem final String documentName) throws IOException { - Entry document = getRoot().getEntry(documentName); - - if (!document.isDocumentEntry()) - { - throw new IOException("Entry '" + documentName - + "' is not a DocumentEntry"); - } - return new DocumentInputStream(( DocumentEntry ) document); + return getRoot().createDocumentInputStream(documentName); } /** diff --git a/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java b/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java index 955cbc5ab..af6616307 100644 --- a/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java +++ b/src/scratchpad/src/org/apache/poi/hdgf/HDGFDiagram.java @@ -53,7 +53,7 @@ public class HDGFDiagram extends POIDocument { private PointerFactory ptrFactory; public HDGFDiagram(POIFSFileSystem fs) throws IOException { - filesystem = fs; + super(fs); DocumentEntry docProps = (DocumentEntry)filesystem.getRoot().getEntry("VisioDocument"); diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java index 12afcc49f..716651d1c 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -45,6 +45,7 @@ import org.apache.poi.hslf.record.Record; import org.apache.poi.hslf.record.UserEditAtom; import org.apache.poi.hslf.usermodel.ObjectData; import org.apache.poi.hslf.usermodel.PictureData; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.filesystem.DocumentInputStream; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -124,7 +125,21 @@ public class HSLFSlideShow extends POIDocument */ public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException { - this.filesystem = filesystem; + this(filesystem.getRoot(), filesystem); + } + + /** + * Constructs a Powerpoint document from a specific point in a + * POIFS Filesystem. Parses the document and places all the + * important stuff into data structures. + * + * @param dir the POIFS directory to read from + * @param filesystem the POIFS FileSystem to read from + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(DirectoryNode dir, POIFSFileSystem filesystem) throws IOException + { + super(dir, filesystem); // First up, grab the "Current User" stream // We need this before we can detect Encrypted Documents @@ -186,11 +201,11 @@ public class HSLFSlideShow extends POIDocument { // Get the main document stream DocumentEntry docProps = - (DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document"); + (DocumentEntry)directory.getEntry("PowerPoint Document"); // Grab the document stream _docstream = new byte[docProps.getSize()]; - filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream); + directory.createDocumentInputStream("PowerPoint Document").read(_docstream); } /** @@ -272,7 +287,7 @@ public class HSLFSlideShow extends POIDocument */ private void readCurrentUserStream() { try { - currentUser = new CurrentUserAtom(filesystem); + currentUser = new CurrentUserAtom(directory); } catch(IOException ie) { logger.log(POILogger.ERROR, "Error finding Current User Atom:\n" + ie); currentUser = new CurrentUserAtom(); @@ -293,9 +308,9 @@ public class HSLFSlideShow extends POIDocument byte[] pictstream; try { - DocumentEntry entry = (DocumentEntry)filesystem.getRoot().getEntry("Pictures"); + DocumentEntry entry = (DocumentEntry)directory.getEntry("Pictures"); pictstream = new byte[entry.getSize()]; - DocumentInputStream is = filesystem.createDocumentInputStream("Pictures"); + DocumentInputStream is = directory.createDocumentInputStream("Pictures"); is.read(pictstream); } catch (FileNotFoundException e){ // Silently catch exceptions if the presentation doesn't diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/CurrentUserAtom.java b/src/scratchpad/src/org/apache/poi/hslf/record/CurrentUserAtom.java index 91a548523..e0810dbca 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/record/CurrentUserAtom.java +++ b/src/scratchpad/src/org/apache/poi/hslf/record/CurrentUserAtom.java @@ -93,9 +93,15 @@ public class CurrentUserAtom * Find the Current User in the filesystem, and create from that */ public CurrentUserAtom(POIFSFileSystem fs) throws IOException { + this(fs.getRoot()); + } + /** + * Find the Current User in the filesystem, and create from that + */ + public CurrentUserAtom(DirectoryNode dir) throws IOException { // Decide how big it is DocumentEntry docProps = - (DocumentEntry)fs.getRoot().getEntry("Current User"); + (DocumentEntry)dir.getEntry("Current User"); _contents = new byte[docProps.getSize()]; // Check it's big enough - if it's not at least 28 bytes long, then @@ -105,7 +111,7 @@ public class CurrentUserAtom } // Grab the contents - InputStream in = fs.createDocumentInputStream("Current User"); + InputStream in = dir.createDocumentInputStream("Current User"); in.read(_contents); // Set everything up diff --git a/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java b/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java index d708a5c1d..5b4850193 100644 --- a/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java +++ b/src/scratchpad/src/org/apache/poi/hssf/usermodel/HSSFChart.java @@ -19,14 +19,58 @@ package org.apache.poi.hssf.usermodel; -import org.apache.poi.hssf.record.*; -import org.apache.poi.hssf.record.formula.Area3DPtg; - import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Stack; +import org.apache.poi.hssf.record.AreaFormatRecord; +import org.apache.poi.hssf.record.AxisLineFormatRecord; +import org.apache.poi.hssf.record.AxisOptionsRecord; +import org.apache.poi.hssf.record.AxisParentRecord; +import org.apache.poi.hssf.record.AxisRecord; +import org.apache.poi.hssf.record.AxisUsedRecord; +import org.apache.poi.hssf.record.BOFRecord; +import org.apache.poi.hssf.record.BarRecord; +import org.apache.poi.hssf.record.BeginRecord; +import org.apache.poi.hssf.record.CategorySeriesAxisRecord; +import org.apache.poi.hssf.record.ChartFormatRecord; +import org.apache.poi.hssf.record.ChartRecord; +import org.apache.poi.hssf.record.ChartTitleFormatRecord; +import org.apache.poi.hssf.record.DataFormatRecord; +import org.apache.poi.hssf.record.DefaultDataLabelTextPropertiesRecord; +import org.apache.poi.hssf.record.DimensionsRecord; +import org.apache.poi.hssf.record.EOFRecord; +import org.apache.poi.hssf.record.EndRecord; +import org.apache.poi.hssf.record.FontBasisRecord; +import org.apache.poi.hssf.record.FontIndexRecord; +import org.apache.poi.hssf.record.FooterRecord; +import org.apache.poi.hssf.record.FrameRecord; +import org.apache.poi.hssf.record.HCenterRecord; +import org.apache.poi.hssf.record.HeaderRecord; +import org.apache.poi.hssf.record.LegendRecord; +import org.apache.poi.hssf.record.LineFormatRecord; +import org.apache.poi.hssf.record.LinkedDataFormulaField; +import org.apache.poi.hssf.record.LinkedDataRecord; +import org.apache.poi.hssf.record.PlotAreaRecord; +import org.apache.poi.hssf.record.PlotGrowthRecord; +import org.apache.poi.hssf.record.PrintSetupRecord; +import org.apache.poi.hssf.record.ProtectRecord; +import org.apache.poi.hssf.record.Record; +import org.apache.poi.hssf.record.SCLRecord; +import org.apache.poi.hssf.record.SeriesIndexRecord; +import org.apache.poi.hssf.record.SeriesRecord; +import org.apache.poi.hssf.record.SeriesTextRecord; +import org.apache.poi.hssf.record.SeriesToChartGroupRecord; +import org.apache.poi.hssf.record.SheetPropertiesRecord; +import org.apache.poi.hssf.record.TextRecord; +import org.apache.poi.hssf.record.TickRecord; +import org.apache.poi.hssf.record.UnitsRecord; +import org.apache.poi.hssf.record.UnknownRecord; +import org.apache.poi.hssf.record.VCenterRecord; +import org.apache.poi.hssf.record.ValueRangeRecord; +import org.apache.poi.hssf.record.formula.Area3DPtg; + /** * Has methods for construction of a chart object. * @@ -35,11 +79,13 @@ import java.util.Stack; public class HSSFChart { private ChartRecord chartRecord; - private SeriesRecord seriesRecord; + private LegendRecord legendRecord; private ChartTitleFormatRecord chartTitleFormat; private SeriesTextRecord chartTitleText; + private List series = new ArrayList(); + private HSSFChart(ChartRecord chartRecord) { this.chartRecord = chartRecord; } @@ -121,8 +167,8 @@ public class HSSFChart /** * Returns all the charts for the given sheet. * - * NOTE: Does not yet work... checking it in just so others - * can take a look. + * NOTE: You won't be able to do very much with + * these charts yet, as this is very limited support */ public static HSSFChart[] getSheetCharts(HSSFSheet sheet) { List charts = new ArrayList(); @@ -132,33 +178,49 @@ public class HSSFChart List records = sheet.getSheet().getRecords(); for(Iterator it = records.iterator(); it.hasNext();) { Record r = (Record)it.next(); - System.err.println(r); - - if(r instanceof DrawingRecord) { - DrawingRecord dr = (DrawingRecord)r; - } if(r instanceof ChartRecord) { lastChart = new HSSFChart((ChartRecord)r); charts.add(lastChart); } + if(r instanceof LegendRecord) { + lastChart.legendRecord = (LegendRecord)r; + } if(r instanceof SeriesRecord) { - lastChart.seriesRecord = (SeriesRecord)r; + HSSFSeries series = lastChart.new HSSFSeries( (SeriesRecord)r ); + lastChart.series.add(series); } if(r instanceof ChartTitleFormatRecord) { lastChart.chartTitleFormat = (ChartTitleFormatRecord)r; } if(r instanceof SeriesTextRecord) { - lastChart.chartTitleText = - (SeriesTextRecord)r; + // Applies to a series, unless we've seen + // a legend already + SeriesTextRecord str = (SeriesTextRecord)r; + if(lastChart.legendRecord == null && + lastChart.series.size() > 0) { + HSSFSeries series = (HSSFSeries) + lastChart.series.get(lastChart.series.size()-1); + series.seriesTitleText = str; + } else { + lastChart.chartTitleText = str; + } } } return (HSSFChart[]) charts.toArray( new HSSFChart[charts.size()] ); } + + /** + * Returns the series of the chart + */ + public HSSFSeries[] getSeries() { + return (HSSFSeries[]) + series.toArray(new HSSFSeries[series.size()]); + } /** * Returns the chart's title, if there is one, @@ -184,7 +246,6 @@ public class HSSFChart } } - private EOFRecord createEOFRecord() { @@ -858,4 +919,51 @@ public class HSSFChart r.setUnits( (short) 0 ); return r; } + + + /** + * A series in a chart + */ + public class HSSFSeries { + private SeriesRecord series; + private SeriesTextRecord seriesTitleText; + + private HSSFSeries(SeriesRecord series) { + this.series = series; + } + + public short getNumValues() { + return series.getNumValues(); + } + /** + * See {@link SeriesRecord} + */ + public short getValueType() { + return series.getValuesDataType(); + } + + /** + * Returns the series' title, if there is one, + * or null if not + */ + public String getSeriesTitle() { + if(seriesTitleText != null) { + return seriesTitleText.getText(); + } + return null; + } + + /** + * Changes the series' title, but only if there + * was one already. + * TODO - add in the records if not + */ + public void setSeriesTitle(String title) { + if(seriesTitleText != null) { + seriesTitleText.setText(title); + } else { + throw new IllegalStateException("No series title found to change"); + } + } + } } diff --git a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java index 557060aa5..a54e50de4 100644 --- a/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java +++ b/src/scratchpad/src/org/apache/poi/hwpf/HWPFDocument.java @@ -29,6 +29,7 @@ import java.io.ByteArrayInputStream; import java.util.Iterator; import org.apache.poi.POIDocument; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.poifs.filesystem.DocumentEntry; import org.apache.poi.poifs.common.POIFSConstants; @@ -95,7 +96,7 @@ public class HWPFDocument extends POIDocument protected HWPFDocument() { - + super(null, null); } /** @@ -132,7 +133,7 @@ public class HWPFDocument extends POIDocument //do Ole stuff this( verifyAndBuildPOIFS(istream) ); } - + /** * This constructor loads a Word document from a POIFSFileSystem * @@ -141,16 +142,31 @@ public class HWPFDocument extends POIDocument * in POIFSFileSystem. */ public HWPFDocument(POIFSFileSystem pfilesystem) throws IOException + { + this(pfilesystem.getRoot(), pfilesystem); + } + + /** + * This constructor loads a Word document from a specific point + * in a POIFSFileSystem, probably not the default. + * Used typically to open embeded documents. + * + * @param pfilesystem The POIFSFileSystem that contains the Word document. + * @throws IOException If there is an unexpected IOException from the passed + * in POIFSFileSystem. + */ + public HWPFDocument(DirectoryNode directory, POIFSFileSystem pfilesystem) throws IOException { // Sort out the hpsf properties - filesystem = pfilesystem; + super(directory, pfilesystem); readProperties(); // read in the main stream. - DocumentEntry documentProps = - (DocumentEntry)filesystem.getRoot().getEntry("WordDocument"); + DocumentEntry documentProps = (DocumentEntry) + directory.getEntry("WordDocument"); _mainStream = new byte[documentProps.getSize()]; - filesystem.createDocumentInputStream("WordDocument").read(_mainStream); + + directory.createDocumentInputStream("WordDocument").read(_mainStream); // use the fib to determine the name of the table stream. _fib = new FileInformationBlock(_mainStream); @@ -165,14 +181,14 @@ public class HWPFDocument extends POIDocument DocumentEntry tableProps; try { tableProps = - (DocumentEntry)filesystem.getRoot().getEntry(name); + (DocumentEntry)directory.getEntry(name); } catch(FileNotFoundException fnfe) { throw new IllegalStateException("Table Stream '" + name + "' wasn't found - Either the document is corrupt, or is Word95 (or earlier)"); } // read in the table stream. _tableStream = new byte[tableProps.getSize()]; - filesystem.createDocumentInputStream(name).read(_tableStream); + directory.createDocumentInputStream(name).read(_tableStream); _fib.fillVariableFields(_mainStream, _tableStream); @@ -180,7 +196,7 @@ public class HWPFDocument extends POIDocument try { DocumentEntry dataProps = - (DocumentEntry) filesystem.getRoot().getEntry("Data"); + (DocumentEntry)directory.getEntry("Data"); _dataStream = new byte[dataProps.getSize()]; filesystem.createDocumentInputStream("Data").read(_dataStream); } diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java index f8618ff0b..d6197a598 100644 --- a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java @@ -21,6 +21,12 @@ package org.apache.poi.hslf.extractor; +import java.io.FileInputStream; + +import org.apache.poi.hslf.HSLFSlideShow; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; + import junit.framework.TestCase; /** @@ -35,6 +41,8 @@ public class TextExtractor extends TestCase { private PowerPointExtractor ppe2; /** Where to go looking for our test files */ private String dirname; + /** Where our embeded files live */ + private String pdirname; public TextExtractor() throws Exception { dirname = System.getProperty("HSLF.testdata.path"); @@ -42,6 +50,8 @@ public class TextExtractor extends TestCase { ppe = new PowerPointExtractor(filename); String filename2 = dirname + "/with_textbox.ppt"; ppe2 = new PowerPointExtractor(filename2); + + pdirname = System.getProperty("POIFS.testdata.path"); } public void testReadSheetText() throws Exception { @@ -123,9 +133,87 @@ public class TextExtractor extends TestCase { char[] expC = exp.toCharArray(); char[] actC = act.toCharArray(); for(int i=0; i 20); + assertEquals("I am a sample document\r\nNot much on me\r\nI am document 1\r\n", + extractor3.getText()); + assertEquals("Sample Doc 1", extractor3.getSummaryInformation().getTitle()); + assertEquals("Sample Test", extractor3.getSummaryInformation().getSubject()); + + + doc = new HWPFDocument(dirB, fs); + extractor3 = new WordExtractor(doc); + + assertNotNull(extractor3.getText()); + assertTrue(extractor3.getText().length() > 20); + assertEquals("I am another sample document\r\nNot much on me\r\nI am document 2\r\n", + extractor3.getText()); + assertEquals("Sample Doc 2", extractor3.getSummaryInformation().getTitle()); + assertEquals("Another Sample Test", extractor3.getSummaryInformation().getSubject()); + } } diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 80611e6e6..ad311eb27 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -17,12 +17,15 @@ package org.apache.poi.hssf.extractor; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import junit.framework.TestCase; import org.apache.poi.hssf.HSSFTestDataSamples; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.POIFSFileSystem; /** * @@ -118,4 +121,72 @@ public final class TestExcelExtractor extends TestCase { assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText()); } + + /** + * Embded in a non-excel file + */ + public void testWithEmbeded() throws Exception { + String pdirname = System.getProperty("POIFS.testdata.path"); + String filename = pdirname + "/word_with_embeded.doc"; + POIFSFileSystem fs = new POIFSFileSystem( + new FileInputStream(filename) + ); + + DirectoryNode objPool = (DirectoryNode) + fs.getRoot().getEntry("ObjectPool"); + DirectoryNode dirA = (DirectoryNode) + objPool.getEntry("_1269427460"); + DirectoryNode dirB = (DirectoryNode) + objPool.getEntry("_1269427461"); + + HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true); + HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); + + ExcelExtractor exA = new ExcelExtractor(wbA); + ExcelExtractor exB = new ExcelExtractor(wbB); + + assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", + exA.getText()); + assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); + + assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", + exB.getText()); + assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + } + + /** + * Excel embeded in excel + */ + public void testWithEmbededInOwn() throws Exception { + String pdirname = System.getProperty("POIFS.testdata.path"); + String filename = pdirname + "/excel_with_embeded.xls"; + POIFSFileSystem fs = new POIFSFileSystem( + new FileInputStream(filename) + ); + + DirectoryNode dirA = (DirectoryNode) + fs.getRoot().getEntry("MBD0000A3B5"); + DirectoryNode dirB = (DirectoryNode) + fs.getRoot().getEntry("MBD0000A3B4"); + + HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true); + HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); + + ExcelExtractor exA = new ExcelExtractor(wbA); + ExcelExtractor exB = new ExcelExtractor(wbB); + + assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", + exA.getText()); + assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); + + assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", + exB.getText()); + assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + + // And the base file too + ExcelExtractor ex = new ExcelExtractor(fs); + assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", + ex.getText()); + assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); + } } diff --git a/src/testcases/org/apache/poi/poifs/data/excel_with_embeded.xls b/src/testcases/org/apache/poi/poifs/data/excel_with_embeded.xls index ca2d4031d..789cccfee 100644 Binary files a/src/testcases/org/apache/poi/poifs/data/excel_with_embeded.xls and b/src/testcases/org/apache/poi/poifs/data/excel_with_embeded.xls differ diff --git a/src/testcases/org/apache/poi/poifs/data/ppt_with_embeded.ppt b/src/testcases/org/apache/poi/poifs/data/ppt_with_embeded.ppt new file mode 100644 index 000000000..588dcefe9 Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/ppt_with_embeded.ppt differ diff --git a/src/testcases/org/apache/poi/poifs/data/ppt_with_embeded.xls b/src/testcases/org/apache/poi/poifs/data/ppt_with_embeded.xls deleted file mode 100644 index 72e7232cd..000000000 Binary files a/src/testcases/org/apache/poi/poifs/data/ppt_with_embeded.xls and /dev/null differ diff --git a/src/testcases/org/apache/poi/poifs/data/source-files/sample1.doc b/src/testcases/org/apache/poi/poifs/data/source-files/sample1.doc new file mode 100644 index 000000000..0e935aa52 Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/source-files/sample1.doc differ diff --git a/src/testcases/org/apache/poi/poifs/data/source-files/sample1.ppt b/src/testcases/org/apache/poi/poifs/data/source-files/sample1.ppt new file mode 100644 index 000000000..7d72e0236 Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/source-files/sample1.ppt differ diff --git a/src/testcases/org/apache/poi/poifs/data/source-files/sample1.xls b/src/testcases/org/apache/poi/poifs/data/source-files/sample1.xls new file mode 100644 index 000000000..c00643cc5 Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/source-files/sample1.xls differ diff --git a/src/testcases/org/apache/poi/poifs/data/source-files/sample2.doc b/src/testcases/org/apache/poi/poifs/data/source-files/sample2.doc new file mode 100644 index 000000000..df5022a04 Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/source-files/sample2.doc differ diff --git a/src/testcases/org/apache/poi/poifs/data/source-files/sample2.ppt b/src/testcases/org/apache/poi/poifs/data/source-files/sample2.ppt new file mode 100644 index 000000000..40227fdca Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/source-files/sample2.ppt differ diff --git a/src/testcases/org/apache/poi/poifs/data/source-files/sample2.xls b/src/testcases/org/apache/poi/poifs/data/source-files/sample2.xls new file mode 100644 index 000000000..66a1b1124 Binary files /dev/null and b/src/testcases/org/apache/poi/poifs/data/source-files/sample2.xls differ diff --git a/src/testcases/org/apache/poi/poifs/data/word_with_embeded.doc b/src/testcases/org/apache/poi/poifs/data/word_with_embeded.doc index d9fea29c2..36417221d 100644 Binary files a/src/testcases/org/apache/poi/poifs/data/word_with_embeded.doc and b/src/testcases/org/apache/poi/poifs/data/word_with_embeded.doc differ