From e6b361637be42524c90ab4897f712ac8d123ec03 Mon Sep 17 00:00:00 2001 From: Avik Sengupta Date: Sat, 28 May 2005 05:36:00 +0000 Subject: [PATCH] Initial Powerpoint support, by Nick Burch git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@353701 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/hslf/HSLFSlideShow.java | 347 ++++++++++++++++++ .../poi/hslf/dev/PPDrawingTextListing.java | 86 +++++ .../apache/poi/hslf/dev/SLWTTextListing.java | 91 +++++ .../hslf/dev/SlideAndNotesAtomListing.java | 66 ++++ .../poi/hslf/dev/SlideShowRecordDumper.java | 167 +++++++++ .../hslf/dev/UserEditAndPersistListing.java | 95 +++++ .../InvalidRecordFormatException.java | 34 ++ .../hslf/extractor/PowerPointExtractor.java | 182 +++++++++ .../src/org/apache/poi/hslf/model/Notes.java | 73 ++++ .../src/org/apache/poi/hslf/model/Sheet.java | 92 +++++ .../src/org/apache/poi/hslf/model/Slide.java | 118 ++++++ .../org/apache/poi/hslf/model/TextRun.java | 141 +++++++ .../poi/hslf/record/CurrentUserAtom.java | 218 +++++++++++ .../hslf/record/DummyRecordWithChildren.java | 70 ++++ .../poi/hslf/record/EscherTextboxWrapper.java | 90 +++++ .../src/org/apache/poi/hslf/record/Notes.java | 95 +++++ .../org/apache/poi/hslf/record/NotesAtom.java | 120 ++++++ .../org/apache/poi/hslf/record/PPDrawing.java | 191 ++++++++++ .../poi/hslf/record/PersistPtrHolder.java | 67 ++++ .../record/PositionDependentRecordAtom.java | 44 +++ .../org/apache/poi/hslf/record/Record.java | 192 ++++++++++ .../apache/poi/hslf/record/RecordAtom.java | 38 ++ .../poi/hslf/record/RecordContainer.java | 109 ++++++ .../src/org/apache/poi/hslf/record/Slide.java | 95 +++++ .../org/apache/poi/hslf/record/SlideAtom.java | 206 +++++++++++ .../poi/hslf/record/SlideListWithText.java | 148 ++++++++ .../poi/hslf/record/SlidePersistAtom.java | 114 ++++++ .../apache/poi/hslf/record/TextBytesAtom.java | 91 +++++ .../apache/poi/hslf/record/TextCharsAtom.java | 91 +++++ .../poi/hslf/record/TextHeaderAtom.java | 91 +++++ .../hslf/record/UnknownRecordPlaceholder.java | 64 ++++ .../apache/poi/hslf/record/UserEditAtom.java | 141 +++++++ .../apache/poi/hslf/usermodel/SlideShow.java | 281 ++++++++++++++ .../util/MutableByteArrayOutputStream.java | 42 +++ .../org/apache/poi/hslf/TestReWrite.java | 73 ++++ .../apache/poi/hslf/TestReWriteSanity.java | 100 +++++ .../org/apache/poi/hslf/TestRecordCounts.java | 86 +++++ .../poi/hslf/data/basic_test_ppt_file.ppt | Bin 0 -> 15360 bytes .../poi/hslf/data/next_test_ppt_file.ppt | Bin 0 -> 13824 bytes .../poi/hslf/extractor/TextExtractor.java | 67 ++++ .../poi/hslf/record/TestSlidePersistAtom.java | 60 +++ .../poi/hslf/record/TestTextBytesAtom.java | 83 +++++ .../poi/hslf/record/TestTextCharsAtom.java | 80 ++++ .../poi/hslf/record/TestTextHeaderAtom.java | 61 +++ .../apache/poi/hslf/usermodel/TestCounts.java | 54 +++ .../poi/hslf/usermodel/TestNotesText.java | 61 +++ .../poi/hslf/usermodel/TestSheetText.java | 61 +++ 47 files changed, 4876 insertions(+) create mode 100644 src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/dev/SLWTTextListing.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/dev/SlideAndNotesAtomListing.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/dev/SlideShowRecordDumper.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/exceptions/InvalidRecordFormatException.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/model/Notes.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/model/Sheet.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/model/Slide.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/model/TextRun.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/CurrentUserAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/Notes.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/PersistPtrHolder.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/PositionDependentRecordAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/Record.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/RecordAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/RecordContainer.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/Slide.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/SlideAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/SlideListWithText.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/SlidePersistAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/TextBytesAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/TextCharsAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/TextHeaderAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/UnknownRecordPlaceholder.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/record/UserEditAtom.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/usermodel/SlideShow.java create mode 100644 src/scratchpad/src/org/apache/poi/hslf/util/MutableByteArrayOutputStream.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/TestReWrite.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/TestReWriteSanity.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/TestRecordCounts.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/data/basic_test_ppt_file.ppt create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/data/next_test_ppt_file.ppt create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/record/TestSlidePersistAtom.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/record/TestTextBytesAtom.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/record/TestTextCharsAtom.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/record/TestTextHeaderAtom.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestCounts.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java create mode 100644 src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestSheetText.java diff --git a/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java new file mode 100644 index 000000000..2701bb1cd --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/HSLFSlideShow.java @@ -0,0 +1,347 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf; + +import java.util.*; +import java.io.*; + +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.poifs.filesystem.POIFSDocument; +import org.apache.poi.poifs.filesystem.DocumentEntry; +import org.apache.poi.poifs.filesystem.DocumentInputStream; + +import org.apache.poi.hpsf.PropertySet; +import org.apache.poi.hpsf.PropertySetFactory; +import org.apache.poi.hpsf.MutablePropertySet; +import org.apache.poi.hpsf.SummaryInformation; +import org.apache.poi.hpsf.DocumentSummaryInformation; + +import org.apache.poi.util.LittleEndian; + +import org.apache.poi.hslf.record.*; + +/** + * This class contains the main functionality for the Powerpoint file + * "reader". It is only a very basic class for now + * + * @author Nick Burch + */ + +public class HSLFSlideShow +{ + private InputStream istream; + private POIFSFileSystem filesystem; + + // Holds metadata on our document + private SummaryInformation sInf; + private DocumentSummaryInformation dsInf; + private CurrentUserAtom currentUser; + + // Low level contents of the file + private byte[] _docstream; + + // Low level contents + private Record[] _records; + + /** + * Constructs a Powerpoint document from fileName. Parses the document + * and places all the important stuff into data structures. + * + * @param fileName The name of the file to read. + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(String fileName) throws IOException + { + this(new FileInputStream(fileName)); + } + + /** + * Constructs a Powerpoint document from an input stream. Parses the + * document and places all the important stuff into data structures. + * + * @param inputStream the source of the data + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(InputStream inputStream) throws IOException + { + //do Ole stuff + this(new POIFSFileSystem(inputStream)); + istream = inputStream; + } + + /** + * Constructs a Powerpoint document from a POIFS Filesystem. Parses the + * document and places all the important stuff into data structures. + * + * @param filesystem the POIFS FileSystem to read from + * @throws IOException if there is a problem while parsing the document. + */ + public HSLFSlideShow(POIFSFileSystem filesystem) throws IOException + { + this.filesystem = filesystem; + + // Go find a PowerPoint document in the stream + // Save anything useful we come across + readFIB(); + + // Look for Property Streams: + readProperties(); + } + + + /** + * Shuts things down. Closes underlying streams etc + * + * @throws IOException + */ + public void close() throws IOException + { + if(istream != null) { + istream.close(); + } + filesystem = null; + } + + + /** + * Extracts the main document stream from the POI file then hands off + * to other functions that parse other areas. + * + * @throws IOException + */ + private void readFIB() throws IOException + { + // Get the main document stream + DocumentEntry docProps = + (DocumentEntry)filesystem.getRoot().getEntry("PowerPoint Document"); + + // Grab the document stream + _docstream = new byte[docProps.getSize()]; + filesystem.createDocumentInputStream("PowerPoint Document").read(_docstream); + + // The format of records in a powerpoint file are: + // + // + // + // If it has a zero length, following it will be another record + // + // If it has a length, depending on its type it may have children or data + // If it has children, these will follow straight away + // > + // If it has data, this will come straigh after, and run for the length + // + // All lengths given exclude the 8 byte record header + // (Data records are known as Atoms) + + // Document should start with: + // 0F 00 E8 03 ## ## ## ## + // (type 1000 = document, info 00 0f is normal, rest is document length) + // 01 00 E9 03 28 00 00 00 + // (type 1001 = document atom, info 00 01 normal, 28 bytes long) + // 80 16 00 00 E0 10 00 00 xx xx xx xx xx xx xx xx + // 05 00 00 00 0A 00 00 00 xx xx xx + // (the contents of the document atom, not sure what it means yet) + // (records then follow) + + // When parsing a document, look to see if you know about that type + // of the current record. If you know it's a type that has children, + // process the record's data area looking for more records + // If you know about the type and it doesn't have children, either do + // something with the data (eg TextRun) or skip over it + // If you don't know about the type, play safe and skip over it (using + // its length to know where the next record will start) + // + // For now, this work is handled by Record.findChildRecords + + _records = Record.findChildRecords(_docstream,0,_docstream.length); + } + + + /** + * Find the properties from the filesystem, and load them + */ + public void readProperties() { + // DocumentSummaryInformation + dsInf = (DocumentSummaryInformation)getPropertySet("\005DocumentSummaryInformation"); + + // SummaryInformation + sInf = (SummaryInformation)getPropertySet("\005SummaryInformation"); + + // Current User + try { + currentUser = new CurrentUserAtom(filesystem); + } catch(IOException ie) { + System.err.println("Error finding Current User Atom:\n" + ie); + currentUser = new CurrentUserAtom(); + } + } + + + /** + * For a given named property entry, either return it or null if + * if it wasn't found + */ + public PropertySet getPropertySet(String setName) { + DocumentInputStream dis; + try { + // Find the entry, and get an input stream for it + dis = filesystem.createDocumentInputStream(setName); + } catch(IOException ie) { + // Oh well, doesn't exist + System.err.println("Error getting property set with name " + setName + "\n" + ie); + return null; + } + + try { + // Create the Property Set + PropertySet set = PropertySetFactory.create(dis); + return set; + } catch(IOException ie) { + // Must be corrupt or something like that + System.err.println("Error creating property set with name " + setName + "\n" + ie); + } catch(org.apache.poi.hpsf.HPSFException he) { + // Oh well, doesn't exist + System.err.println("Error creating property set with name " + setName + "\n" + he); + } + return null; + } + + + /** + * Writes out the slideshow file the is represented by an instance of + * this class + * @param out The OutputStream to write to. + * @throws IOException If there is an unexpected IOException from the passed + * in OutputStream + */ + public void write(OutputStream out) throws IOException { + // Get a new Filesystem to write into + POIFSFileSystem outFS = new POIFSFileSystem(); + + // Write out the Property Streams + if(sInf != null) { + writePropertySet("\005SummaryInformation",sInf,outFS); + } + if(dsInf != null) { + writePropertySet("\005DocumentSummaryInformation",dsInf,outFS); + } + + // Need to take special care of PersistPtrHolder and UserEditAtoms + // Store where they used to be, and where they are now + Hashtable persistPtrHolderPos = new Hashtable(); + Hashtable userEditAtomsPos = new Hashtable(); + int lastUserEditAtomPos = -1; + + // Write ourselves out + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for(int i=0; i<_records.length; i++) { + // If it's a special record, record where it was and now is + if(_records[i] instanceof PersistPtrHolder) { + // Update position + PersistPtrHolder pph = (PersistPtrHolder)_records[i]; + int oldPos = pph.getLastOnDiskOffset(); + int newPos = baos.size(); + pph.setLastOnDiskOffet(newPos); + persistPtrHolderPos.put(new Integer(oldPos),new Integer(newPos)); + } + if(_records[i] instanceof UserEditAtom) { + // Update position + UserEditAtom uea = (UserEditAtom)_records[i]; + int oldPos = uea.getLastOnDiskOffset(); + int newPos = baos.size(); + lastUserEditAtomPos = newPos; + uea.setLastOnDiskOffet(newPos); + userEditAtomsPos.put(new Integer(oldPos),new Integer(newPos)); + + // Update internal positions + if(uea.getLastUserEditAtomOffset() != 0) { + Integer ueNewPos = (Integer)userEditAtomsPos.get( new Integer( uea.getLastUserEditAtomOffset() ) ); + uea.setLastUserEditAtomOffset(ueNewPos.intValue()); + } + if(uea.getPersistPointersOffset() != 0) { + Integer ppNewPos = (Integer)persistPtrHolderPos.get( new Integer( uea.getPersistPointersOffset() ) ); + uea.setPersistPointersOffset(ppNewPos.intValue()); + } + } + + // Finally, write out + _records[i].writeOut(baos); + } + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); + outFS.createDocument(bais,"PowerPoint Document"); + + // Update and write out the Current User atom + if(lastUserEditAtomPos != -1) { + currentUser.setCurrentEditOffset(lastUserEditAtomPos); + } + currentUser.writeToFS(outFS); + + // Send the POIFSFileSystem object out + outFS.writeFilesystem(out); + } + + + /** + * Writes out a given ProperySet + */ + private void writePropertySet(String name, PropertySet set, POIFSFileSystem fs) throws IOException { + try { + MutablePropertySet mSet = new MutablePropertySet(set); + ByteArrayOutputStream bOut = new ByteArrayOutputStream(); + mSet.write(bOut); + byte[] data = bOut.toByteArray(); + ByteArrayInputStream bIn = new ByteArrayInputStream(data); + fs.createDocument(bIn,name); + System.out.println("Wrote property set " + name + " of size " + data.length); + } catch(org.apache.poi.hpsf.WritingNotSupportedException wnse) { + System.err.println("Couldn't write property set with name " + name + " as not supported by HPSF yet"); + } + } + + + /* ******************* fetching methods follow ********************* */ + + + /** + * Returns an array of all the records found in the slideshow + */ + public Record[] getRecords() { return _records; } + + /** + * Returns an array of the bytes of the file. Only correct after a + * call to open or write - at all other times might be wrong! + */ + public byte[] getUnderlyingBytes() { return _docstream; } + + /** + * Fetch the Document Summary Information of the document + */ + public DocumentSummaryInformation getDocumentSummaryInformation() { return dsInf; } + + /** + * Fetch the Summary Information of the document + */ + public SummaryInformation getSummaryInformation() { return sInf; } + + /** + * Fetch the Current User Atom of the document + */ + public CurrentUserAtom getCurrentUserAtom() { return currentUser; } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java b/src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java new file mode 100644 index 000000000..680e63edb --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/PPDrawingTextListing.java @@ -0,0 +1,86 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.dev; + +import org.apache.poi.hslf.*; +import org.apache.poi.hslf.model.*; +import org.apache.poi.hslf.record.*; +import org.apache.poi.hslf.usermodel.*; + +import java.io.*; + +/** + * Uses record level code to locate PPDrawing entries. + * Having found them, it sees if they have DDF Textbox records, and if so, + * searches those for text. Prints out any text it finds + */ +public class PPDrawingTextListing { + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Need to give a filename"); + System.exit(1); + } + + HSLFSlideShow ss = new HSLFSlideShow(args[0]); + + // Find PPDrawings at any second level position + Record[] records = ss.getRecords(); + for(int i=0; i"); + return; + } + + String filename = args[0]; + + SlideShowRecordDumper foo = new SlideShowRecordDumper(filename); + + foo.printDump(); + foo.close(); + } + + + /** + * Constructs a Powerpoint dump from fileName. Parses the document + * and dumps out the contents + * + * @param fileName The name of the file to read. + * @throws IOException if there is a problem while parsing the document. + */ + public SlideShowRecordDumper(String fileName) throws IOException + { + doc = new HSLFSlideShow(fileName); + } + + /** + * Shuts things down. Closes underlying streams etc + * + * @throws IOException + */ + public void close() throws IOException + { + if(doc != null) { + doc.close(); + } + doc = null; + } + + + public void printDump() throws IOException { + // Prints out the records in the tree + walkTree(0,0,doc.getRecords()); + } + + public String makeHex(int number, int padding) { + String hex = Integer.toHexString(number).toUpperCase(); + while(hex.length() < padding) { + hex = "0" + hex; + } + return hex; + } + + public String reverseHex(String s) { + StringBuffer ret = new StringBuffer(); + + // Get to a multiple of two + if((s.length() / 2) * 2 != s.length()) { s = "0" + s; } + + // Break up into blocks + char[] c = s.toCharArray(); + for(int i=c.length; i>0; i-=2) { + ret.append(c[i-2]); + ret.append(c[i-1]); + if(i != 2) { ret.append(' '); } + } + return ret.toString(); + } + + public int getDiskLen(Record r) throws IOException { + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + r.writeOut(baos); + byte[] b = baos.toByteArray(); + return b.length; + } + + + public void walkTree(int depth, int pos, Record[] records) throws IOException { + int indent = depth; + String ind = ""; + for(int i=0; i " + rHexType + " )"); + System.out.println(ind + " Len is " + (len-8) + " (" + makeHex((len-8),8) + "), on disk len is " + len ); + System.out.println(); + + // If it has children, show them + if(r.getChildRecords() != null) { + walkTree((depth+3),pos+8,r.getChildRecords()); + } + + // Wind on the position marker + pos += len; + } + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java b/src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java new file mode 100644 index 000000000..5fff3fa03 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/dev/UserEditAndPersistListing.java @@ -0,0 +1,95 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.dev; + +import org.apache.poi.hslf.*; +import org.apache.poi.hslf.record.*; + +import java.io.*; + +/** + * Uses record level code to locate UserEditAtom records, and other + * persistence related atoms. Tries to match them together, to help + * illuminate quite what all the offsets mean + */ +public class UserEditAndPersistListing { + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Need to give a filename"); + System.exit(1); + } + + HSLFSlideShow ss = new HSLFSlideShow(args[0]); + System.out.println(""); + + // Find any persist ones first + Record[] records = ss.getRecords(); + int pos = 0; + for(int i=0; i"); + System.exit(1); + } + + boolean notes = false; + String file; + if(args.length > 1) { + notes = true; + file = args[1]; + } else { + file = args[0]; + } + + PowerPointExtractor ppe = new PowerPointExtractor(file); + System.out.println(ppe.getText(true,notes)); + ppe.close(); + } + + /** + * Creates a PowerPointExtractor + * @param fileName + */ + public PowerPointExtractor(String fileName) throws IOException { + _hslfshow = new HSLFSlideShow(fileName); + _show = new SlideShow(_hslfshow); + _slides = _show.getSlides(); + _notes = _show.getNotes(); + } + + /** + * Creates a PowerPointExtractor + * @param iStream + */ + public PowerPointExtractor(InputStream iStream) throws IOException { + _hslfshow = new HSLFSlideShow(iStream); + _show = new SlideShow(_hslfshow); + _slides = _show.getSlides(); + _notes = _show.getNotes(); + } + + /** + * Creates a PowerPointExtractor + * @param fs + */ + public PowerPointExtractor(POIFSFileSystem fs) throws IOException { + _hslfshow = new HSLFSlideShow(fs); + _show = new SlideShow(_hslfshow); + _slides = _show.getSlides(); + _notes = _show.getNotes(); + } + + + /** + * Shuts down the underlying streams + */ + public void close() throws IOException { + _hslfshow.close(); + _hslfshow = null; + _show = null; + _slides = null; + _notes = null; + } + + + /** + * Fetches all the slide text from the slideshow, but not the notes + */ + public String getText() { + return getText(true,false); + } + + /** + * Fetches all the notes text from the slideshow, but not the slide text + */ + public String getNotes() { + return getText(false,true); + } + + /** + * Fetches text from the slideshow, be it slide text or note text + * @param getSlideText fetch slide text + * @param getNoteText fetch note text + */ + public String getText(boolean getSlideText, boolean getNoteText) { + StringBuffer ret = new StringBuffer(); + + if(getSlideText) { + for(int i=0; i<_slides.length; i++) { + Slide slide = _slides[i]; + TextRun[] runs = slide.getTextRuns(); + for(int j=0; j 0) { + for(int j=0; j= start+len) { + byte[] textBytes = new byte[len]; + System.arraycopy(_contents,start,textBytes,0,len); + lastEditUser = StringUtil.getFromUnicodeLE(textBytes); + } else { + // Fake from the 8 bit version + byte[] textBytes = new byte[(int)usernameLen]; + System.arraycopy(_contents,28,textBytes,0,(int)usernameLen); + lastEditUser = StringUtil.getFromCompressedUnicode(textBytes,0,(int)usernameLen); + } + } + + + /** + * Writes ourselves back out + */ + public void writeOut(OutputStream out) throws IOException { + // Decide on the size + // 8 = atom header + // 20 = up to name + // 4 = revision + // 3 * len = ascii + unicode + int size = 8 + 20 + 4 + (3 * lastEditUser.length()); + _contents = new byte[size]; + + // First we have a 8 byte atom header + System.arraycopy(atomHeader,0,_contents,0,4); + // Size is 20+user len + revision len(4) + int atomSize = 20+4+lastEditUser.length(); + LittleEndian.putInt(_contents,4,atomSize); + + // Now we have the size of the details, which is 20 + LittleEndian.putInt(_contents,8,20); + + // Now the ppt magic number (4 bytes) + System.arraycopy(magicNumber,0,_contents,12,4); + + // Now the current edit offset + LittleEndian.putInt(_contents,16,(int)currentEditOffset); + + // Now the file versions, 2+2+1+1 + LittleEndian.putShort(_contents,20,(short)docFinalVersionA); + LittleEndian.putShort(_contents,22,(short)docFinalVersionB); + _contents[24] = docMajorNo; + _contents[25] = docMinorNo; + + // 2 bytes blank + _contents[26] = 0; + _contents[27] = 0; + + // username in bytes in us ascii + byte[] asciiUN = new byte[lastEditUser.length()]; + StringUtil.putCompressedUnicode(lastEditUser,asciiUN,0); + System.arraycopy(asciiUN,0,_contents,28,asciiUN.length); + + // 4 byte release version + LittleEndian.putInt(_contents,28+asciiUN.length,(int)releaseVersion); + + // username in unicode + byte [] ucUN = new byte[lastEditUser.length()*2]; + StringUtil.putUnicodeLE(lastEditUser,ucUN,0); + System.arraycopy(ucUN,0,_contents,28+asciiUN.length+4,ucUN.length); + + // Write out + out.write(_contents); + } + + /** + * Writes ourselves back out to a filesystem + */ + public void writeToFS(POIFSFileSystem fs) throws IOException { + // Grab contents + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + writeOut(baos); + ByteArrayInputStream bais = + new ByteArrayInputStream(baos.toByteArray()); + + // Write out + fs.createDocument(bais,"Current User"); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java b/src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java new file mode 100644 index 000000000..5a30fae6f --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/DummyRecordWithChildren.java @@ -0,0 +1,70 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; + +/** + * If we come across a record we know has children of (potential) + * interest, but where the record itself is boring, we create one + * of these. It allows us to get at the children, but not much else + * + * @author Nick Burch + */ + +public class DummyRecordWithChildren extends RecordContainer +{ + private Record[] _children; + private byte[] _header; + private long _type; + + /** + * Create a new holder for a boring record with children + */ + protected DummyRecordWithChildren(byte[] source, int start, int len) { + // Just grab the header, not the whole contents + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + _type = LittleEndian.getUShort(_header,2); + + // Find our children + _children = Record.findChildRecords(source,start+8,len-8); + } + + /** + * Return the value we were given at creation + */ + public long getRecordType() { return _type; } + + /** + * Return any children + */ + public Record[] getChildRecords() { return _children; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + writeOut(_header[0],_header[1],_type,_children,out); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java b/src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java new file mode 100644 index 000000000..04d388d25 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/EscherTextboxWrapper.java @@ -0,0 +1,90 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.ddf.*; + +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; + +/** + * A wrapper around a DDF (Escher) EscherTextbox Record. Causes the DDF + * Record to be accessible as if it were a HSLF record. + * Note: when asked to write out, will simply put any child records correctly + * into the Escher layer. A call to the escher layer to write out (by the + * parent PPDrawing) will do the actual write out + * + * @author Nick Burch + */ + +public class EscherTextboxWrapper extends RecordContainer +{ + private EscherTextboxRecord _escherRecord; + private Record[] _children; + private long _type; + + /** + * Returns the underlying DDF Escher Record + */ + public EscherTextboxRecord getEscherRecord() { return _escherRecord; } + + /** + * Creates the wrapper for the given DDF Escher Record and children + */ + protected EscherTextboxWrapper(EscherTextboxRecord textbox) { + _escherRecord = textbox; + _type = (long)_escherRecord.getRecordId(); + + // Find the child records in the escher data + byte[] data = _escherRecord.getData(); + _children = Record.findChildRecords(data,0,data.length); + } + + + /** + * Return the type of the escher record (normally in the 0xFnnn range) + */ + public long getRecordType() { return _type; } + + /** + * Return any children + */ + public Record[] getChildRecords() { return _children; } + + /** + * Stores the data for the child records back into the Escher layer. + * Doesn't actually do the writing out, that's left to the Escher + * layer to do. Must be called before writeOut/serialize is called + * on the underlying Escher object! + */ + public void writeOut(OutputStream out) throws IOException { + // Write out our children, and stuff them into the Escher layer + + // Grab the children's data + ByteArrayOutputStream baos = new ByteArrayOutputStream(); + for(int i=0; i<_children.length; i++) { + _children[i].writeOut(baos); + } + byte[] data = baos.toByteArray(); + + // Save in the escher layer + _escherRecord.setData(data); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/Notes.java b/src/scratchpad/src/org/apache/poi/hslf/record/Notes.java new file mode 100644 index 000000000..86e692b36 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/Notes.java @@ -0,0 +1,95 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; + +/** + * Master container for Notes. There is one of these for every page of + * notes, and they have certain specific children + * + * @author Nick Burch + */ + +public class Notes extends RecordContainer +{ + private Record[] _children; + private byte[] _header; + private static long _type = 1008l; + + // Links to our more interesting children + private NotesAtom notesAtom; + private PPDrawing ppDrawing; + + /** + * Returns the NotesAtom of this Notes + */ + public NotesAtom getNotesAtom() { return notesAtom; } + /** + * Returns the PPDrawing of this Notes, which has all the + * interesting data in it + */ + public PPDrawing getPPDrawing() { return ppDrawing; } + + + /** + * Set things up, and find our more interesting children + */ + protected Notes(byte[] source, int start, int len) { + // Grab the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Find our children + _children = Record.findChildRecords(source,start+8,len-8); + + // Find the interesting ones in there + for(int i=0; i<_children.length; i++) { + if(_children[i] instanceof NotesAtom) { + notesAtom = (NotesAtom)_children[i]; + //System.out.println("Found notes for sheet " + notesAtom.getSlideID()); + } + if(_children[i] instanceof PPDrawing) { + ppDrawing = (PPDrawing)_children[i]; + } + } + } + + + /** + * We are of type 1008 + */ + public long getRecordType() { return _type; } + + /** + * Return any children + */ + public Record[] getChildRecords() { return _children; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + writeOut(_header[0],_header[1],_type,_children,out); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java b/src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java new file mode 100644 index 000000000..dba56256e --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/NotesAtom.java @@ -0,0 +1,120 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; +import java.io.IOException; +import java.io.OutputStream; + +/** + * A Notes Atom (type 1009). Holds information on the parent Notes, such + * as what slide it is tied to + * + * @author Nick Burch + */ + +public class NotesAtom extends RecordAtom +{ + private byte[] _header; + private static long _type = 1009l; + + private int slideID; + private boolean followMasterObjects; + private boolean followMasterScheme; + private boolean followMasterBackground; + private byte[] reserved; + + + public int getSlideID() { return slideID; } + public void setSlideID(int id) { slideID = id; } + + public boolean getFollowMasterObjects() { return followMasterObjects; } + public boolean getFollowMasterScheme() { return followMasterScheme; } + public boolean getFollowMasterBackground() { return followMasterBackground; } + public void setFollowMasterObjects(boolean flag) { followMasterObjects = flag; } + public void setFollowMasterScheme(boolean flag) { followMasterScheme = flag; } + public void setFollowMasterBackground(boolean flag) { followMasterBackground = flag; } + + + /* *************** record code follows ********************** */ + + /** + * For the Notes Atom + */ + protected NotesAtom(byte[] source, int start, int len) { + // Sanity Checking + if(len < 8) { len = 8; } + + // Get the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Get the slide ID + slideID = (int)LittleEndian.getInt(source,start+8); + + // Grok the flags, stored as bits + int flags = LittleEndian.getUShort(source,start+12); + if((flags&4) == 4) { + followMasterBackground = true; + } else { + followMasterBackground = false; + } + if((flags&2) == 2) { + followMasterScheme = true; + } else { + followMasterScheme = false; + } + if((flags&1) == 1) { + followMasterObjects = true; + } else { + followMasterObjects = false; + } + + // There might be 2 more bytes, which are a reserved field + reserved = new byte[len-14]; + System.arraycopy(source,start+14,reserved,0,reserved.length); + } + + /** + * We are of type 1009 + */ + public long getRecordType() { return _type; } + + /** + * Write the contents of the record back, so it can be written + * to disk + */ + public void writeOut(OutputStream out) throws IOException { + // Header + out.write(_header); + + // Slide ID + writeLittleEndian(slideID,out); + + // Flags + short flags = 0; + if(followMasterObjects) { flags += 1; } + if(followMasterScheme) { flags += 2; } + if(followMasterBackground) { flags += 4; } + writeLittleEndian(flags,out); + + // Reserved fields + out.write(reserved); + } +} diff --git a/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java new file mode 100644 index 000000000..e19bc0a99 --- /dev/null +++ b/src/scratchpad/src/org/apache/poi/hslf/record/PPDrawing.java @@ -0,0 +1,191 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hslf.record; + +import org.apache.poi.util.LittleEndian; + +import org.apache.poi.ddf.*; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; +import java.util.Vector; + +/** + * These are actually wrappers onto Escher drawings. Make use of + * the DDF classes to do useful things with them. + * For now, creates a tree of the Escher records, and then creates any + * PowerPoint (hslf) records found within the EscherTextboxRecord + * (msofbtClientTextbox) records. + * Also provides easy access to the EscherTextboxRecords, so that their + * text may be extracted and used in Sheets + * + * @author Nick Burch + */ + +// For now, pretending to be an atom. Might not always be, but that +// would require a wrapping class +public class PPDrawing extends RecordAtom +{ + private byte[] _header; + private long _type; + + private EscherRecord[] childRecords; + private EscherTextboxWrapper[] textboxWrappers; + + + /** + * Get access to the underlying Escher Records + */ + public EscherRecord[] getEscherRecords() { return childRecords; } + + /** + * Get access to the atoms inside Textboxes + */ + public EscherTextboxWrapper[] getTextboxWrappers() { return textboxWrappers; } + + + /* ******************** record stuff follows ********************** */ + + /** + * Sets everything up, groks the escher etc + */ + protected PPDrawing(byte[] source, int start, int len) { + // Get the header + _header = new byte[8]; + System.arraycopy(source,start,_header,0,8); + + // Get the type + _type = LittleEndian.getUShort(_header,2); + + // Get the contents for now + byte[] contents = new byte[len]; + System.arraycopy(source,start,contents,0,len); + + + // Build up a tree of Escher records contained within + DefaultEscherRecordFactory erf = new DefaultEscherRecordFactory(); + Vector escherChildren = new Vector(); + findEscherChildren(erf,contents,8,len-8,escherChildren); + + childRecords = new EscherRecord[escherChildren.size()]; + for(int i=0; i= 8) { + findEscherChildren(erf, source, startPos, lenToGo, found); + } + } + + /** + * Look for EscherTextboxRecords + */ + private void findEscherTextboxRecord(EscherRecord[] toSearch, Vector found) { + for(int i=0; i i) { + Vector thisSetsV = sortedSetsV[i]; + sets = new SlideAtomsSet[thisSetsV.size()]; + for(int j=0; jbta-{#U~FbGs;5@7?CD#MP?#^C>?zbqby2y4G(kg5B#H#{dkTeuDHK8S zU^sv{up78e#^QN^2%rkUHLn5G0_p(c0OJAmfCj(>z!U)2eInvXfW{!tvO|zQ6fhNV z7~pWg5rAm`)*lS}4+qYc6#lZ}%g3Z2aavYll|~;9fy7v9s0(6OUEKYhMSqxh*>iHM z+;Pk^#UsVe+kVfJpSjP0>M#O^s1>uXkT~XLDyca5vHV7PNn@5m@k3Z)0n#xGbfSmAL&|>;{4np7mxUpTp+ejs+qxO?|-qA2hf6--*hIF`{&8>7@mIcdQ!*ZF%oFEX`r@k4zj^8F}rW!jZGG;fjbM!9;B(cPyY zp6QEpBNp`6m}zNI;@GQDcu`9XN69B82u$!DpQI^6T3pkE%<1qtD0*6+i{v6qMrC|t zjmDC{oCQ*yZ^CZ`JS9I#85?pygR9ANAkxS>!X+|Z`&~>4tG$HHYcD$$zp@#7C?@wh zD5||rOA1ac`Z_L1scn%XU&2%zqn0gL_6oM(z8|R@cDpxdeNkCYXthzw|TA_s6fjetfH?kZCFG8G_{oFtlEWB9D4GY0y zT`HBJ;TdnztzcZIfhgJ6-kZ$$`0wU}^F>U6!c4>TQG}cjZNoHko)jZVfj`QK)a~kbI4g60C=o z=W;oX@7n&9G&ipoS-)P^4?^Zc?XJD{T5b3Gwl*oOPsm_1nkGuy+40s77Gx06+&m~h z?kNn9_m>?n={R;g;&Ijez$Jl62a3mgLffrau|nfQ;T35f9F#(9E9M_*C%|86`Ruuf zQPI6vmZlycfq;>AIgG+~=T8^e_UKy0|DKf{+UHNV95##}sM2=Q3>wkCnRsm?dIc%` z!zpTK#%oTjm#3ZwP2k<>SDc~k@_!$hw|B|sbUf1W$3OmF)~xx8Ir&8D-2N(@ms#AT`Q2=ug*hc>`$4A)EEMICPA45EI+H&v8MK%^MuN;d}=;NR?sh4LuzgnOJFOHb7X0GOiYo3kj54)ol*vV-Oez{BRy0Zs>asraXABz^H~$FYnAGN}J*eWU zwcN`mG>+tFqs)=IyiuP96Z`ARIPJ&5Nmo=N`9~2?3d&uS)7@AL%Xbozt1wcP6WtDo2NNrp(*c?rp9QPCe8}^P?~O3oQ!cX*bJ!LmZvFxVb+k8X=sagrTV-5 zzW5j7eKa{~n?60YJe}Gms z=865U5L@i4DquhRcVoqVQ_awXQtY|6>s*6<3(}Rbhr=G=Oh?k$3;&%J@#hZat_#a% z_m;8Z@5ef(l;Y3fYu^C=?q2vmz8C%rF{b6eetoB0gBkb3Z?2?x{L3B(#`M^b|&s)*2uBs8sikb62aLYGtE9_l|Za61~@k zCBbSI?o%$hq30pQmL9JHJwx?lrRSS<+hV2kaMl_z^w2k`OwZnXb40d@;D~N1Pf<78 zSc)P6MP0CwvrxWDB2FZ*j;nx<138}vyT~uXIyuFqgjyZNb(cP_xvj8n#o&h!_th-z|8nETo92PhqXEVs^XbRMY)-`J{F!?z3X7+HFX`H z-?+UYJTs7lr7xYC<(zpTVoNhr=RRa*Kk+EGJ+zzN!iTUn!`=djsR_J=JCUYr+>W@V zST^usM%nnJ){jy)(8tQgxkY?(QO*@Q4YI+LCDeV6HQ%hxMsbFnDFcg5X)n{_b!1n+F2m|^h)7~4;#Qn~6+|TyL z{r(h=>-+ie8fEs74j{dq29ww zKKYM1GX|MYObreQ(1BD_A{^PVf_Dfn!A!kVcL;Z1V(t(aGw=?f9rpzU-XUy7Iqwh{ zGaRUQ2pbf4$$y(f;0~b$zY*}BQg;|j09*&=DZ}&>sQszDm?m(I7_%+o5$lH>zLD6U zec8{pXIqZRarl*l&w-NrU*7#w#CiYAJ6&>`XMYqKdb-8;zr6cpn}c`1V8VVD_TU`6 zinm6gK)Kld)tmnH+M?dr53a72!0wLjB0M>T^*s~Wv*|{7*nYjT#x!DAu zw!9la8QKn@&3^&Fv!Yi4l%og+qa4*E=6FwIFnSsg_3(v=-iDx9>p9lKRL_debT+fE zGUC=8^x^mSS;?++Dw9fN{Y8mHvMcVlrdGt$t*K;xR$5R$bksv^rs?h14;-zY8S=4b zp49tm?v;}O9|z1qhD*w_TKRgc2T1WGQoO4`O_iQ18SZ#+D!3cRlSEC`yDQ~p-FZjq ze%pl_424=^d_4M-n>$_hGBu{$(etQTp1pa$P&Np`h?eQ2UG&HFafiob{=K*}CW)`R zJ2)M}8q9{!g4^13va?S!<$uh0o{l%ynX0i1Jwhz&NSk{Ueoh>Bk{qWR!CqYzUwu_W zA8`a%e?85)_>CaXKYksvlyl7mW(L=oatZRJ9K>Dc%(j&{mtBfRL7>Etkg4;gs1zAo z>63`$9WNN?wYP!hV==>tA1xKbmyU}|@h85-)X-2FXYP-3VPE>k-SY638|EGnTYDY!&z#qOME&y^ z_(8kFRi?h-xN88^BLe{P>TUpS*lz)}V}k(hipK!dOIrcdM^6K|S}y>o(_RK}>|Frb zGwLPUGU_GjE53KY_LekailEg~?6au<_)J$i9`E8jUS&nUU6fdI5LFeBY^`^x&lsU+H8;)jdiN zhBwQB({Ku%#-Dj-<;ywXEnj1eHXIHKHg?DSj;0^q1I`;w!%?;w>-?h}cGS%d@cG}7 zH$~0R6K~U-qL+iuJ-c3WUVrP}6#18$H%0uktn{X6x)k&|6;!!7>j)5`Y9ITRQR#c5 zPf^7NPqwJ$2^?E(AmiW|gG!pm7*wTJp%-iVH}&4gBEU&30{ZdDIYam7{NA$&l(8rl z4|mfA2(%Jq#8bv^?N%reg;?3>CQfXabK!yNybkl@P6vTY{REg{c+9o#+Rbq z7vemlS=yP+Ci{EZc}PPQb1Hy=MvHUd5eBmed^}|oka@*L*L679Mghm^K-e%suzgsy z_fajoHcYUsCCB#Vj%r(rkK?dhV?L@yAL1N7Fm+MxLo~w2Wsw`g$7RJwwflM|(GVP$ z(UsZfQEi2ppI#cCx%jAdUym6kl$n|QEjg;i%#{z?-u`|?e z4Rf)(1%&aa_F=>w#j=6#8Olan>qmK1i#}F1&IN1ik+zF+%EnuHRQqtz_HtLQDo)GJjLZc;9G_Tz`0s{`N(@jlG90|Il-mC@dVdL{rxkHN~ChLC%Ad2 z_5I-lH+S4Ma_|J#SXo+HZf3^T@`g`vQFWkraIL_#8(?E3<^A)nk+27M{jNe`1H9_D z;#GLD2X{V1t0dr6_}lHlorSGu9Vjj(K{c6{v+f;uF>VpCazq4192$cFQK1VtMi1@` zbg~ZLTj0Tk9XOuV7*1J!{l7VIxy{$$R+U^Rch|me2ksiU5Dob^>cHhoW*)r*S2{D- q??83nXk&TEMH^c^llnL&dyYmy$iDt<2Huk56pOT4`^r}PXZSB<_=u$d literal 0 HcmV?d00001 diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/data/next_test_ppt_file.ppt b/src/scratchpad/testcases/org/apache/poi/hslf/data/next_test_ppt_file.ppt new file mode 100644 index 0000000000000000000000000000000000000000..69b2c4787774ba3173b1ca2fcd9d8ad32463b222 GIT binary patch literal 13824 zcmeHN3viUx6+Zv}Z=NLVLc&We{4n@Nh@`cSZDAdeVwE9dVyljpgk(2K3E9l<(ty}u zskKFv8q34dIy{ulpdH)PK7ACKsaD6P8Yys0_!wQSML=ZxefR!1+3bTRK4HpD z&ffRA=bU@)Irs7RJ~d+RmS3IpPss=~#E~PZ5-Ch8w~!{N5D@Vr!Tga_Dy0iWkUSBN zBME!}S}QSm4!{Q}0C3KW0L6e3Kq+7tpbStB7!DW(;JlAOJQ7gh#;Y%Ei)-I4VmqV}lQ)p)p2av;~3HE1G^f@2L^re@*U@`_J5$j;Z6# z+s5tu#-kQYhgL8|H5k29LKv5+Rz-s=vyg6-#j*%Bq$+m-rqGV#qqgrj100G^g0o97g#l)H7xv zE^V2%`e~0662vd(`DbceWQwQahxSV3Za-;d+LjVD56WGB&K`91gPDjYxgt%71^rdb zv;-A%?3GHrrAo9!DJR7cgy32(SyKWD+PVjYQ}5JM^;CJnm+&#^mtnpx6-#m@3#2ev zg+ zNZW!?I^=za>@##AVe-Tpg#Vc!6}~U4T0I8wr$Z39w=68HqAGPmm9m0W0g_i%oR12> ztg7IYfH6NsHDCSJfbKdu7fLB|a|O%c@O-4H*VMH5<$$vlj@*{F9QtI^Y1u_wAZekjs$P;sE=n*TsyvZMsQAIXFG_Xw z29XULWJ3>hKBL|G_3Kr;_xIFFYC~9hs?ju@-Ok|K(36rLKy`JG{IWUKhwp{`Z!0_o zAL&^AXV4PHpxe^%J*V1rc6O>bm3mLAdwL{QQ-ksQ+X?7byj*=HVpKFOkVTQl$RMD< zT>@Cx?wScAdv>l-^lx8UuloG$&T)P8fh*Ncs=*`LSCg)8l9!UyD^^-NjjuYqL0)_v zJVAD|S6!;wC136zw`JkA3Lp7+`su$&SJ!tXv2mNEdcyMR?)&@jg@K3k&6shGitDdh zCgE@jbAfTemzj^tXPl<$&v8e|o*k>@mSr7s>-=fb^Ba*{Hf{!&cll@F*UGYn)*!6m z6!a7aW&~KSM^f8VE5oKz*`Eby3z=K()P(BK+N{<9~9c=8( z6FIPqO~X=|hGk?LmOvVo0L*2X@9#kC7d!bR;(!zBq>gy#iQ zNSAp25A>-~?#1lZkNS?|mk}N{e;Q_No+axrdU>)AqljmSRT75SMdG5i*2aZyJmQ9$ zTI25AhFCll)n?m`#h12)MDFyH`-!@f+@p3L%;^~-XQPocoH-t6#-|(3%2uzFq41P+ z$Ww(slP8X9dpvDaG->CesoAFVHaAb9C2`GCth_eV7-?^E+d|(CwbA87O?q}@aWvlI zRm^S;n>3}s)izO$$g`oPPKmZUCps!k#NT+aB zAX7SADMgl^ajq#d;~Z9Gv!C^60k$Y-ZPpbf%3!M#?*Zl1nJ4vsKy0Wl$V2`0n}$k# zcTw-~Z0fnTOKeSj5b6BXW5e#z%#EZqfc`^y>E{aOs>{^PN45@?em77*Dw}?yFa8kp z+XLu-b^!fzfYZod9l&5d4gD*q^w=-M!W6Huez@soS!=!U#}@3GN1_k*nh=i9p#h)i zSdda6V?Sf|DVh78p>G~vpGtf4G|s%;Us?9Ni_IqY)U$A^4dXnt7$+j0yqS;Tgw%|a zCN`MPArNlK+h#MIz$Ubc7Wg`p(*j2kS9ung9<~U)`^8C?s`^1%VDv$ArOS^aHnOzo zhH~gaSM8A@*(qR=nj#nH#BAGJY9GP4<;fa0@@(&5S0c$f*ry4`vT&Vp(ltL%AU6Cs zdHC6J^-%e_wPa5qn;(u^B$^-Y4f69daBcSMHVN#vEx9>rLL0-8&*P{OK5{b3my6Hx zS=u`;2RRn>d<6U=PlP2h)24hEE-hQ3rw!>|OWWOJ(Q7>kip@+EdeX_z}Ym#dZ*89qbn@TBPX_Zb9sN9)W9$u7e(7vZ^1X zM?fFr5u(6hwA@Cy3wml{Z@AZ64)B4D;Jxc$X=rcgQDJ2qwR`vW-ZKhdYbtjaXKFJ# zgEGCB;e?26?6c{$>#od^)$ZCGxvQk!aVz$gXO0Xa;pwwSW;sWmgV^v4 z(|HtKIW~3_UF&Xop1Xw$FgJa>1u#?O?G_$Dn!0fx;$XUN;9y4G_?oI8q;8;((Tyw9 zPL@(+~-j{~o@$hHr-u0m)8M>!D zl^G?Ew*kX20NPhy#v#4E2N*!urcN?-EumP*ZHR{4cuUA_kKnq_4M(EhtsAY(NVFxd z4BUMYYYq(p<;T3Wd2H@Jk?0t5W2q}!kuDtKUV;hv5iY5>^N~;fqwkEt=5u4c3j%Z? z*VG70_IL6Pp&Xe{>kZ+d8}u6jV+Ot<)Zx8=z&C{5DCZjjV}|4P4PmpQU6Ma06L>=i z;@1b+Q|rdD1i*Pn*}wFG{C zTd|yd`mdj>LRk<>!fydsUk_l<765f;C4f3}2Y`Ch4WPAr2tXa$3!u+`1HikY0|4rg z56GxTWr&IIB_QKY1FD|$;RMC1p19^jnmZe!@z~MINZWL;56|CoTN|U1SR@>G=Y_+q zjUl%t(iw`@L|WV95=4Ds*(^ROs&q0s_jR7%S#I91xmIQZz6zLv3}=*O#d1@i8AS0W zQk>P`ra;{)Y3VrFRB$y8Ba13kW-pa3YUPbr>+LXVfC{yw_#E`7G!NPArE7G#rS799 zJLctmiEI{v)-2sebx}XMkKKop`P1=cOi8?N@8@s`T^J1^h}YU^YeSpLaV80>Rw0s#n zn`b=82si%#%^&B4Dbj}*0~PBM-rQ-b$^ECt#YDx$*X0XpJsK|h9vv5C(@%OyspcU+ z&0HV3!qM!H{mfsVxnt_dfi<_o{!BUe3)-JukO%z^XPNefc)I|!BijI!73~du*dqY? zu^s?d#V!Et(ry6l(Mtf%)*ArYw08l-eHcK0M!Q5`M!Q6N#qS-kyd{d5DroE!`^;-U zC)OAZh1%WPmI$6DX}{REE)s2QSrA!*q@tDIosedZ&zN}9V`Np{#Ltc$rf6fHG%{qy z<3RxVCeM2HQW~>D;fBR+ad%cI*4o^z9J86w^rfiQgsI@3S3NkpX3nQe8L$&GIpWB( zBkl2+dqphNqzHv9R27AwhjQyei#pmG;-LZc6ku&zYg5RrQ_b5FPKB@#R!Yy)s)tF-iM+@pO$QSM!(ahr40Yur2|SJq{Y zoPLdav=1GI=NOst&AP_L$oVhfKKpB2ehl3)`Bc!Ibe3Rl%r&mo4gKhQ0F3b(H;K4D zT{m#np>C{H^@F^|MIWOZS3)%Ay4yxMb>ri_#!aT-=U0chbnkFerUnf48g~^&G3JNs zm-a7+-(1(YW=jk6GR19ZYs)eqZUa7k>9-kFo(lmwFvX2JL0kb&?mrF6s*=X}d}L#|vP# KIuVXb3H%qLW1k)X literal 0 HcmV?d00001 diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java new file mode 100644 index 000000000..a8294a11d --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hslf/extractor/TextExtractor.java @@ -0,0 +1,67 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.extractor; + + +import junit.framework.TestCase; + +/** + * Tests that the extractor correctly gets the text out of our sample file + * + * @author Nick Burch (nick at torchbox dot com) + */ +public class TextExtractor extends TestCase { + // Extractor primed on the test data + private PowerPointExtractor ppe; + + public TextExtractor() throws Exception { + String dirname = System.getProperty("HSLF.testdata.path"); + String filename = dirname + "/basic_test_ppt_file.ppt"; + ppe = new PowerPointExtractor(filename); + } + + public void testReadSheetText() throws Exception { + String sheetText = ppe.getText(); + String expectText = "This is a test title\nThis is a test subtitle\nThis is on page 1\nThis is the title on page 2\nThis is page two\nIt has several blocks of text\nNone of them have formatting\n"; + + assertEquals(expectText.length(),sheetText.length()); + char[] st = sheetText.toCharArray(); + char[] et = expectText.toCharArray(); + for(int i=0; i two notes, plus the notes on the slide master + assertEquals(3, notes.length); + } +} diff --git a/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java new file mode 100644 index 000000000..e4d6590be --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hslf/usermodel/TestNotesText.java @@ -0,0 +1,61 @@ + +/* ==================================================================== + Copyright 2002-2004 Apache Software Foundation + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + + +package org.apache.poi.hslf.usermodel; + + +import junit.framework.TestCase; +import org.apache.poi.hslf.*; +import org.apache.poi.hslf.model.*; + +/** + * Tests that SlideShow returns MetaSheets which have the right text in them + * + * @author Nick Burch (nick at torchbox dot com) + */ +public class TestNotesText extends TestCase { + // SlideShow primed on the test data + private SlideShow ss; + + public TestNotesText() throws Exception { + String dirname = System.getProperty("HSLF.testdata.path"); + String filename = dirname + "/basic_test_ppt_file.ppt"; + HSLFSlideShow hss = new HSLFSlideShow(filename); + ss = new SlideShow(hss); + } + + public void testNotesOne() throws Exception { + Notes notes = ss.getNotes()[1]; + + String[] expectText = new String[] {"These are the notes for page 1"}; + assertEquals(expectText.length, notes.getTextRuns().length); + for(int i=0; i