diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index 9690fdddf..c6f809459 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -126,7 +126,9 @@ public class PowerPointExtractor } /** - * Fetches text from the slideshow, be it slide text or note text + * Fetches text from the slideshow, be it slide text or note text. + * Because the final block of text in a TextRun normally have their + * last \n stripped, we add it back * @param getSlideText fetch slide text * @param getNoteText fetch note text */ @@ -139,10 +141,12 @@ public class PowerPointExtractor TextRun[] runs = slide.getTextRuns(); for(int j=0; j"); + System.exit(1); + } + + String file = args[0]; + + QuickButCruddyTextExtractor ppe = new QuickButCruddyTextExtractor(file); + System.out.println(ppe.getTextAsString()); + ppe.close(); + } + + /** + * Creates an extractor from a given file name + * @param fileName + */ + public QuickButCruddyTextExtractor(String fileName) throws IOException { + this(new FileInputStream(fileName)); + } + + /** + * Creates an extractor from a given input stream + * @param iStream + */ + public QuickButCruddyTextExtractor(InputStream iStream) throws IOException { + this(new POIFSFileSystem(iStream)); + is = iStream; + } + + /** + * Creates an extractor from a POIFS Filesystem + * @param poifs + */ + public QuickButCruddyTextExtractor(POIFSFileSystem poifs) throws IOException { + fs = poifs; + + // Find the PowerPoint bit, and get out the bytes + DocumentEntry docProps = + (DocumentEntry)fs.getRoot().getEntry("PowerPoint Document"); + pptContents = new byte[docProps.getSize()]; + fs.createDocumentInputStream("PowerPoint Document").read(pptContents); + } + + + /** + * Shuts down the underlying streams + */ + public void close() throws IOException { + if(is != null) { is.close(); } + fs = null; + } + + /** + * Fetches the ALL the text of the powerpoint file, as a single string + */ + public String getTextAsString() { + StringBuffer ret = new StringBuffer(); + Vector textV = getTextAsVector(); + for(int i=0; i (pptContents.length - 8)) { + newPos = -1; + } + return newPos; + } +}