diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 569691b6c..46f8c4389 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -52,6 +52,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + 45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments 45538 - Include excel headers and footers in the output of ExcelExtractor 44894 - refactor duplicate logic from EventRecordFactory to RecordFactory Support for Headers / Footers in HSLF diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 1b665f1ae..af9648e52 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -49,6 +49,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + 45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments 45538 - Include excel headers and footers in the output of ExcelExtractor 44894 - refactor duplicate logic from EventRecordFactory to RecordFactory Support for Headers / Footers in HSLF diff --git a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java index cd9fa2825..9dddb700b 100644 --- a/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java +++ b/src/scratchpad/src/org/apache/poi/hslf/extractor/PowerPointExtractor.java @@ -27,6 +27,8 @@ import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.hslf.*; import org.apache.poi.hslf.model.*; +import org.apache.poi.hslf.record.Comment2000; +import org.apache.poi.hslf.record.Record; import org.apache.poi.hslf.usermodel.*; /** @@ -44,6 +46,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor private boolean slidesByDefault = true; private boolean notesByDefault = false; + private boolean commentsByDefault = false; /** * Basic extractor. Returns all the text, and optionally all the notes @@ -57,16 +60,20 @@ public class PowerPointExtractor extends POIOLE2TextExtractor } boolean notes = false; + boolean comments = false; String file; if(args.length > 1) { notes = true; file = args[1]; + if(args.length > 2) { + comments = true; + } } else { file = args[0]; } PowerPointExtractor ppe = new PowerPointExtractor(file); - System.out.println(ppe.getText(true,notes)); + System.out.println(ppe.getText(true,notes,comments)); ppe.close(); } @@ -127,6 +134,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor public void setNotesByDefault(boolean notesByDefault) { this.notesByDefault = notesByDefault; } + /** + * Should a call to getText() return comments text? + * Default is no + */ + public void setCommentsByDefault(boolean commentsByDefault) { + this.commentsByDefault = commentsByDefault; + } /** * Fetches all the slide text from the slideshow, @@ -135,7 +149,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor * to change this */ public String getText() { - return getText(slidesByDefault,notesByDefault); + return getText(slidesByDefault,notesByDefault,commentsByDefault); } /** @@ -153,6 +167,9 @@ public class PowerPointExtractor extends POIOLE2TextExtractor * @param getNoteText fetch note text */ public String getText(boolean getSlideText, boolean getNoteText) { + return getText(getSlideText, getNoteText, commentsByDefault); + } + public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) { StringBuffer ret = new StringBuffer(); if(getSlideText) { @@ -169,6 +186,18 @@ public class PowerPointExtractor extends POIOLE2TextExtractor } } } + + if(getCommentText) { + Comment[] comments = slide.getComments(); + for(int j=0; j ProgBinaryTag -> BinaryTagData + RecordContainer progTags = (RecordContainer) + getSheetContainer().findFirstOfType( + RecordTypes.ProgTags.typeID + ); + if(progTags != null) { + RecordContainer progBinaryTag = (RecordContainer) + progTags.findFirstOfType( + RecordTypes.ProgBinaryTag.typeID + ); + if(progBinaryTag != null) { + RecordContainer binaryTags = (RecordContainer) + progBinaryTag.findFirstOfType( + RecordTypes.BinaryTagData.typeID + ); + if(binaryTags != null) { + // This is where they'll be + int count = 0; + for(int i=0; i