Merged revisions 638786-638802,638805-638811,638813-638814,638816-639230,639233-639241,639243-639253,639255-639486,639488-639601,639603-639835,639837-639917,639919-640056,640058-640710,640712-641156,641158-641184,641186-641795,641797-641798,641800-641933,641935-641963,641965-641966,641968-641995,641997-642230,642232-642562,642564-642565,642568-642570,642572-642573,642576-642736,642739-642877,642879,642881-642890,642892-642903,642905-642945,642947-643624,643626-643653,643655-643669,643671,643673-643830,643832-643833,643835-644342,644344-644472,644474-644508,644510-645347,645349-645351,645353-645559,645561-645565,645568-645951,645953-646193,646195-646311,646313-646404,646406-646665,646667-646853,646855-646869,646871-647151,647153-647185,647187-647277,647279-647566,647568-647573,647575,647578-647711,647714-647737,647739-647823,647825-648155,648157-648202,648204-648273,648275,648277-648302,648304-648333,648335-648588,648590-648622,648625-648673,648675-649141,649144,649146-649556,649558-649795,649799,649801-649910,649912-649913,649915-650128,650131-650132,650134-650137,650140-650914,650916-651991,651993-652284,652286-652287,652289,652291,652293-652297,652299-652328,652330-652425,652427-652445,652447-652560,652562-652933,652935,652937-652993,652995-653116,653118-653124,653126-653483,653487-653519,653522-653550,653552-653607,653609-653667,653669-653674,653676-653814,653817-653830,653832-653891,653893-653944,653946-654055,654057-654355,654357-654365,654367-654648,654651-655215,655217-655277,655279-655281,655283-655911,655913-656212,656214,656216-656251,656253-656698,656700-656756,656758-656892,656894-657135,657137-657165,657168-657179,657181-657354,657356-657357,657359-657701,657703-657874,657876-658032,658034-658284,658286,658288-658301,658303-658307,658309-658321,658323-658335,658337-658348,658351,658353-658832,658834-658983,658985,658987-659066,659068-659402,659404-659428,659430-659451,659453-659454,659456-659461,659463-659477,659479-659524,659526-659571,659574,659576-660255,660257-660262,660264-660279,660281-660343,660345-660473,660475-660827,660829-660833,660835-660888,660890-663321,663323-663435,663437-663764,663766-663854,663856-664219,664221-664489,664494-664514,664516-668013,668015-668142,668144-668152,668154,668156-668256,668258,668260-669139,669141-669455,669457-669657,669659-669808,669810-670189,670191-671321,671323-672229,672231-672549,672551-672552,672554-672561,672563-672566,672568,672571-673049,673051-673852,673854-673862,673864-673986,673988-673996,673998-674347,674349-674890,674892-674910,674912-674936,674938-674952,674954-675078,675080-675085,675087-675217,675219-675660,675662-675670,675672-675716,675718-675726,675728-675733,675735-675775,675777-675782,675784,675786-675791,675794-675852,675854-676200,676202,676204,676206-676220,676222-676309,676311-676456,676458-676994,676996-677027,677030-677040,677042-677056,677058-677375,677377-677968,677970-677971,677973,677975-677994,677996-678286,678288-678538,678540-680393,680395-680469,680471-680529,680531-680852,680854-681529,681531-681571,681573-682224,682226,682228,682231-682281,682283-682335,682337-682507,682509,682512-682517,682519-682533 via svnmerge from
https://svn.apache.org/repos/asf/poi/trunk ........ r682533 | nick | 2008-08-04 23:48:39 +0100 (Mon, 04 Aug 2008) | 1 line Fix bug #45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments ........ git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@682535 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
60aced4a6d
commit
6b2768f4d7
@ -52,6 +52,7 @@
|
||||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
||||
</release>
|
||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
|
||||
|
@ -49,6 +49,7 @@
|
||||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
||||
</release>
|
||||
<release version="3.1.1-alpha1" date="2008-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45543 - Optionally extract comment text with PowerPointExtractor, and initial hslf model support for comments</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45538 - Include excel headers and footers in the output of ExcelExtractor</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">44894 - refactor duplicate logic from EventRecordFactory to RecordFactory</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">Support for Headers / Footers in HSLF</action>
|
||||
|
@ -27,6 +27,8 @@ import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
||||
import org.apache.poi.hslf.*;
|
||||
import org.apache.poi.hslf.model.*;
|
||||
import org.apache.poi.hslf.record.Comment2000;
|
||||
import org.apache.poi.hslf.record.Record;
|
||||
import org.apache.poi.hslf.usermodel.*;
|
||||
|
||||
/**
|
||||
@ -44,6 +46,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
|
||||
private boolean slidesByDefault = true;
|
||||
private boolean notesByDefault = false;
|
||||
private boolean commentsByDefault = false;
|
||||
|
||||
/**
|
||||
* Basic extractor. Returns all the text, and optionally all the notes
|
||||
@ -57,16 +60,20 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
}
|
||||
|
||||
boolean notes = false;
|
||||
boolean comments = false;
|
||||
String file;
|
||||
if(args.length > 1) {
|
||||
notes = true;
|
||||
file = args[1];
|
||||
if(args.length > 2) {
|
||||
comments = true;
|
||||
}
|
||||
} else {
|
||||
file = args[0];
|
||||
}
|
||||
|
||||
PowerPointExtractor ppe = new PowerPointExtractor(file);
|
||||
System.out.println(ppe.getText(true,notes));
|
||||
System.out.println(ppe.getText(true,notes,comments));
|
||||
ppe.close();
|
||||
}
|
||||
|
||||
@ -127,6 +134,13 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
public void setNotesByDefault(boolean notesByDefault) {
|
||||
this.notesByDefault = notesByDefault;
|
||||
}
|
||||
/**
|
||||
* Should a call to getText() return comments text?
|
||||
* Default is no
|
||||
*/
|
||||
public void setCommentsByDefault(boolean commentsByDefault) {
|
||||
this.commentsByDefault = commentsByDefault;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches all the slide text from the slideshow,
|
||||
@ -135,7 +149,7 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
* to change this
|
||||
*/
|
||||
public String getText() {
|
||||
return getText(slidesByDefault,notesByDefault);
|
||||
return getText(slidesByDefault,notesByDefault,commentsByDefault);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -153,6 +167,9 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
* @param getNoteText fetch note text
|
||||
*/
|
||||
public String getText(boolean getSlideText, boolean getNoteText) {
|
||||
return getText(getSlideText, getNoteText, commentsByDefault);
|
||||
}
|
||||
public String getText(boolean getSlideText, boolean getNoteText, boolean getCommentText) {
|
||||
StringBuffer ret = new StringBuffer();
|
||||
|
||||
if(getSlideText) {
|
||||
@ -169,6 +186,18 @@ public class PowerPointExtractor extends POIOLE2TextExtractor
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(getCommentText) {
|
||||
Comment[] comments = slide.getComments();
|
||||
for(int j=0; j<comments.length; j++) {
|
||||
ret.append(
|
||||
comments[j].getAuthor() +
|
||||
" - " +
|
||||
comments[j].getText() +
|
||||
"\n"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
if(getNoteText) {
|
||||
ret.append("\n");
|
||||
|
54
src/scratchpad/src/org/apache/poi/hslf/model/Comment.java
Normal file
54
src/scratchpad/src/org/apache/poi/hslf/model/Comment.java
Normal file
@ -0,0 +1,54 @@
|
||||
package org.apache.poi.hslf.model;
|
||||
|
||||
import org.apache.poi.hslf.record.Comment2000;
|
||||
|
||||
public class Comment {
|
||||
private Comment2000 comment2000;
|
||||
|
||||
public Comment(Comment2000 comment2000) {
|
||||
this.comment2000 = comment2000;
|
||||
}
|
||||
|
||||
protected Comment2000 getComment2000() {
|
||||
return comment2000;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Author of this comment
|
||||
*/
|
||||
public String getAuthor() {
|
||||
return comment2000.getAuthor();
|
||||
}
|
||||
/**
|
||||
* Set the Author of this comment
|
||||
*/
|
||||
public void setAuthor(String author) {
|
||||
comment2000.setAuthor(author);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the Author's Initials of this comment
|
||||
*/
|
||||
public String getAuthorInitials() {
|
||||
return comment2000.getAuthorInitials();
|
||||
}
|
||||
/**
|
||||
* Set the Author's Initials of this comment
|
||||
*/
|
||||
public void setAuthorInitials(String initials) {
|
||||
comment2000.setAuthorInitials(initials);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the text of this comment
|
||||
*/
|
||||
public String getText() {
|
||||
return comment2000.getText();
|
||||
}
|
||||
/**
|
||||
* Set the text of this comment
|
||||
*/
|
||||
public void setText(String text) {
|
||||
comment2000.setText(text);
|
||||
}
|
||||
}
|
@ -362,6 +362,59 @@ public class Slide extends Sheet
|
||||
return super.getColorScheme();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the comment(s) for this slide.
|
||||
* Note - for now, only works on PPT 2000 and
|
||||
* PPT 2003 files. Doesn't work for PPT 97
|
||||
* ones, as they do their comments oddly.
|
||||
*/
|
||||
public Comment[] getComments() {
|
||||
// If there are any, they're in
|
||||
// ProgTags -> ProgBinaryTag -> BinaryTagData
|
||||
RecordContainer progTags = (RecordContainer)
|
||||
getSheetContainer().findFirstOfType(
|
||||
RecordTypes.ProgTags.typeID
|
||||
);
|
||||
if(progTags != null) {
|
||||
RecordContainer progBinaryTag = (RecordContainer)
|
||||
progTags.findFirstOfType(
|
||||
RecordTypes.ProgBinaryTag.typeID
|
||||
);
|
||||
if(progBinaryTag != null) {
|
||||
RecordContainer binaryTags = (RecordContainer)
|
||||
progBinaryTag.findFirstOfType(
|
||||
RecordTypes.BinaryTagData.typeID
|
||||
);
|
||||
if(binaryTags != null) {
|
||||
// This is where they'll be
|
||||
int count = 0;
|
||||
for(int i=0; i<binaryTags.getChildRecords().length; i++) {
|
||||
if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Now build
|
||||
Comment[] comments = new Comment[count];
|
||||
count = 0;
|
||||
for(int i=0; i<binaryTags.getChildRecords().length; i++) {
|
||||
if(binaryTags.getChildRecords()[i] instanceof Comment2000) {
|
||||
comments[i] = new Comment(
|
||||
(Comment2000)binaryTags.getChildRecords()[i]
|
||||
);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
return comments;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// None found
|
||||
return new Comment[0];
|
||||
}
|
||||
|
||||
public void draw(Graphics2D graphics){
|
||||
MasterSheet master = getMasterSheet();
|
||||
if(getFollowMasterBackground()) master.getBackground().draw(graphics);
|
||||
|
@ -123,6 +123,20 @@ public abstract class RecordContainer extends Record
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Finds the first child record of the given type,
|
||||
* or null if none of the child records are of the
|
||||
* given type. Does not descend.
|
||||
*/
|
||||
public Record findFirstOfType(long type) {
|
||||
for(int i=0; i<_children.length; i++) {
|
||||
if(_children[i].getRecordType() == type) {
|
||||
return _children[i];
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/* ===============================================================
|
||||
* External Move Methods
|
||||
* ===============================================================
|
||||
|
BIN
src/scratchpad/testcases/org/apache/poi/hslf/data/45543.ppt
Normal file
BIN
src/scratchpad/testcases/org/apache/poi/hslf/data/45543.ppt
Normal file
Binary file not shown.
BIN
src/scratchpad/testcases/org/apache/poi/hslf/data/WithComments.ppt
Executable file
BIN
src/scratchpad/testcases/org/apache/poi/hslf/data/WithComments.ppt
Executable file
Binary file not shown.
@ -216,4 +216,36 @@ public class TextExtractor extends TestCase {
|
||||
ppe.getText(true, false)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* From bug #45543
|
||||
*/
|
||||
public void testWithComments() throws Exception {
|
||||
String filename;
|
||||
|
||||
// New file
|
||||
filename = dirname + "/WithComments.ppt";
|
||||
ppe = new PowerPointExtractor(filename);
|
||||
|
||||
String text = ppe.getText();
|
||||
assertFalse("Comments not in by default", text.contains("This is a test comment"));
|
||||
|
||||
ppe.setCommentsByDefault(true);
|
||||
|
||||
text = ppe.getText();
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("This is a test comment"));
|
||||
|
||||
|
||||
// And another file
|
||||
filename = dirname + "/45543.ppt";
|
||||
ppe = new PowerPointExtractor(filename);
|
||||
|
||||
text = ppe.getText();
|
||||
assertFalse("Comments not in by default", text.contains("testdoc"));
|
||||
|
||||
ppe.setCommentsByDefault(true);
|
||||
|
||||
text = ppe.getText();
|
||||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user