diff --git a/build.xml b/build.xml
index e1cb26c35..bcb253536 100644
--- a/build.xml
+++ b/build.xml
@@ -650,6 +650,7 @@ under the License.
+
@@ -799,6 +800,7 @@ under the License.
+
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index f26c6271b..1381dd8d2 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -46,6 +46,7 @@
Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx
+ 45043 - Support for getting excel cell comments when extracting text
Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level
45025 - improved FormulaParser parse error messages
45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 4fc778a5f..35e3ab751 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -43,6 +43,7 @@
Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx
+ 45043 - Support for getting excel cell comments when extracting text
Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level
45025 - improved FormulaParser parse error messages
45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable
diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
index 2a9c455ca..75a73c654 100644
--- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
+++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFComment;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
@@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
private HSSFWorkbook wb;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
public ExcelExtractor(HSSFWorkbook wb) {
super(wb);
@@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+ /**
+ * Should cell comments be included? Default is true
+ */
+ public void setIncludeCellComments(boolean includeCellComments) {
+ this.includeCellComments = includeCellComments;
+ }
/**
* Retreives the text contents of the file
@@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
break;
}
+ // Output the comment, if requested and exists
+ HSSFComment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+ }
+
// Output a tab if we're not on the last cell
if(outputContents && k < (lastCell-1)) {
text.append("\t");
diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
index 9ebb3f053..2d27f5d33 100644
--- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
@@ -16,25 +16,20 @@
==================================================================== */
package org.apache.poi.xssf.extractor;
-import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Comment;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
/**
* Helper class to extract text from an OOXML Excel file
@@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
private Workbook workbook;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(path));
@@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+ /**
+ * Should cell comments be included? Default is true
+ */
+ public void setIncludeCellComments(boolean includeCellComments) {
+ this.includeCellComments = includeCellComments;
+ }
/**
* Retreives the text contents of the file
@@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
for (Object rawR : sheet) {
Row row = (Row)rawR;
- for(Iterator ri = row.cellIterator(); ri.hasNext();) {
- Cell cell = (Cell)ri.next();
+ for(Iterator ri = row.cellIterator(); ri.hasNext();) {
+ Cell cell = ri.next();
// Is it a formula one?
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
@@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
text.append(xc.getRawValue());
}
+ // Output the comment, if requested and exists
+ Comment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+ }
+
if(ri.hasNext())
text.append("\t");
}
diff --git a/src/ooxml/testcases/org/apache/poi/TestEmbeded.java b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java
new file mode 100644
index 000000000..5e127e21c
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java
@@ -0,0 +1,83 @@
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi;
+
+import java.io.File;
+import java.util.Iterator;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+/**
+ * Class to test that we handle embeded bits in
+ * OOXML files properly
+ */
+public class TestEmbeded extends TestCase
+{
+ public String dirname;
+
+ public void setUp() {
+ dirname = System.getProperty("OOXML.testdata.path");
+ assertNotNull(dirname);
+ }
+
+ public void testExcel() throws Exception {
+ File f = new File(dirname, "ExcelWithAttachments.xlsx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString()));
+ test(doc, 0);
+ }
+
+ public void testWord() throws Exception {
+ File f = new File(dirname, "WordWithAttachments.docx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XWPFDocument(Package.open(f.toString()));
+ test(doc, 4);
+ }
+
+ public void testPowerPoint() throws Exception {
+ File f = new File(dirname, "PPTWithAttachments.pptx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString()));
+ test(doc, 0);
+ }
+
+ private void test(POIXMLDocument doc, int expectedCount) throws Exception {
+ assertNotNull(doc.getAllEmbedds());
+ assertEquals(expectedCount, doc.getAllEmbedds().size());
+
+ for(int i=0; i 0);
+ }
+ }
+}
diff --git a/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
new file mode 100644
index 000000000..66dd9185e
Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls differ
diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
index 63d67ee77..9bb137ff6 100644
--- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
+++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
@@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase {
);
}
+ public void testWithComments() throws Exception {
+ ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
+ extractor.setIncludeSheetNames(false);
+
+ // Check without comments
+ assertEquals(
+ "1.0\tone\n" +
+ "2.0\ttwo\n" +
+ "3.0\tthree\n",
+ extractor.getText()
+ );
+
+ // Now with
+ extractor.setIncludeCellComments(true);
+ assertEquals(
+ "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
+ "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
+ "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
+ extractor.getText()
+ );
+ }
+
/**
* Embded in a non-excel file
|