diff --git a/build.xml b/build.xml index e1cb26c35..bcb253536 100644 --- a/build.xml +++ b/build.xml @@ -650,6 +650,7 @@ under the License. + @@ -799,6 +800,7 @@ under the License. + diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index f26c6271b..1381dd8d2 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -46,6 +46,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + 45043 - Support for getting excel cell comments when extracting text Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level 45025 - improved FormulaParser parse error messages 45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 4fc778a5f..35e3ab751 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -43,6 +43,7 @@ Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx + 45043 - Support for getting excel cell comments when extracting text Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level 45025 - improved FormulaParser parse error messages 45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index 2a9c455ca..75a73c654 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFComment; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; @@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor { private HSSFWorkbook wb; private boolean includeSheetNames = true; private boolean formulasNotResults = false; + private boolean includeCellComments = false; public ExcelExtractor(HSSFWorkbook wb) { super(wb); @@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor { public void setFormulasNotResults(boolean formulasNotResults) { this.formulasNotResults = formulasNotResults; } + /** + * Should cell comments be included? Default is true + */ + public void setIncludeCellComments(boolean includeCellComments) { + this.includeCellComments = includeCellComments; + } /** * Retreives the text contents of the file @@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor { break; } + // Output the comment, if requested and exists + HSSFComment comment = cell.getCellComment(); + if(includeCellComments && comment != null) { + // Replace any newlines with spaces, otherwise it + // breaks the output + String commentText = comment.getString().getString().replace('\n', ' '); + text.append(" Comment by "+comment.getAuthor()+": "+commentText); + } + // Output a tab if we're not on the last cell if(outputContents && k < (lastCell-1)) { text.append("\t"); diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index 9ebb3f053..2d27f5d33 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -16,25 +16,20 @@ ==================================================================== */ package org.apache.poi.xssf.extractor; -import java.io.File; import java.io.IOException; import java.util.Iterator; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Comment; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFCell; -import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; /** * Helper class to extract text from an OOXML Excel file @@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { private Workbook workbook; private boolean includeSheetNames = true; private boolean formulasNotResults = false; + private boolean includeCellComments = false; public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { this(new XSSFWorkbook(path)); @@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { public void setFormulasNotResults(boolean formulasNotResults) { this.formulasNotResults = formulasNotResults; } + /** + * Should cell comments be included? Default is true + */ + public void setIncludeCellComments(boolean includeCellComments) { + this.includeCellComments = includeCellComments; + } /** * Retreives the text contents of the file @@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { for (Object rawR : sheet) { Row row = (Row)rawR; - for(Iterator ri = row.cellIterator(); ri.hasNext();) { - Cell cell = (Cell)ri.next(); + for(Iterator ri = row.cellIterator(); ri.hasNext();) { + Cell cell = ri.next(); // Is it a formula one? if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) { @@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { text.append(xc.getRawValue()); } + // Output the comment, if requested and exists + Comment comment = cell.getCellComment(); + if(includeCellComments && comment != null) { + // Replace any newlines with spaces, otherwise it + // breaks the output + String commentText = comment.getString().getString().replace('\n', ' '); + text.append(" Comment by "+comment.getAuthor()+": "+commentText); + } + if(ri.hasNext()) text.append("\t"); } diff --git a/src/ooxml/testcases/org/apache/poi/TestEmbeded.java b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java new file mode 100644 index 000000000..5e127e21c --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java @@ -0,0 +1,83 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi; + +import java.io.File; +import java.util.Iterator; + +import org.apache.poi.util.IOUtils; +import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.xwpf.XWPFDocument; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; + +import junit.framework.TestCase; + +/** + * Class to test that we handle embeded bits in + * OOXML files properly + */ +public class TestEmbeded extends TestCase +{ + public String dirname; + + public void setUp() { + dirname = System.getProperty("OOXML.testdata.path"); + assertNotNull(dirname); + } + + public void testExcel() throws Exception { + File f = new File(dirname, "ExcelWithAttachments.xlsx"); + assertTrue(f.exists()); + + POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString())); + test(doc, 0); + } + + public void testWord() throws Exception { + File f = new File(dirname, "WordWithAttachments.docx"); + assertTrue(f.exists()); + + POIXMLDocument doc = new XWPFDocument(Package.open(f.toString())); + test(doc, 4); + } + + public void testPowerPoint() throws Exception { + File f = new File(dirname, "PPTWithAttachments.pptx"); + assertTrue(f.exists()); + + POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString())); + test(doc, 0); + } + + private void test(POIXMLDocument doc, int expectedCount) throws Exception { + assertNotNull(doc.getAllEmbedds()); + assertEquals(expectedCount, doc.getAllEmbedds().size()); + + for(int i=0; i 0); + } + } +} diff --git a/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls new file mode 100644 index 000000000..66dd9185e Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls differ diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 63d67ee77..9bb137ff6 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase { ); } + public void testWithComments() throws Exception { + ExcelExtractor extractor = createExtractor("SimpleWithComments.xls"); + extractor.setIncludeSheetNames(false); + + // Check without comments + assertEquals( + "1.0\tone\n" + + "2.0\ttwo\n" + + "3.0\tthree\n", + extractor.getText() + ); + + // Now with + extractor.setIncludeCellComments(true); + assertEquals( + "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" + + "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + + "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", + extractor.getText() + ); + } + /** * Embded in a non-excel file