More merging, plus tests for embeded ooxml files
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@660488 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c132523b46
commit
fb7f5240fe
@ -650,6 +650,7 @@ under the License.
|
||||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
|
||||
<sysproperty key="java.awt.headless" value="true"/>
|
||||
<formatter type="plain" usefile="no"/>
|
||||
<formatter type="xml"/>
|
||||
@ -799,6 +800,7 @@ under the License.
|
||||
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
||||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
|
||||
<sysproperty key="java.awt.headless" value="true"/>
|
||||
<formatter type="plain"/>
|
||||
<formatter type="xml"/>
|
||||
|
@ -46,6 +46,7 @@
|
||||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
||||
</release>
|
||||
<release version="3.1-final" date="2008-06-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
|
||||
|
@ -43,6 +43,7 @@
|
||||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
||||
</release>
|
||||
<release version="3.1-final" date="2008-06-??">
|
||||
<action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
|
||||
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
|
||||
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
||||
|
||||
import org.apache.poi.POIOLE2TextExtractor;
|
||||
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||
import org.apache.poi.hssf.usermodel.HSSFComment;
|
||||
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||
@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
|
||||
private HSSFWorkbook wb;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean formulasNotResults = false;
|
||||
private boolean includeCellComments = false;
|
||||
|
||||
public ExcelExtractor(HSSFWorkbook wb) {
|
||||
super(wb);
|
||||
@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
|
||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||
this.formulasNotResults = formulasNotResults;
|
||||
}
|
||||
/**
|
||||
* Should cell comments be included? Default is true
|
||||
*/
|
||||
public void setIncludeCellComments(boolean includeCellComments) {
|
||||
this.includeCellComments = includeCellComments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreives the text contents of the file
|
||||
@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
|
||||
break;
|
||||
}
|
||||
|
||||
// Output the comment, if requested and exists
|
||||
HSSFComment comment = cell.getCellComment();
|
||||
if(includeCellComments && comment != null) {
|
||||
// Replace any newlines with spaces, otherwise it
|
||||
// breaks the output
|
||||
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
|
||||
}
|
||||
|
||||
// Output a tab if we're not on the last cell
|
||||
if(outputContents && k < (lastCell-1)) {
|
||||
text.append("\t");
|
||||
|
@ -16,25 +16,20 @@
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.extractor;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.POIXMLTextExtractor;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
import org.apache.poi.ss.usermodel.Comment;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Sheet;
|
||||
import org.apache.poi.ss.usermodel.Workbook;
|
||||
import org.apache.poi.xssf.usermodel.XSSFCell;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.openxml4j.exceptions.OpenXML4JException;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
|
||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
|
||||
|
||||
/**
|
||||
* Helper class to extract text from an OOXML Excel file
|
||||
@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
private Workbook workbook;
|
||||
private boolean includeSheetNames = true;
|
||||
private boolean formulasNotResults = false;
|
||||
private boolean includeCellComments = false;
|
||||
|
||||
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||
this(new XSSFWorkbook(path));
|
||||
@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||
this.formulasNotResults = formulasNotResults;
|
||||
}
|
||||
/**
|
||||
* Should cell comments be included? Default is true
|
||||
*/
|
||||
public void setIncludeCellComments(boolean includeCellComments) {
|
||||
this.includeCellComments = includeCellComments;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retreives the text contents of the file
|
||||
@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
|
||||
for (Object rawR : sheet) {
|
||||
Row row = (Row)rawR;
|
||||
for(Iterator ri = row.cellIterator(); ri.hasNext();) {
|
||||
Cell cell = (Cell)ri.next();
|
||||
for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
|
||||
Cell cell = ri.next();
|
||||
|
||||
// Is it a formula one?
|
||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
||||
@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
||||
text.append(xc.getRawValue());
|
||||
}
|
||||
|
||||
// Output the comment, if requested and exists
|
||||
Comment comment = cell.getCellComment();
|
||||
if(includeCellComments && comment != null) {
|
||||
// Replace any newlines with spaces, otherwise it
|
||||
// breaks the output
|
||||
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
|
||||
}
|
||||
|
||||
if(ri.hasNext())
|
||||
text.append("\t");
|
||||
}
|
||||
|
83
src/ooxml/testcases/org/apache/poi/TestEmbeded.java
Normal file
83
src/ooxml/testcases/org/apache/poi/TestEmbeded.java
Normal file
@ -0,0 +1,83 @@
|
||||
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
|
||||
package org.apache.poi;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.util.IOUtils;
|
||||
import org.apache.poi.xslf.XSLFSlideShow;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.poi.xwpf.XWPFDocument;
|
||||
import org.openxml4j.opc.Package;
|
||||
import org.openxml4j.opc.PackagePart;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
/**
|
||||
* Class to test that we handle embeded bits in
|
||||
* OOXML files properly
|
||||
*/
|
||||
public class TestEmbeded extends TestCase
|
||||
{
|
||||
public String dirname;
|
||||
|
||||
public void setUp() {
|
||||
dirname = System.getProperty("OOXML.testdata.path");
|
||||
assertNotNull(dirname);
|
||||
}
|
||||
|
||||
public void testExcel() throws Exception {
|
||||
File f = new File(dirname, "ExcelWithAttachments.xlsx");
|
||||
assertTrue(f.exists());
|
||||
|
||||
POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString()));
|
||||
test(doc, 0);
|
||||
}
|
||||
|
||||
public void testWord() throws Exception {
|
||||
File f = new File(dirname, "WordWithAttachments.docx");
|
||||
assertTrue(f.exists());
|
||||
|
||||
POIXMLDocument doc = new XWPFDocument(Package.open(f.toString()));
|
||||
test(doc, 4);
|
||||
}
|
||||
|
||||
public void testPowerPoint() throws Exception {
|
||||
File f = new File(dirname, "PPTWithAttachments.pptx");
|
||||
assertTrue(f.exists());
|
||||
|
||||
POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString()));
|
||||
test(doc, 0);
|
||||
}
|
||||
|
||||
private void test(POIXMLDocument doc, int expectedCount) throws Exception {
|
||||
assertNotNull(doc.getAllEmbedds());
|
||||
assertEquals(expectedCount, doc.getAllEmbedds().size());
|
||||
|
||||
for(int i=0; i<doc.getAllEmbedds().size(); i++) {
|
||||
PackagePart pp = doc.getAllEmbedds().get(i);
|
||||
assertNotNull(pp);
|
||||
|
||||
byte[] b = IOUtils.toByteArray(pp.getInputStream());
|
||||
assertTrue(b.length > 0);
|
||||
}
|
||||
}
|
||||
}
|
BIN
src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
Normal file
BIN
src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
Normal file
Binary file not shown.
@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase {
|
||||
);
|
||||
}
|
||||
|
||||
public void testWithComments() throws Exception {
|
||||
ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
|
||||
extractor.setIncludeSheetNames(false);
|
||||
|
||||
// Check without comments
|
||||
assertEquals(
|
||||
"1.0\tone\n" +
|
||||
"2.0\ttwo\n" +
|
||||
"3.0\tthree\n",
|
||||
extractor.getText()
|
||||
);
|
||||
|
||||
// Now with
|
||||
extractor.setIncludeCellComments(true);
|
||||
assertEquals(
|
||||
"1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
|
||||
"2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
|
||||
"3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
|
||||
extractor.getText()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Embded in a non-excel file
|
||||
|
Loading…
Reference in New Issue
Block a user