More merging, plus tests for embeded ooxml files
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@660488 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c132523b46
commit
fb7f5240fe
@ -650,6 +650,7 @@ under the License.
|
|||||||
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
|
||||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||||
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
|
||||||
|
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain" usefile="no"/>
|
<formatter type="plain" usefile="no"/>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
@ -799,6 +800,7 @@ under the License.
|
|||||||
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
|
||||||
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
|
||||||
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
|
||||||
|
<sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
|
||||||
<sysproperty key="java.awt.headless" value="true"/>
|
<sysproperty key="java.awt.headless" value="true"/>
|
||||||
<formatter type="plain"/>
|
<formatter type="plain"/>
|
||||||
<formatter type="xml"/>
|
<formatter type="xml"/>
|
||||||
|
@ -46,6 +46,7 @@
|
|||||||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
||||||
</release>
|
</release>
|
||||||
<release version="3.1-final" date="2008-06-??">
|
<release version="3.1-final" date="2008-06-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
|
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
|
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
|
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
|
||||||
|
@ -43,6 +43,7 @@
|
|||||||
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
|
||||||
</release>
|
</release>
|
||||||
<release version="3.1-final" date="2008-06-??">
|
<release version="3.1-final" date="2008-06-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
|
||||||
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
|
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
|
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
|
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
|
||||||
|
@ -20,6 +20,7 @@ import java.io.IOException;
|
|||||||
|
|
||||||
import org.apache.poi.POIOLE2TextExtractor;
|
import org.apache.poi.POIOLE2TextExtractor;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFCell;
|
import org.apache.poi.hssf.usermodel.HSSFCell;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFComment;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFRow;
|
import org.apache.poi.hssf.usermodel.HSSFRow;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||||
@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
|
|||||||
private HSSFWorkbook wb;
|
private HSSFWorkbook wb;
|
||||||
private boolean includeSheetNames = true;
|
private boolean includeSheetNames = true;
|
||||||
private boolean formulasNotResults = false;
|
private boolean formulasNotResults = false;
|
||||||
|
private boolean includeCellComments = false;
|
||||||
|
|
||||||
public ExcelExtractor(HSSFWorkbook wb) {
|
public ExcelExtractor(HSSFWorkbook wb) {
|
||||||
super(wb);
|
super(wb);
|
||||||
@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
|
|||||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||||
this.formulasNotResults = formulasNotResults;
|
this.formulasNotResults = formulasNotResults;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Should cell comments be included? Default is true
|
||||||
|
*/
|
||||||
|
public void setIncludeCellComments(boolean includeCellComments) {
|
||||||
|
this.includeCellComments = includeCellComments;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retreives the text contents of the file
|
* Retreives the text contents of the file
|
||||||
@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Output the comment, if requested and exists
|
||||||
|
HSSFComment comment = cell.getCellComment();
|
||||||
|
if(includeCellComments && comment != null) {
|
||||||
|
// Replace any newlines with spaces, otherwise it
|
||||||
|
// breaks the output
|
||||||
|
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||||
|
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
|
||||||
|
}
|
||||||
|
|
||||||
// Output a tab if we're not on the last cell
|
// Output a tab if we're not on the last cell
|
||||||
if(outputContents && k < (lastCell-1)) {
|
if(outputContents && k < (lastCell-1)) {
|
||||||
text.append("\t");
|
text.append("\t");
|
||||||
|
@ -16,25 +16,20 @@
|
|||||||
==================================================================== */
|
==================================================================== */
|
||||||
package org.apache.poi.xssf.extractor;
|
package org.apache.poi.xssf.extractor;
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
|
|
||||||
import org.apache.poi.POIXMLTextExtractor;
|
import org.apache.poi.POIXMLTextExtractor;
|
||||||
import org.apache.poi.ss.usermodel.Cell;
|
import org.apache.poi.ss.usermodel.Cell;
|
||||||
|
import org.apache.poi.ss.usermodel.Comment;
|
||||||
import org.apache.poi.ss.usermodel.Row;
|
import org.apache.poi.ss.usermodel.Row;
|
||||||
import org.apache.poi.ss.usermodel.Sheet;
|
import org.apache.poi.ss.usermodel.Sheet;
|
||||||
import org.apache.poi.ss.usermodel.Workbook;
|
import org.apache.poi.ss.usermodel.Workbook;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFCell;
|
import org.apache.poi.xssf.usermodel.XSSFCell;
|
||||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
|
||||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.openxml4j.exceptions.OpenXML4JException;
|
import org.openxml4j.exceptions.OpenXML4JException;
|
||||||
import org.openxml4j.opc.Package;
|
import org.openxml4j.opc.Package;
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
|
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
|
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
|
|
||||||
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper class to extract text from an OOXML Excel file
|
* Helper class to extract text from an OOXML Excel file
|
||||||
@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
|||||||
private Workbook workbook;
|
private Workbook workbook;
|
||||||
private boolean includeSheetNames = true;
|
private boolean includeSheetNames = true;
|
||||||
private boolean formulasNotResults = false;
|
private boolean formulasNotResults = false;
|
||||||
|
private boolean includeCellComments = false;
|
||||||
|
|
||||||
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(new XSSFWorkbook(path));
|
this(new XSSFWorkbook(path));
|
||||||
@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
|||||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||||
this.formulasNotResults = formulasNotResults;
|
this.formulasNotResults = formulasNotResults;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Should cell comments be included? Default is true
|
||||||
|
*/
|
||||||
|
public void setIncludeCellComments(boolean includeCellComments) {
|
||||||
|
this.includeCellComments = includeCellComments;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retreives the text contents of the file
|
* Retreives the text contents of the file
|
||||||
@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
|||||||
|
|
||||||
for (Object rawR : sheet) {
|
for (Object rawR : sheet) {
|
||||||
Row row = (Row)rawR;
|
Row row = (Row)rawR;
|
||||||
for(Iterator ri = row.cellIterator(); ri.hasNext();) {
|
for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
|
||||||
Cell cell = (Cell)ri.next();
|
Cell cell = ri.next();
|
||||||
|
|
||||||
// Is it a formula one?
|
// Is it a formula one?
|
||||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
|
||||||
@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
|
|||||||
text.append(xc.getRawValue());
|
text.append(xc.getRawValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Output the comment, if requested and exists
|
||||||
|
Comment comment = cell.getCellComment();
|
||||||
|
if(includeCellComments && comment != null) {
|
||||||
|
// Replace any newlines with spaces, otherwise it
|
||||||
|
// breaks the output
|
||||||
|
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||||
|
text.append(" Comment by "+comment.getAuthor()+": "+commentText);
|
||||||
|
}
|
||||||
|
|
||||||
if(ri.hasNext())
|
if(ri.hasNext())
|
||||||
text.append("\t");
|
text.append("\t");
|
||||||
}
|
}
|
||||||
|
83
src/ooxml/testcases/org/apache/poi/TestEmbeded.java
Normal file
83
src/ooxml/testcases/org/apache/poi/TestEmbeded.java
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
|
||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
|
||||||
|
package org.apache.poi;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.poi.util.IOUtils;
|
||||||
|
import org.apache.poi.xslf.XSLFSlideShow;
|
||||||
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||||
|
import org.apache.poi.xwpf.XWPFDocument;
|
||||||
|
import org.openxml4j.opc.Package;
|
||||||
|
import org.openxml4j.opc.PackagePart;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class to test that we handle embeded bits in
|
||||||
|
* OOXML files properly
|
||||||
|
*/
|
||||||
|
public class TestEmbeded extends TestCase
|
||||||
|
{
|
||||||
|
public String dirname;
|
||||||
|
|
||||||
|
public void setUp() {
|
||||||
|
dirname = System.getProperty("OOXML.testdata.path");
|
||||||
|
assertNotNull(dirname);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testExcel() throws Exception {
|
||||||
|
File f = new File(dirname, "ExcelWithAttachments.xlsx");
|
||||||
|
assertTrue(f.exists());
|
||||||
|
|
||||||
|
POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString()));
|
||||||
|
test(doc, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testWord() throws Exception {
|
||||||
|
File f = new File(dirname, "WordWithAttachments.docx");
|
||||||
|
assertTrue(f.exists());
|
||||||
|
|
||||||
|
POIXMLDocument doc = new XWPFDocument(Package.open(f.toString()));
|
||||||
|
test(doc, 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testPowerPoint() throws Exception {
|
||||||
|
File f = new File(dirname, "PPTWithAttachments.pptx");
|
||||||
|
assertTrue(f.exists());
|
||||||
|
|
||||||
|
POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString()));
|
||||||
|
test(doc, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void test(POIXMLDocument doc, int expectedCount) throws Exception {
|
||||||
|
assertNotNull(doc.getAllEmbedds());
|
||||||
|
assertEquals(expectedCount, doc.getAllEmbedds().size());
|
||||||
|
|
||||||
|
for(int i=0; i<doc.getAllEmbedds().size(); i++) {
|
||||||
|
PackagePart pp = doc.getAllEmbedds().get(i);
|
||||||
|
assertNotNull(pp);
|
||||||
|
|
||||||
|
byte[] b = IOUtils.toByteArray(pp.getInputStream());
|
||||||
|
assertTrue(b.length > 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
Normal file
BIN
src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
Normal file
Binary file not shown.
@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testWithComments() throws Exception {
|
||||||
|
ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
|
||||||
|
extractor.setIncludeSheetNames(false);
|
||||||
|
|
||||||
|
// Check without comments
|
||||||
|
assertEquals(
|
||||||
|
"1.0\tone\n" +
|
||||||
|
"2.0\ttwo\n" +
|
||||||
|
"3.0\tthree\n",
|
||||||
|
extractor.getText()
|
||||||
|
);
|
||||||
|
|
||||||
|
// Now with
|
||||||
|
extractor.setIncludeCellComments(true);
|
||||||
|
assertEquals(
|
||||||
|
"1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
|
||||||
|
"2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
|
||||||
|
"3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
|
||||||
|
extractor.getText()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Embded in a non-excel file
|
* Embded in a non-excel file
|
||||||
|
Loading…
Reference in New Issue
Block a user