Patch from Shaun Kalley from bug #56022 - XSSF Event Text Extractor header/footer support
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1563657 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
85bee89fe3
commit
eb2e8ffe0a
@ -18,8 +18,10 @@ package org.apache.poi.xssf.extractor;
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
import javax.xml.parsers.SAXParser;
|
import javax.xml.parsers.SAXParser;
|
||||||
@ -56,9 +58,10 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
private POIXMLProperties properties;
|
private POIXMLProperties properties;
|
||||||
|
|
||||||
private Locale locale;
|
private Locale locale;
|
||||||
private boolean includeSheetNames = true;
|
|
||||||
private boolean formulasNotResults = false;
|
|
||||||
private boolean includeTextBoxes = true;
|
private boolean includeTextBoxes = true;
|
||||||
|
private boolean includeSheetNames = true;
|
||||||
|
private boolean includeHeadersFooters = true;
|
||||||
|
private boolean formulasNotResults = false;
|
||||||
|
|
||||||
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
|
||||||
this(OPCPackage.open(path));
|
this(OPCPackage.open(path));
|
||||||
@ -94,7 +97,12 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
public void setFormulasNotResults(boolean formulasNotResults) {
|
public void setFormulasNotResults(boolean formulasNotResults) {
|
||||||
this.formulasNotResults = formulasNotResults;
|
this.formulasNotResults = formulasNotResults;
|
||||||
}
|
}
|
||||||
|
/**
|
||||||
|
* Should headers and footers be included? Default is true
|
||||||
|
*/
|
||||||
|
public void setIncludeHeadersFooters(boolean includeHeadersFooters) {
|
||||||
|
this.includeHeadersFooters = includeHeadersFooters;
|
||||||
|
}
|
||||||
/**
|
/**
|
||||||
* Should text from textboxes be included? Default is true
|
* Should text from textboxes be included? Default is true
|
||||||
*/
|
*/
|
||||||
@ -186,7 +194,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
|
||||||
|
|
||||||
StringBuffer text = new StringBuffer();
|
StringBuffer text = new StringBuffer();
|
||||||
SheetTextExtractor sheetExtractor = new SheetTextExtractor(text);
|
SheetTextExtractor sheetExtractor = new SheetTextExtractor();
|
||||||
|
|
||||||
while (iter.hasNext()) {
|
while (iter.hasNext()) {
|
||||||
InputStream stream = iter.next();
|
InputStream stream = iter.next();
|
||||||
@ -195,9 +203,17 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
text.append('\n');
|
text.append('\n');
|
||||||
}
|
}
|
||||||
processSheet(sheetExtractor, styles, strings, stream);
|
processSheet(sheetExtractor, styles, strings, stream);
|
||||||
|
if (includeHeadersFooters) {
|
||||||
|
sheetExtractor.appendHeaderText(text);
|
||||||
|
}
|
||||||
|
sheetExtractor.appendCellText(text);
|
||||||
if (includeTextBoxes){
|
if (includeTextBoxes){
|
||||||
processShapes(iter.getShapes(), text);
|
processShapes(iter.getShapes(), text);
|
||||||
}
|
}
|
||||||
|
if (includeHeadersFooters) {
|
||||||
|
sheetExtractor.appendFooterText(text);
|
||||||
|
}
|
||||||
|
sheetExtractor.reset();
|
||||||
stream.close();
|
stream.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -238,10 +254,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
|
|
||||||
protected class SheetTextExtractor implements SheetContentsHandler {
|
protected class SheetTextExtractor implements SheetContentsHandler {
|
||||||
private final StringBuffer output;
|
private final StringBuffer output;
|
||||||
private boolean firstCellOfRow = true;
|
private boolean firstCellOfRow;
|
||||||
|
private final Map<String, String> headerFooterMap;
|
||||||
|
|
||||||
protected SheetTextExtractor(StringBuffer output) {
|
protected SheetTextExtractor() {
|
||||||
this.output = output;
|
this.output = new StringBuffer();
|
||||||
|
this.firstCellOfRow = true;
|
||||||
|
this.headerFooterMap = includeHeadersFooters ? new HashMap<String, String>() : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void startRow(int rowNum) {
|
public void startRow(int rowNum) {
|
||||||
@ -262,7 +281,84 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
}
|
}
|
||||||
|
|
||||||
public void headerFooter(String text, boolean isHeader, String tagName) {
|
public void headerFooter(String text, boolean isHeader, String tagName) {
|
||||||
// We don't include headers in the output yet, so ignore
|
if (headerFooterMap != null) {
|
||||||
|
headerFooterMap.put(tagName, text);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the text for the named header or footer if found.
|
||||||
|
*/
|
||||||
|
private void appendHeaderFooterText(StringBuffer buffer, String name) {
|
||||||
|
String text = headerFooterMap.get(name);
|
||||||
|
if (text != null && text.length() > 0) {
|
||||||
|
// this is a naive way of handling the left, center, and right
|
||||||
|
// header and footer delimiters, but it seems to be as good as
|
||||||
|
// the method used by XSSFExcelExtractor
|
||||||
|
text = handleHeaderFooterDelimiter(text, "&L");
|
||||||
|
text = handleHeaderFooterDelimiter(text, "&C");
|
||||||
|
text = handleHeaderFooterDelimiter(text, "&R");
|
||||||
|
buffer.append(text).append('\n');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Remove the delimiter if its found at the beginning of the text,
|
||||||
|
* or replace it with a tab if its in the middle.
|
||||||
|
*/
|
||||||
|
private String handleHeaderFooterDelimiter(String text, String delimiter) {
|
||||||
|
int index = text.indexOf(delimiter);
|
||||||
|
if (index == 0) {
|
||||||
|
text = text.substring(2);
|
||||||
|
} else if (index > 0) {
|
||||||
|
text = text.substring(0, index) + "\t" + text.substring(index + 2);
|
||||||
|
}
|
||||||
|
return text;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the text for each header type in the same order
|
||||||
|
* they are appended in XSSFExcelExtractor.
|
||||||
|
* @see XSSFExcelExtractor#getText()
|
||||||
|
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||||
|
*/
|
||||||
|
private void appendHeaderText(StringBuffer buffer) {
|
||||||
|
appendHeaderFooterText(buffer, "firstHeader");
|
||||||
|
appendHeaderFooterText(buffer, "oddHeader");
|
||||||
|
appendHeaderFooterText(buffer, "evenHeader");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the text for each footer type in the same order
|
||||||
|
* they are appended in XSSFExcelExtractor.
|
||||||
|
* @see XSSFExcelExtractor#getText()
|
||||||
|
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
|
||||||
|
*/
|
||||||
|
private void appendFooterText(StringBuffer buffer) {
|
||||||
|
// append the text for each footer type in the same order
|
||||||
|
// they are appended in XSSFExcelExtractor
|
||||||
|
appendHeaderFooterText(buffer, "firstFooter");
|
||||||
|
appendHeaderFooterText(buffer, "oddFooter");
|
||||||
|
appendHeaderFooterText(buffer, "evenFooter");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Append the cell contents we have collected.
|
||||||
|
*/
|
||||||
|
private void appendCellText(StringBuffer buffer) {
|
||||||
|
buffer.append(output);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reset this <code>SheetTextExtractor</code> for the next sheet.
|
||||||
|
*/
|
||||||
|
private void reset() {
|
||||||
|
output.setLength(0);
|
||||||
|
firstCellOfRow = true;
|
||||||
|
if (headerFooterMap != null) {
|
||||||
|
headerFooterMap.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -209,4 +209,35 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
|
|||||||
fixture.close();
|
fixture.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we return the same output headers and footers as the
|
||||||
|
* non-event-based XSSFExcelExtractor.
|
||||||
|
*/
|
||||||
|
public void testHeadersAndFootersComparedToNonEventBasedExtractor()
|
||||||
|
throws Exception {
|
||||||
|
|
||||||
|
String expectedOutputWithHeadersAndFooters =
|
||||||
|
"Sheet1\n" +
|
||||||
|
"&\"Calibri,Regular\"&K000000top left\t&\"Calibri,Regular\"&K000000top center\t&\"Calibri,Regular\"&K000000top right\n" +
|
||||||
|
"abc\t123\n" +
|
||||||
|
"&\"Calibri,Regular\"&K000000bottom left\t&\"Calibri,Regular\"&K000000bottom center\t&\"Calibri,Regular\"&K000000bottom right\n";
|
||||||
|
|
||||||
|
String expectedOutputWithoutHeadersAndFooters =
|
||||||
|
"Sheet1\n" +
|
||||||
|
"abc\t123\n";
|
||||||
|
|
||||||
|
XSSFExcelExtractor extractor = new XSSFExcelExtractor(
|
||||||
|
XSSFTestDataSamples.openSampleWorkbook("headerFooterTest.xlsx"));
|
||||||
|
assertEquals(expectedOutputWithHeadersAndFooters, extractor.getText());
|
||||||
|
extractor.setIncludeHeadersFooters(false);
|
||||||
|
assertEquals(expectedOutputWithoutHeadersAndFooters, extractor.getText());
|
||||||
|
|
||||||
|
XSSFEventBasedExcelExtractor fixture =
|
||||||
|
new XSSFEventBasedExcelExtractor(
|
||||||
|
XSSFTestDataSamples.openSamplePackage("headerFooterTest.xlsx"));
|
||||||
|
assertEquals(expectedOutputWithHeadersAndFooters, fixture.getText());
|
||||||
|
fixture.setIncludeHeadersFooters(false);
|
||||||
|
assertEquals(expectedOutputWithoutHeadersAndFooters, fixture.getText());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BIN
test-data/spreadsheet/headerFooterTest.xlsx
Normal file
BIN
test-data/spreadsheet/headerFooterTest.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user