Add a limit of the max number of characters that can be extracted to avoid sending applications out of memory with very large documents
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1711520 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b9cf1bdc38
commit
50baa731d3
@ -23,6 +23,7 @@ import org.apache.poi.POIXMLProperties.CoreProperties;
|
|||||||
import org.apache.poi.POIXMLProperties.CustomProperties;
|
import org.apache.poi.POIXMLProperties.CustomProperties;
|
||||||
import org.apache.poi.POIXMLProperties.ExtendedProperties;
|
import org.apache.poi.POIXMLProperties.ExtendedProperties;
|
||||||
import org.apache.poi.openxml4j.opc.OPCPackage;
|
import org.apache.poi.openxml4j.opc.OPCPackage;
|
||||||
|
import org.apache.poi.openxml4j.util.ZipSecureFile;
|
||||||
|
|
||||||
public abstract class POIXMLTextExtractor extends POITextExtractor {
|
public abstract class POIXMLTextExtractor extends POITextExtractor {
|
||||||
/** The POIXMLDocument that's open */
|
/** The POIXMLDocument that's open */
|
||||||
@ -88,4 +89,18 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
|
|||||||
}
|
}
|
||||||
super.close();
|
super.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected void checkMaxTextSize(StringBuffer text, String string) {
|
||||||
|
if(string == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size = text.length() + string.length();
|
||||||
|
if(size > ZipSecureFile.getMaxTextSize()) {
|
||||||
|
throw new IllegalStateException("The text would exceed the max allowed overall size of extracted text. "
|
||||||
|
+ "By default this is prevented as some documents may exhaust available memory and it may indicate that the file is used to inflate memory usage and thus could pose a security risk. "
|
||||||
|
+ "You can adjust this limit via ZipSecureFile.setMaxTextSize() if you need to work with files which have a lot of text. "
|
||||||
|
+ "Size: " + size + ", limit: MAX_TEXT_SIZE: " + ZipSecureFile.getMaxTextSize());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -50,6 +50,9 @@ public class ZipSecureFile extends ZipFile {
|
|||||||
// don't alert for expanded sizes smaller than 100k
|
// don't alert for expanded sizes smaller than 100k
|
||||||
private static long GRACE_ENTRY_SIZE = 100*1024;
|
private static long GRACE_ENTRY_SIZE = 100*1024;
|
||||||
|
|
||||||
|
// The default maximum size of extracted text
|
||||||
|
private static long MAX_TEXT_SIZE = 10*1024*1024;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the ratio between de- and inflated bytes to detect zipbomb.
|
* Sets the ratio between de- and inflated bytes to detect zipbomb.
|
||||||
* It defaults to 1% (= 0.01d), i.e. when the compression is better than
|
* It defaults to 1% (= 0.01d), i.e. when the compression is better than
|
||||||
@ -100,6 +103,34 @@ public class ZipSecureFile extends ZipFile {
|
|||||||
return MAX_ENTRY_SIZE;
|
return MAX_ENTRY_SIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the maximum number of characters of text that are
|
||||||
|
* extracted before an exception is thrown during extracting
|
||||||
|
* text from documents.
|
||||||
|
*
|
||||||
|
* This can be used to limit memory consumption and protect against
|
||||||
|
* security vulnerabilities when documents are provided by users.
|
||||||
|
*
|
||||||
|
* @param maxTextSize the max. file size of a single zip entry
|
||||||
|
*/
|
||||||
|
public static void setMaxTextSize(long maxTextSize) {
|
||||||
|
if (maxTextSize < 0 || maxTextSize > 0xFFFFFFFFl) {
|
||||||
|
throw new IllegalArgumentException("Max text size is bounded [0-4GB].");
|
||||||
|
}
|
||||||
|
MAX_TEXT_SIZE = maxTextSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current maximum allowed text size.
|
||||||
|
*
|
||||||
|
* See setMaxTextSize() for details.
|
||||||
|
*
|
||||||
|
* @return The max accepted text size.
|
||||||
|
*/
|
||||||
|
public static long getMaxTextSize() {
|
||||||
|
return MAX_TEXT_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
public ZipSecureFile(File file, int mode) throws IOException {
|
public ZipSecureFile(File file, int mode) throws IOException {
|
||||||
super(file, mode);
|
super(file, mode);
|
||||||
}
|
}
|
||||||
|
@ -283,11 +283,13 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
output.append('\t');
|
output.append('\t');
|
||||||
}
|
}
|
||||||
if (formattedValue != null) {
|
if (formattedValue != null) {
|
||||||
|
checkMaxTextSize(output, formattedValue);
|
||||||
output.append(formattedValue);
|
output.append(formattedValue);
|
||||||
}
|
}
|
||||||
if (includeCellComments && comment != null) {
|
if (includeCellComments && comment != null) {
|
||||||
String commentText = comment.getString().getString().replace('\n', ' ');
|
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||||
output.append(formattedValue != null ? " Comment by " : "Comment by ");
|
output.append(formattedValue != null ? " Comment by " : "Comment by ");
|
||||||
|
checkMaxTextSize(output, commentText);
|
||||||
if (commentText.startsWith(comment.getAuthor() + ": ")) {
|
if (commentText.startsWith(comment.getAuthor() + ": ")) {
|
||||||
output.append(commentText);
|
output.append(commentText);
|
||||||
} else {
|
} else {
|
||||||
@ -363,6 +365,7 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
|
|||||||
* Append the cell contents we have collected.
|
* Append the cell contents we have collected.
|
||||||
*/
|
*/
|
||||||
private void appendCellText(StringBuffer buffer) {
|
private void appendCellText(StringBuffer buffer) {
|
||||||
|
checkMaxTextSize(buffer, output.toString());
|
||||||
buffer.append(output);
|
buffer.append(output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -168,7 +168,9 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor
|
|||||||
// Is it a formula one?
|
// Is it a formula one?
|
||||||
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
|
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA) {
|
||||||
if (formulasNotResults) {
|
if (formulasNotResults) {
|
||||||
text.append(cell.getCellFormula());
|
String contents = cell.getCellFormula();
|
||||||
|
checkMaxTextSize(text, contents);
|
||||||
|
text.append(contents);
|
||||||
} else {
|
} else {
|
||||||
if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
|
if (cell.getCachedFormulaResultType() == Cell.CELL_TYPE_STRING) {
|
||||||
handleStringCell(text, cell);
|
handleStringCell(text, cell);
|
||||||
@ -188,6 +190,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor
|
|||||||
// Replace any newlines with spaces, otherwise it
|
// Replace any newlines with spaces, otherwise it
|
||||||
// breaks the output
|
// breaks the output
|
||||||
String commentText = comment.getString().getString().replace('\n', ' ');
|
String commentText = comment.getString().getString().replace('\n', ' ');
|
||||||
|
checkMaxTextSize(text, commentText);
|
||||||
text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
|
text.append(" Comment by ").append(comment.getAuthor()).append(": ").append(commentText);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -230,8 +233,11 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor
|
|||||||
}
|
}
|
||||||
|
|
||||||
private void handleStringCell(StringBuffer text, Cell cell) {
|
private void handleStringCell(StringBuffer text, Cell cell) {
|
||||||
text.append(cell.getRichStringCellValue().getString());
|
String contents = cell.getRichStringCellValue().getString();
|
||||||
|
checkMaxTextSize(text, contents);
|
||||||
|
text.append(contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) {
|
private void handleNonStringCell(StringBuffer text, Cell cell, DataFormatter formatter) {
|
||||||
int type = cell.getCellType();
|
int type = cell.getCellType();
|
||||||
if (type == Cell.CELL_TYPE_FORMULA) {
|
if (type == Cell.CELL_TYPE_FORMULA) {
|
||||||
@ -242,16 +248,18 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor
|
|||||||
CellStyle cs = cell.getCellStyle();
|
CellStyle cs = cell.getCellStyle();
|
||||||
|
|
||||||
if (cs != null && cs.getDataFormatString() != null) {
|
if (cs != null && cs.getDataFormatString() != null) {
|
||||||
text.append(formatter.formatRawCellContents(
|
String contents = formatter.formatRawCellContents(
|
||||||
cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString()
|
cell.getNumericCellValue(), cs.getDataFormat(), cs.getDataFormatString());
|
||||||
));
|
checkMaxTextSize(text, contents);
|
||||||
|
text.append(contents);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No supported styling applies to this cell
|
// No supported styling applies to this cell
|
||||||
XSSFCell xcell = (XSSFCell)cell;
|
String contents = ((XSSFCell)cell).getRawValue();
|
||||||
text.append( xcell.getRawValue() );
|
checkMaxTextSize(text, contents);
|
||||||
|
text.append( contents );
|
||||||
}
|
}
|
||||||
|
|
||||||
private String extractHeaderFooter(HeaderFooter hf) {
|
private String extractHeaderFooter(HeaderFooter hf) {
|
||||||
|
Loading…
Reference in New Issue
Block a user