avoid exceptions when using POI in Tika, see BUgs 51771 and 51770
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1169679 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ac144f7d82
commit
effaab4dc7
@ -34,6 +34,8 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.8-beta5" date="2011-??-??">
|
<release version="3.8-beta5" date="2011-??-??">
|
||||||
|
<action dev="poi-developers" type="add">51196 - prevent NPE in XWPFPicture.getPictureData() </action>
|
||||||
|
<action dev="poi-developers" type="add">51771 - prevent NPE when getting object data from OLEShape in HSLF</action>
|
||||||
<action dev="poi-developers" type="add">51196 - more progress with Chart APi in XSSF</action>
|
<action dev="poi-developers" type="add">51196 - more progress with Chart APi in XSSF</action>
|
||||||
<action dev="poi-developers" type="fix">51785 - Allow XSSF setForceFormulaRecalculation to work with the minimal ooxml-schemas jar</action>
|
<action dev="poi-developers" type="fix">51785 - Allow XSSF setForceFormulaRecalculation to work with the minimal ooxml-schemas jar</action>
|
||||||
<action dev="poi-developers" type="fix">51772 - IllegalArgumentException Parsing MS Word 97 - 2003</action>
|
<action dev="poi-developers" type="fix">51772 - IllegalArgumentException Parsing MS Word 97 - 2003</action>
|
||||||
|
@ -18,7 +18,6 @@
|
|||||||
package org.apache.poi.hssf.record;
|
package org.apache.poi.hssf.record;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -26,6 +25,7 @@ import java.util.Map;
|
|||||||
|
|
||||||
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
import org.apache.poi.ddf.DefaultEscherRecordFactory;
|
||||||
import org.apache.poi.ddf.EscherBoolProperty;
|
import org.apache.poi.ddf.EscherBoolProperty;
|
||||||
|
import org.apache.poi.ddf.EscherChildAnchorRecord;
|
||||||
import org.apache.poi.ddf.EscherClientAnchorRecord;
|
import org.apache.poi.ddf.EscherClientAnchorRecord;
|
||||||
import org.apache.poi.ddf.EscherClientDataRecord;
|
import org.apache.poi.ddf.EscherClientDataRecord;
|
||||||
import org.apache.poi.ddf.EscherContainerRecord;
|
import org.apache.poi.ddf.EscherContainerRecord;
|
||||||
@ -33,7 +33,6 @@ import org.apache.poi.ddf.EscherDgRecord;
|
|||||||
import org.apache.poi.ddf.EscherDggRecord;
|
import org.apache.poi.ddf.EscherDggRecord;
|
||||||
import org.apache.poi.ddf.EscherOptRecord;
|
import org.apache.poi.ddf.EscherOptRecord;
|
||||||
import org.apache.poi.ddf.EscherProperties;
|
import org.apache.poi.ddf.EscherProperties;
|
||||||
import org.apache.poi.ddf.EscherProperty;
|
|
||||||
import org.apache.poi.ddf.EscherRecord;
|
import org.apache.poi.ddf.EscherRecord;
|
||||||
import org.apache.poi.ddf.EscherRecordFactory;
|
import org.apache.poi.ddf.EscherRecordFactory;
|
||||||
import org.apache.poi.ddf.EscherSerializationListener;
|
import org.apache.poi.ddf.EscherSerializationListener;
|
||||||
@ -46,14 +45,16 @@ import org.apache.poi.hssf.model.CommentShape;
|
|||||||
import org.apache.poi.hssf.model.ConvertAnchor;
|
import org.apache.poi.hssf.model.ConvertAnchor;
|
||||||
import org.apache.poi.hssf.model.DrawingManager2;
|
import org.apache.poi.hssf.model.DrawingManager2;
|
||||||
import org.apache.poi.hssf.model.TextboxShape;
|
import org.apache.poi.hssf.model.TextboxShape;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFAnchor;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFChildAnchor;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFClientAnchor;
|
import org.apache.poi.hssf.usermodel.HSSFClientAnchor;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFPatriarch;
|
import org.apache.poi.hssf.usermodel.HSSFPatriarch;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFPicture;
|
import org.apache.poi.hssf.usermodel.HSSFPicture;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFShape;
|
import org.apache.poi.hssf.usermodel.HSSFShape;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFShapeContainer;
|
import org.apache.poi.hssf.usermodel.HSSFShapeContainer;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFShapeGroup;
|
import org.apache.poi.hssf.usermodel.HSSFShapeGroup;
|
||||||
import org.apache.poi.hssf.usermodel.HSSFTextbox;
|
|
||||||
import org.apache.poi.hssf.usermodel.HSSFSimpleShape;
|
import org.apache.poi.hssf.usermodel.HSSFSimpleShape;
|
||||||
|
import org.apache.poi.hssf.usermodel.HSSFTextbox;
|
||||||
import org.apache.poi.util.POILogFactory;
|
import org.apache.poi.util.POILogFactory;
|
||||||
import org.apache.poi.util.POILogger;
|
import org.apache.poi.util.POILogger;
|
||||||
|
|
||||||
@ -584,28 +585,42 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
convertRecordsToUserModelRecursive(tcc, patriarch, null);
|
||||||
|
|
||||||
|
// Now, clear any trace of what records make up
|
||||||
|
// the patriarch
|
||||||
|
// Otherwise, everything will go horribly wrong
|
||||||
|
// when we try to write out again....
|
||||||
|
// clearEscherRecords();
|
||||||
|
drawingManager.getDgg().setFileIdClusters(new EscherDggRecord.FileIdCluster[0]);
|
||||||
|
|
||||||
|
// TODO: Support converting our records
|
||||||
|
// back into shapes
|
||||||
|
// log.log(POILogger.WARN, "Not processing objects into Patriarch!");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void convertRecordsToUserModelRecursive(List tcc, HSSFShapeContainer container, HSSFShape parent) {
|
||||||
// Now process the containers for each group
|
// Now process the containers for each group
|
||||||
// and objects
|
// and objects
|
||||||
for(int i=1; i<tcc.size(); i++) {
|
for(int i=1; i<tcc.size(); i++) {
|
||||||
EscherContainerRecord shapeContainer =
|
EscherContainerRecord shapeContainer = (EscherContainerRecord)tcc.get(i);
|
||||||
(EscherContainerRecord)tcc.get(i);
|
|
||||||
//System.err.println("\n\n*****\n\n");
|
|
||||||
//System.err.println(shapeContainer);
|
|
||||||
|
|
||||||
// Could be a group, or a base object
|
// Could be a group, or a base object
|
||||||
|
|
||||||
if (shapeContainer.getRecordId() == EscherContainerRecord.SPGR_CONTAINER)
|
if (shapeContainer.getRecordId() == EscherContainerRecord.SPGR_CONTAINER)
|
||||||
{
|
{
|
||||||
// Group
|
// Group
|
||||||
if (shapeContainer.getChildRecords().size() > 0)
|
final int shapeChildren = shapeContainer.getChildRecords().size();
|
||||||
|
if (shapeChildren > 0)
|
||||||
{
|
{
|
||||||
HSSFShapeGroup group = new HSSFShapeGroup( null,
|
HSSFShapeGroup group = new HSSFShapeGroup( parent, new HSSFClientAnchor() );
|
||||||
new HSSFClientAnchor() );
|
addToParentOrContainer(group, container, parent);
|
||||||
patriarch.getChildren().add( group );
|
|
||||||
|
|
||||||
EscherContainerRecord groupContainer = (EscherContainerRecord) shapeContainer
|
EscherContainerRecord groupContainer = (EscherContainerRecord) shapeContainer.getChild( 0 );
|
||||||
.getChild( 0 );
|
|
||||||
convertRecordsToUserModel( groupContainer, group );
|
convertRecordsToUserModel( groupContainer, group );
|
||||||
|
|
||||||
|
if (shapeChildren>1){
|
||||||
|
convertRecordsToUserModelRecursive(shapeContainer.getChildRecords(), container, group);
|
||||||
|
}
|
||||||
} else
|
} else
|
||||||
{
|
{
|
||||||
log.log( POILogger.WARN,
|
log.log( POILogger.WARN,
|
||||||
@ -621,9 +636,9 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
|
|||||||
switch (type)
|
switch (type)
|
||||||
{
|
{
|
||||||
case ST_TEXTBOX:
|
case ST_TEXTBOX:
|
||||||
HSSFTextbox box = new HSSFTextbox( null,
|
HSSFTextbox box = new HSSFTextbox( parent,
|
||||||
new HSSFClientAnchor() );
|
new HSSFClientAnchor() );
|
||||||
patriarch.addShape( box );
|
addToParentOrContainer(box, container, parent);
|
||||||
|
|
||||||
convertRecordsToUserModel( shapeContainer, box );
|
convertRecordsToUserModel( shapeContainer, box );
|
||||||
break;
|
break;
|
||||||
@ -645,14 +660,34 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
|
|||||||
EscherClientAnchorRecord anchorRecord = (EscherClientAnchorRecord) getEscherChild(
|
EscherClientAnchorRecord anchorRecord = (EscherClientAnchorRecord) getEscherChild(
|
||||||
shapeContainer,
|
shapeContainer,
|
||||||
EscherClientAnchorRecord.RECORD_ID );
|
EscherClientAnchorRecord.RECORD_ID );
|
||||||
HSSFClientAnchor anchor = toClientAnchor(anchorRecord);
|
|
||||||
|
|
||||||
HSSFPicture picture = new HSSFPicture( null, anchor );
|
EscherChildAnchorRecord childRecord = (EscherChildAnchorRecord) getEscherChild(
|
||||||
|
shapeContainer,
|
||||||
|
EscherChildAnchorRecord.RECORD_ID );
|
||||||
|
|
||||||
|
if (anchorRecord!=null && childRecord!=null){
|
||||||
|
log.log( POILogger.WARN, "Picture with both CLIENT and CHILD anchor: "+ type );
|
||||||
|
}
|
||||||
|
|
||||||
|
HSSFAnchor anchor;
|
||||||
|
if (anchorRecord!=null){
|
||||||
|
anchor = toClientAnchor(anchorRecord);
|
||||||
|
}else{
|
||||||
|
anchor = toChildAnchor(childRecord);
|
||||||
|
}
|
||||||
|
|
||||||
|
HSSFPicture picture = new HSSFPicture( parent, anchor );
|
||||||
picture.setPictureIndex( pictureIndex );
|
picture.setPictureIndex( pictureIndex );
|
||||||
patriarch.addShape( picture );
|
|
||||||
|
addToParentOrContainer(picture, container, parent);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
final HSSFSimpleShape shape = new HSSFSimpleShape( parent,
|
||||||
|
new HSSFClientAnchor() );
|
||||||
|
addToParentOrContainer(shape, container, parent);
|
||||||
|
convertRecordsToUserModel( shapeContainer, shape);
|
||||||
|
|
||||||
log.log( POILogger.WARN, "Unhandled shape type: "
|
log.log( POILogger.WARN, "Unhandled shape type: "
|
||||||
+ type );
|
+ type );
|
||||||
break;
|
break;
|
||||||
@ -663,20 +698,19 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
|
|||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Now, clear any trace of what records make up
|
|
||||||
// the patriarch
|
|
||||||
// Otherwise, everything will go horribly wrong
|
|
||||||
// when we try to write out again....
|
|
||||||
// clearEscherRecords();
|
|
||||||
drawingManager.getDgg().setFileIdClusters(new EscherDggRecord.FileIdCluster[0]);
|
|
||||||
|
|
||||||
// TODO: Support converting our records
|
|
||||||
// back into shapes
|
|
||||||
// log.log(POILogger.WARN, "Not processing objects into Patriarch!");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private HSSFClientAnchor toClientAnchor(EscherClientAnchorRecord anchorRecord){
|
private static void addToParentOrContainer(HSSFShape shape, HSSFShapeContainer container, HSSFShape parent) {
|
||||||
|
|
||||||
|
if (parent instanceof HSSFShapeGroup)
|
||||||
|
((HSSFShapeGroup) parent).addShape(shape);
|
||||||
|
else if (container instanceof HSSFPatriarch)
|
||||||
|
((HSSFPatriarch) container).addShape(shape);
|
||||||
|
else
|
||||||
|
container.getChildren().add(shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static HSSFClientAnchor toClientAnchor(EscherClientAnchorRecord anchorRecord){
|
||||||
HSSFClientAnchor anchor = new HSSFClientAnchor();
|
HSSFClientAnchor anchor = new HSSFClientAnchor();
|
||||||
anchor.setAnchorType(anchorRecord.getFlag());
|
anchor.setAnchorType(anchorRecord.getFlag());
|
||||||
anchor.setCol1( anchorRecord.getCol1() );
|
anchor.setCol1( anchorRecord.getCol1() );
|
||||||
@ -690,7 +724,21 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
|
|||||||
return anchor;
|
return anchor;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void convertRecordsToUserModel(EscherContainerRecord shapeContainer, Object model) {
|
private static HSSFChildAnchor toChildAnchor(EscherChildAnchorRecord anchorRecord){
|
||||||
|
HSSFChildAnchor anchor = new HSSFChildAnchor();
|
||||||
|
// anchor.setAnchorType(anchorRecord.getFlag());
|
||||||
|
// anchor.setCol1( anchorRecord.getCol1() );
|
||||||
|
// anchor.setCol2( anchorRecord.getCol2() );
|
||||||
|
anchor.setDx1( anchorRecord.getDx1() );
|
||||||
|
anchor.setDx2( anchorRecord.getDx2() );
|
||||||
|
anchor.setDy1( anchorRecord.getDy1() );
|
||||||
|
anchor.setDy2( anchorRecord.getDy2() );
|
||||||
|
// anchor.setRow1( anchorRecord.getRow1() );
|
||||||
|
// anchor.setRow2( anchorRecord.getRow2() );
|
||||||
|
return anchor;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void convertRecordsToUserModel(EscherContainerRecord shapeContainer, Object model) {
|
||||||
for(Iterator<EscherRecord> it = shapeContainer.getChildIterator(); it.hasNext();) {
|
for(Iterator<EscherRecord> it = shapeContainer.getChildIterator(); it.hasNext();) {
|
||||||
EscherRecord r = it.next();
|
EscherRecord r = it.next();
|
||||||
if(r instanceof EscherSpgrRecord) {
|
if(r instanceof EscherSpgrRecord) {
|
||||||
@ -728,6 +776,10 @@ public final class EscherAggregate extends AbstractEscherHolderRecord {
|
|||||||
}
|
}
|
||||||
else if(r instanceof EscherSpRecord) {
|
else if(r instanceof EscherSpRecord) {
|
||||||
// Use flags if needed
|
// Use flags if needed
|
||||||
|
final EscherSpRecord spr = (EscherSpRecord) r;
|
||||||
|
if (model instanceof HSSFShape){
|
||||||
|
final HSSFShape s = (HSSFShape) model;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if(r instanceof EscherOptRecord) {
|
else if(r instanceof EscherOptRecord) {
|
||||||
// Use properties if needed
|
// Use properties if needed
|
||||||
|
@ -56,6 +56,11 @@ public class HSSFShapeGroup
|
|||||||
return group;
|
return group;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addShape(HSSFShape shape){
|
||||||
|
shape._patriarch = this._patriarch;
|
||||||
|
shapes.add(shape);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new simple shape under this group.
|
* Create a new simple shape under this group.
|
||||||
* @param anchor the position of the shape.
|
* @param anchor the position of the shape.
|
||||||
@ -177,4 +182,4 @@ public class HSSFShapeGroup
|
|||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
|
|||||||
|
|
||||||
import org.apache.poi.POIXMLDocumentPart;
|
import org.apache.poi.POIXMLDocumentPart;
|
||||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||||
|
import org.openxmlformats.schemas.drawingml.x2006.main.CTBlipFillProperties;
|
||||||
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
|
import org.openxmlformats.schemas.drawingml.x2006.picture.CTPicture;
|
||||||
|
|
||||||
|
|
||||||
@ -58,7 +59,14 @@ public class XWPFPicture {
|
|||||||
* Note - not all kinds of picture have data
|
* Note - not all kinds of picture have data
|
||||||
*/
|
*/
|
||||||
public XWPFPictureData getPictureData(){
|
public XWPFPictureData getPictureData(){
|
||||||
String blipId = ctPic.getBlipFill().getBlip().getEmbed();
|
CTBlipFillProperties blipProps = ctPic.getBlipFill();
|
||||||
|
|
||||||
|
if(blipProps == null || !blipProps.isSetBlip()) {
|
||||||
|
// return null if Blip data is missing
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String blipId = blipProps.getBlip().getEmbed();
|
||||||
POIXMLDocumentPart part = run.getParagraph().getPart();
|
POIXMLDocumentPart part = run.getParagraph().getPart();
|
||||||
if (part != null)
|
if (part != null)
|
||||||
{
|
{
|
||||||
|
@ -129,4 +129,26 @@ public class TestXWPFPictureData extends TestCase {
|
|||||||
public void testGetChecksum() {
|
public void testGetChecksum() {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testBug51770() throws InvalidFormatException, IOException {
|
||||||
|
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Bug51170.docx");
|
||||||
|
XWPFHeaderFooterPolicy policy = doc.getHeaderFooterPolicy();
|
||||||
|
XWPFHeader header = policy.getDefaultHeader();
|
||||||
|
for (XWPFParagraph paragraph : header.getParagraphs()) {
|
||||||
|
for (XWPFRun run : paragraph.getRuns()) {
|
||||||
|
for (XWPFPicture picture : run.getEmbeddedPictures()) {
|
||||||
|
if (paragraph.getDocument() != null) {
|
||||||
|
System.out.println(picture.getCTPicture());
|
||||||
|
XWPFPictureData data = picture.getPictureData();
|
||||||
|
if(data != null) System.out.println(data.getFileName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
private void process(XWPFParagraph paragraph){
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -83,16 +83,17 @@ public final class OLEShape extends Picture {
|
|||||||
ObjectData[] ole = ppt.getEmbeddedObjects();
|
ObjectData[] ole = ppt.getEmbeddedObjects();
|
||||||
|
|
||||||
//persist reference
|
//persist reference
|
||||||
int ref = getExEmbed().getExOleObjAtom().getObjStgDataRef();
|
ExEmbed exEmbed = getExEmbed();
|
||||||
|
|
||||||
ObjectData data = null;
|
ObjectData data = null;
|
||||||
|
if(exEmbed != null) {
|
||||||
|
int ref = exEmbed.getExOleObjAtom().getObjStgDataRef();
|
||||||
|
|
||||||
for (int i = 0; i < ole.length; i++) {
|
for (int i = 0; i < ole.length; i++) {
|
||||||
if(ole[i].getExOleObjStg().getPersistId() == ref) {
|
if(ole[i].getExOleObjStg().getPersistId() == ref) {
|
||||||
data=ole[i];
|
data=ole[i];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (data==null) {
|
if (data==null) {
|
||||||
logger.log(POILogger.WARN, "OLE data not found");
|
logger.log(POILogger.WARN, "OLE data not found");
|
||||||
}
|
}
|
||||||
|
BIN
test-data/document/Bug51170.docx
Normal file
BIN
test-data/document/Bug51170.docx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user