Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@941399 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7b043c2bc4
commit
ede8beb227
@ -34,6 +34,7 @@
|
|||||||
|
|
||||||
<changes>
|
<changes>
|
||||||
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
||||||
|
<action dev="POI-DEVELOPERS" type="fix">49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action>
|
<action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action>
|
<action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action>
|
||||||
<action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action>
|
<action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action>
|
||||||
|
@ -20,6 +20,7 @@ package org.apache.poi.xssf.usermodel;
|
|||||||
import org.apache.poi.POIXMLDocumentPart;
|
import org.apache.poi.POIXMLDocumentPart;
|
||||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||||
|
import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream;
|
||||||
import org.apache.xmlbeans.XmlException;
|
import org.apache.xmlbeans.XmlException;
|
||||||
import org.apache.xmlbeans.XmlOptions;
|
import org.apache.xmlbeans.XmlOptions;
|
||||||
import org.apache.xmlbeans.XmlObject;
|
import org.apache.xmlbeans.XmlObject;
|
||||||
@ -54,6 +55,11 @@ import schemasMicrosoftComOfficeExcel.STObjectType;
|
|||||||
* need a file format for drawings are strongly encouraged to use preferentially DrawingML
|
* need a file format for drawings are strongly encouraged to use preferentially DrawingML
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
* <p>
|
||||||
|
* Warning - Excel is known to put invalid XML into these files!
|
||||||
|
* For example, >br< without being closed or escaped crops up.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
* See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf
|
* See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf
|
||||||
*
|
*
|
||||||
* @author Yegor Kozlov
|
* @author Yegor Kozlov
|
||||||
@ -98,7 +104,9 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
|
|||||||
|
|
||||||
|
|
||||||
protected void read(InputStream is) throws IOException, XmlException {
|
protected void read(InputStream is) throws IOException, XmlException {
|
||||||
XmlObject root = XmlObject.Factory.parse(is);
|
XmlObject root = XmlObject.Factory.parse(
|
||||||
|
new EvilUnclosedBRFixingInputStream(is)
|
||||||
|
);
|
||||||
|
|
||||||
_qnames = new ArrayList<QName>();
|
_qnames = new ArrayList<QName>();
|
||||||
_items = new ArrayList<XmlObject>();
|
_items = new ArrayList<XmlObject>();
|
||||||
|
@ -0,0 +1,116 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
package org.apache.poi.xssf.util;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a seriously sick fix for the fact that some .xlsx
|
||||||
|
* files contain raw bits of HTML, without being escaped
|
||||||
|
* or properly turned into XML.
|
||||||
|
* The result is that they contain things like >br<,
|
||||||
|
* which breaks the XML parsing.
|
||||||
|
* This very sick InputStream wrapper attempts to spot
|
||||||
|
* these go past, and fix them.
|
||||||
|
* Only works for UTF-8 and US-ASCII based streams!
|
||||||
|
* It should only be used where experience shows the problem
|
||||||
|
* can occur...
|
||||||
|
*/
|
||||||
|
public class EvilUnclosedBRFixingInputStream extends InputStream {
|
||||||
|
private InputStream source;
|
||||||
|
private byte[] spare;
|
||||||
|
|
||||||
|
private static byte[] detect = new byte[] {
|
||||||
|
(byte)'<', (byte)'b', (byte)'r', (byte)'>'
|
||||||
|
};
|
||||||
|
|
||||||
|
public EvilUnclosedBRFixingInputStream(InputStream source) {
|
||||||
|
this.source = source;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Warning - doesn't fix!
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
return source.read();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b, int off, int len) throws IOException {
|
||||||
|
if(spare != null) {
|
||||||
|
// This is risky, but spare is normally only a byte or two...
|
||||||
|
System.arraycopy(spare, 0, b, off, spare.length);
|
||||||
|
int ret = spare.length;
|
||||||
|
spare = null;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
int read = source.read(b, off, len);
|
||||||
|
read = fixUp(b, off, read);
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(byte[] b) throws IOException {
|
||||||
|
return this.read(b, 0, b.length);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int fixUp(byte[] b, int offset, int read) {
|
||||||
|
// Find places to fix
|
||||||
|
ArrayList<Integer> fixAt = new ArrayList<Integer>();
|
||||||
|
for(int i=offset; i<offset+read-4; i++) {
|
||||||
|
boolean going = true;
|
||||||
|
for(int j=0; j<detect.length && going; j++) {
|
||||||
|
if(b[i+j] != detect[j]) {
|
||||||
|
going = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if(going) {
|
||||||
|
fixAt.add(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(fixAt.size()==0) {
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save a bit, if needed to fit
|
||||||
|
int overshoot = offset+read+fixAt.size() - b.length;
|
||||||
|
if(overshoot > 0) {
|
||||||
|
spare = new byte[overshoot];
|
||||||
|
System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
|
||||||
|
read -= overshoot;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fix them, in reverse order so the
|
||||||
|
// positions are valid
|
||||||
|
for(int j=fixAt.size()-1; j>=0; j--) {
|
||||||
|
int i = fixAt.get(j);
|
||||||
|
|
||||||
|
byte[] tmp = new byte[read-i-3];
|
||||||
|
System.arraycopy(b, i+3, tmp, 0, tmp.length);
|
||||||
|
b[i+3] = (byte)'/';
|
||||||
|
System.arraycopy(tmp, 0, b, i+4, tmp.length);
|
||||||
|
// It got one longer
|
||||||
|
read++;
|
||||||
|
}
|
||||||
|
return read;
|
||||||
|
}
|
||||||
|
}
|
@ -138,4 +138,14 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
|
|||||||
assertEquals(1, rels.size());
|
assertEquals(1, rels.size());
|
||||||
assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment());
|
assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Excel will sometimes write a button with a textbox
|
||||||
|
* containing >br< (not closed!).
|
||||||
|
* Clearly Excel shouldn't do this, but test that we can
|
||||||
|
* read the file despite the naughtyness
|
||||||
|
*/
|
||||||
|
public void test49020() throws Exception {
|
||||||
|
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,79 @@
|
|||||||
|
/* ====================================================================
|
||||||
|
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
contributor license agreements. See the NOTICE file distributed with
|
||||||
|
this work for additional information regarding copyright ownership.
|
||||||
|
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
(the "License"); you may not use this file except in compliance with
|
||||||
|
the License. You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License.
|
||||||
|
==================================================================== */
|
||||||
|
|
||||||
|
package org.apache.poi.xssf.util;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
|
||||||
|
public void testOK() throws Exception {
|
||||||
|
byte[] ok = "<p><div>Hello There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
|
||||||
|
|
||||||
|
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
|
||||||
|
new ByteArrayInputStream(ok)
|
||||||
|
);
|
||||||
|
|
||||||
|
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||||
|
boolean going = true;
|
||||||
|
while(going) {
|
||||||
|
byte[] b = new byte[1024];
|
||||||
|
int r = inp.read(b);
|
||||||
|
if(r > 0) {
|
||||||
|
bout.write(b, 0, r);
|
||||||
|
} else {
|
||||||
|
going = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] result = bout.toByteArray();
|
||||||
|
assertEquals(ok, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testProblem() throws Exception {
|
||||||
|
byte[] orig = "<p><div>Hello<br>There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
|
||||||
|
byte[] fixed = "<p><div>Hello<br/>There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
|
||||||
|
|
||||||
|
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
|
||||||
|
new ByteArrayInputStream(orig)
|
||||||
|
);
|
||||||
|
|
||||||
|
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||||
|
boolean going = true;
|
||||||
|
while(going) {
|
||||||
|
byte[] b = new byte[1024];
|
||||||
|
int r = inp.read(b);
|
||||||
|
if(r > 0) {
|
||||||
|
bout.write(b, 0, r);
|
||||||
|
} else {
|
||||||
|
going = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] result = bout.toByteArray();
|
||||||
|
assertEquals(fixed, result);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void assertEquals(byte[] a, byte[] b) {
|
||||||
|
assertEquals(a.length, b.length);
|
||||||
|
for(int i=0; i<a.length; i++) {
|
||||||
|
assertEquals("Wrong byte at index " + i, a[i], b[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
test-data/spreadsheet/BrNotClosed.xlsx
Normal file
BIN
test-data/spreadsheet/BrNotClosed.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user