Fix bug #49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@941399 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
7b043c2bc4
commit
ede8beb227
@ -34,6 +34,7 @@
|
||||
|
||||
<changes>
|
||||
<release version="3.7-SNAPSHOT" date="2010-??-??">
|
||||
<action dev="POI-DEVELOPERS" type="fix">49020 - Workaround Excel outputting invalid XML in button definitions by not closing BR tags</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49050 - Improve performance of AbstractEscherHolderRecord when there are lots of Continue Records</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49194 - Correct text size limit for OOXML .xlsx files</action>
|
||||
<action dev="POI-DEVELOPERS" type="fix">49254 - Fix CellUtils.setFont to use the correct type internally</action>
|
||||
|
@ -20,6 +20,7 @@ package org.apache.poi.xssf.usermodel;
|
||||
import org.apache.poi.POIXMLDocumentPart;
|
||||
import org.apache.poi.openxml4j.opc.PackagePart;
|
||||
import org.apache.poi.openxml4j.opc.PackageRelationship;
|
||||
import org.apache.poi.xssf.util.EvilUnclosedBRFixingInputStream;
|
||||
import org.apache.xmlbeans.XmlException;
|
||||
import org.apache.xmlbeans.XmlOptions;
|
||||
import org.apache.xmlbeans.XmlObject;
|
||||
@ -54,6 +55,11 @@ import schemasMicrosoftComOfficeExcel.STObjectType;
|
||||
* need a file format for drawings are strongly encouraged to use preferentially DrawingML
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Warning - Excel is known to put invalid XML into these files!
|
||||
* For example, >br< without being closed or escaped crops up.
|
||||
* </p>
|
||||
*
|
||||
* See 6.4 VML - SpreadsheetML Drawing in Office Open XML Part 4 - Markup Language Reference.pdf
|
||||
*
|
||||
* @author Yegor Kozlov
|
||||
@ -98,7 +104,9 @@ public final class XSSFVMLDrawing extends POIXMLDocumentPart {
|
||||
|
||||
|
||||
protected void read(InputStream is) throws IOException, XmlException {
|
||||
XmlObject root = XmlObject.Factory.parse(is);
|
||||
XmlObject root = XmlObject.Factory.parse(
|
||||
new EvilUnclosedBRFixingInputStream(is)
|
||||
);
|
||||
|
||||
_qnames = new ArrayList<QName>();
|
||||
_items = new ArrayList<XmlObject>();
|
||||
|
@ -0,0 +1,116 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
package org.apache.poi.xssf.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* This is a seriously sick fix for the fact that some .xlsx
|
||||
* files contain raw bits of HTML, without being escaped
|
||||
* or properly turned into XML.
|
||||
* The result is that they contain things like >br<,
|
||||
* which breaks the XML parsing.
|
||||
* This very sick InputStream wrapper attempts to spot
|
||||
* these go past, and fix them.
|
||||
* Only works for UTF-8 and US-ASCII based streams!
|
||||
* It should only be used where experience shows the problem
|
||||
* can occur...
|
||||
*/
|
||||
public class EvilUnclosedBRFixingInputStream extends InputStream {
|
||||
private InputStream source;
|
||||
private byte[] spare;
|
||||
|
||||
private static byte[] detect = new byte[] {
|
||||
(byte)'<', (byte)'b', (byte)'r', (byte)'>'
|
||||
};
|
||||
|
||||
public EvilUnclosedBRFixingInputStream(InputStream source) {
|
||||
this.source = source;
|
||||
}
|
||||
|
||||
/**
|
||||
* Warning - doesn't fix!
|
||||
*/
|
||||
@Override
|
||||
public int read() throws IOException {
|
||||
return source.read();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b, int off, int len) throws IOException {
|
||||
if(spare != null) {
|
||||
// This is risky, but spare is normally only a byte or two...
|
||||
System.arraycopy(spare, 0, b, off, spare.length);
|
||||
int ret = spare.length;
|
||||
spare = null;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int read = source.read(b, off, len);
|
||||
read = fixUp(b, off, read);
|
||||
return read;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int read(byte[] b) throws IOException {
|
||||
return this.read(b, 0, b.length);
|
||||
}
|
||||
|
||||
private int fixUp(byte[] b, int offset, int read) {
|
||||
// Find places to fix
|
||||
ArrayList<Integer> fixAt = new ArrayList<Integer>();
|
||||
for(int i=offset; i<offset+read-4; i++) {
|
||||
boolean going = true;
|
||||
for(int j=0; j<detect.length && going; j++) {
|
||||
if(b[i+j] != detect[j]) {
|
||||
going = false;
|
||||
}
|
||||
}
|
||||
if(going) {
|
||||
fixAt.add(i);
|
||||
}
|
||||
}
|
||||
|
||||
if(fixAt.size()==0) {
|
||||
return read;
|
||||
}
|
||||
|
||||
// Save a bit, if needed to fit
|
||||
int overshoot = offset+read+fixAt.size() - b.length;
|
||||
if(overshoot > 0) {
|
||||
spare = new byte[overshoot];
|
||||
System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
|
||||
read -= overshoot;
|
||||
}
|
||||
|
||||
// Fix them, in reverse order so the
|
||||
// positions are valid
|
||||
for(int j=fixAt.size()-1; j>=0; j--) {
|
||||
int i = fixAt.get(j);
|
||||
|
||||
byte[] tmp = new byte[read-i-3];
|
||||
System.arraycopy(b, i+3, tmp, 0, tmp.length);
|
||||
b[i+3] = (byte)'/';
|
||||
System.arraycopy(tmp, 0, b, i+4, tmp.length);
|
||||
// It got one longer
|
||||
read++;
|
||||
}
|
||||
return read;
|
||||
}
|
||||
}
|
@ -138,4 +138,14 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
|
||||
assertEquals(1, rels.size());
|
||||
assertEquals("Sheet1!A1", rels.get(0).getPackageRelationship().getTargetURI().getFragment());
|
||||
}
|
||||
|
||||
/**
|
||||
* Excel will sometimes write a button with a textbox
|
||||
* containing >br< (not closed!).
|
||||
* Clearly Excel shouldn't do this, but test that we can
|
||||
* read the file despite the naughtyness
|
||||
*/
|
||||
public void test49020() throws Exception {
|
||||
XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("BrNotClosed.xlsx");
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,79 @@
|
||||
/* ====================================================================
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
==================================================================== */
|
||||
|
||||
package org.apache.poi.xssf.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
|
||||
public void testOK() throws Exception {
|
||||
byte[] ok = "<p><div>Hello There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
|
||||
|
||||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
|
||||
new ByteArrayInputStream(ok)
|
||||
);
|
||||
|
||||
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||
boolean going = true;
|
||||
while(going) {
|
||||
byte[] b = new byte[1024];
|
||||
int r = inp.read(b);
|
||||
if(r > 0) {
|
||||
bout.write(b, 0, r);
|
||||
} else {
|
||||
going = false;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] result = bout.toByteArray();
|
||||
assertEquals(ok, result);
|
||||
}
|
||||
|
||||
public void testProblem() throws Exception {
|
||||
byte[] orig = "<p><div>Hello<br>There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
|
||||
byte[] fixed = "<p><div>Hello<br/>There!</div> <div>Tags!</div></p>".getBytes("UTF-8");
|
||||
|
||||
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
|
||||
new ByteArrayInputStream(orig)
|
||||
);
|
||||
|
||||
ByteArrayOutputStream bout = new ByteArrayOutputStream();
|
||||
boolean going = true;
|
||||
while(going) {
|
||||
byte[] b = new byte[1024];
|
||||
int r = inp.read(b);
|
||||
if(r > 0) {
|
||||
bout.write(b, 0, r);
|
||||
} else {
|
||||
going = false;
|
||||
}
|
||||
}
|
||||
|
||||
byte[] result = bout.toByteArray();
|
||||
assertEquals(fixed, result);
|
||||
}
|
||||
|
||||
protected void assertEquals(byte[] a, byte[] b) {
|
||||
assertEquals(a.length, b.length);
|
||||
for(int i=0; i<a.length; i++) {
|
||||
assertEquals("Wrong byte at index " + i, a[i], b[i]);
|
||||
}
|
||||
}
|
||||
}
|
BIN
test-data/spreadsheet/BrNotClosed.xlsx
Normal file
BIN
test-data/spreadsheet/BrNotClosed.xlsx
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user