Fix bug #50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1067217 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-02-04 16:42:57 +00:00
parent e325b96167
commit 63a225e051
3 changed files with 136 additions and 14 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta1" date="2010-??-??">
<action dev="poi-developers" type="fix">50539 - Better fix for html-style br tags (invalid XML) inside XSSF documents</action>
<action dev="poi-developers" type="add">49928 - allow overridden built-in formats in HSSFCellStyle</action>
<action dev="POI-DEVELOPERS" type="add">50607 - Added implementation for CLEAN(), CHAR() and ADDRESS()</action>
<action dev="poi-developers" type="add">50587 - Improved documentation on user-defined functions</action>

View File

@ -54,16 +54,26 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
@Override
public int read(byte[] b, int off, int len) throws IOException {
if(spare != null) {
// This is risky, but spare is normally only a byte or two...
System.arraycopy(spare, 0, b, off, spare.length);
int ret = spare.length;
spare = null;
return ret;
// Grab any data left from last time
int readA = readFromSpare(b, off, len);
// Now read from the stream
int readB = source.read(b, off+readA, len-readA);
// Figure out how much we've done
int read;
if(readB == -1 || readB == 0) {
read = readA;
} else {
read = readA + readB;
}
int read = source.read(b, off, len);
read = fixUp(b, off, read);
// Fix up our data
if(read > 0) {
read = fixUp(b, off, read);
}
// All done
return read;
}
@ -71,11 +81,72 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
public int read(byte[] b) throws IOException {
return this.read(b, 0, b.length);
}
/**
* Reads into the buffer from the spare bytes
*/
private int readFromSpare(byte[] b, int offset, int len) {
if(spare == null) return 0;
if(len == 0) throw new IllegalArgumentException("Asked to read 0 bytes");
if(spare.length <= len) {
// All fits, good
System.arraycopy(spare, 0, b, offset, spare.length);
int read = spare.length;
spare = null;
return read;
} else {
// We have more spare than they can copy with...
byte[] newspare = new byte[spare.length-len];
System.arraycopy(spare, 0, b, offset, len);
System.arraycopy(spare, len, newspare, 0, newspare.length);
spare = newspare;
return len;
}
}
private void addToSpare(byte[] b, int offset, int len, boolean atTheEnd) {
if(spare == null) {
spare = new byte[len];
System.arraycopy(b, offset, spare, 0, len);
} else {
byte[] newspare = new byte[spare.length+len];
if(atTheEnd) {
System.arraycopy(spare, 0, newspare, 0, spare.length);
System.arraycopy(b, offset, newspare, spare.length, len);
} else {
System.arraycopy(b, offset, newspare, 0, len);
System.arraycopy(spare, 0, newspare, len, spare.length);
}
spare = newspare;
}
}
private int fixUp(byte[] b, int offset, int read) {
// Do we have any potential overhanging ones?
for(int i=0; i<detect.length-1; i++) {
int base = offset+read-1-i;
if(base < 0) continue;
boolean going = true;
for(int j=0; j<=i && going; j++) {
if(b[base+j] == detect[j]) {
// Matches
} else {
going = false;
}
}
if(going) {
// There could be a <br> handing over the end, eg <br|
addToSpare(b, base, i+1, true);
read -= 1;
read -= i;
break;
}
}
// Find places to fix
ArrayList<Integer> fixAt = new ArrayList<Integer>();
for(int i=offset; i<offset+read-4; i++) {
for(int i=offset; i<=offset+read-detect.length; i++) {
boolean going = true;
for(int j=0; j<detect.length && going; j++) {
if(b[i+j] != detect[j]) {
@ -91,18 +162,37 @@ public class EvilUnclosedBRFixingInputStream extends InputStream {
return read;
}
// Save a bit, if needed to fit
int overshoot = offset+read+fixAt.size() - b.length;
// If there isn't space in the buffer to contain
// all the fixes, then save the overshoot for next time
int needed = offset+read+fixAt.size();
int overshoot = needed - b.length;
if(overshoot > 0) {
spare = new byte[overshoot];
System.arraycopy(b, b.length-overshoot, spare, 0, overshoot);
// Make sure we don't loose part of a <br>!
int fixes = 0;
for(int at : fixAt) {
if(at > offset+read-detect.length-overshoot-fixes) {
overshoot = needed - at - 1 - fixes;
break;
}
fixes++;
}
addToSpare(b, offset+read-overshoot, overshoot, false);
read -= overshoot;
}
// Fix them, in reverse order so the
// positions are valid
for(int j=fixAt.size()-1; j>=0; j--) {
int i = fixAt.get(j);
int i = fixAt.get(j);
if(i >= read+offset) {
// This one has moved into the overshoot
continue;
}
if(i > read-3) {
// This one has moved into the overshoot
continue;
}
byte[] tmp = new byte[read-i-3];
System.arraycopy(b, i+3, tmp, 0, tmp.length);

View File

@ -70,6 +70,37 @@ public final class TestEvilUnclosedBRFixingInputStream extends TestCase {
assertEquals(fixed, result);
}
/**
* Checks that we can copy with br tags around the buffer boundaries
*/
public void testBufferSize() throws Exception {
byte[] orig = "<p><div>Hello<br> <br>There!</div> <div>Tags!<br><br></div></p>".getBytes("UTF-8");
byte[] fixed = "<p><div>Hello<br/> <br/>There!</div> <div>Tags!<br/><br/></div></p>".getBytes("UTF-8");
// Vary the buffer size, so that we can end up with the br in the
// overflow or only part in the buffer
for(int i=5; i<orig.length; i++) {
EvilUnclosedBRFixingInputStream inp = new EvilUnclosedBRFixingInputStream(
new ByteArrayInputStream(orig)
);
ByteArrayOutputStream bout = new ByteArrayOutputStream();
boolean going = true;
while(going) {
byte[] b = new byte[i];
int r = inp.read(b);
if(r > 0) {
bout.write(b, 0, r);
} else {
going = false;
}
}
byte[] result = bout.toByteArray();
assertEquals(fixed, result);
}
}
protected void assertEquals(byte[] a, byte[] b) {
assertEquals(a.length, b.length);
for(int i=0; i<a.length; i++) {