bug 50955 -- fix for java 7
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1790130 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
af51ea4c10
commit
3c62c898a4
@ -18,6 +18,7 @@
|
||||
package org.apache.poi.hwpf.model;
|
||||
|
||||
|
||||
import org.apache.poi.util.CodePageUtil;
|
||||
import org.apache.poi.util.Internal;
|
||||
import org.apache.poi.util.NotImplemented;
|
||||
|
||||
@ -40,11 +41,19 @@ public class OldTextPiece extends TextPiece {
|
||||
public OldTextPiece(int start, int end, byte[] text, PieceDescriptor pd) {
|
||||
super(start, end, text, pd);
|
||||
this.rawBytes = text;
|
||||
if (end < start) {
|
||||
throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void validateLengths(int start, int end, int length, PieceDescriptor pd) {
|
||||
//things are still wonky with Big5 char/byte length mapping
|
||||
//sometimes working w/ Java 8 but not w/ Java 7!
|
||||
//for now, if we're dealing w/ Big5 don't bother checking
|
||||
if (pd.getCharset() != null &&
|
||||
CodePageUtil.VARIABLE_BYTE_CHARSETS.contains(pd.getCharset())) {
|
||||
return;
|
||||
}
|
||||
super.validateLengths(start, end, length, pd);
|
||||
}
|
||||
/**
|
||||
* @return nothing, ever. Always throws an UnsupportedOperationException
|
||||
* @throws UnsupportedOperationException
|
||||
@ -56,6 +65,7 @@ public class OldTextPiece extends TextPiece {
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public StringBuilder getStringBuilder() {
|
||||
return (StringBuilder) _buf;
|
||||
}
|
||||
|
@ -60,14 +60,17 @@ public class TextPiece extends PropertyNode<TextPiece> {
|
||||
|
||||
// Validate
|
||||
int textLength = ((CharSequence) _buf).length();
|
||||
if (end - start != textLength) {
|
||||
throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
|
||||
}
|
||||
validateLengths(start, end, textLength, pd);
|
||||
if (end < start) {
|
||||
throw new IllegalStateException("Told we're of negative size! start=" + start + " end=" + end);
|
||||
}
|
||||
}
|
||||
|
||||
protected void validateLengths(int start, int end, int textLength, PieceDescriptor pd) {
|
||||
if (end - start != textLength) {
|
||||
throw new IllegalStateException("Told we're for characters " + start + " -> " + end + ", but actually covers " + textLength + " characters!");
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Create the StringBuilder from the text and unicode flag
|
||||
*/
|
||||
|
@ -226,6 +226,26 @@ public final class TestHWPFOldDocument extends HWPFTestCase {
|
||||
assertContains(txt, "also maintain");//this is at a critical juncture
|
||||
assertContains(txt, "which are available for");//this too
|
||||
|
||||
/*
|
||||
The bytes for the following test:
|
||||
170 : 78 : x
|
||||
171 : 0 :
|
||||
172 : d : <r>
|
||||
173 : 35 : 5
|
||||
174 : 39 : 9
|
||||
175 : 0 :
|
||||
176 : 2d : -
|
||||
177 : 0 :
|
||||
178 : 35 : 5
|
||||
179 : 0 :
|
||||
180 : 35 : 5
|
||||
|
||||
Note that we are skipping over the value "5" at offset 173.
|
||||
This is an apparently invalid sequence in MS's encoding scheme
|
||||
|
||||
When I open the document in MSWord, I also see "\r9-55"
|
||||
*/
|
||||
assertContains(txt, "\n9-55 xxxxx block5");
|
||||
//TODO: figure out why these two aren't passing
|
||||
// assertContains(txt, "\u2019\u0078 block2");//make sure smart quote is extracted correctly
|
||||
// assertContains(txt, "We are able to");//not sure if we can get this easily?
|
||||
|
Loading…
Reference in New Issue
Block a user