diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 829fb4d18..ef6511128 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -36,6 +36,7 @@ + 41064 - [PATCH] Support for String continue records 27511 - [PATCH] Support for data validation, via DVRecord and DVALRecord 43877 and 39512 - Fix for handling mixed OBJ and CONTINUE records. 43807 - Throw an IllegalArgumentException if asked to create a merged region with invalid columns or rows, rather than writing out a corrupt file diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 18a305017..d09ccb87c 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,7 @@ + 41064 - [PATCH] Support for String continue records 27511 - [PATCH] Support for data validation, via DVRecord and DVALRecord 43877 - Fix for handling mixed OBJ and CONTINUE records 39512 - Fix for handling mixed OBJ and CONTINUE records diff --git a/src/java/org/apache/poi/hssf/record/RecordFactory.java b/src/java/org/apache/poi/hssf/record/RecordFactory.java index 984291cdb..cf705a316 100644 --- a/src/java/org/apache/poi/hssf/record/RecordFactory.java +++ b/src/java/org/apache/poi/hssf/record/RecordFactory.java @@ -147,6 +147,9 @@ public class RecordFactory } else if (record.getSid() == ContinueRecord.sid && (lastRecord instanceof DrawingGroupRecord)) { ((DrawingGroupRecord)lastRecord).processContinueRecord(((ContinueRecord)record).getData()); + } else if (record.getSid() == ContinueRecord.sid && + (lastRecord instanceof StringRecord)) { + ((StringRecord)lastRecord).processContinueRecord(((ContinueRecord)record).getData()); } else if (record.getSid() == ContinueRecord.sid) { if (lastRecord instanceof UnknownRecord) { //Gracefully handle records that we dont know about, diff --git a/src/java/org/apache/poi/hssf/record/StringRecord.java b/src/java/org/apache/poi/hssf/record/StringRecord.java index a880d7235..b3a42aaba 100644 --- a/src/java/org/apache/poi/hssf/record/StringRecord.java +++ b/src/java/org/apache/poi/hssf/record/StringRecord.java @@ -83,6 +83,14 @@ public class StringRecord field_3_string = StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length); } } + + public void processContinueRecord(byte[] data) { + if(isUnCompressedUnicode()) { + field_3_string += StringUtil.getFromUnicodeLE(data, 0, field_1_string_length - field_3_string.length()); + } else { + field_3_string += StringUtil.getFromCompressedUnicode(data, 0, field_1_string_length - field_3_string.length()); + } + } public boolean isInValueSection() { diff --git a/src/java/org/apache/poi/util/StringUtil.java b/src/java/org/apache/poi/util/StringUtil.java index 9dd8e4838..673b5246e 100644 --- a/src/java/org/apache/poi/util/StringUtil.java +++ b/src/java/org/apache/poi/util/StringUtil.java @@ -161,7 +161,8 @@ public class StringUtil { final int offset, final int len) { try { - return new String(string, offset, len, "ISO-8859-1"); + int len_to_use = Math.min(len, string.length - offset); + return new String(string, offset, len_to_use, "ISO-8859-1"); } catch (UnsupportedEncodingException e) { throw new InternalError(); /* unreachable */ } diff --git a/src/testcases/org/apache/poi/hssf/data/StringContinueRecords.xls b/src/testcases/org/apache/poi/hssf/data/StringContinueRecords.xls new file mode 100644 index 000000000..f2ada9eb2 Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/StringContinueRecords.xls differ diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 027495a1b..0aef5c765 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -68,6 +68,19 @@ public class TestExcelExtractor extends TestCase { ); } + public void testwithContinueRecords() throws Exception { + String path = System.getProperty("HSSF.testdata.path"); + FileInputStream fin = new FileInputStream(path + File.separator + "StringContinueRecords.xls"); + + ExcelExtractor extractor = new ExcelExtractor(new POIFSFileSystem(fin)); + + extractor.getText(); + + // Has masses of text + // Until we fixed bug #41064, this would've + // failed by now + assertTrue(extractor.getText().length() > 40960); + } public void testStringConcat() throws Exception { String path = System.getProperty("HSSF.testdata.path");