SST Rich Text Fix.
git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/branches/REL_1_5_BRANCH@352656 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9a47a0c1ea
commit
aac81d881b
|
@ -1,8 +1,68 @@
|
|||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2002 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache POI" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache POI", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import org.apache.poi.util.LittleEndianConsts;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
|
||||
/**
|
||||
* Process a single record. That is, an SST record or a continue record.
|
||||
* Refactored from code originally in SSTRecord.
|
||||
*
|
||||
* @author Glen Stampoultzis (glens at apache.org)
|
||||
*/
|
||||
class RecordProcessor
|
||||
{
|
||||
private byte[] data;
|
||||
|
|
|
@ -58,7 +58,6 @@ import org.apache.poi.util.BinaryTree;
|
|||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianConsts;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
@ -259,7 +258,8 @@ public class SSTRecord
|
|||
rval = field_3_strings.size();
|
||||
field_2_num_unique_strings++;
|
||||
integer = new Integer( rval );
|
||||
field_3_strings.put( integer, ucs );
|
||||
addToStringTable( integer, ucs );
|
||||
// field_3_strings.put( integer, ucs );
|
||||
}
|
||||
return rval;
|
||||
}
|
||||
|
@ -324,11 +324,10 @@ public class SSTRecord
|
|||
|
||||
public String getString( final int id )
|
||||
{
|
||||
return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) )
|
||||
.getString();
|
||||
return ( (UnicodeString) field_3_strings.get( new Integer( id ) ) ).getString();
|
||||
}
|
||||
|
||||
public boolean getString16bit( final int id )
|
||||
public boolean isString16bit( final int id )
|
||||
{
|
||||
UnicodeString unicodeString = ( (UnicodeString) field_3_strings.get( new Integer( id ) ) );
|
||||
return ( ( unicodeString.getOptionFlags() & 0x01 ) == 1 );
|
||||
|
@ -456,7 +455,8 @@ public class SSTRecord
|
|||
_unfinished_string );
|
||||
Integer integer = new Integer( field_3_strings.size() );
|
||||
|
||||
field_3_strings.put( integer, string );
|
||||
// field_3_strings.put( integer, string );
|
||||
addToStringTable( integer, string );
|
||||
manufactureStrings( record,
|
||||
_total_length_bytes
|
||||
- LittleEndianConsts
|
||||
|
@ -725,35 +725,27 @@ public class SSTRecord
|
|||
private void setupStringParameters( final byte[] data, final int index,
|
||||
final int char_count )
|
||||
{
|
||||
byte flag = data[index + LittleEndianConsts.SHORT_SIZE];
|
||||
byte optionFlag = data[index + LittleEndianConsts.SHORT_SIZE];
|
||||
|
||||
_wide_char = ( flag & 1 ) == 1;
|
||||
boolean extended = ( flag & 4 ) == 4;
|
||||
boolean formatted_run = ( flag & 8 ) == 8;
|
||||
_wide_char = ( optionFlag & 1 ) == 1;
|
||||
boolean extended = ( optionFlag & 4 ) == 4;
|
||||
boolean rich_text = ( optionFlag & 8 ) == 8;
|
||||
|
||||
_total_length_bytes = STRING_MINIMAL_OVERHEAD
|
||||
+ calculateByteCount( char_count );
|
||||
_total_length_bytes = STRING_MINIMAL_OVERHEAD + calculateByteCount( char_count );
|
||||
_string_data_offset = STRING_MINIMAL_OVERHEAD;
|
||||
if ( formatted_run )
|
||||
if ( rich_text )
|
||||
{
|
||||
short run_count = LittleEndian.getShort( data,
|
||||
index
|
||||
+ _string_data_offset );
|
||||
short run_count = LittleEndian.getShort( data, index + _string_data_offset );
|
||||
|
||||
_string_data_offset += LittleEndianConsts.SHORT_SIZE;
|
||||
_total_length_bytes += LittleEndianConsts.SHORT_SIZE
|
||||
+ ( LittleEndianConsts.INT_SIZE
|
||||
* run_count );
|
||||
_total_length_bytes += LittleEndianConsts.SHORT_SIZE + ( LittleEndianConsts.INT_SIZE * run_count );
|
||||
}
|
||||
if ( extended )
|
||||
{
|
||||
int extension_length = LittleEndian.getInt( data,
|
||||
index
|
||||
+ _string_data_offset );
|
||||
int extension_length = LittleEndian.getInt( data, index + _string_data_offset );
|
||||
|
||||
_string_data_offset += LittleEndianConsts.INT_SIZE;
|
||||
_total_length_bytes += LittleEndianConsts.INT_SIZE
|
||||
+ extension_length;
|
||||
_total_length_bytes += LittleEndianConsts.INT_SIZE + extension_length;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -771,6 +763,7 @@ public class SSTRecord
|
|||
LittleEndian.putShort( bstring, offset, char_count );
|
||||
offset += LittleEndianConsts.SHORT_SIZE;
|
||||
bstring[offset] = str_data[offset];
|
||||
System.out.println( "_string_data_offset = " + _string_data_offset );
|
||||
System.arraycopy( str_data, _string_data_offset, bstring,
|
||||
STRING_MINIMAL_OVERHEAD,
|
||||
bstring.length - STRING_MINIMAL_OVERHEAD );
|
||||
|
@ -785,30 +778,35 @@ public class SSTRecord
|
|||
else
|
||||
{
|
||||
Integer integer = new Integer( field_3_strings.size() );
|
||||
addToStringTable( integer, string );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Okay, we are doing some major cheating here. Because we can't handle rich text strings properly
|
||||
* we end up getting duplicate strings. To get around this I'm doing do things: 1. Converting rich
|
||||
* text to normal text and 2. If there's a duplicate I'm adding a space onto the end. Sneaky perhaps
|
||||
* but it gets the job done until we can handle this a little better.
|
||||
*/
|
||||
private void addToStringTable( Integer integer, UnicodeString string )
|
||||
{
|
||||
if (string.isRichText())
|
||||
string.setOptionFlags( (byte)(string.getOptionFlags() & (~8) ) );
|
||||
|
||||
// This retry loop is a nasty hack that lets us get around the issue of duplicate
|
||||
// strings in the SST record. There should never be duplicates but because we don't
|
||||
// handle rich text records correctly this may occur. Also some Excel alternatives
|
||||
// do not seem correctly add strings to this table.
|
||||
//
|
||||
// The hack bit is that we add spaces to the end of the string until don't get an
|
||||
// illegal argument exception when adding. One day we will have to deal with this
|
||||
// more gracefully.
|
||||
boolean added = false;
|
||||
while ( !added )
|
||||
while (added == false)
|
||||
{
|
||||
try
|
||||
{
|
||||
field_3_strings.put( integer, string );
|
||||
added = true;
|
||||
}
|
||||
catch ( IllegalArgumentException duplicateValue )
|
||||
catch( Exception ignore )
|
||||
{
|
||||
string.setString( string.getString() + " " );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void setExpectedChars( final int count )
|
||||
{
|
||||
|
|
|
@ -1,3 +1,57 @@
|
|||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
* Copyright (c) 2002 The Apache Software Foundation. All rights
|
||||
* reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* 3. The end-user documentation included with the redistribution,
|
||||
* if any, must include the following acknowledgment:
|
||||
* "This product includes software developed by the
|
||||
* Apache Software Foundation (http://www.apache.org/)."
|
||||
* Alternately, this acknowledgment may appear in the software itself,
|
||||
* if and wherever such third-party acknowledgments normally appear.
|
||||
*
|
||||
* 4. The names "Apache" and "Apache Software Foundation" and
|
||||
* "Apache POI" must not be used to endorse or promote products
|
||||
* derived from this software without prior written permission. For
|
||||
* written permission, please contact apache@apache.org.
|
||||
*
|
||||
* 5. Products derived from this software may not be called "Apache",
|
||||
* "Apache POI", nor may "Apache" appear in their name, without
|
||||
* prior written permission of the Apache Software Foundation.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
||||
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
||||
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
||||
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
||||
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
||||
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
* ====================================================================
|
||||
*
|
||||
* This software consists of voluntary contributions made by many
|
||||
* individuals on behalf of the Apache Software Foundation. For more
|
||||
* information on the Apache Software Foundation, please see
|
||||
* <http://www.apache.org/>.
|
||||
*/
|
||||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import org.apache.poi.util.BinaryTree;
|
||||
|
@ -6,6 +60,12 @@ import org.apache.poi.util.LittleEndianConsts;
|
|||
import java.util.List;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* This class handles serialization of SST records. It utilizes the record processor
|
||||
* class write individual records. This has been refactored from the SSTRecord class.
|
||||
*
|
||||
* @author Glen Stampoultzis (glens at apache.org)
|
||||
*/
|
||||
class SSTSerializer
|
||||
{
|
||||
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.poi.util.StringUtil;
|
|||
* REFERENCE: PG 264 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
|
||||
* @author Andrew C. Oliver
|
||||
* @author Marc Johnson (mjohnson at apache dot org)
|
||||
* @author Glen Stampoultzis (glens at apache.org)
|
||||
* @version 2.0-pre
|
||||
*/
|
||||
|
||||
|
@ -77,12 +78,28 @@ public class UnicodeString
|
|||
private short field_1_charCount; // = 0;
|
||||
private byte field_2_optionflags; // = 0;
|
||||
private String field_3_string; // = null;
|
||||
private final int RICH_TEXT_BIT = 8;
|
||||
|
||||
public UnicodeString()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
public int hashCode()
|
||||
{
|
||||
return field_1_charCount;
|
||||
int stringHash = 0;
|
||||
if (field_3_string != null)
|
||||
stringHash = field_3_string.hashCode();
|
||||
return field_1_charCount + stringHash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Our handling of equals is inconsistent with compareTo. The trouble is because we don't truely understand
|
||||
* rich text fields yet it's difficult to make a sound comparison.
|
||||
*
|
||||
* @param o The object to compare.
|
||||
* @return true if the object is actually equal.
|
||||
*/
|
||||
public boolean equals(Object o)
|
||||
{
|
||||
if ((o == null) || (o.getClass() != this.getClass()))
|
||||
|
@ -96,10 +113,6 @@ public class UnicodeString
|
|||
&& field_3_string.equals(other.field_3_string));
|
||||
}
|
||||
|
||||
public UnicodeString()
|
||||
{
|
||||
}
|
||||
|
||||
/**
|
||||
* construct a unicode string record and fill its fields, ID is ignored
|
||||
* @param id - ignored
|
||||
|
@ -278,20 +291,10 @@ public class UnicodeString
|
|||
|
||||
public int serialize(int offset, byte [] data)
|
||||
{
|
||||
int charsize = 1;
|
||||
|
||||
// Note: I suspect this may not be right
|
||||
if ((getOptionFlags() & 0x01) == 1)
|
||||
{
|
||||
charsize = 2;
|
||||
}
|
||||
|
||||
// byte[] retval = new byte[ 3 + (getString().length() * charsize) ];
|
||||
LittleEndian.putShort(data, 0 + offset, getCharCount());
|
||||
data[ 2 + offset ] = getOptionFlags();
|
||||
|
||||
// System.out.println("Unicode: We've got "+retval[2]+" for our option flag");
|
||||
if ((getOptionFlags() & 0x01) == 0)
|
||||
if (!isUncompressedUnicode())
|
||||
{
|
||||
StringUtil.putCompressedUnicode(getString(), data, 0x3 + offset);
|
||||
}
|
||||
|
@ -303,14 +306,14 @@ public class UnicodeString
|
|||
return getRecordSize();
|
||||
}
|
||||
|
||||
private boolean isUncompressedUnicode()
|
||||
{
|
||||
return (getOptionFlags() & 0x01) == 1;
|
||||
}
|
||||
|
||||
public int getRecordSize()
|
||||
{
|
||||
int charsize = 1;
|
||||
|
||||
if ((getOptionFlags() & 0x01) == 1)
|
||||
{
|
||||
charsize = 2;
|
||||
}
|
||||
int charsize = isUncompressedUnicode() ? 2 : 1;
|
||||
return 3 + (getString().length() * charsize);
|
||||
}
|
||||
|
||||
|
@ -339,11 +342,16 @@ public class UnicodeString
|
|||
return this.getString().compareTo(str.getString());
|
||||
}
|
||||
|
||||
public boolean isRichText()
|
||||
{
|
||||
return (getOptionFlags() & RICH_TEXT_BIT) != 0;
|
||||
}
|
||||
|
||||
int maxBrokenLength(final int proposedBrokenLength)
|
||||
{
|
||||
int rval = proposedBrokenLength;
|
||||
|
||||
if ((field_2_optionflags & 1) == 1)
|
||||
if (isUncompressedUnicode())
|
||||
{
|
||||
int proposedStringLength = proposedBrokenLength - 3;
|
||||
|
||||
|
@ -356,12 +364,4 @@ public class UnicodeString
|
|||
return rval;
|
||||
}
|
||||
|
||||
// public boolean equals(Object obj) {
|
||||
// if (!(obj instanceof UnicodeString)) return false;
|
||||
//
|
||||
// UnicodeString str = (UnicodeString)obj;
|
||||
//
|
||||
//
|
||||
// return this.getString().equals(str.getString());
|
||||
// }
|
||||
}
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -1,4 +1,3 @@
|
|||
|
||||
/* ====================================================================
|
||||
* The Apache Software License, Version 1.1
|
||||
*
|
||||
|
@ -55,13 +54,18 @@
|
|||
|
||||
package org.apache.poi.hssf.record;
|
||||
|
||||
import org.apache.poi.util.*;
|
||||
|
||||
import junit.framework.*;
|
||||
import junit.framework.TestCase;
|
||||
import org.apache.poi.util.LittleEndian;
|
||||
import org.apache.poi.util.LittleEndianConsts;
|
||||
import org.apache.poi.hssf.model.Workbook;
|
||||
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
|
||||
import org.apache.poi.hssf.usermodel.HSSFSheet;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author Marc Johnson (mjohnson at apache dot org)
|
||||
|
@ -571,4 +575,46 @@ public class TestSSTRecord
|
|||
}
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests that workbooks with rich text that duplicates a non rich text cell can be read and written.
|
||||
*/
|
||||
public void testReadWriteDuplicatedRichText1()
|
||||
throws Exception
|
||||
{
|
||||
File file = new File( _test_file_path + File.separator + "duprich1.xls" );
|
||||
InputStream stream = new FileInputStream(file);
|
||||
HSSFWorkbook wb = new HSSFWorkbook(stream);
|
||||
stream.close();
|
||||
HSSFSheet sheet = wb.getSheetAt(1);
|
||||
assertEquals("01/05 (Wed) ", sheet.getRow(0).getCell((short)8).getStringCellValue());
|
||||
assertEquals("01/05 (Wed)", sheet.getRow(1).getCell((short)8).getStringCellValue());
|
||||
|
||||
file = File.createTempFile("testout", "xls");
|
||||
FileOutputStream outStream = new FileOutputStream(file);
|
||||
wb.write(outStream);
|
||||
outStream.close();
|
||||
file.delete();
|
||||
|
||||
// test the second file.
|
||||
file = new File( _test_file_path + File.separator + "duprich2.xls" );
|
||||
stream = new FileInputStream(file);
|
||||
wb = new HSSFWorkbook(stream);
|
||||
stream.close();
|
||||
sheet = wb.getSheetAt(0);
|
||||
int row = 0;
|
||||
assertEquals("Testing ", sheet.getRow(row++).getCell((short)0).getStringCellValue());
|
||||
assertEquals("rich", sheet.getRow(row++).getCell((short)0).getStringCellValue());
|
||||
assertEquals("text", sheet.getRow(row++).getCell((short)0).getStringCellValue());
|
||||
assertEquals("strings", sheet.getRow(row++).getCell((short)0).getStringCellValue());
|
||||
assertEquals("Testing ", sheet.getRow(row++).getCell((short)0).getStringCellValue());
|
||||
assertEquals("Testing", sheet.getRow(row++).getCell((short)0).getStringCellValue());
|
||||
|
||||
// file = new File("/tryme.xls");
|
||||
file = File.createTempFile("testout", ".xls");
|
||||
outStream = new FileOutputStream(file);
|
||||
wb.write(outStream);
|
||||
outStream.close();
|
||||
file.delete();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue