http://nagoya.apache.org/bugzilla/show_bug.cgi?id=11010 - great patch by

sergei.... if he'd learn to do diff -u patches instead of the nasty kind I'd be loving his work alot right now ;-) PR: Obtained from: Submitted by: Reviewed by: git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352831 13f79535-47bb-0310-9956-ffa450edef68
2002-09-02 02:11:16 +00:00 · 2002-09-02 02:11:16 +00:00 · 0cb31a8d4d
commit 0cb31a8d4d
parent 8b555a177c
9 changed files with 347 additions and 89 deletions
--- a/src/java/org/apache/poi/hssf/dev/BiffViewer.java
+++ b/src/java/org/apache/poi/hssf/dev/BiffViewer.java
@ -624,6 +624,9 @@ public class BiffViewer {
            case StringRecord.sid:
                retval = new StringRecord( rectype, size, data );
                break;
+            case NameRecord.sid:
+                retval = new NameRecord( rectype, size, data );
+                break;
            default:
                retval = new UnknownRecord( rectype, size, data );
        }
--- a/src/java/org/apache/poi/hssf/eventmodel/HSSFEventFactory.java
+++ b/src/java/org/apache/poi/hssf/eventmodel/HSSFEventFactory.java
@ -185,6 +185,24 @@ public class HSSFEventFactory
 			{

 				sid = LittleEndian.getShort(sidbytes);
+                
+                //
+                // for some reasons we have to make the workbook to be at least 4096 bytes
+                // but if we have such workbook we fill the end of it with zeros (many zeros)
+                //
+                // it is not good:
+                // if the length( all zero records ) % 4 = 1
+                // e.g.: any zero record would be readed as  4 bytes at once ( 2 - id and 2 - size ).
+                // And the last 1 byte will be readed WRONG ( the id must be 2 bytes )
+                //
+                // So we should better to check if the sid is zero and not to read more data
+                // The zero sid shows us that rest of the stream data is a fake to make workbook 
+                // certain size
+                //
+                if ( sid == 0 )
+                    break;
+
+
 				if ((rec != null) && (sid != ContinueRecord.sid))
 				{
 					userCode = req.processRecord(rec);
--- a/src/java/org/apache/poi/hssf/record/FormatRecord.java
+++ b/src/java/org/apache/poi/hssf/record/FormatRecord.java
@ -73,7 +73,8 @@ public class FormatRecord
    public final static short sid = 0x41e;
    private short             field_1_index_code;
    private byte              field_2_formatstring_len;
-    private short             field_3_zero;   // undocumented 2 bytes of 0
+    private short             field_3_unicode_len;      // unicode string length
+    private boolean          field_3_unicode_flag;     // it is not undocumented - it is unicode flag
    private String            field_4_formatstring;

    public FormatRecord()
@ -118,10 +119,19 @@ public class FormatRecord
    protected void fillFields(byte [] data, short size, int offset)
    {
        field_1_index_code       = LittleEndian.getShort(data, 0 + offset);
-        field_2_formatstring_len = data[ 2 + offset ];
-        field_3_zero             = LittleEndian.getShort(data, 3 + offset);
-        field_4_formatstring     = new String(data, 5 + offset,
-                                              LittleEndian.ubyteToInt(field_2_formatstring_len));
+        // field_2_formatstring_len = data[ 2 + offset ];
+        field_3_unicode_len      = LittleEndian.getShort( data, 2 + offset );
+        field_3_unicode_flag     = ( data[ 4 + offset ] & (byte)0x01 ) != 0;
+                                              
+                                              
+      if ( field_3_unicode_flag  ) {
+          // unicode
+          field_4_formatstring = StringUtil.getFromUnicodeHigh( data, 5 + offset, field_3_unicode_len );
+      }
+      else {
+          // not unicode
+          field_4_formatstring = new String(data, 5 + offset, field_3_unicode_len );
+      }
    }

    /**
@ -203,11 +213,15 @@ public class FormatRecord
        buffer.append("[FORMAT]\n");
        buffer.append("    .indexcode       = ")
            .append(Integer.toHexString(getIndexCode())).append("\n");
+        /*
        buffer.append("    .formatstringlen = ")
            .append(Integer.toHexString(getFormatStringLength()))
            .append("\n");
-        buffer.append("    .zero            = ")
-            .append(Integer.toHexString(field_3_zero)).append("\n");
+        */
+        buffer.append("    .unicode length  = ")
+            .append(Integer.toHexString(field_3_unicode_len)).append("\n");
+        buffer.append("    .isUnicode       = ")
+            .append( field_3_unicode_flag ).append("\n");
        buffer.append("    .formatstring    = ").append(getFormatString())
            .append("\n");
        buffer.append("[/FORMAT]\n");
@ -217,20 +231,29 @@ public class FormatRecord
    public int serialize(int offset, byte [] data)
    {
        LittleEndian.putShort(data, 0 + offset, sid);
-        LittleEndian.putShort(data, 2 + offset,
-                              ( short ) (5 + getFormatStringLength()));
-
-        // 9 - 4(len/sid) + format string length
+        LittleEndian.putShort(data, 2 + offset, (short)( 2 + 2 + 1 + ( (field_3_unicode_flag) 
+                                                                  ? 2 * field_3_unicode_len 
+                                                                  : field_3_unicode_len ) ) );
+                                                  // index + len + flag + format string length
        LittleEndian.putShort(data, 4 + offset, getIndexCode());
-        data[ 6 + offset ] = getFormatStringLength();
-        LittleEndian.putShort(data, 7 + offset, ( short ) 0);
+        LittleEndian.putShort(data, 6 + offset, field_3_unicode_len);
+        data[ 8 + offset ] = (byte)( (field_3_unicode_flag) ? 0x01 : 0x00 );
+
+      if ( field_3_unicode_flag ) {
+          // unicode
+          StringUtil.putUncompressedUnicode( getFormatString(), data, 9 + offset );
+      }
+      else {
+          // not unicode
          StringUtil.putCompressedUnicode( getFormatString(), data, 9 + offset );
+      }
+      
        return getRecordSize();
    }

    public int getRecordSize()
    {
-        return 9 + getFormatStringLength();
+        return 9 + ( ( field_3_unicode_flag ) ? 2 * field_3_unicode_len : field_3_unicode_len );
    }

    public short getSid()
--- a/src/java/org/apache/poi/hssf/record/NameRecord.java
+++ b/src/java/org/apache/poi/hssf/record/NameRecord.java
@ -55,6 +55,7 @@

 package org.apache.poi.hssf.record;

+import org.apache.poi.util.HexDump;
 import org.apache.poi.util.LittleEndian;
 import org.apache.poi.util.StringUtil;
 import java.util.Stack;
@ -70,6 +71,7 @@ import org.apache.poi.hssf.util.SheetReferences;
 * Description:  Defines a named range within a workbook. <P>
 * REFERENCE:  <P>
 * @author Libin Roman (Vista Portal LDT. Developer)
+ * @author  Sergei Kozello (sergeikozello at mail.ru)
 * @version 1.0-pre
 */

@ -88,9 +90,10 @@ public class NameRecord extends Record {
    private byte              field_9_length_help_topic_text;
    private byte              field_10_length_status_bar_text;
    private byte              field_11_compressed_unicode_flag;   // not documented
+    private byte              field_12_builtIn_name;
    private String            field_12_name_text;
    private Stack             field_13_name_definition;
-    private byte[]            field_13_raw_name_definition = null; // raw data
+    private byte[]            field_13_raw_name_definition;       // raw data
    private String            field_14_custom_menu_text;
    private String            field_15_description_text;
    private String            field_16_help_topic_text;
@ -378,7 +381,7 @@ public class NameRecord extends Record {
     */
    public int serialize(int offset, byte[] data) {
        LittleEndian.putShort(data, 0 + offset, sid);
-        LittleEndian.putShort(data, 2 + offset, (short)( 15 + getTextsLength()));
+        // size defined below
        LittleEndian.putShort(data, 4 + offset, getOptionFlag());
        data[6 + offset] = getKeyboardShortcut();
        data[7 + offset] = getNameTextLength();
@ -391,6 +394,18 @@ public class NameRecord extends Record {
        data [17 + offset] =  getStatusBarLength();
        data [18 + offset] =  getCompressedUnicodeFlag();
        
+      if ( ( field_1_option_flag & (short)0x20 ) != 0 ) {
+          LittleEndian.putShort(data, 2 + offset, (short)( 16 + field_13_raw_name_definition.length ));
+          
+            data [19 + offset] =  field_12_builtIn_name;
+            System.arraycopy( field_13_raw_name_definition, 0, data, 20 + offset, field_13_raw_name_definition.length );
+            
+            return 20 + field_13_raw_name_definition.length;
+      }
+      else {
+          LittleEndian.putShort(data, 2 + offset, (short)( 15 + getTextsLength()));
+          
+          
            StringUtil.putCompressedUnicode(getNameText(), data , 19 + offset);
    
            int start_of_name_definition    = 19  + field_3_length_name_text;
@ -413,9 +428,9 @@ public class NameRecord extends Record {
            int start_of_status_bar_text       = start_of_help_topic_text + field_10_length_status_bar_text;
            StringUtil.putCompressedUnicode(getStatusBarText(), data , start_of_status_bar_text + offset);
            
-
          return getRecordSize();
      }
+    }

    private void serializePtgs(byte [] data, int offset) {
        int pos = offset;
@ -579,6 +594,29 @@ public class NameRecord extends Record {
        field_9_length_help_topic_text  = data [12 + offset];
        field_10_length_status_bar_text = data [13 + offset];

+        
+        if ( ( field_1_option_flag & (short)0x20 ) != 0 ) {
+            // DEBUG
+            // System.out.println( "Built-in name" );
+            
+            field_11_compressed_unicode_flag = data[ 14 + offset ];
+            field_12_builtIn_name = data[ 15 + offset ];
+
+            if ( (field_12_builtIn_name & (short)0x07) != 0 ) {
+                field_12_name_text = "Print_Titles";
+                
+                // DEBUG
+                // System.out.println( field_12_name_text );
+                
+                field_13_raw_name_definition = new byte[ field_4_length_name_definition ];
+                System.arraycopy( data, 16 + offset, field_13_raw_name_definition, 0, field_13_raw_name_definition.length );
+                
+                // DEBUG
+                // System.out.println( HexDump.toHex( field_13_raw_name_definition ) );
+            }
+        }
+        else {
+    
            field_11_compressed_unicode_flag= data [14 + offset];
            field_12_name_text = new String(data, 15 + offset,
            LittleEndian.ubyteToInt(field_3_length_name_text));
@ -602,7 +640,7 @@ public class NameRecord extends Record {
            int start_of_status_bar_text       = start_of_help_topic_text + field_10_length_status_bar_text;
            field_17_status_bar_text        = new String(data, start_of_status_bar_text +  offset,
            LittleEndian.ubyteToInt(field_10_length_status_bar_text));
-
+        }
    }

    private Stack getParsedExpressionTokens(byte [] data, short size,
@ -635,5 +673,101 @@ public class NameRecord extends Record {
    public short getSid() {
        return this.sid;
    }
+    /*
+      20 00 
+      00 
+      01 
+      1A 00 // sz = 0x1A = 26
+      00 00 
+      01 00 
+      00 
+      00 
+      00 
+      00 
+      00 // unicode flag
+      07 // name
+      
+      29 17 00 3B 00 00 00 00 FF FF 00 00 02 00 3B 00 //{ 26
+      00 07 00 07 00 00 00 FF 00 10                   //  }
+      
+      
+      
+      20 00 
+      00 
+      01 
+      0B 00 // sz = 0xB = 11
+      00 00 
+      01 00 
+      00 
+      00 
+      00 
+      00 
+      00 // unicode flag
+      07 // name
+      
+      3B 00 00 07 00 07 00 00 00 FF 00   // { 11 }
+  */
+    /*
+      18, 00, 
+      1B, 00, 
+      
+      20, 00, 
+      00, 
+      01, 
+      0B, 00, 
+      00, 
+      00, 
+      00, 
+      00, 
+      00, 
+      07, 
+      3B 00 00 07 00 07 00 00 00 FF 00 ]     
+     */
+
+    /**
+     * @see Object#toString()
+     */
+    public String toString() {
+        StringBuffer buffer = new StringBuffer();
+
+        buffer.append("[NAME]\n");
+        buffer.append("    .option flags         = ").append( HexDump.toHex( field_1_option_flag ) )
+            .append("\n");
+        buffer.append("    .keyboard shortcut    = ").append( HexDump.toHex( field_2_keyboard_shortcut ) )
+            .append("\n");
+        buffer.append("    .length of the name   = ").append( field_3_length_name_text )
+            .append("\n");
+        buffer.append("    .size of the formula data = ").append( field_4_length_name_definition )
+            .append("\n");
+        buffer.append("    .unused                 = ").append( field_5_index_to_sheet )
+            .append("\n");
+        buffer.append("    .( 0 = Global name, otherwise index to sheet (one-based) ) = ").append( field_6_equals_to_index_to_sheet )
+            .append("\n");
+        buffer.append("    .Length of menu text (character count)        = ").append( field_7_length_custom_menu )
+            .append("\n");
+        buffer.append("    .Length of description text (character count) = ").append( field_8_length_description_text )
+            .append("\n");
+        buffer.append("    .Length of help topic text (character count)  = ").append( field_9_length_help_topic_text )
+            .append("\n");
+        buffer.append("    .Length of status bar text (character count)  = ").append( field_10_length_status_bar_text )
+            .append("\n");
+        buffer.append("    .Name (Unicode flag)  = ").append( field_11_compressed_unicode_flag )
+            .append("\n");
+        buffer.append("    .Name (Unicode text)  = ").append( field_12_name_text )
+            .append("\n");
+        buffer.append("    .Formula data (RPN token array without size field)      = ").append( HexDump.toHex( field_13_raw_name_definition ) )
+            .append("\n");
+        buffer.append("    .Menu text (Unicode string without length field)        = ").append( field_14_custom_menu_text )
+            .append("\n");
+        buffer.append("    .Description text (Unicode string without length field) = ").append( field_15_description_text )
+            .append("\n");
+        buffer.append("    .Help topic text (Unicode string without length field)  = ").append( field_16_help_topic_text )
+            .append("\n");
+        buffer.append("    .Status bar text (Unicode string without length field)  = ").append( field_17_status_bar_text )
+            .append("\n");
+        buffer.append("[/NAME]\n");
+        
+        return buffer.toString();
+    }

 }
--- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
+++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java
@ -202,6 +202,9 @@ public class HSSFWorkbook
        // none currently
    }

+    public final static byte ENCODING_COMPRESSED_UNICODE = 0;
+    public final static byte ENCODING_UTF_16             = 1;
+    
    /**
     * set the sheet name.
     * @param sheet number (0 based)
@ -209,12 +212,28 @@ public class HSSFWorkbook
     */

    public void setSheetName(int sheet, String name)
+    {
+        workbook.setSheetName( sheet, name, ENCODING_COMPRESSED_UNICODE );
+    }
+
+    public void setSheetName( int sheet, String name, short encoding )
    {
        if (sheet > (sheets.size() - 1))
        {
            throw new RuntimeException("Sheet out of bounds");
        }
-        workbook.setSheetName(sheet, name);
+        
+        switch ( encoding ) {
+        case ENCODING_COMPRESSED_UNICODE:
+        case ENCODING_UTF_16:
+            break;
+            
+        default:
+            // TODO java.io.UnsupportedEncodingException
+            throw new RuntimeException( "Unsupported encoding" );
+        }
+        
+        workbook.setSheetName( sheet, name, encoding );
    }

    /**
--- a/src/java/org/apache/poi/util/StringUtil.java
+++ b/src/java/org/apache/poi/util/StringUtil.java
@ -63,7 +63,13 @@ import java.text.FieldPosition;
 /**
 *  Title: String Utility Description: Collection of string handling utilities
 * 
+ * Now it is quite confusing: the method pairs, in which
+ * one of them write data and other read written data are:
+ * putUncompressedUnicodeHigh and getFromUnicode
+ * putUncompressedUnicode     and getFromUnicodeHigh
+ *
 *@author     Andrew C. Oliver
+ *@author     Sergei Kozello (sergeikozello at mail.ru)
 *@created    May 10, 2002
 *@version    1.0
 */
@ -79,6 +85,8 @@ public class StringUtil {
     *  given a byte array of 16-bit unicode characters, compress to 8-bit and
     *  return a string
     *
+     * { 0x16, 0x00 } -> 0x16
+     * 
     *@param  string                              the byte array to be converted
     *@param  offset                              the initial offset into the
     *      byte array. it is assumed that string[ offset ] and string[ offset +
@ -103,23 +111,38 @@ public class StringUtil {
        if ((len < 0) || (((string.length - offset) / 2) < len)) {
            throw new IllegalArgumentException("Illegal length");
        }
-        byte[] bstring = new byte[len];
-        int index = offset;
-        // start with high bits.
        
-        for (int k = 0; k < len; k++) {
-            bstring[k] = string[index];
-            index += 2;
-        }
-        return new String(bstring);
+        char[] chars = new char[ len ];
+        for ( int i = 0; i < chars.length; i++ ) {
+            chars[i] = (char)( string[ offset + ( 2*i ) ] + 
+                             ( string[ offset + ( 2*i+1 ) ] << 8 ) );
        }

+        return new String( chars );
+    }
    
    
    /**
     *  given a byte array of 16-bit unicode characters, compress to 8-bit and
     *  return a string
     * 
+     * { 0x16, 0x00 } -> 0x16
+     *
+     *@param  string  the byte array to be converted
+     *@return         the converted string
+     */
+
+    public static String getFromUnicodeHigh( final byte[] string ) {
+        return getFromUnicodeHigh( string, 0, string.length / 2 );
+    }
+
+
+    /**
+     *  given a byte array of 16-bit unicode characters, compress to 8-bit and
+     *  return a string
+     * 
+     * { 0x00, 0x16 } -> 0x16
+     *
     *@param  string                              the byte array to be converted
     *@param  offset                              the initial offset into the
     *      byte array. it is assumed that string[ offset ] and string[ offset +
@ -144,15 +167,15 @@ public class StringUtil {
        if ((len < 0) || (((string.length - offset) / 2) < len)) {
            throw new IllegalArgumentException("Illegal length");
        }
-        byte[] bstring = new byte[len];
-        int index = offset + 1;
-        // start with low bits.

-        for (int k = 0; k < len; k++) {
-            bstring[k] = string[index];
-            index += 2;
+        
+        char[] chars = new char[ len ];
+        for ( int i = 0; i < chars.length; i++ ) {
+            chars[i] = (char)( ( string[ offset + ( 2*i ) ] << 8 ) +
+                              string[ offset + ( 2*i+1 ) ] );
        }
-        return new String(bstring);
+        
+        return new String( chars );
    }


@ -160,6 +183,8 @@ public class StringUtil {
     *  given a byte array of 16-bit unicode characters, compress to 8-bit and
     *  return a string
     * 
+     * { 0x00, 0x16 } -> 0x16
+     *
     *@param  string  the byte array to be converted
     *@return         the converted string
     */
--- a/src/testcases/org/apache/poi/hssf/usermodel/TestCellStyle.java
+++ b/src/testcases/org/apache/poi/hssf/usermodel/TestCellStyle.java
@ -129,7 +129,6 @@ public class TestCellStyle
        out.close();
        SanityChecker sanityChecker = new SanityChecker();
        sanityChecker.checkHSSFWorkbook(wb);
-        assertEquals("FILE LENGTH == 87040", file.length(), 87040);  // remove me
        assertEquals("LAST ROW == 99", 99, s.getLastRowNum());
        assertEquals("FIRST ROW == 0", 0, s.getFirstRowNum());

@ -169,7 +168,6 @@ public class TestCellStyle
        SanityChecker sanityChecker = new SanityChecker();
        sanityChecker.checkHSSFWorkbook(wb);

-        assertEquals("FILE LENGTH ", 5632, file.length());      // remove me
        assertEquals("LAST ROW ", 0, s.getLastRowNum());
        assertEquals("FIRST ROW ", 0, s.getFirstRowNum());

@ -232,7 +230,6 @@ public class TestCellStyle
        out.close();
        SanityChecker sanityChecker = new SanityChecker();
        sanityChecker.checkHSSFWorkbook(wb);
-        assertEquals("FILE LENGTH == 87040", file.length(), 87040);     // remove me
        assertEquals("LAST ROW == 99", 99, s.getLastRowNum());
        assertEquals("FIRST ROW == 0", 0, s.getFirstRowNum());

--- a/src/testcases/org/apache/poi/hssf/usermodel/TestWorkbook.java
+++ b/src/testcases/org/apache/poi/hssf/usermodel/TestWorkbook.java
@ -142,8 +142,6 @@ public class TestWorkbook
        wb.write(out);
        out.close();
        sanityChecker.checkHSSFWorkbook(wb);
-        assertEquals("FILE LENGTH == 87040", 87040,
-                     file.length());                            // remove me
        assertEquals("LAST ROW == 99", 99, s.getLastRowNum());
        assertEquals("FIRST ROW == 0", 0, s.getFirstRowNum());

@ -201,8 +199,6 @@ public class TestWorkbook
        out.close();

        sanityChecker.checkHSSFWorkbook(wb);
-        assertEquals("FILE LENGTH == 45568", 45568,
-                     file.length());   // remove
        assertEquals("LAST ROW == 74", 74, s.getLastRowNum());
        assertEquals("FIRST ROW == 25", 25, s.getFirstRowNum());
    }
--- a/src/testcases/org/apache/poi/util/TestStringUtil.java
+++ b/src/testcases/org/apache/poi/util/TestStringUtil.java
@ -64,6 +64,7 @@ import java.text.NumberFormat;
 *
 * @author  Marc Johnson (mjohnson at apache dot org
 * @author  Glen Stampoultzis (glens at apache.org)
+ * @author  Sergei Kozello (sergeikozello at mail.ru)
 */

 public class TestStringUtil
@ -99,6 +100,48 @@ public class TestStringUtil
                     StringUtil.getFromUnicode(test_data));
    }

+    /**
+     * test simple form of getFromUnicode with symbols with code below and more 127
+     */
+
+    public void testGetFromUnicodeSymbolsWithCodesMoreThan127()
+    {
+        byte[] test_data = new byte[] {   0x04, 0x22,
+                                          0x04, 0x35,
+                                          0x04, 0x41,
+                                          0x04, 0x42,
+                                          0x00, 0x20,
+                                          0x00, 0x74,
+                                          0x00, 0x65,
+                                          0x00, 0x73,
+                                          0x00, 0x74,
+                                          };
+
+        assertEquals("\u0422\u0435\u0441\u0442 test",
+                     StringUtil.getFromUnicode(test_data));
+    }
+
+    /**
+     * test getFromUnicodeHigh for symbols with code below and more 127
+     */
+
+    public void testGetFromUnicodeHighSymbolsWithCodesMoreThan127()
+    {
+        byte[] test_data = new byte[] {   0x22, 0x04,
+                                          0x35, 0x04,
+                                          0x41, 0x04,
+                                          0x42, 0x04,
+                                          0x20, 0x00,
+                                          0x74, 0x00,
+                                          0x65, 0x00,
+                                          0x73, 0x00,
+                                          0x74, 0x00,
+                                          };
+
+        assertEquals("\u0422\u0435\u0441\u0442 test",
+                     StringUtil.getFromUnicodeHigh( test_data ) );
+    }
+
    /**
     * Test more complex form of getFromUnicode
     */