from russia with love (LOL)

http://nagoya.apache.org/bugzilla/show_bug.cgi?id=10548 #1
PR:
Obtained from:
Submitted by:
Reviewed by:


git-svn-id: https://svn.apache.org/repos/asf/jakarta/poi/trunk@352795 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew C. Oliver 2002-07-21 03:03:57 +00:00
parent d8798390bc
commit a3c338967e
7 changed files with 291 additions and 82 deletions

View File

@ -158,4 +158,35 @@
We will probably enhance HSSF in the future to make this process easier.
</answer>
</faq>
<faq>
<question>
I tried to set cell values and Excel sheet name on my native language,
but I failed to do it. :(
</question>
<answer>
By default HSSF uses cell values and sheet names as compressed unicode,
so to support localization you should use Unicode.
To do it you should set it manually:
<source>
//
// for sheet name
//
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet s = wb.createSheet();
wb.setSheetName( 0, "SomeUnicodeName", HSSFWorkbook.ENCODING_UTF_16 );
//
// for cell value
//
HSSFRow r = s.createRow( 0 );
HSSFCell c = r.createCell( (short)0 );
c.setCellType( HSSFCell.CELL_TYPE_STRING );
c.setEncoding( HSSFCell.ENCODING_UTF_16 );
c.setCellValue( "\u0422\u0435\u0441\u0442\u043E\u0432\u0430\u044F" );
</source>
</answer>
</faq>
</faqs>

View File

@ -7,6 +7,7 @@
<authors>
<person email="acoliver2@users.sourceforge.net" name="Andrew C. Oliver" id="AO"/>
<person email="glens@apache.org" name="Glen Stampoultzis" id="GJS"/>
<person email="sergeikozello@mail.ru" name="Sergei Kozello" id="SK"/>
</authors>
</header>
<body>
@ -42,7 +43,10 @@
sequence to the workbook. Sheets do not in themselves have a sheet
name (the tab at the bottom); you set
the name associated with a sheet by calling
HSSFWorkbook.setSheetName(sheetindex,&quot;SheetName&quot;).</p>
HSSFWorkbook.setSheetName(sheetindex,&quot;SheetName&quot;,encoding).
The name may be in 8bit format (HSSFWorkbook.ENCODING_COMPRESSED_UNICODE)
or Unicode (HSSFWorkbook.ENCODING_UTF_16). Default encoding is 8bit per char.
</p>
<p>Rows are created by calling createRow(rowNumber) from an existing
instance of HSSFSheet. Only rows that have cell values should be
added to the sheet. To set the row's height, you just call
@ -98,18 +102,20 @@ HSSFFont f2 = wb.createFont();
//set font 1 to 12 point type
f.setFontHeightInPoints((short) 12);
//make it red
f.setColor((short) HSSFColor.RED.index);
//make it blue
f.setColor( (short)0xc );
// make it bold
//arial is the default font
f.setBoldweight(f.BOLDWEIGHT_BOLD);
f.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);
//set font 2 to 10 point type
f2.setFontHeightInPoints((short) 10);
//make it the color at palette index 0xf (white)
f2.setColor((short) HSSFColor.WHITE.index);
//make it red
f2.setColor( (short)HSSFFont.COLOR_RED );
//make it bold
f2.setBoldweight(f2.BOLDWEIGHT_BOLD);
f2.setBoldweight(HSSFFont.BOLDWEIGHT_BOLD);
f2.setStrikeout( true );
//set cell stlye
cs.setFont(f);
@ -120,16 +126,18 @@ cs.setDataFormat(HSSFDataFormat.getFormat("($#,##0_);[Red]($#,##0)"));
cs2.setBorderBottom(cs2.BORDER_THIN);
//fill w fg fill color
cs2.setFillPattern((short) HSSFCellStyle.SOLID_FOREGROUND);
// set foreground fill to red
cs2.setFillForegroundColor((short) HSSFColor.RED.index);
// set the font
cs2.setFont(f2);
// set the sheet name to HSSF Test
wb.setSheetName(0, "HSSF Test");
// create a sheet with 300 rows (0-299)
for (rownum = (short) 0; rownum < 300; rownum++)
// set the sheet name in Unicode
wb.setSheetName(0, "\u0422\u0435\u0441\u0442\u043E\u0432\u0430\u044F " +
"\u0421\u0442\u0440\u0430\u043D\u0438\u0447\u043A\u0430",
HSSFWorkbook.ENCODING_UTF_16 );
// in case of compressed Unicode
// wb.setSheetName(0, "HSSF Test", HSSFWorkbook.ENCODING_COMPRESSED_UNICODE );
// create a sheet with 30 rows (0-29)
for (rownum = (short) 0; rownum < 30; rownum++)
{
// create a row
r = s.createRow(rownum);
@ -141,8 +149,8 @@ for (rownum = (short) 0; rownum < 300; rownum++)
}
//r.setRowNum(( short ) rownum);
// create 50 cells (0-49) (the += 2 becomes apparent later
for (short cellnum = (short) 0; cellnum < 50; cellnum += 2)
// create 10 cells (0-9) (the += 2 becomes apparent later
for (short cellnum = (short) 0; cellnum < 10; cellnum += 2)
{
// create a numeric cell
c = r.createCell(cellnum);
@ -151,29 +159,31 @@ for (rownum = (short) 0; rownum < 300; rownum++)
+ (((double) rownum / 1000)
+ ((double) cellnum / 10000)));
String cellValue;
// create a string cell (see why += 2 in the
c = r.createCell((short) (cellnum + 1));
// on every other row
if ((rownum % 2) == 0)
{
// set this cell to the first cell style we defined
c.setCellStyle(cs);
// set the cell's string value to "Test"
c.setEncoding( HSSFCell.ENCODING_COMPRESSED_UNICODE );
c.setCellValue( "Test" );
}
else
{
c.setCellStyle(cs2);
// set the cell's string value to "\u0422\u0435\u0441\u0442"
c.setEncoding( HSSFCell.ENCODING_UTF_16 );
c.setCellValue( "\u0422\u0435\u0441\u0442" );
}
// create a string cell (see why += 2 in the
c = r.createCell((short) (cellnum + 1));
// set the cell's string value to "TEST"
c.setCellValue("TEST");
// make this column a bit wider
s.setColumnWidth((short) (cellnum + 1), (short) ((50 * 8) / ((double) 1 / 20)));
// on every other row
if ((rownum % 2) == 0)
{
// set this to the white on red cell style
// we defined above
c.setCellStyle(cs2);
}
}
}

View File

@ -60,6 +60,7 @@ import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
import java.util.Locale;
import org.apache.poi.util.POILogger;
@ -85,6 +86,7 @@ import org.apache.poi.hssf.util.HSSFColor;
*
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Glen Stampoultzis (glens at apache.org)
* @author Sergei Kozello (sergeikozello at mail.ru)
* @see org.apache.poi.hssf.usermodel.HSSFWorkbook
* @version 1.0-pre
*/
@ -410,6 +412,7 @@ public class Workbook {
return ( BackupRecord ) records.get(backuppos);
}
/**
* sets the name for a given sheet. If the boundsheet record doesn't exist and
* its only one more than we have, go ahead and create it. If its > 1 more than
@ -419,12 +422,18 @@ public class Workbook {
* @param sheetname the name for the sheet
*/
// for compartibility
public void setSheetName(int sheetnum, String sheetname ) {
setSheetName( sheetnum, sheetname, (byte)0 );
}
public void setSheetName(int sheetnum, String sheetname, short encoding ) {
checkSheets(sheetnum);
(( BoundSheetRecord ) boundsheets.get(sheetnum))
.setSheetname(sheetname);
(( BoundSheetRecord ) boundsheets.get(sheetnum))
.setSheetnameLength(( byte ) sheetname.length());
BoundSheetRecord sheet = (BoundSheetRecord)boundsheets.get( sheetnum );
sheet.setSheetname(sheetname);
sheet.setSheetnameLength( (byte)sheetname.length() );
sheet.setCompressedUnicodeFlag( (byte)encoding );
}
/**
@ -1586,7 +1595,8 @@ public class Workbook {
}
/**
* Creates the Country record with the default and current country set to 1
* Creates the Country record with the default country set to 1
* and current country set to 7 in case of russian locale ("ru_RU") and 1 otherwise
* @return record containing a CountryRecord
* @see org.apache.poi.hssf.record.CountryRecord
* @see org.apache.poi.hssf.record.Record
@ -1596,7 +1606,15 @@ public class Workbook {
CountryRecord retval = new CountryRecord();
retval.setDefaultCountry(( short ) 1);
// from Russia with love ;)
if ( Locale.getDefault().toString().equals( "ru_RU" ) ) {
retval.setCurrentCountry(( short ) 7);
}
else {
retval.setCurrentCountry(( short ) 1);
}
return retval;
}

View File

@ -55,8 +55,13 @@
package org.apache.poi.hssf.record;
import java.io.*;
import java.io.UnsupportedEncodingException;
import org.apache.poi.util.BinaryTree;
import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.StringUtil;
import sun.awt.image.ByteInterleavedRaster;
/**
* Title: Bound Sheet Record (aka BundleSheet) <P>
@ -65,6 +70,7 @@ import org.apache.poi.util.StringUtil;
* file. <P>
* REFERENCE: PG 291 Microsoft Excel 97 Developer's Kit (ISBN: 1-57231-498-2)<P>
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Sergei Kozello (sergeikozello at mail.ru)
* @version 2.0-pre
*/
@ -117,16 +123,31 @@ public class BoundSheetRecord
}
}
/**
* UTF8:
* sid + len + bof + flags + len(str) + unicode + str
* 2 + 2 + 4 + 2 + 1 + 1 + len(str)
*
* UNICODE:
* sid + len + bof + flags + len(str) + unicode + str
* 2 + 2 + 4 + 2 + 1 + 1 + 2 * len(str)
*
*/
protected void fillFields(byte [] data, short size, int offset)
{
field_1_position_of_BOF = LittleEndian.getInt(data,
0 + offset);
field_2_option_flags = LittleEndian.getShort(data,
4 + offset);
field_3_sheetname_length = data[ 6 + offset ];
field_4_compressed_unicode_flag = data[ 7 + offset ];
field_5_sheetname = new String(data, 8 + offset,
LittleEndian.ubyteToInt( field_3_sheetname_length));
field_1_position_of_BOF = LittleEndian.getInt(data, 0 + offset); // bof
field_2_option_flags = LittleEndian.getShort(data, 4 + offset); // flags
field_3_sheetname_length = data[ 6 + offset ]; // len(str)
field_4_compressed_unicode_flag = data[ 7 + offset ]; // unicode
int nameLength = LittleEndian.ubyteToInt( field_3_sheetname_length );
if ( ( field_4_compressed_unicode_flag & 0x01 ) == 1 ) {
field_5_sheetname = StringUtil.getFromUnicodeHigh( data, 8 + offset, nameLength );
}
else {
field_5_sheetname = new String( data, 8 + offset, nameLength );
}
}
/**
@ -218,6 +239,20 @@ public class BoundSheetRecord
return field_3_sheetname_length;
}
/**
* get the length of the raw sheetname in characters
* the length depends on the unicode flag
*
* @return number of characters in the raw sheet name
*/
public byte getRawSheetnameLength()
{
return (byte)( ( ( field_4_compressed_unicode_flag & 0x01 ) == 1 )
? 2 * field_3_sheetname_length
: field_3_sheetname_length );
}
/**
* get whether or not to interperate the Sheetname as compressed unicode (8/16 bit)
* (This is undocumented but can be found as Q187919 on the Microsoft(tm) Support site)
@ -262,21 +297,46 @@ public class BoundSheetRecord
public int serialize(int offset, byte [] data)
{
LittleEndian.putShort(data, 0 + offset, sid);
LittleEndian.putShort(data, 2 + offset,
( short ) (0x08 + getSheetnameLength()));
LittleEndian.putShort( data, 2 + offset, (short)( 8 + getRawSheetnameLength() ) );
LittleEndian.putInt(data, 4 + offset, getPositionOfBof());
LittleEndian.putShort(data, 8 + offset, getOptionFlags());
data[ 10 + offset ] = getSheetnameLength();
data[ 10 + offset ] = (byte)( getSheetnameLength() );
data[ 11 + offset ] = getCompressedUnicodeFlag();
// we assume compressed unicode (bein the dern americans we are ;-p)
if ( ( field_4_compressed_unicode_flag & 0x01 ) == 1 )
StringUtil.putUncompressedUnicode( getSheetname(), data, 12 + offset );
else
StringUtil.putCompressedUnicode( getSheetname(), data, 12 + offset );
return getRecordSize();
/*
byte[] fake = new byte[] { (byte)0x85, 0x00, // sid
0x1a, 0x00, // length
0x3C, 0x09, 0x00, 0x00, // bof
0x00, 0x00, // flags
0x09, // len( str )
0x01, // unicode
// <str>
0x21, 0x04, 0x42, 0x04, 0x40, 0x04, 0x30, 0x04, 0x3D,
0x04, 0x38, 0x04, 0x47, 0x04, 0x3A, 0x04, 0x30, 0x04
// </str>
};
sid + len + bof + flags + len(str) + unicode + str
2 + 2 + 4 + 2 + 1 + 1 + len(str)
System.arraycopy( fake, 0, data, offset, fake.length );
return fake.length;
*/
}
public int getRecordSize()
{
return 12 + getSheetnameLength();
// return 30;
return 12 + getRawSheetnameLength();
}
public short getSid()

View File

@ -70,6 +70,7 @@ import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
@ -82,6 +83,7 @@ import java.util.List;
* @see org.apache.poi.hssf.usermodel.HSSFSheet
* @author Andrew C. Oliver (acoliver at apache dot org)
* @author Glen Stampoultzis (glens at apache.org)
* @author Sergei Kozello (sergeikozello at mail.ru)
* @version 2.0-pre
*/
@ -201,6 +203,10 @@ public class HSSFWorkbook
// none currently
}
public final static byte ENCODING_COMPRESSED_UNICODE = 0;
public final static byte ENCODING_UTF_16 = 1;
/**
* set the sheet name.
* @param sheet number (0 based)
@ -208,12 +214,28 @@ public class HSSFWorkbook
*/
public void setSheetName(int sheet, String name)
{
workbook.setSheetName( sheet, name, ENCODING_COMPRESSED_UNICODE );
}
public void setSheetName( int sheet, String name, short encoding )
{
if (sheet > (sheets.size() - 1))
{
throw new RuntimeException("Sheet out of bounds");
}
workbook.setSheetName(sheet, name);
switch ( encoding ) {
case ENCODING_COMPRESSED_UNICODE:
case ENCODING_UTF_16:
break;
default:
// TODO java.io.UnsupportedEncodingException
throw new RuntimeException( "Unsupported encoding" );
}
workbook.setSheetName( sheet, name, encoding );
}
/**

View File

@ -63,7 +63,13 @@ import java.text.FieldPosition;
/**
* Title: String Utility Description: Collection of string handling utilities
*
* Now it is quite confusing: the method pairs, in which
* one of them write data and other read written data are:
* putUncompressedUnicodeHigh and getFromUnicode
* putUncompressedUnicode and getFromUnicodeHigh
*
*@author Andrew C. Oliver
*@author Sergei Kozello (sergeikozello at mail.ru)
*@created May 10, 2002
*@version 1.0
*/
@ -79,6 +85,8 @@ public class StringUtil {
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x16, 0x00 } -> 0x16
*
*@param string the byte array to be converted
*@param offset the initial offset into the
* byte array. it is assumed that string[ offset ] and string[ offset +
@ -103,23 +111,38 @@ public class StringUtil {
if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length");
}
byte[] bstring = new byte[len];
int index = offset;
// start with high bits.
for (int k = 0; k < len; k++) {
bstring[k] = string[index];
index += 2;
}
return new String(bstring);
char[] chars = new char[ len ];
for ( int i = 0; i < chars.length; i++ ) {
chars[i] = (char)( string[ offset + ( 2*i ) ] +
( string[ offset + ( 2*i+1 ) ] << 8 ) );
}
return new String( chars );
}
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x16, 0x00 } -> 0x16
*
*@param string the byte array to be converted
*@return the converted string
*/
public static String getFromUnicodeHigh( final byte[] string ) {
return getFromUnicodeHigh( string, 0, string.length / 2 );
}
/**
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x00, 0x16 } -> 0x16
*
*@param string the byte array to be converted
*@param offset the initial offset into the
* byte array. it is assumed that string[ offset ] and string[ offset +
@ -144,15 +167,15 @@ public class StringUtil {
if ((len < 0) || (((string.length - offset) / 2) < len)) {
throw new IllegalArgumentException("Illegal length");
}
byte[] bstring = new byte[len];
int index = offset + 1;
// start with low bits.
for (int k = 0; k < len; k++) {
bstring[k] = string[index];
index += 2;
char[] chars = new char[ len ];
for ( int i = 0; i < chars.length; i++ ) {
chars[i] = (char)( ( string[ offset + ( 2*i ) ] << 8 ) +
string[ offset + ( 2*i+1 ) ] );
}
return new String(bstring);
return new String( chars );
}
@ -160,6 +183,8 @@ public class StringUtil {
* given a byte array of 16-bit unicode characters, compress to 8-bit and
* return a string
*
* { 0x00, 0x16 } -> 0x16
*
*@param string the byte array to be converted
*@return the converted string
*/

View File

@ -64,6 +64,7 @@ import java.text.NumberFormat;
*
* @author Marc Johnson (mjohnson at apache dot org
* @author Glen Stampoultzis (glens at apache.org)
* @author Sergei Kozello (sergeikozello at mail.ru)
*/
public class TestStringUtil
@ -99,6 +100,48 @@ public class TestStringUtil
StringUtil.getFromUnicode(test_data));
}
/**
* test simple form of getFromUnicode with symbols with code below and more 127
*/
public void testGetFromUnicodeSymbolsWithCodesMoreThan127()
{
byte[] test_data = new byte[] { 0x04, 0x22,
0x04, 0x35,
0x04, 0x41,
0x04, 0x42,
0x00, 0x20,
0x00, 0x74,
0x00, 0x65,
0x00, 0x73,
0x00, 0x74,
};
assertEquals("\u0422\u0435\u0441\u0442 test",
StringUtil.getFromUnicode(test_data));
}
/**
* test getFromUnicodeHigh for symbols with code below and more 127
*/
public void testGetFromUnicodeHighSymbolsWithCodesMoreThan127()
{
byte[] test_data = new byte[] { 0x22, 0x04,
0x35, 0x04,
0x41, 0x04,
0x42, 0x04,
0x20, 0x00,
0x74, 0x00,
0x65, 0x00,
0x73, 0x00,
0x74, 0x00,
};
assertEquals("\u0422\u0435\u0441\u0442 test",
StringUtil.getFromUnicodeHigh( test_data ) );
}
/**
* Test more complex form of getFromUnicode
*/