Fix TIKA-1515 - Handle Excel 3 files with a 0x8001 codepage

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1651517 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2015-01-13 22:13:24 +00:00
parent ce3cc5b519
commit 9ccc610f6a
3 changed files with 6 additions and 2 deletions

View File

@ -75,7 +75,7 @@ public final class OldStringRecord {
protected static String getString(byte[] data, CodepageRecord codepage) {
int cp = CodePageUtil.CP_ISO_8859_1;
if (codepage != null) {
cp = codepage.getCodepage();
cp = codepage.getCodepage() & 0xffff;
}
try {
return CodePageUtil.getStringFromCodePage(data, cp);

View File

@ -53,6 +53,7 @@ public class CodePageUtil
/** <p>Codepage for Windows 1252</p> */
public static final int CP_WINDOWS_1252 = 1252;
public static final int CP_WINDOWS_1252_BIFF23 = 32769;
/** <p>Codepage for Windows 1253</p> */
public static final int CP_WINDOWS_1253 = 1253;
@ -77,6 +78,7 @@ public class CodePageUtil
/** <p>Codepage for Macintosh Roman (Java: MacRoman)</p> */
public static final int CP_MAC_ROMAN = 10000;
public static final int CP_MAC_ROMAN_BIFF23 = 32768;
/** <p>Codepage for Macintosh Japan (Java: unknown - use SJIS, cp942 or
* cp943)</p> */
@ -298,6 +300,7 @@ public class CodePageUtil
else
return "windows-1251";
case CP_WINDOWS_1252:
case CP_WINDOWS_1252_BIFF23:
if (javaLangFormat)
return "Cp1252";
else
@ -335,6 +338,7 @@ public class CodePageUtil
case CP_JOHAB:
return "johab";
case CP_MAC_ROMAN:
case CP_MAC_ROMAN_BIFF23:
return "MacRoman";
case CP_MAC_JAPAN:
return "SJIS";

View File

@ -38,7 +38,7 @@ public final class TestOldExcelExtractor extends POITestCase {
}
}
public void DISABLEDtestSimpleExcel3() {
public void testSimpleExcel3() {
OldExcelExtractor extractor = createExtractor("testEXCEL_3.xls");
// Check we can call getText without error