Fix TIKA-1515 - Handle Excel 3 files with a 0x8001 codepage
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1651517 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ce3cc5b519
commit
9ccc610f6a
@ -75,7 +75,7 @@ public final class OldStringRecord {
|
|||||||
protected static String getString(byte[] data, CodepageRecord codepage) {
|
protected static String getString(byte[] data, CodepageRecord codepage) {
|
||||||
int cp = CodePageUtil.CP_ISO_8859_1;
|
int cp = CodePageUtil.CP_ISO_8859_1;
|
||||||
if (codepage != null) {
|
if (codepage != null) {
|
||||||
cp = codepage.getCodepage();
|
cp = codepage.getCodepage() & 0xffff;
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
return CodePageUtil.getStringFromCodePage(data, cp);
|
return CodePageUtil.getStringFromCodePage(data, cp);
|
||||||
|
@ -53,6 +53,7 @@ public class CodePageUtil
|
|||||||
|
|
||||||
/** <p>Codepage for Windows 1252</p> */
|
/** <p>Codepage for Windows 1252</p> */
|
||||||
public static final int CP_WINDOWS_1252 = 1252;
|
public static final int CP_WINDOWS_1252 = 1252;
|
||||||
|
public static final int CP_WINDOWS_1252_BIFF23 = 32769;
|
||||||
|
|
||||||
/** <p>Codepage for Windows 1253</p> */
|
/** <p>Codepage for Windows 1253</p> */
|
||||||
public static final int CP_WINDOWS_1253 = 1253;
|
public static final int CP_WINDOWS_1253 = 1253;
|
||||||
@ -77,6 +78,7 @@ public class CodePageUtil
|
|||||||
|
|
||||||
/** <p>Codepage for Macintosh Roman (Java: MacRoman)</p> */
|
/** <p>Codepage for Macintosh Roman (Java: MacRoman)</p> */
|
||||||
public static final int CP_MAC_ROMAN = 10000;
|
public static final int CP_MAC_ROMAN = 10000;
|
||||||
|
public static final int CP_MAC_ROMAN_BIFF23 = 32768;
|
||||||
|
|
||||||
/** <p>Codepage for Macintosh Japan (Java: unknown - use SJIS, cp942 or
|
/** <p>Codepage for Macintosh Japan (Java: unknown - use SJIS, cp942 or
|
||||||
* cp943)</p> */
|
* cp943)</p> */
|
||||||
@ -298,6 +300,7 @@ public class CodePageUtil
|
|||||||
else
|
else
|
||||||
return "windows-1251";
|
return "windows-1251";
|
||||||
case CP_WINDOWS_1252:
|
case CP_WINDOWS_1252:
|
||||||
|
case CP_WINDOWS_1252_BIFF23:
|
||||||
if (javaLangFormat)
|
if (javaLangFormat)
|
||||||
return "Cp1252";
|
return "Cp1252";
|
||||||
else
|
else
|
||||||
@ -335,6 +338,7 @@ public class CodePageUtil
|
|||||||
case CP_JOHAB:
|
case CP_JOHAB:
|
||||||
return "johab";
|
return "johab";
|
||||||
case CP_MAC_ROMAN:
|
case CP_MAC_ROMAN:
|
||||||
|
case CP_MAC_ROMAN_BIFF23:
|
||||||
return "MacRoman";
|
return "MacRoman";
|
||||||
case CP_MAC_JAPAN:
|
case CP_MAC_JAPAN:
|
||||||
return "SJIS";
|
return "SJIS";
|
||||||
|
@ -38,7 +38,7 @@ public final class TestOldExcelExtractor extends POITestCase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void DISABLEDtestSimpleExcel3() {
|
public void testSimpleExcel3() {
|
||||||
OldExcelExtractor extractor = createExtractor("testEXCEL_3.xls");
|
OldExcelExtractor extractor = createExtractor("testEXCEL_3.xls");
|
||||||
|
|
||||||
// Check we can call getText without error
|
// Check we can call getText without error
|
||||||
|
Loading…
Reference in New Issue
Block a user