bug 50955 - add logging per Javen's recommendation. Thank you!
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1791021 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9412d467f3
commit
0ddc4da7e2
@ -44,6 +44,8 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
|
|||||||
import org.apache.poi.util.CodePageUtil;
|
import org.apache.poi.util.CodePageUtil;
|
||||||
import org.apache.poi.util.LittleEndian;
|
import org.apache.poi.util.LittleEndian;
|
||||||
import org.apache.poi.util.NotImplemented;
|
import org.apache.poi.util.NotImplemented;
|
||||||
|
import org.apache.poi.util.POILogFactory;
|
||||||
|
import org.apache.poi.util.POILogger;
|
||||||
import org.apache.poi.util.StringUtil;
|
import org.apache.poi.util.StringUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -52,6 +54,9 @@ import org.apache.poi.util.StringUtil;
|
|||||||
*/
|
*/
|
||||||
public class HWPFOldDocument extends HWPFDocumentCore {
|
public class HWPFOldDocument extends HWPFDocumentCore {
|
||||||
|
|
||||||
|
private static final POILogger logger = POILogFactory
|
||||||
|
.getLogger( HWPFOldDocument.class );
|
||||||
|
|
||||||
private final static Charset DEFAULT_CHARSET = StringUtil.WIN_1252;
|
private final static Charset DEFAULT_CHARSET = StringUtil.WIN_1252;
|
||||||
|
|
||||||
private OldTextPieceTable tpt;
|
private OldTextPieceTable tpt;
|
||||||
@ -110,6 +115,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||||||
//if there was a problem with the guessed charset and the length of the
|
//if there was a problem with the guessed charset and the length of the
|
||||||
//textpiece, back off to win1252. This is effectively what we used to do.
|
//textpiece, back off to win1252. This is effectively what we used to do.
|
||||||
tp = buildTextPiece(StringUtil.WIN_1252);
|
tp = buildTextPiece(StringUtil.WIN_1252);
|
||||||
|
logger.log(POILogger.WARN, "Error with "+guessedCharset +". Backing off to Windows-1252");
|
||||||
}
|
}
|
||||||
tpt.add(tp);
|
tpt.add(tp);
|
||||||
|
|
||||||
@ -181,9 +187,9 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Take the first codepage that is not default, ansi or symbol.
|
* Try to get the code page from various areas of the document.
|
||||||
* Ideally, we'd want to track fonts with runs, but we don't yet
|
* Start with the DocumentSummaryInformation, back off to the section info,
|
||||||
* know how to do that.
|
* finally try the charset information from the font table.
|
||||||
*
|
*
|
||||||
* Consider throwing an exception if > 1 unique codepage that is not default, symbol or ansi
|
* Consider throwing an exception if > 1 unique codepage that is not default, symbol or ansi
|
||||||
* appears here.
|
* appears here.
|
||||||
@ -198,26 +204,30 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||||||
CustomProperties customProperties = summaryInformation.getCustomProperties();
|
CustomProperties customProperties = summaryInformation.getCustomProperties();
|
||||||
if (customProperties != null) {
|
if (customProperties != null) {
|
||||||
int codePage = customProperties.getCodepage();
|
int codePage = customProperties.getCodepage();
|
||||||
try {
|
if (codePage > -1) {
|
||||||
return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
|
try {
|
||||||
} catch (UnsupportedEncodingException e) {
|
return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
|
||||||
//swallow
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
//swallow
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//for now, try to get first valid code page in a valid section
|
//If that didn't work, for now, try to get first valid code page in a valid section
|
||||||
for (Section section : summaryInformation.getSections()) {
|
for (Section section : summaryInformation.getSections()) {
|
||||||
if (section.getOffset() < 0) {
|
if (section.getOffset() < 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
int codePage = section.getCodepage();
|
int codePage = section.getCodepage();
|
||||||
try {
|
if (codePage > -1) {
|
||||||
return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
|
try {
|
||||||
} catch (UnsupportedEncodingException e) {
|
return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
|
||||||
//swallow
|
} catch (UnsupportedEncodingException e) {
|
||||||
|
//swallow
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//if that still doesn't work, pick the first non-default non symbol charset
|
//if that still doesn't work, pick the first non-default, non-symbol charset
|
||||||
for (OldFfn oldFfn : fontTable.getFontNames()) {
|
for (OldFfn oldFfn : fontTable.getFontNames()) {
|
||||||
HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
|
HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
|
||||||
if (wmfCharset != null &&
|
if (wmfCharset != null &&
|
||||||
@ -227,6 +237,8 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||||||
return wmfCharset.getCharset();
|
return wmfCharset.getCharset();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
logger.log(POILogger.WARN, "Couldn't find a defined charset; backing off to cp1252");
|
||||||
|
//if all else fails
|
||||||
return DEFAULT_CHARSET;
|
return DEFAULT_CHARSET;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -282,8 +294,9 @@ public class HWPFOldDocument extends HWPFDocumentCore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* As a rough heuristic (total hack), read through the font table
|
* As a rough heuristic (total hack), read through the HPSF,
|
||||||
* and take the first non-default, non-ansi, non-symbol
|
* then read through the font table, and take the first
|
||||||
|
* non-default, non-ansi, non-symbol
|
||||||
* font's charset and return that.
|
* font's charset and return that.
|
||||||
*
|
*
|
||||||
* Once we figure out how to link a font to a text piece, we should
|
* Once we figure out how to link a font to a text piece, we should
|
||||||
|
Loading…
Reference in New Issue
Block a user