bug 50955 - add logging per Javen's recommendation. Thank you!

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1791021 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Tim Allison 2017-04-11 20:00:12 +00:00
parent 9412d467f3
commit 0ddc4da7e2

View File

@ -44,6 +44,8 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.util.CodePageUtil; import org.apache.poi.util.CodePageUtil;
import org.apache.poi.util.LittleEndian; import org.apache.poi.util.LittleEndian;
import org.apache.poi.util.NotImplemented; import org.apache.poi.util.NotImplemented;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.util.StringUtil; import org.apache.poi.util.StringUtil;
/** /**
@ -52,6 +54,9 @@ import org.apache.poi.util.StringUtil;
*/ */
public class HWPFOldDocument extends HWPFDocumentCore { public class HWPFOldDocument extends HWPFDocumentCore {
private static final POILogger logger = POILogFactory
.getLogger( HWPFOldDocument.class );
private final static Charset DEFAULT_CHARSET = StringUtil.WIN_1252; private final static Charset DEFAULT_CHARSET = StringUtil.WIN_1252;
private OldTextPieceTable tpt; private OldTextPieceTable tpt;
@ -110,6 +115,7 @@ public class HWPFOldDocument extends HWPFDocumentCore {
//if there was a problem with the guessed charset and the length of the //if there was a problem with the guessed charset and the length of the
//textpiece, back off to win1252. This is effectively what we used to do. //textpiece, back off to win1252. This is effectively what we used to do.
tp = buildTextPiece(StringUtil.WIN_1252); tp = buildTextPiece(StringUtil.WIN_1252);
logger.log(POILogger.WARN, "Error with "+guessedCharset +". Backing off to Windows-1252");
} }
tpt.add(tp); tpt.add(tp);
@ -181,9 +187,9 @@ public class HWPFOldDocument extends HWPFDocumentCore {
/** /**
* Take the first codepage that is not default, ansi or symbol. * Try to get the code page from various areas of the document.
* Ideally, we'd want to track fonts with runs, but we don't yet * Start with the DocumentSummaryInformation, back off to the section info,
* know how to do that. * finally try the charset information from the font table.
* *
* Consider throwing an exception if > 1 unique codepage that is not default, symbol or ansi * Consider throwing an exception if > 1 unique codepage that is not default, symbol or ansi
* appears here. * appears here.
@ -198,26 +204,30 @@ public class HWPFOldDocument extends HWPFDocumentCore {
CustomProperties customProperties = summaryInformation.getCustomProperties(); CustomProperties customProperties = summaryInformation.getCustomProperties();
if (customProperties != null) { if (customProperties != null) {
int codePage = customProperties.getCodepage(); int codePage = customProperties.getCodepage();
try { if (codePage > -1) {
return Charset.forName(CodePageUtil.codepageToEncoding(codePage)); try {
} catch (UnsupportedEncodingException e) { return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
//swallow } catch (UnsupportedEncodingException e) {
//swallow
}
} }
} }
//for now, try to get first valid code page in a valid section //If that didn't work, for now, try to get first valid code page in a valid section
for (Section section : summaryInformation.getSections()) { for (Section section : summaryInformation.getSections()) {
if (section.getOffset() < 0) { if (section.getOffset() < 0) {
continue; continue;
} }
int codePage = section.getCodepage(); int codePage = section.getCodepage();
try { if (codePage > -1) {
return Charset.forName(CodePageUtil.codepageToEncoding(codePage)); try {
} catch (UnsupportedEncodingException e) { return Charset.forName(CodePageUtil.codepageToEncoding(codePage));
//swallow } catch (UnsupportedEncodingException e) {
//swallow
}
} }
} }
} }
//if that still doesn't work, pick the first non-default non symbol charset //if that still doesn't work, pick the first non-default, non-symbol charset
for (OldFfn oldFfn : fontTable.getFontNames()) { for (OldFfn oldFfn : fontTable.getFontNames()) {
HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff); HwmfFont.WmfCharset wmfCharset = HwmfFont.WmfCharset.valueOf(oldFfn.getChs()& 0xff);
if (wmfCharset != null && if (wmfCharset != null &&
@ -227,6 +237,8 @@ public class HWPFOldDocument extends HWPFDocumentCore {
return wmfCharset.getCharset(); return wmfCharset.getCharset();
} }
} }
logger.log(POILogger.WARN, "Couldn't find a defined charset; backing off to cp1252");
//if all else fails
return DEFAULT_CHARSET; return DEFAULT_CHARSET;
} }
@ -282,8 +294,9 @@ public class HWPFOldDocument extends HWPFDocumentCore {
} }
/** /**
* As a rough heuristic (total hack), read through the font table * As a rough heuristic (total hack), read through the HPSF,
* and take the first non-default, non-ansi, non-symbol * then read through the font table, and take the first
* non-default, non-ansi, non-symbol
* font's charset and return that. * font's charset and return that.
* *
* Once we figure out how to link a font to a text piece, we should * Once we figure out how to link a font to a text piece, we should