Add new method to HSMF of MAPIMessage.has7BitEncodingStrings() to make it easier to decide when encoding guessing is needed

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1087746 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Nick Burch 2011-04-01 15:20:07 +00:00
parent a0b9fb350b
commit 002d0a5995
3 changed files with 45 additions and 0 deletions

View File

@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta3" date="2011-??-??">
<action dev="poi-developers" type="add">Add new method to HSMF of MAPIMessage.has7BitEncodingStrings() to make it easier to decide when encoding guessing is needed</action>
<action dev="poi-developers" type="fix">OutlookTextExtractor now requests 7 bit encoding guessing</action>
<action dev="poi-developers" type="add">Improve HSMF encoding guessing for 7 bit fields in MAPIMessage</action>
<action dev="poi-developers" type="add">Allow HSMF access to the HTML body contents in MAPIMessage</action>

View File

@ -36,6 +36,7 @@ import org.apache.poi.hsmf.datatypes.ChunkGroup;
import org.apache.poi.hsmf.datatypes.Chunks;
import org.apache.poi.hsmf.datatypes.NameIdChunks;
import org.apache.poi.hsmf.datatypes.RecipientChunks;
import org.apache.poi.hsmf.datatypes.Types;
import org.apache.poi.hsmf.datatypes.RecipientChunks.RecipientChunksSorter;
import org.apache.poi.hsmf.datatypes.StringChunk;
import org.apache.poi.hsmf.exceptions.ChunkNotFoundException;
@ -396,6 +397,37 @@ public class MAPIMessage extends POIDocument {
}
}
/**
* Does this file contain any strings that
* are stored as 7 bit rather than unicode?
*/
public boolean has7BitEncodingStrings() {
for(Chunk c : mainChunks.getAll()) {
if(c instanceof StringChunk) {
if( ((StringChunk)c).getType() == Types.ASCII_STRING ) {
return true;
}
}
}
for(Chunk c : nameIdChunks.getAll()) {
if(c instanceof StringChunk) {
if( ((StringChunk)c).getType() == Types.ASCII_STRING ) {
return true;
}
}
}
for(RecipientChunks rc : recipientChunks) {
for(Chunk c : rc.getAll()) {
if(c instanceof StringChunk) {
if( ((StringChunk)c).getType() == Types.ASCII_STRING ) {
return true;
}
}
}
}
return false;
}
/**
* Returns all the headers, one entry per line
*/

View File

@ -34,6 +34,7 @@ public final class TestBasics extends TestCase {
private MAPIMessage outlook30;
private MAPIMessage attachments;
private MAPIMessage noRecipientAddress;
private MAPIMessage unicode;
private MAPIMessage cyrillic;
private MAPIMessage chinese;
@ -48,6 +49,7 @@ public final class TestBasics extends TestCase {
outlook30 = new MAPIMessage(samples.openResourceAsStream("outlook_30_msg.msg"));
attachments = new MAPIMessage(samples.openResourceAsStream("attachment_test_msg.msg"));
noRecipientAddress = new MAPIMessage(samples.openResourceAsStream("no_recipient_address.msg"));
unicode = new MAPIMessage(samples.openResourceAsStream("example_received_unicode.msg"));
cyrillic = new MAPIMessage(samples.openResourceAsStream("cyrillic_message.msg"));
chinese = new MAPIMessage(samples.openResourceAsStream("chinese-traditional.msg"));
}
@ -182,6 +184,16 @@ public final class TestBasics extends TestCase {
noRecipientAddress.setReturnNullOnMissingChunk(false);
}
/**
* Test the 7 bit detection
*/
public void test7BitDetection() throws Exception {
assertEquals(false, unicode.has7BitEncodingStrings());
assertEquals(true, simple.has7BitEncodingStrings());
assertEquals(true, chinese.has7BitEncodingStrings());
assertEquals(true, cyrillic.has7BitEncodingStrings());
}
/**
* We default to CP1252, but can sometimes do better
* if needed.