From d1a1b12aaee1f550b34d7c174e83a7119afc795f Mon Sep 17 00:00:00 2001 From: Jesse Vincent Date: Fri, 14 Jan 2011 00:53:19 +0000 Subject: [PATCH] Support emoji in subjects. Signed-off-by: HIRANO Takahito --- .../fsck/k9/mail/internet/DecoderUtil.java | 229 ++++++++++++++++++ .../fsck/k9/mail/internet/MimeMessage.java | 2 +- .../fsck/k9/mail/internet/MimeUtility.java | 20 +- 3 files changed, 245 insertions(+), 6 deletions(-) create mode 100644 src/com/fsck/k9/mail/internet/DecoderUtil.java diff --git a/src/com/fsck/k9/mail/internet/DecoderUtil.java b/src/com/fsck/k9/mail/internet/DecoderUtil.java new file mode 100644 index 000000000..7d108bb18 --- /dev/null +++ b/src/com/fsck/k9/mail/internet/DecoderUtil.java @@ -0,0 +1,229 @@ + +package com.fsck.k9.mail.internet; + +import android.util.Log; +import com.fsck.k9.K9; +import com.fsck.k9.mail.Message; +import com.fsck.k9.mail.MessagingException; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import org.apache.james.mime4j.decoder.Base64InputStream; +import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; +import org.apache.james.mime4j.util.CharsetUtil; + + +/** + * Static methods for decoding strings, byte arrays and encoded words. + * + * This class is copied from the org.apache.james.mime4j.decoder.DecoderUtil class. It's modified here in order to + * decode emoji characters in the Subject headers. The method to decode emoji depends on the MimeMessage class because + * it has to be determined with the sender address, the mailer and so on. + */ +public class DecoderUtil +{ + /** + * Decodes an encoded word encoded with the 'B' encoding (described in + * RFC 2047) found in a header field body. + * + * @param encodedWord the encoded word to decode. + * @param charset the Java charset to use. + * @return the decoded string. + */ + private static String decodeB(String encodedWord, String charset) + { + byte[] bytes; + try + { + bytes = encodedWord.getBytes("US-ASCII"); + } + catch (UnsupportedEncodingException e) + { + return null; + } + + Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes)); + try + { + return MimeUtility.readToString(is, charset); + } + catch (IOException e) + { + return null; + } + } + + /** + * Decodes an encoded word encoded with the 'Q' encoding (described in + * RFC 2047) found in a header field body. + * + * @param encodedWord the encoded word to decode. + * @param charset the Java charset to use. + * @return the decoded string. + */ + private static String decodeQ(String encodedWord, String charset) + { + + /* + * Replace _ with =20 + */ + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < encodedWord.length(); i++) + { + char c = encodedWord.charAt(i); + if (c == '_') + { + sb.append("=20"); + } + else + { + sb.append(c); + } + } + + byte[] bytes; + try + { + bytes = encodedWord.getBytes("US-ASCII"); + } + catch (UnsupportedEncodingException e) + { + return null; + } + + QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes)); + try + { + return MimeUtility.readToString(is, charset); + } + catch (IOException e) + { + return null; + } + } + + /** + * Decodes a string containing encoded words as defined by RFC 2047. + * Encoded words in have the form + * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for + * quoted-printable and 'B' or 'b' for Base64. + * + * ANDROID: COPIED FROM A NEWER VERSION OF MIME4J + * + * @param body the string to decode. + * @param message the message which has the string. + * @return the decoded string. + */ + public static String decodeEncodedWords(String body, Message message) + { + + // ANDROID: Most strings will not include "=?" so a quick test can prevent unneeded + // object creation. This could also be handled via lazy creation of the StringBuilder. + if (body.indexOf("=?") == -1) + { + return body; + } + + int previousEnd = 0; + boolean previousWasEncoded = false; + + StringBuilder sb = new StringBuilder(); + + while (true) + { + int begin = body.indexOf("=?", previousEnd); + + // ANDROID: The mime4j original version has an error here. It gets confused if + // the encoded string begins with an '=' (just after "?Q?"). This patch seeks forward + // to find the two '?' in the "header", before looking for the final "?=". + int endScan = begin + 2; + if (begin != -1) + { + int qm1 = body.indexOf('?', endScan + 2); + int qm2 = body.indexOf('?', qm1 + 1); + if (qm2 != -1) + { + endScan = qm2 + 1; + } + } + + int end = begin == -1 ? -1 : body.indexOf("?=", endScan); + if (end == -1) + { + if (previousEnd == 0) + return body; + + sb.append(body.substring(previousEnd)); + return sb.toString(); + } + end += 2; + + String sep = body.substring(previousEnd, begin); + + String decoded = decodeEncodedWord(body, begin, end, message); + if (decoded == null) + { + sb.append(sep); + sb.append(body.substring(begin, end)); + } + else + { + if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep)) + { + sb.append(sep); + } + sb.append(decoded); + } + + previousEnd = end; + previousWasEncoded = decoded != null; + } + } + + // return null on error + private static String decodeEncodedWord(String body, int begin, int end, Message message) + { + int qm1 = body.indexOf('?', begin + 2); + if (qm1 == end - 2) + return null; + + int qm2 = body.indexOf('?', qm1 + 1); + if (qm2 == end - 2) + return null; + + String mimeCharset = body.substring(begin + 2, qm1); + String encoding = body.substring(qm1 + 1, qm2); + String encodedText = body.substring(qm2 + 1, end - 2); + + String charset; + try + { + charset = MimeUtility.fixupCharset(mimeCharset, message); + } + catch (MessagingException e) + { + return null; + } + + if (encodedText.length() == 0) + { + Log.w(K9.LOG_TAG, "Missing encoded text in encoded word: '" + body.substring(begin, end) + "'"); + return null; + } + + if (encoding.equalsIgnoreCase("Q")) + { + return DecoderUtil.decodeQ(encodedText, charset); + } + else if (encoding.equalsIgnoreCase("B")) + { + return DecoderUtil.decodeB(encodedText, charset); + } + else + { + Log.w(K9.LOG_TAG, "Warning: Unknown encoding in encoded word '" + body.substring(begin, end) + "'"); + return null; + } + } +} diff --git a/src/com/fsck/k9/mail/internet/MimeMessage.java b/src/com/fsck/k9/mail/internet/MimeMessage.java index dc04b1044..a5029baaa 100644 --- a/src/com/fsck/k9/mail/internet/MimeMessage.java +++ b/src/com/fsck/k9/mail/internet/MimeMessage.java @@ -257,7 +257,7 @@ public class MimeMessage extends Message @Override public String getSubject() { - return MimeUtility.unfoldAndDecode(getFirstHeader("Subject")); + return MimeUtility.unfoldAndDecode(getFirstHeader("Subject"), this); } @Override diff --git a/src/com/fsck/k9/mail/internet/MimeUtility.java b/src/com/fsck/k9/mail/internet/MimeUtility.java index 6b46a51cb..bf5b0182e 100644 --- a/src/com/fsck/k9/mail/internet/MimeUtility.java +++ b/src/com/fsck/k9/mail/internet/MimeUtility.java @@ -7,7 +7,6 @@ import com.fsck.k9.K9; import com.fsck.k9.mail.*; import org.apache.commons.io.IOUtils; import org.apache.james.mime4j.decoder.Base64InputStream; -import org.apache.james.mime4j.decoder.DecoderUtil; import org.apache.james.mime4j.decoder.QuotedPrintableInputStream; import java.io.IOException; @@ -892,17 +891,28 @@ public class MimeUtility } public static String decode(String s) + { + return decode(s, null); + } + + public static String decode(String s, Message message) { if (s == null) { return null; } - return DecoderUtil.decodeEncodedWords(s); + + return DecoderUtil.decodeEncodedWords(s, message); } public static String unfoldAndDecode(String s) { - return decode(unfold(s)); + return unfoldAndDecode(s, null); + } + + public static String unfoldAndDecode(String s, Message message) + { + return decode(unfold(s), message); } // TODO implement proper foldAndEncode @@ -1271,7 +1281,7 @@ public class MimeUtility return null; } - private static String fixupCharset(String charset, Message message) throws MessagingException + public static String fixupCharset(String charset, Message message) throws MessagingException { if (charset == null || "0".equals(charset)) charset = "US-ASCII"; // No encoding, so use us-ascii, which is the standard. @@ -1382,7 +1392,7 @@ public class MimeUtility return null; } - private static String readToString(InputStream in, String charset) throws IOException + public static String readToString(InputStream in, String charset) throws IOException { boolean isIphoneString = false;