Support emoji in subjects.

Signed-off-by: HIRANO Takahito <hiranotaka@zng.info>
2025-02-11 04:30:23 -05:00 · 2011-01-14 00:53:19 +00:00 · 2011-01-14 00:53:19 +00:00 · d1a1b12aae
commit d1a1b12aae
parent 94b76ff9d1
3 changed files with 245 additions and 6 deletions
--- a/src/com/fsck/k9/mail/internet/DecoderUtil.java
+++ b/src/com/fsck/k9/mail/internet/DecoderUtil.java
@ -0,0 +1,229 @@
+
+package com.fsck.k9.mail.internet;
+
+import android.util.Log;
+import com.fsck.k9.K9;
+import com.fsck.k9.mail.Message;
+import com.fsck.k9.mail.MessagingException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import org.apache.james.mime4j.decoder.Base64InputStream;
+import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;
+import org.apache.james.mime4j.util.CharsetUtil;
+
+
+/**
+ * Static methods for decoding strings, byte arrays and encoded words.
+ *
+ * This class is copied from the org.apache.james.mime4j.decoder.DecoderUtil class.  It's modified here in order to
+ * decode emoji characters in the Subject headers.  The method to decode emoji depends on the MimeMessage class because
+ * it has to be determined with the sender address, the mailer and so on.
+ */
+public class DecoderUtil
+{
+    /**
+     * Decodes an encoded word encoded with the 'B' encoding (described in
+     * RFC 2047) found in a header field body.
+     *
+     * @param encodedWord the encoded word to decode.
+     * @param charset the Java charset to use.
+     * @return the decoded string.
+     */
+    private static String decodeB(String encodedWord, String charset)
+    {
+        byte[] bytes;
+        try
+        {
+            bytes = encodedWord.getBytes("US-ASCII");
+        }
+        catch (UnsupportedEncodingException e)
+        {
+            return null;
+        }
+
+        Base64InputStream is = new Base64InputStream(new ByteArrayInputStream(bytes));
+        try
+        {
+            return MimeUtility.readToString(is, charset);
+        }
+        catch (IOException e)
+        {
+            return null;
+        }
+    }
+
+    /**
+     * Decodes an encoded word encoded with the 'Q' encoding (described in
+     * RFC 2047) found in a header field body.
+     *
+     * @param encodedWord the encoded word to decode.
+     * @param charset the Java charset to use.
+     * @return the decoded string.
+     */
+    private static String decodeQ(String encodedWord, String charset)
+    {
+
+        /*
+         * Replace _ with =20
+         */
+        StringBuffer sb = new StringBuffer();
+        for (int i = 0; i < encodedWord.length(); i++)
+        {
+            char c = encodedWord.charAt(i);
+            if (c == '_')
+            {
+                sb.append("=20");
+            }
+            else
+            {
+                sb.append(c);
+            }
+        }
+
+        byte[] bytes;
+        try
+        {
+            bytes = encodedWord.getBytes("US-ASCII");
+        }
+        catch (UnsupportedEncodingException e)
+        {
+            return null;
+        }
+
+        QuotedPrintableInputStream is = new QuotedPrintableInputStream(new ByteArrayInputStream(bytes));
+        try
+        {
+            return MimeUtility.readToString(is, charset);
+        }
+        catch (IOException e)
+        {
+            return null;
+        }
+    }
+
+    /**
+     * Decodes a string containing encoded words as defined by RFC 2047.
+     * Encoded words in have the form
+     * =?charset?enc?Encoded word?= where enc is either 'Q' or 'q' for
+     * quoted-printable and 'B' or 'b' for Base64.
+     *
+     * ANDROID:  COPIED FROM A NEWER VERSION OF MIME4J
+     *
+     * @param body the string to decode.
+     * @param message the message which has the string.
+     * @return the decoded string.
+     */
+    public static String decodeEncodedWords(String body, Message message)
+    {
+
+        // ANDROID:  Most strings will not include "=?" so a quick test can prevent unneeded
+        // object creation.  This could also be handled via lazy creation of the StringBuilder.
+        if (body.indexOf("=?") == -1)
+        {
+            return body;
+        }
+
+        int previousEnd = 0;
+        boolean previousWasEncoded = false;
+
+        StringBuilder sb = new StringBuilder();
+
+        while (true)
+        {
+            int begin = body.indexOf("=?", previousEnd);
+
+            // ANDROID:  The mime4j original version has an error here.  It gets confused if
+            // the encoded string begins with an '=' (just after "?Q?").  This patch seeks forward
+            // to find the two '?' in the "header", before looking for the final "?=".
+            int endScan = begin + 2;
+            if (begin != -1)
+            {
+                int qm1 = body.indexOf('?', endScan + 2);
+                int qm2 = body.indexOf('?', qm1 + 1);
+                if (qm2 != -1)
+                {
+                    endScan = qm2 + 1;
+                }
+            }
+
+            int end = begin == -1 ? -1 : body.indexOf("?=", endScan);
+            if (end == -1)
+            {
+                if (previousEnd == 0)
+                    return body;
+
+                sb.append(body.substring(previousEnd));
+                return sb.toString();
+            }
+            end += 2;
+
+            String sep = body.substring(previousEnd, begin);
+
+            String decoded = decodeEncodedWord(body, begin, end, message);
+            if (decoded == null)
+            {
+                sb.append(sep);
+                sb.append(body.substring(begin, end));
+            }
+            else
+            {
+                if (!previousWasEncoded || !CharsetUtil.isWhitespace(sep))
+                {
+                    sb.append(sep);
+                }
+                sb.append(decoded);
+            }
+
+            previousEnd = end;
+            previousWasEncoded = decoded != null;
+        }
+    }
+
+    // return null on error
+    private static String decodeEncodedWord(String body, int begin, int end, Message message)
+    {
+        int qm1 = body.indexOf('?', begin + 2);
+        if (qm1 == end - 2)
+            return null;
+
+        int qm2 = body.indexOf('?', qm1 + 1);
+        if (qm2 == end - 2)
+            return null;
+
+        String mimeCharset = body.substring(begin + 2, qm1);
+        String encoding = body.substring(qm1 + 1, qm2);
+        String encodedText = body.substring(qm2 + 1, end - 2);
+
+        String charset;
+        try
+        {
+            charset = MimeUtility.fixupCharset(mimeCharset, message);
+        }
+        catch (MessagingException e)
+        {
+            return null;
+        }
+
+        if (encodedText.length() == 0)
+        {
+            Log.w(K9.LOG_TAG, "Missing encoded text in encoded word: '" + body.substring(begin, end) + "'");
+            return null;
+        }
+
+        if (encoding.equalsIgnoreCase("Q"))
+        {
+            return DecoderUtil.decodeQ(encodedText, charset);
+        }
+        else if (encoding.equalsIgnoreCase("B"))
+        {
+            return DecoderUtil.decodeB(encodedText, charset);
+        }
+        else
+        {
+            Log.w(K9.LOG_TAG, "Warning: Unknown encoding in encoded word '" + body.substring(begin, end) + "'");
+            return null;
+        }
+    }
+}
--- a/src/com/fsck/k9/mail/internet/MimeMessage.java
+++ b/src/com/fsck/k9/mail/internet/MimeMessage.java
@ -257,7 +257,7 @@ public class MimeMessage extends Message
    @Override
    public String getSubject()
    {
-        return MimeUtility.unfoldAndDecode(getFirstHeader("Subject"));
+        return MimeUtility.unfoldAndDecode(getFirstHeader("Subject"), this);
    }

    @Override
--- a/src/com/fsck/k9/mail/internet/MimeUtility.java
+++ b/src/com/fsck/k9/mail/internet/MimeUtility.java
@ -7,7 +7,6 @@ import com.fsck.k9.K9;
 import com.fsck.k9.mail.*;
 import org.apache.commons.io.IOUtils;
 import org.apache.james.mime4j.decoder.Base64InputStream;
-import org.apache.james.mime4j.decoder.DecoderUtil;
 import org.apache.james.mime4j.decoder.QuotedPrintableInputStream;

 import java.io.IOException;
@ -892,17 +891,28 @@ public class MimeUtility
    }

    public static String decode(String s)
+    {
+        return decode(s, null);
+    }
+
+    public static String decode(String s, Message message)
    {
        if (s == null)
        {
            return null;
        }
-        return DecoderUtil.decodeEncodedWords(s);
+
+        return DecoderUtil.decodeEncodedWords(s, message);
    }

    public static String unfoldAndDecode(String s)
    {
-        return decode(unfold(s));
+        return unfoldAndDecode(s, null);
+    }
+
+    public static String unfoldAndDecode(String s, Message message)
+    {
+        return decode(unfold(s), message);
    }

    // TODO implement proper foldAndEncode
@ -1271,7 +1281,7 @@ public class MimeUtility
        return null;
    }

-    private static String fixupCharset(String charset, Message message) throws MessagingException
+    public static String fixupCharset(String charset, Message message) throws MessagingException
    {
        if (charset == null || "0".equals(charset))
            charset = "US-ASCII";  // No encoding, so use us-ascii, which is the standard.
@ -1382,7 +1392,7 @@ public class MimeUtility
        return null;
    }

-    private static String readToString(InputStream in, String charset) throws IOException
+    public static String readToString(InputStream in, String charset) throws IOException
    {
        boolean isIphoneString = false;