package com.fsck.k9.mail.internet;
import android.util.Log;
import com.fsck.k9.K9;
import com.fsck.k9.mail.Body;
import com.fsck.k9.mail.BodyPart;
import com.fsck.k9.mail.Message;
import com.fsck.k9.mail.MessagingException;
import com.fsck.k9.mail.Multipart;
import com.fsck.k9.mail.Part;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static com.fsck.k9.mail.internet.CharsetSupport.fixupCharset;
import static com.fsck.k9.mail.internet.MimeUtility.getHeaderParameter;
import static com.fsck.k9.mail.internet.Viewable.Alternative;
import static com.fsck.k9.mail.internet.Viewable.Textual;
public class MessageExtractor {
public static String getTextFromPart(Part part) {
try {
if ((part != null) && (part.getBody() != null)) {
final Body body = part.getBody();
if (body instanceof TextBody) {
return ((TextBody)body).getText();
}
final String mimeType = part.getMimeType();
if ((mimeType != null) && MimeUtility.mimeTypeMatches(mimeType, "text/*")) {
/*
* We've got a text part, so let's see if it needs to be processed further.
*/
String charset = getHeaderParameter(part.getContentType(), "charset");
/*
* determine the charset from HTML message.
*/
if (mimeType.equalsIgnoreCase("text/html") && charset == null) {
InputStream in = part.getBody().getInputStream();
try {
byte[] buf = new byte[256];
in.read(buf, 0, buf.length);
String str = new String(buf, "US-ASCII");
if (str.isEmpty()) {
return "";
}
Pattern p = Pattern.compile("", Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(str);
if (m.find()) {
charset = m.group(1);
}
} finally {
try {
if (in instanceof BinaryTempFileBody.BinaryTempFileBodyInputStream) {
/*
* If this is a BinaryTempFileBodyInputStream, calling close()
* will delete the file. But we can't let that happen because
* the file needs to be opened again by the code a few lines
* down.
*/
((BinaryTempFileBody.BinaryTempFileBodyInputStream) in).closeWithoutDeleting();
} else {
in.close();
}
} catch (Exception e) { /* ignore */ }
}
}
charset = fixupCharset(charset, getMessageFromPart(part));
/*
* Now we read the part into a buffer for further processing. Because
* the stream is now wrapped we'll remove any transfer encoding at this point.
*/
InputStream in = part.getBody().getInputStream();
try {
String text = CharsetSupport.readToString(in, charset);
// Replace the body with a TextBody that already contains the decoded text
part.setBody(new TextBody(text));
return text;
} finally {
try {
/*
* This time we don't care if it's a BinaryTempFileBodyInputStream. We
* replaced the body with a TextBody instance and hence don't need the
* file anymore.
*/
in.close();
} catch (IOException e) { /* Ignore */ }
}
}
}
} catch (OutOfMemoryError oom) {
/*
* If we are not able to process the body there's nothing we can do about it. Return
* null and let the upper layers handle the missing content.
*/
Log.e(K9.LOG_TAG, "Unable to getTextFromPart " + oom.toString());
} catch (Exception e) {
/*
* If we are not able to process the body there's nothing we can do about it. Return
* null and let the upper layers handle the missing content.
*/
Log.e(K9.LOG_TAG, "Unable to getTextFromPart", e);
}
return null;
}
/**
* Traverse the MIME tree of a message an extract viewable parts.
*
* @param part
* The message part to start from.
* @param attachments
* A list that will receive the parts that are considered attachments.
*
* @return A list of {@link Viewable}s.
*
* @throws MessagingException
* In case of an error.
*/
public static List getViewables(Part part, List attachments) throws MessagingException {
List viewables = new ArrayList();
Body body = part.getBody();
if (body instanceof Multipart) {
Multipart multipart = (Multipart) body;
if (part.getMimeType().equalsIgnoreCase("multipart/alternative")) {
/*
* For multipart/alternative parts we try to find a text/plain and a text/html
* child. Everything else we find is put into 'attachments'.
*/
List text = findTextPart(multipart, true);
Set knownTextParts = getParts(text);
List html = findHtmlPart(multipart, knownTextParts, attachments, true);
if (!text.isEmpty() || !html.isEmpty()) {
Alternative alternative = new Alternative(text, html);
viewables.add(alternative);
}
} else {
// For all other multipart parts we recurse to grab all viewable children.
for (Part bodyPart : multipart.getBodyParts()) {
viewables.addAll(getViewables(bodyPart, attachments));
}
}
} else if (body instanceof Message &&
!("attachment".equalsIgnoreCase(part.getContentDisposition()))) {
/*
* We only care about message/rfc822 parts whose Content-Disposition header has a value
* other than "attachment".
*/
Message message = (Message) body;
// We add the Message object so we can extract the filename later.
viewables.add(new Viewable.MessageHeader(part, message));
// Recurse to grab all viewable parts and attachments from that message.
viewables.addAll(getViewables(message, attachments));
} else if (isPartTextualBody(part)) {
/*
* Save text/plain and text/html
*/
String mimeType = part.getMimeType();
if (mimeType.equalsIgnoreCase("text/plain")) {
Viewable.Text text = new Viewable.Text(part);
viewables.add(text);
} else {
Viewable.Html html = new Viewable.Html(part);
viewables.add(html);
}
} else {
// Everything else is treated as attachment.
attachments.add(part);
}
return viewables;
}
public static Set getTextParts(Part part) throws MessagingException {
List attachments = new ArrayList();
return getParts(getViewables(part, attachments));
}
private static Message getMessageFromPart(Part part) {
while (part != null) {
if (part instanceof Message)
return (Message)part;
if (!(part instanceof BodyPart))
return null;
Multipart multipart = ((BodyPart)part).getParent();
if (multipart == null)
return null;
part = multipart.getParent();
}
return null;
}
/**
* Search the children of a {@link Multipart} for {@code text/plain} parts.
*
* @param multipart The {@code Multipart} to search through.
* @param directChild If {@code true}, this method will return after the first {@code text/plain} was
* found.
*
* @return A list of {@link Viewable.Text} viewables.
*
* @throws MessagingException
* In case of an error.
*/
private static List findTextPart(Multipart multipart, boolean directChild)
throws MessagingException {
List viewables = new ArrayList();
for (Part part : multipart.getBodyParts()) {
Body body = part.getBody();
if (body instanceof Multipart) {
Multipart innerMultipart = (Multipart) body;
/*
* Recurse to find text parts. Since this is a multipart that is a child of a
* multipart/alternative we don't want to stop after the first text/plain part
* we find. This will allow to get all text parts for constructions like this:
*
* 1. multipart/alternative
* 1.1. multipart/mixed
* 1.1.1. text/plain
* 1.1.2. text/plain
* 1.2. text/html
*/
List textViewables = findTextPart(innerMultipart, false);
if (!textViewables.isEmpty()) {
viewables.addAll(textViewables);
if (directChild) {
break;
}
}
} else if (isPartTextualBody(part) && part.getMimeType().equalsIgnoreCase("text/plain")) {
Viewable.Text text = new Viewable.Text(part);
viewables.add(text);
if (directChild) {
break;
}
}
}
return viewables;
}
/**
* Search the children of a {@link Multipart} for {@code text/html} parts.
* Every part that is not a {@code text/html} we want to display, we add to 'attachments'.
*
* @param multipart The {@code Multipart} to search through.
* @param knownTextParts A set of {@code text/plain} parts that shouldn't be added to 'attachments'.
* @param attachments A list that will receive the parts that are considered attachments.
* @param directChild If {@code true}, this method will add all {@code text/html} parts except the first
* found to 'attachments'.
*
* @return A list of {@link Viewable.Text} viewables.
*
* @throws MessagingException In case of an error.
*/
private static List findHtmlPart(Multipart multipart, Set knownTextParts,
List attachments, boolean directChild) throws MessagingException {
List viewables = new ArrayList();
boolean partFound = false;
for (Part part : multipart.getBodyParts()) {
Body body = part.getBody();
if (body instanceof Multipart) {
Multipart innerMultipart = (Multipart) body;
if (directChild && partFound) {
// We already found our text/html part. Now we're only looking for attachments.
findAttachments(innerMultipart, knownTextParts, attachments);
} else {
/*
* Recurse to find HTML parts. Since this is a multipart that is a child of a
* multipart/alternative we don't want to stop after the first text/html part
* we find. This will allow to get all text parts for constructions like this:
*
* 1. multipart/alternative
* 1.1. text/plain
* 1.2. multipart/mixed
* 1.2.1. text/html
* 1.2.2. text/html
* 1.3. image/jpeg
*/
List htmlViewables = findHtmlPart(innerMultipart, knownTextParts,
attachments, false);
if (!htmlViewables.isEmpty()) {
partFound = true;
viewables.addAll(htmlViewables);
}
}
} else if (!(directChild && partFound) && isPartTextualBody(part) &&
part.getMimeType().equalsIgnoreCase("text/html")) {
Viewable.Html html = new Viewable.Html(part);
viewables.add(html);
partFound = true;
} else if (!knownTextParts.contains(part)) {
// Only add this part as attachment if it's not a viewable text/plain part found
// earlier.
attachments.add(part);
}
}
return viewables;
}
/**
* Traverse the MIME tree and add everything that's not a known text part to 'attachments'.
*
* @param multipart
* The {@link Multipart} to start from.
* @param knownTextParts
* A set of known text parts we don't want to end up in 'attachments'.
* @param attachments
* A list that will receive the parts that are considered attachments.
*/
private static void findAttachments(Multipart multipart, Set knownTextParts,
List attachments) {
for (Part part : multipart.getBodyParts()) {
Body body = part.getBody();
if (body instanceof Multipart) {
Multipart innerMultipart = (Multipart) body;
findAttachments(innerMultipart, knownTextParts, attachments);
} else if (!knownTextParts.contains(part)) {
attachments.add(part);
}
}
}
/**
* Build a set of message parts for fast lookups.
*
* @param viewables
* A list of {@link Viewable}s containing references to the message parts to include in
* the set.
*
* @return The set of viewable {@code Part}s.
*
* @see MimeUtility#findHtmlPart(Multipart, Set, List, boolean)
* @see MimeUtility#findAttachments(Multipart, Set, List)
*/
private static Set getParts(List viewables) {
Set parts = new HashSet();
for (Viewable viewable : viewables) {
if (viewable instanceof Textual) {
parts.add(((Textual) viewable).getPart());
} else if (viewable instanceof Alternative) {
Alternative alternative = (Alternative) viewable;
parts.addAll(getParts(alternative.getText()));
parts.addAll(getParts(alternative.getHtml()));
}
}
return parts;
}
private static Boolean isPartTextualBody(Part part) throws MessagingException {
String disposition = part.getDisposition();
String dispositionType = null;
String dispositionFilename = null;
if (disposition != null) {
dispositionType = MimeUtility.getHeaderParameter(disposition, null);
dispositionFilename = MimeUtility.getHeaderParameter(disposition, "filename");
}
/*
* A best guess that this part is intended to be an attachment and not inline.
*/
boolean attachment = ("attachment".equalsIgnoreCase(dispositionType) || (dispositionFilename != null));
if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/html"))) {
return true;
}
/*
* If the part is plain text and it got this far it's part of a
* mixed (et al) and should be rendered inline.
*/
else if ((!attachment) && (part.getMimeType().equalsIgnoreCase("text/plain"))) {
return true;
}
/*
* Finally, if it's nothing else we will include it as an attachment.
*/
else {
return false;
}
}
}