Add MessagePreviewExtractor

This commit is contained in:
cketti 2015-01-21 00:56:52 +01:00
parent 23c9398c03
commit c9b2ec533c
4 changed files with 312 additions and 49 deletions

View File

@ -150,55 +150,6 @@ public abstract class Message implements Part, CompositeBody {
public abstract int getSize();
/*
* calculateContentPreview
* Takes a plain text message body as a string.
* Returns a message summary as a string suitable for showing in a message list
*
* A message summary should be about the first 160 characters
* of unique text written by the message sender
* Quoted text, "On $date" and so on will be stripped out.
* All newlines and whitespace will be compressed.
*
*/
public static String calculateContentPreview(String text) {
if (text == null) {
return null;
}
// Only look at the first 8k of a message when calculating
// the preview. This should avoid unnecessary
// memory usage on large messages
if (text.length() > 8192) {
text = text.substring(0, 8192);
}
// Remove (correctly delimited by '-- \n') signatures
text = text.replaceAll("(?ms)^-- [\\r\\n]+.*", "");
// try to remove lines of dashes in the preview
text = text.replaceAll("(?m)^----.*?$", "");
// remove quoted text from the preview
text = text.replaceAll("(?m)^[#>].*$", "");
// Remove a common quote header from the preview
text = text.replaceAll("(?m)^On .*wrote.?$", "");
// Remove a more generic quote header from the preview
text = text.replaceAll("(?m)^.*\\w+:$", "");
// Remove horizontal rules.
text = text.replaceAll("\\s*([-=_]{30,}+)\\s*", " ");
// URLs in the preview should just be shown as "..." - They're not
// clickable and they usually overwhelm the preview
text = text.replaceAll("https?://\\S+", "...");
// Don't show newlines in the preview
text = text.replaceAll("(\\r|\\n)+", " ");
// Collapse whitespace in the preview
text = text.replaceAll("\\s+", " ");
// Remove any whitespace at the beginning and end of the string.
text = text.trim();
return (text.length() <= 512) ? text : text.substring(0, 512);
}
public void delete(String trashFolderName) throws MessagingException {}
/*

View File

@ -0,0 +1,141 @@
package com.fsck.k9.mailstore;
import android.content.Context;
import android.support.test.InstrumentationRegistry;
import android.support.test.runner.AndroidJUnit4;
import com.fsck.k9.mail.MessagingException;
import com.fsck.k9.mail.internet.MimeBodyPart;
import com.fsck.k9.mail.internet.MimeMessage;
import com.fsck.k9.mail.internet.MimeMultipart;
import com.fsck.k9.mail.internet.TextBody;
import org.junit.Test;
import org.junit.runner.RunWith;
import static org.junit.Assert.assertEquals;
@RunWith(AndroidJUnit4.class)
public class MessagePreviewExtractorTest {
@Test
public void shouldExtractPreviewFromSinglePlainTextPart() throws MessagingException {
MimeMessage message = new MimeMessage();
message.addHeader("Content-Type", "text/plain");
TextBody body = new TextBody("Message text ");
message.setBody(body);
String preview = MessagePreviewExtractor.extractPreview(getContext(), message);
assertEquals("Message text", preview);
}
@Test
public void shouldLimitPreviewTo512Characters() throws MessagingException {
MimeMessage message = new MimeMessage();
message.addHeader("Content-Type", "text/plain");
TextBody body = new TextBody("10--------20--------30--------40--------50--------" +
"60--------70--------80--------90--------100-------" +
"110-------120-------130-------140-------150-------" +
"160-------170-------180-------190-------200-------" +
"210-------220-------230-------240-------250-------" +
"260-------270-------280-------290-------300-------" +
"310-------320-------330-------340-------350-------" +
"360-------370-------380-------390-------400-------" +
"410-------420-------430-------440-------450-------" +
"460-------470-------480-------490-------500-------" +
"510-------520-------530-------540-------550-------" +
"560-------570-------580-------590-------600-------");
message.setBody(body);
String preview = MessagePreviewExtractor.extractPreview(getContext(), message);
assertEquals(512, preview.length());
assertEquals('…', preview.charAt(511));
}
@Test
public void shouldExtractPreviewFromSingleHtmlPart() throws MessagingException {
MimeMessage message = new MimeMessage();
message.addHeader("Content-Type", "text/html");
TextBody body = new TextBody("<html><body><pre>Message text</pre></body></html>");
message.setBody(body);
String preview = MessagePreviewExtractor.extractPreview(getContext(), message);
assertEquals("Message text", preview);
}
@Test
public void shouldExtractPreviewFromMultipartAlternative() throws MessagingException {
MimeMessage message = new MimeMessage();
message.addHeader("Content-Type", "multipart/alternative");
MimeMultipart multipart = new MimeMultipart();
multipart.setSubType("alternative");
message.setBody(multipart);
TextBody textBody = new TextBody("text");
MimeBodyPart textPart = new MimeBodyPart(textBody, "text/plain");
multipart.addBodyPart(textPart);
TextBody htmlBody = new TextBody("<html><body>html</body></html>");
MimeBodyPart htmlPart = new MimeBodyPart(htmlBody, "text/html");
multipart.addBodyPart(htmlPart);
String preview = MessagePreviewExtractor.extractPreview(getContext(), message);
assertEquals("text", preview);
}
@Test
public void shouldExtractPreviewFromMultipartMixed() throws MessagingException {
MimeMessage message = new MimeMessage();
message.addHeader("Content-Type", "multipart/mixed");
MimeMultipart multipart = new MimeMultipart();
multipart.setSubType("mixed");
message.setBody(multipart);
TextBody textBody = new TextBody("text");
MimeBodyPart textPart = new MimeBodyPart(textBody, "text/plain");
multipart.addBodyPart(textPart);
TextBody htmlBody = new TextBody("<html><body>html</body></html>");
MimeBodyPart htmlPart = new MimeBodyPart(htmlBody, "text/html");
multipart.addBodyPart(htmlPart);
String preview = MessagePreviewExtractor.extractPreview(getContext(), message);
assertEquals("text / html", preview);
}
@Test
public void shouldExtractPreviewFromMultipartMixedWithInnerMesssage() throws MessagingException {
MimeMessage message = new MimeMessage();
message.addHeader("Content-Type", "multipart/mixed");
MimeMultipart multipart = new MimeMultipart();
multipart.setSubType("mixed");
message.setBody(multipart);
TextBody textBody = new TextBody("text");
MimeBodyPart textPart = new MimeBodyPart(textBody, "text/plain");
multipart.addBodyPart(textPart);
MimeMessage innerMessage = new MimeMessage();
innerMessage.addHeader("Content-Type", "text/html");
innerMessage.addHeader("Subject", "inner message");
TextBody htmlBody = new TextBody("<html><body>ht&#109;l</body></html>");
innerMessage.setBody(htmlBody);
MimeBodyPart messagePart = new MimeBodyPart(innerMessage, "message/rfc822");
multipart.addBodyPart(messagePart);
String preview = MessagePreviewExtractor.extractPreview(getContext(), message);
assertEquals("text / Includes message titled \"inner message\" containing: html", preview);
}
private Context getContext() {
return InstrumentationRegistry.getTargetContext();
}
}

View File

@ -0,0 +1,168 @@
package com.fsck.k9.mailstore;
import java.util.ArrayList;
import java.util.List;
import android.content.Context;
import android.text.TextUtils;
import com.fsck.k9.R;
import com.fsck.k9.helper.HtmlConverter;
import com.fsck.k9.mail.Message;
import com.fsck.k9.mail.MessagingException;
import com.fsck.k9.mail.Part;
import com.fsck.k9.mail.internet.MessageExtractor;
import com.fsck.k9.mail.internet.Viewable;
import com.fsck.k9.mail.internet.Viewable.Alternative;
import com.fsck.k9.mail.internet.Viewable.Html;
import com.fsck.k9.mail.internet.Viewable.MessageHeader;
import com.fsck.k9.mail.internet.Viewable.Textual;
public class MessagePreviewExtractor {
private static final int MAX_PREVIEW_LENGTH = 512;
private static final int MAX_CHARACTERS_CHECKED_FOR_PREVIEW = 8192;
public static String extractPreview(Context context, Message message) throws MessagingException {
try {
List<Part> attachments = new ArrayList<Part>();
List<Viewable> viewables = MessageExtractor.getViewables(message, attachments);
return buildPreview(context, viewables);
} catch (Exception e) {
throw new MessagingException("Couldn't extract viewable parts", e);
}
}
private static String buildPreview(Context context, List<Viewable> viewables) throws MessagingException {
StringBuilder text = new StringBuilder();
boolean divider = false;
for (Viewable viewable : viewables) {
if (viewable instanceof Textual) {
appendText(text, viewable, divider);
divider = true;
} else if (viewable instanceof MessageHeader) {
appendMessagePreview(context, text, (MessageHeader) viewable, divider);
divider = false;
} else if (viewable instanceof Alternative) {
appendAlternative(text, (Alternative) viewable, divider);
divider = true;
}
if (hasMaxPreviewLengthBeenReached(text)) {
break;
}
}
if (hasMaxPreviewLengthBeenReached(text)) {
text.setLength(MAX_PREVIEW_LENGTH - 1);
text.append('…');
}
return text.toString();
}
private static void appendText(StringBuilder text, Viewable viewable, boolean prependDivider) {
if (viewable instanceof Textual) {
appendTextual(text, (Textual) viewable, prependDivider);
} else if (viewable instanceof Alternative) {
appendAlternative(text, (Alternative) viewable, prependDivider);
} else {
throw new IllegalArgumentException("Unknown Viewable");
}
}
private static void appendTextual(StringBuilder text, Textual textual, boolean prependDivider) {
Part part = textual.getPart();
if (prependDivider) {
appendDivider(text);
}
String textFromPart = MessageExtractor.getTextFromPart(part);
if (textFromPart == null) {
textFromPart = "";
} else if (textual instanceof Html) {
textFromPart = HtmlConverter.htmlToText(textFromPart);
}
text.append(stripTextForPreview(textFromPart));
}
private static void appendAlternative(StringBuilder text, Alternative alternative, boolean prependDivider) {
List<Viewable> textAlternative = alternative.getText().isEmpty() ?
alternative.getHtml() : alternative.getText();
boolean divider = prependDivider;
for (Viewable textViewable : textAlternative) {
appendText(text, textViewable, divider);
divider = true;
if (hasMaxPreviewLengthBeenReached(text)) {
break;
}
}
}
private static void appendMessagePreview(Context context, StringBuilder text, MessageHeader messageHeader,
boolean divider) {
if (divider) {
appendDivider(text);
}
String subject = messageHeader.getMessage().getSubject();
if (TextUtils.isEmpty(subject)) {
text.append(context.getString(R.string.preview_untitled_inner_message));
} else {
text.append(context.getString(R.string.preview_inner_message, subject));
}
}
private static void appendDivider(StringBuilder text) {
text.append(" / ");
}
private static String stripTextForPreview(String text) {
if (text == null) {
return "";
}
// Only look at the first 8k of a message when calculating
// the preview. This should avoid unnecessary
// memory usage on large messages
if (text.length() > MAX_CHARACTERS_CHECKED_FOR_PREVIEW) {
text = text.substring(0, MAX_CHARACTERS_CHECKED_FOR_PREVIEW);
}
// Remove (correctly delimited by '-- \n') signatures
text = text.replaceAll("(?ms)^-- [\\r\\n]+.*", "");
// try to remove lines of dashes in the preview
text = text.replaceAll("(?m)^----.*?$", "");
// remove quoted text from the preview
text = text.replaceAll("(?m)^[#>].*$", "");
// Remove a common quote header from the preview
text = text.replaceAll("(?m)^On .*wrote.?$", "");
// Remove a more generic quote header from the preview
text = text.replaceAll("(?m)^.*\\w+:$", "");
// Remove horizontal rules.
text = text.replaceAll("\\s*([-=_]{30,}+)\\s*", " ");
// URLs in the preview should just be shown as "..." - They're not
// clickable and they usually overwhelm the preview
text = text.replaceAll("https?://\\S+", "...");
// Don't show newlines in the preview
text = text.replaceAll("(\\r|\\n)+", " ");
// Collapse whitespace in the preview
text = text.replaceAll("\\s+", " ");
// Remove any whitespace at the beginning and end of the string.
text = text.trim();
return (text.length() <= MAX_PREVIEW_LENGTH) ? text : text.substring(0, MAX_PREVIEW_LENGTH);
}
private static boolean hasMaxPreviewLengthBeenReached(StringBuilder text) {
return text.length() >= MAX_PREVIEW_LENGTH;
}
}

View File

@ -1124,4 +1124,7 @@ Please submit bug reports, contribute new features and ask questions at
<string name="client_certificate_retrieval_failure">"Failed to retrieve client certificate for alias \"<xliff:g id="alias">%s</xliff:g>\""</string>
<string name="client_certificate_advanced_options">Advanced options</string>
<string name="client_certificate_expired">"Client certificate \"<xliff:g id="certificate_alias">%1$s</xliff:g>\" has expired or is not yet valid (<xliff:g id="exception_message">%2$s</xliff:g>)"</string>
<string name="preview_inner_message">"Includes message titled \"%s\" containing: "</string>
<string name="preview_untitled_inner_message">"Includes untitled message containing: "</string>
</resources>