diff --git a/k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java b/k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java new file mode 100644 index 000000000..1c04eb632 --- /dev/null +++ b/k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java @@ -0,0 +1,54 @@ +package com.fsck.k9.helper; + + +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.HtmlCleaner; +import org.htmlcleaner.HtmlSerializer; +import org.htmlcleaner.SimpleHtmlSerializer; +import org.htmlcleaner.TagNode; + + +public class HtmlSanitizer { + private static final HtmlCleaner HTML_CLEANER; + private static final HtmlSerializer HTML_SERIALIZER; + + static { + CleanerProperties properties = createCleanerProperties(); + HTML_CLEANER = new HtmlCleaner(properties); + HTML_SERIALIZER = new SimpleHtmlSerializer(properties); + } + + + private HtmlSanitizer() {} + + public static String sanitize(String html) { + TagNode rootNode = HTML_CLEANER.clean(html); + + removeMetaRefresh(rootNode); + + return HTML_SERIALIZER.getAsString(rootNode, "UTF8"); + } + + private static CleanerProperties createCleanerProperties() { + CleanerProperties properties = new CleanerProperties(); + + // See http://htmlcleaner.sourceforge.net/parameters.php for descriptions + properties.setNamespacesAware(false); + properties.setAdvancedXmlEscape(false); + properties.setOmitXmlDeclaration(true); + properties.setOmitDoctypeDeclaration(false); + properties.setTranslateSpecialEntities(false); + properties.setRecognizeUnicodeChars(false); + + return properties; + } + + private static void removeMetaRefresh(TagNode rootNode) { + for (TagNode element : rootNode.getElementListByName("meta", true)) { + String httpEquiv = element.getAttributeByName("http-equiv"); + if (httpEquiv != null && httpEquiv.trim().equalsIgnoreCase("refresh")) { + element.removeFromTree(); + } + } + } +} diff --git a/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java b/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java index 3198d4723..0f64194e2 100644 --- a/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java +++ b/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java @@ -11,6 +11,8 @@ import android.widget.Toast; import com.fsck.k9.K9; import com.fsck.k9.R; import com.fsck.k9.helper.HtmlConverter; +import com.fsck.k9.helper.HtmlSanitizer; + public class MessageWebView extends RigidWebView { @@ -123,7 +125,9 @@ public class MessageWebView extends RigidWebView { } content += HtmlConverter.cssStylePre(); content += "" + text + ""; - loadDataWithBaseURL("http://", content, "text/html", "utf-8", null); + + String sanitizedContent = HtmlSanitizer.sanitize(content); + loadDataWithBaseURL("http://", sanitizedContent, "text/html", "utf-8", null); resumeTimers(); } diff --git a/tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java b/tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java new file mode 100644 index 000000000..ccff81412 --- /dev/null +++ b/tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java @@ -0,0 +1,94 @@ +package com.fsck.k9.helper; + + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + + +public class HtmlSanitizerTest { + @Test + public void shouldRemoveMetaRefreshInHead() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshBetweenHeadAndBody() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshInBody() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithUpperCaseAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithMixedCaseAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithoutQuotesAroundAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithSpacesInAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMultipleMetaRefreshTags() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshButKeepOtherMetaTags() { + String html = "" + + "" + + "" + + "" + + "" + + "Message" + + ""; + assertEquals("" + + "Message", HtmlSanitizer.sanitize(html)); + } +}