From 63abf05776ea04dd6a61661dea2a2177474344e3 Mon Sep 17 00:00:00 2001 From: cketti Date: Fri, 9 Jan 2015 16:16:00 +0100 Subject: [PATCH] Sanitize HTML to remove meta refresh Using in a HTML message causes WebView to load the URL in the default browser. Overriding WebViewClient.shouldOverrideUrlLoading() allows us to cancel loading this URL. Sadly, I found no way to find out whether the method was called because of a meta refresh or because the user clicked on a link. So now we're using HtmlCleaner to parse the HTML and remove all "meta" elements containing an "http-equiv" attribute with a value of "refresh". --- .../com/fsck/k9/helper/HtmlSanitizer.java | 54 +++++++++++ .../java/com/fsck/k9/view/MessageWebView.java | 6 +- .../com/fsck/k9/helper/HtmlSanitizerTest.java | 94 +++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) create mode 100644 k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java create mode 100644 tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java diff --git a/k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java b/k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java new file mode 100644 index 000000000..1c04eb632 --- /dev/null +++ b/k9mail/src/main/java/com/fsck/k9/helper/HtmlSanitizer.java @@ -0,0 +1,54 @@ +package com.fsck.k9.helper; + + +import org.htmlcleaner.CleanerProperties; +import org.htmlcleaner.HtmlCleaner; +import org.htmlcleaner.HtmlSerializer; +import org.htmlcleaner.SimpleHtmlSerializer; +import org.htmlcleaner.TagNode; + + +public class HtmlSanitizer { + private static final HtmlCleaner HTML_CLEANER; + private static final HtmlSerializer HTML_SERIALIZER; + + static { + CleanerProperties properties = createCleanerProperties(); + HTML_CLEANER = new HtmlCleaner(properties); + HTML_SERIALIZER = new SimpleHtmlSerializer(properties); + } + + + private HtmlSanitizer() {} + + public static String sanitize(String html) { + TagNode rootNode = HTML_CLEANER.clean(html); + + removeMetaRefresh(rootNode); + + return HTML_SERIALIZER.getAsString(rootNode, "UTF8"); + } + + private static CleanerProperties createCleanerProperties() { + CleanerProperties properties = new CleanerProperties(); + + // See http://htmlcleaner.sourceforge.net/parameters.php for descriptions + properties.setNamespacesAware(false); + properties.setAdvancedXmlEscape(false); + properties.setOmitXmlDeclaration(true); + properties.setOmitDoctypeDeclaration(false); + properties.setTranslateSpecialEntities(false); + properties.setRecognizeUnicodeChars(false); + + return properties; + } + + private static void removeMetaRefresh(TagNode rootNode) { + for (TagNode element : rootNode.getElementListByName("meta", true)) { + String httpEquiv = element.getAttributeByName("http-equiv"); + if (httpEquiv != null && httpEquiv.trim().equalsIgnoreCase("refresh")) { + element.removeFromTree(); + } + } + } +} diff --git a/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java b/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java index 3198d4723..0f64194e2 100644 --- a/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java +++ b/k9mail/src/main/java/com/fsck/k9/view/MessageWebView.java @@ -11,6 +11,8 @@ import android.widget.Toast; import com.fsck.k9.K9; import com.fsck.k9.R; import com.fsck.k9.helper.HtmlConverter; +import com.fsck.k9.helper.HtmlSanitizer; + public class MessageWebView extends RigidWebView { @@ -123,7 +125,9 @@ public class MessageWebView extends RigidWebView { } content += HtmlConverter.cssStylePre(); content += "" + text + ""; - loadDataWithBaseURL("http://", content, "text/html", "utf-8", null); + + String sanitizedContent = HtmlSanitizer.sanitize(content); + loadDataWithBaseURL("http://", sanitizedContent, "text/html", "utf-8", null); resumeTimers(); } diff --git a/tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java b/tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java new file mode 100644 index 000000000..ccff81412 --- /dev/null +++ b/tests-on-jvm/src/test/java/com/fsck/k9/helper/HtmlSanitizerTest.java @@ -0,0 +1,94 @@ +package com.fsck.k9.helper; + + +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + + +public class HtmlSanitizerTest { + @Test + public void shouldRemoveMetaRefreshInHead() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshBetweenHeadAndBody() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshInBody() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithUpperCaseAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithMixedCaseAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithoutQuotesAroundAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshWithSpacesInAttributeValue() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMultipleMetaRefreshTags() { + String html = "" + + "" + + "Message" + + ""; + assertEquals("Message", HtmlSanitizer.sanitize(html)); + } + + @Test + public void shouldRemoveMetaRefreshButKeepOtherMetaTags() { + String html = "" + + "" + + "" + + "" + + "" + + "Message" + + ""; + assertEquals("" + + "Message", HtmlSanitizer.sanitize(html)); + } +}