/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.mail.utils; import android.os.Looper; import android.util.Log; import com.android.mail.perf.Timer; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.owasp.html.AttributePolicy; import org.owasp.html.CssSchema; import org.owasp.html.ElementPolicy; import org.owasp.html.FilterUrlByProtocolAttributePolicy; import org.owasp.html.Handler; import org.owasp.html.HtmlPolicyBuilder; import org.owasp.html.HtmlStreamRenderer; import org.owasp.html.PolicyFactory; import java.util.List; /** * This sanitizer is meant to strip all scripts and any malicious HTML from untrusted emails. It * uses the OWASP Java * HTML Sanitizer Project to whitelist the subset of HTML elements and attributes as well as CSS * properties that are considered safe. Any unmatched HTML or CSS is discarded. * * All URLS are scrubbed to ensure they match the blessed form of "http://the.url.here", * "https://the.url.here" or "mailto:address@server.com" and cannot resemble "javascript:badness()" * or comparable. */ public final class HtmlSanitizer { /** * This version number should be bumped each time a meaningful change is made to this sanitizer * configuration which influences its output. It is compared against a minimum target version * number. If it meets or exceeds the minimum target version, the result of the sanitizer is * free to be shown in a standard webview. If it does not meet the minimum target version then * the sanitized output is deemed untrustworthy and is shown in a sandboxed webview with * javascript execution disabled. */ public static final int VERSION = 1; private static final String LOG_TAG = LogTag.getLogTag(); /** * The following CSS properties do not appear in the default whitelist from OWASP, but they * improve the fidelity of the HTML display without unacceptable risk. */ private static final CssSchema ADDITIONAL_CSS = CssSchema.withProperties(ImmutableSet.of( "float", "display" )); /** * Translates the body tag into the div tag */ private static final ElementPolicy TRANSLATE_BODY_TO_DIV = new ElementPolicy() { public String apply(String elementName, List attrs) { return "div"; } }; /** * Translates
tags surrounding quoted text into
which allows * quoted text collapsing in ConversationViewFragment. */ private static final ElementPolicy TRANSLATE_DIV_CLASS = new ElementPolicy() { public String apply(String elementName, List attrs) { boolean showHideQuotedText = false; // check if the class attribute is listed final int classIndex = attrs.indexOf("class"); if (classIndex >= 0) { // remove the class attribute and its value final String value = attrs.remove(classIndex + 1); attrs.remove(classIndex); // gmail and yahoo use a specific div class name to indicate quoted text showHideQuotedText = "gmail_quote".equals(value) || "yahoo_quoted".equals(value); } // check if the id attribute is listed final int idIndex = attrs.indexOf("id"); if (idIndex >= 0) { // remove the id attribute and its value final String value = attrs.remove(idIndex + 1); attrs.remove(idIndex); // AOL uses a specific id value to indicate quoted text showHideQuotedText = value.startsWith("AOLMsgPart"); } // insert a class attribute with a value of "elided-text" to hide/show quoted text if (showHideQuotedText) { attrs.add("class"); attrs.add("elided-text"); } return "div"; } }; /** * Disallow "cid:" and "mailto:" urls on all tags not <a> or <img>. */ private static final AttributePolicy URL_PROTOCOLS = new FilterUrlByProtocolAttributePolicy(ImmutableList.of("http", "https")); /** * Disallow the "cid:" url on links. Do allow "mailto:" urls to support sending mail. */ private static final AttributePolicy A_HREF_PROTOCOLS = new FilterUrlByProtocolAttributePolicy(ImmutableList.of("mailto", "http", "https")); /** * Disallow the "mailto:" url on images so that "Show pictures" can't be used to start composing * a bajillion emails. Do allow "cid:" urls to support inline image attachments. */ private static final AttributePolicy IMG_SRC_PROTOCOLS = new FilterUrlByProtocolAttributePolicy(ImmutableList.of("cid", "http", "https")); /** * This sanitizer policy removes these elements and the content within: *
    *
  • APPLET
  • *
  • FRAMESET
  • *
  • OBJECT
  • *
  • SCRIPT
  • *
  • STYLE
  • *
  • TITLE
  • *
* * This sanitizer policy removes these elements but preserves the content within: *
    *
  • BASEFONT
  • *
  • FRAME
  • *
  • HEAD
  • *
  • IFRAME
  • *
  • ISINDEX
  • *
  • LINK
  • *
  • META
  • *
  • NOFRAMES
  • *
  • PARAM
  • *
  • NOSCRIPT
  • *
* * This sanitizer policy removes these attributes from all elements: *
    *
  • code
  • *
  • codebase
  • *
  • id
  • *
  • for
  • *
  • headers
  • *
  • onblur
  • *
  • onchange
  • *
  • onclick
  • *
  • ondblclick
  • *
  • onfocus
  • *
  • onkeydown
  • *
  • onkeypress
  • *
  • onkeyup
  • *
  • onload
  • *
  • onmousedown
  • *
  • onmousemove
  • *
  • onmouseout
  • *
  • onmouseover
  • *
  • onmouseup
  • *
  • onreset
  • *
  • onselect
  • *
  • onsubmit
  • *
  • onunload
  • *
  • tabindex
  • *
*/ private static final PolicyFactory POLICY_DEFINITION = new HtmlPolicyBuilder() .allowAttributes("dir").matching(true, "ltr", "rtl").globally() .allowUrlProtocols("cid", "http", "https", "mailto") .allowStyling(CssSchema.union(CssSchema.DEFAULT, ADDITIONAL_CSS)) .disallowTextIn("applet", "frameset", "object", "script", "style", "title") .allowElements("a") .allowAttributes("coords", "name", "shape").onElements("a") .allowAttributes("href").matching(A_HREF_PROTOCOLS).onElements("a") .allowElements("abbr").allowAttributes("title").onElements("abbr") .allowElements("acronym").allowAttributes("title").onElements("acronym") .allowElements("address") .allowElements("area") .allowAttributes("alt", "coords", "nohref", "name", "shape").onElements("area") .allowAttributes("href").matching(URL_PROTOCOLS).onElements("area") .allowElements("article") .allowElements("aside") .allowElements("b") .allowElements("base") .allowAttributes("href").matching(URL_PROTOCOLS).onElements("base") .allowElements("bdi").allowAttributes("dir").onElements("bdi") .allowElements("bdo").allowAttributes("dir").onElements("bdo") .allowElements("big") .allowElements("blockquote").allowAttributes("cite").onElements("blockquote") .allowElements(TRANSLATE_BODY_TO_DIV, "body") .allowElements("br").allowAttributes("clear").onElements("br") .allowElements("button") .allowAttributes("autofocus", "disabled", "form", "formaction", "formenctype", "formmethod", "formnovalidate", "formtarget", "name", "type", "value") .onElements("button") .allowElements("canvas").allowAttributes("width", "height").onElements("canvas") .allowElements("caption").allowAttributes("align").onElements("caption") .allowElements("center") .allowElements("cite") .allowElements("code") .allowElements("col") .allowAttributes("align", "bgcolor", "char", "charoff", "span", "valign", "width") .onElements("col") .allowElements("colgroup") .allowAttributes("align", "char", "charoff", "span", "valign", "width") .onElements("colgroup") .allowElements("datalist") .allowElements("dd") .allowElements("del").allowAttributes("cite", "datetime").onElements("del") .allowElements("details") .allowElements("dfn") .allowElements("dir").allowAttributes("compact").onElements("dir") .allowElements(TRANSLATE_DIV_CLASS, "div") .allowAttributes("align", "background", "class", "id") .onElements("div") .allowElements("dl") .allowElements("dt") .allowElements("em") .allowElements("fieldset") .allowAttributes("disabled", "form", "name") .onElements("fieldset") .allowElements("figcaption") .allowElements("figure") .allowElements("font").allowAttributes("color", "face", "size").onElements("font") .allowElements("footer") .allowElements("form") .allowAttributes("accept", "action", "accept-charset", "autocomplete", "enctype", "method", "name", "novalidate", "target") .onElements("form") .allowElements("header") .allowElements("h1").allowAttributes("align").onElements("h1") .allowElements("h2").allowAttributes("align").onElements("h2") .allowElements("h3").allowAttributes("align").onElements("h3") .allowElements("h4").allowAttributes("align").onElements("h4") .allowElements("h5").allowAttributes("align").onElements("h5") .allowElements("h6").allowAttributes("align").onElements("h6") .allowElements("hr") .allowAttributes("align", "noshade", "size", "width") .onElements("hr") .allowElements("i") .allowElements("img") .allowAttributes("src").matching(IMG_SRC_PROTOCOLS).onElements("img") .allowAttributes("longdesc").matching(URL_PROTOCOLS).onElements("img") .allowAttributes("align", "alt", "border", "crossorigin", "height", "hspace", "ismap", "usemap", "vspace", "width") .onElements("img") .allowElements("input") .allowAttributes("src").matching(URL_PROTOCOLS).onElements("input") .allowAttributes("formaction").matching(URL_PROTOCOLS).onElements("input") .allowAttributes("accept", "align", "alt", "autocomplete", "autofocus", "checked", "disabled", "form", "formenctype", "formmethod", "formnovalidate", "formtarget", "height", "list", "max", "maxlength", "min", "multiple", "name", "pattern", "placeholder", "readonly", "required", "size", "step", "type", "value", "width") .onElements("input") .allowElements("ins") .allowAttributes("cite").matching(URL_PROTOCOLS).onElements("ins") .allowAttributes("datetime").onElements("ins") .allowElements("kbd") .allowElements("keygen") .allowAttributes("autofocus", "challenge", "disabled", "form", "keytype", "name") .onElements("keygen") .allowElements("label").allowAttributes("form").onElements("label") .allowElements("legend").allowAttributes("align").onElements("legend") .allowElements("li").allowAttributes("type", "value").onElements("li") .allowElements("main") .allowElements("map").allowAttributes("name").onElements("map") .allowElements("mark") .allowElements("menu").allowAttributes("label", "type").onElements("menu") .allowElements("menuitem") .allowAttributes("icon").matching(URL_PROTOCOLS).onElements("menuitem") .allowAttributes("checked", "command", "default", "disabled", "label", "type", "radiogroup").onElements("menuitem") .allowElements("meter") .allowAttributes("form", "high", "low", "max", "min", "optimum", "value") .onElements("meter") .allowElements("nav") .allowElements("ol") .allowAttributes("compact", "reversed", "start", "type") .onElements("ol") .allowElements("optgroup").allowAttributes("disabled", "label").onElements("optgroup") .allowElements("option") .allowAttributes("disabled", "label", "selected", "value") .onElements("option") .allowElements("output").allowAttributes("form", "name").onElements("output") .allowElements("p").allowAttributes("align").onElements("p") .allowElements("pre").allowAttributes("width").onElements("pre") .allowElements("progress").allowAttributes("max", "value").onElements("progress") .allowElements("q").allowAttributes("cite").matching(URL_PROTOCOLS).onElements("q") .allowElements("rp") .allowElements("rt") .allowElements("ruby") .allowElements("s") .allowElements("samp") .allowElements("section") .allowElements("select") .allowAttributes("autofocus", "disabled", "form", "multiple", "name", "required", "size") .onElements("select") .allowElements("small") .allowElements("span") .allowElements("strike") .allowElements("strong") .allowElements("sub") .allowElements("summary") .allowElements("sup") .allowElements("table") .allowAttributes("align", "bgcolor", "border", "cellpadding", "cellspacing", "frame", "rules", "sortable", "summary", "width") .onElements("table") .allowElements("tbody") .allowAttributes("align", "char", "charoff", "valign").onElements("tbody") .allowElements("td") .allowAttributes("abbr", "align", "axis", "bgcolor", "char", "charoff", "colspan", "height", "nowrap", "rowspan", "scope", "valign", "width") .onElements("td") .allowElements("textarea") .allowAttributes("autofocus", "cols", "disabled", "form", "maxlength", "name", "placeholder", "readonly", "required", "rows", "wrap") .onElements("textarea") .allowElements("tfoot") .allowAttributes("align", "char", "charoff", "valign").onElements("tfoot") .allowElements("th") .allowAttributes("abbr", "align", "axis", "bgcolor", "char", "charoff", "colspan", "height", "nowrap", "rowspan", "scope", "sorted", "valign", "width") .onElements("th") .allowElements("thead") .allowAttributes("align", "char", "charoff", "valign").onElements("thead") .allowElements("time").allowAttributes("datetime").onElements("time") .allowElements("tr") .allowAttributes("align", "bgcolor", "char", "charoff", "valign").onElements("tr") .allowElements("tt") .allowElements("u") .allowElements("ul").allowAttributes("compact", "type").onElements("ul") .allowElements("var") .allowElements("wbr") .toFactory(); private HtmlSanitizer() {} /** * Sanitizing email is treated as an expensive operation; this method should be called from * a background Thread. * * @param rawHtml the unsanitized, suspicious html * @return the sanitized form of the rawHtml; null if * rawHtml was null */ public static String sanitizeHtml(final String rawHtml) { if (Looper.getMainLooper() == Looper.myLooper()) { throw new IllegalStateException("sanitizing email should not occur on the main thread"); } if (rawHtml == null) { return null; } // create the builder into which the sanitized email will be written final StringBuilder htmlBuilder = new StringBuilder(rawHtml.length()); // create the renderer that will write the sanitized HTML to the builder final HtmlStreamRenderer renderer = HtmlStreamRenderer.create( htmlBuilder, Handler.PROPAGATE, // log errors resulting from exceptionally bizarre inputs new Handler() { public void handle(final String x) { Log.wtf(LOG_TAG, "Mangled HTML content cannot be parsed: " + x); throw new AssertionError(x); } } ); // create a thread-specific policy final org.owasp.html.HtmlSanitizer.Policy policy = POLICY_DEFINITION.apply(renderer); // run the html through the sanitizer Timer.startTiming("sanitizingHTMLEmail"); try { org.owasp.html.HtmlSanitizer.sanitize(rawHtml, policy); } finally { Timer.stopTiming("sanitizingHTMLEmail"); } // return the resulting HTML from the builder return htmlBuilder.toString(); } }