001    // Copyright (c) 2011, Mike Samuel
002    // All rights reserved.
003    //
004    // Redistribution and use in source and binary forms, with or without
005    // modification, are permitted provided that the following conditions
006    // are met:
007    //
008    // Redistributions of source code must retain the above copyright
009    // notice, this list of conditions and the following disclaimer.
010    // Redistributions in binary form must reproduce the above copyright
011    // notice, this list of conditions and the following disclaimer in the
012    // documentation and/or other materials provided with the distribution.
013    // Neither the name of the OWASP nor the names of its contributors may
014    // be used to endorse or promote products derived from this software
015    // without specific prior written permission.
016    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
017    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
018    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
019    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
020    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
021    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
022    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
024    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
025    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
026    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
027    // POSSIBILITY OF SUCH DAMAGE.
028    
029    package org.owasp.html;
030    
031    /**
032     * Pre-packaged HTML sanitizer policies.
033     *
034     * <p>
035     * These policies can be used to sanitize content.
036     * </p>
037     * <pre>
038     *   Sanitizers.FORMATTING.sanitize({@code "<b>Hello, World!</b>"})
039     * </pre>
040     * and can be chained
041     * <pre>
042     *   PolicyFactory sanitizer = Sanitizers.FORMATTING.and(Sanitizers.BLOCKS);
043     *   System.out.println(sanitizer.sanitize({@code "<p>Hello, <b>World!</b>"}));
044     * </pre>
045     *
046     * <p>
047     * For more fine-grained control over sanitization, use
048     * {@link HtmlPolicyBuilder}.
049     * </p>
050     *
051     * @author Mike Samuel <mikesamuel@gmail.com>
052     */
053    public final class Sanitizers {
054    
055      /**
056       * Allows common formatting elements including {@code <b>}, {@code <i>}, etc.
057       */
058      public static final PolicyFactory FORMATTING = new HtmlPolicyBuilder()
059          .allowCommonInlineFormattingElements().toFactory();
060    
061      /**
062       * Allows common block elements including <code>&lt;p&gt;</code>,
063       * <code>&lt;h1&gt;</code>, etc.
064       */
065      public static final PolicyFactory BLOCKS = new HtmlPolicyBuilder()
066          .allowCommonBlockElements().toFactory();
067    
068      /**
069       * Allows certain safe CSS properties in {@code style="..."} attributes.
070       */
071      public static final PolicyFactory STYLES = new HtmlPolicyBuilder()
072          .allowStyling().toFactory();
073    
074      /**
075       * Allows HTTP, HTTPS, MAILTO, and relative links.
076       */
077      public static final PolicyFactory LINKS = new HtmlPolicyBuilder()
078          .allowStandardUrlProtocols().allowElements("a")
079          .allowAttributes("href").onElements("a").requireRelNofollowOnLinks()
080          .toFactory();
081    
082      private static final AttributePolicy INTEGER = new AttributePolicy() {
083        public String apply(
084            String elementName, String attributeName, String value) {
085          int n = value.length();
086          if (n == 0) { return null; }
087          for (int i = 0; i < n; ++i) {
088            char ch = value.charAt(i);
089            if (ch == '.') {
090              if (i == 0) { return null; }
091              return value.substring(0, i);  // truncate to integer.
092            } else if (!('0' <= ch && ch <= '9')) {
093              return null;
094            }
095          }
096          return value;
097        }
098      };
099    
100      /**
101       * Allows {@code <img>} elements from HTTP, HTTPS, and relative sources.
102       */
103      public static final PolicyFactory IMAGES = new HtmlPolicyBuilder()
104          .allowUrlProtocols("http", "https").allowElements("img")
105          .allowAttributes("alt", "src").onElements("img")
106          .allowAttributes("border", "height", "width").matching(INTEGER)
107              .onElements("img")
108          .toFactory();
109    
110      private Sanitizers() {
111        // Uninstantiable.
112      }
113    }