1 // Copyright (c) 2011, Mike Samuel 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions 6 // are met: 7 // 8 // Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // Neither the name of the OWASP nor the names of its contributors may 14 // be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 29 package org.owasp.html; 30 31 import java.util.List; 32 import java.util.Map; 33 import java.util.Set; 34 import java.util.regex.Pattern; 35 36 import javax.annotation.Nullable; 37 import javax.annotation.concurrent.NotThreadSafe; 38 39 import com.google.common.base.Predicate; 40 import com.google.common.collect.ImmutableList; 41 import com.google.common.collect.ImmutableMap; 42 import com.google.common.collect.ImmutableSet; 43 import com.google.common.collect.Maps; 44 import com.google.common.collect.Sets; 45 46 47 /** 48 * Conveniences for configuring policies for the {@link HtmlSanitizer}. 49 * 50 * <h3>Usage</h3> 51 * <p> 52 * To create a policy, first construct an instance of this class; then call 53 * <code>allow…</code> methods to turn on tags, attributes, and other 54 * processing modes; and finally call <code>build(renderer)</code> or 55 * <code>toFactory()</code>. 56 * </p> 57 * <pre class="prettyprint lang-java"> 58 * // Define the policy. 59 * Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy> policy 60 * = new HtmlPolicyBuilder() 61 * .allowElements("a", "p") 62 * .allowAttributes("href").onElements("a") 63 * .toFactory(); 64 * 65 * // Sanitize your output. 66 * HtmlSanitizer.sanitize(myHtml, policy.apply(myHtmlStreamRenderer)); 67 * </pre> 68 * 69 * <h3>Embedded Content</h3> 70 * <p> 71 * Embedded URLs are filtered by 72 * {@link HtmlPolicyBuilder#allowUrlProtocols protocol}. 73 * There is a {@link HtmlPolicyBuilder#allowStandardUrlProtocols canned policy} 74 * so you can easily white-list widely used policies that don't violate the 75 * current pages origin. See "Customization" below for ways to do further 76 * filtering. If you allow links it might be worthwhile to 77 * {@link HtmlPolicyBuilder#requireRelNofollowOnLinks() require} 78 * {@code rel=nofollow}. 79 * </p> 80 * <p> 81 * This class simply throws out all embedded JS. 82 * Use a custom element or attribute policy to allow through 83 * signed or otherwise known-safe code. 84 * Check out the Caja project if you need a way to contain third-party JS. 85 * </p> 86 * <p> 87 * This class does not attempt to faithfully parse and sanitize CSS. 88 * It does provide {@link HtmlPolicyBuilder#allowStyling() one} styling option 89 * that allows through a few CSS properties that allow textual styling, but that 90 * disallow image loading, history stealing, layout breaking, code execution, 91 * etc. 92 * </p> 93 * 94 * <h3>Customization</h3> 95 * <p> 96 * You can easily do custom processing on tags and attributes by supplying your 97 * own {@link ElementPolicy element policy} or 98 * {@link AttributePolicy attribute policy} when calling 99 * <code>allow…</code>. 100 * E.g. to convert headers into {@code <div>}s, you could use an element policy 101 * </p> 102 * <pre class="prettyprint lang-java"> 103 * new HtmlPolicyBuilder() 104 * .allowElement( 105 * new ElementPolicy() { 106 * public String apply(String elementName, List<String> attributes) { 107 * attributes.add("class"); 108 * attributes.add("header-" + elementName); 109 * return "div"; 110 * } 111 * }, 112 * "h1", "h2", "h3", "h4", "h5", "h6") 113 * .build(outputChannel) 114 * </pre> 115 * 116 * <h3>Rules of Thumb</h3> 117 * <p> 118 * Throughout this class, several rules hold: 119 * <ul> 120 * <li>Everything is denied by default. There are 121 * <code>disallow…</code> methods, but those reverse 122 * allows instead of rolling back overly permissive defaults. 123 * <li>The order of allows and disallows does not matter. 124 * Disallows trump allows whether they occur before or after them. 125 * The only method that needs to be called in a particular place is 126 * {@link HtmlPolicyBuilder#build}. 127 * Allows or disallows after {@code build} is called have no 128 * effect on the already built policy. 129 * <li>Element and attribute policies are applied in the following order: 130 * element specific attribute policy, global attribute policy, element 131 * policy. 132 * Element policies come last so they can observe all the post-processed 133 * attributes, and so they can add attributes that are exempt from 134 * attribute policies. 135 * Element specific policies go first, so they can normalize content to 136 * a form that might be acceptable to a more simplistic global policy. 137 * </ul> 138 * 139 * <h3>Thread safety and efficiency</h3> 140 * <p> 141 * This class is not thread-safe. The resulting policy will not violate its 142 * security guarantees as a result of race conditions, but is not thread safe 143 * because it maintains state to track whether text inside disallowed elements 144 * should be suppressed. 145 * <p> 146 * The resulting policy can be reused, but if you use the 147 * {@link HtmlPolicyBuilder#toFactory()} method instead of {@link #build}, then 148 * binding policies to output channels is cheap so there's no need. 149 * </p> 150 * 151 * @author Mike Samuel <mikesamuel@gmail.com> 152 */ 153 @TCB 154 @NotThreadSafe 155 public class HtmlPolicyBuilder { 156 /** 157 * The default set of elements that are removed if they have no attributes. 158 * Since {@code <img>} is in this set, by default, a policy will remove 159 * {@code <img src=javascript:alert(1337)>} because its URL is not allowed 160 * and it has no other attributes that would warrant it appearing in the 161 * output. 162 */ 163 public static final ImmutableSet<String> DEFAULT_SKIP_IF_EMPTY 164 = ImmutableSet.of("a", "font", "img", "input", "span"); 165 166 private final Map<String, ElementPolicy> elPolicies = Maps.newLinkedHashMap(); 167 private final Map<String, Map<String, AttributePolicy>> attrPolicies 168 = Maps.newLinkedHashMap(); 169 private final Map<String, AttributePolicy> globalAttrPolicies 170 = Maps.newLinkedHashMap(); 171 private final Set<String> allowedProtocols = Sets.newLinkedHashSet(); 172 private final Set<String> skipIfEmpty = Sets.newLinkedHashSet( 173 DEFAULT_SKIP_IF_EMPTY); 174 private final Map<String, Boolean> textContainers = Maps.newLinkedHashMap(); 175 private boolean requireRelNofollowOnLinks; 176 177 /** 178 * Allows the named elements. 179 */ allowElements(String... elementNames)180 public HtmlPolicyBuilder allowElements(String... elementNames) { 181 return allowElements(ElementPolicy.IDENTITY_ELEMENT_POLICY, elementNames); 182 } 183 184 /** 185 * Disallows the named elements. Elements are disallowed by default, so 186 * there is no need to disallow elements, unless you are making an exception 187 * based on an earlier allow. 188 */ disallowElements(String... elementNames)189 public HtmlPolicyBuilder disallowElements(String... elementNames) { 190 return allowElements(ElementPolicy.REJECT_ALL_ELEMENT_POLICY, elementNames); 191 } 192 193 /** 194 * Allow the given elements with the given policy. 195 * 196 * @param policy May remove or add attributes, change the element name, or 197 * deny the element. 198 */ allowElements( ElementPolicy policy, String... elementNames)199 public HtmlPolicyBuilder allowElements( 200 ElementPolicy policy, String... elementNames) { 201 invalidateCompiledState(); 202 for (String elementName : elementNames) { 203 elementName = HtmlLexer.canonicalName(elementName); 204 ElementPolicy newPolicy = ElementPolicy.Util.join( 205 elPolicies.get(elementName), policy); 206 // Don't remove if newPolicy is the always reject policy since we want 207 // that to infect later allowElement calls for this particular element 208 // name. rejects should have higher priority than allows. 209 elPolicies.put(elementName, newPolicy); 210 if (!textContainers.containsKey(elementName) 211 && TagBalancingHtmlStreamEventReceiver 212 .allowsPlainTextualContent(elementName)) { 213 textContainers.put(elementName, true); 214 } 215 } 216 return this; 217 } 218 219 /** 220 * A canned policy that allows a number of common formatting elements. 221 */ allowCommonInlineFormattingElements()222 public HtmlPolicyBuilder allowCommonInlineFormattingElements() { 223 return allowElements( 224 "b", "i", "font", "s", "u", "o", "sup", "sub", "ins", "del", "strong", 225 "strike", "tt", "code", "big", "small", "br", "span"); 226 } 227 228 /** 229 * A canned policy that allows a number of common block elements. 230 */ allowCommonBlockElements()231 public HtmlPolicyBuilder allowCommonBlockElements() { 232 return allowElements( 233 "p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "ul", "ol", "li", 234 "blockquote"); 235 } 236 237 /** 238 * Allows text content in the named elements. 239 * By default, text content is allowed in any 240 * {@link #allowElements allowed elements} that can contain character data per 241 * the HTML5 spec, but text content is not allowed by default in elements that 242 * contain content of other kinds (like JavaScript in {@code <script>} 243 * elements. 244 * <p> 245 * To write a policy that whitelists {@code <script>} or {@code <style>} 246 * elements, first {@code allowTextIn("script")}. 247 */ allowTextIn(String... elementNames)248 public HtmlPolicyBuilder allowTextIn(String... elementNames) { 249 invalidateCompiledState(); 250 for (String elementName : elementNames) { 251 elementName = HtmlLexer.canonicalName(elementName); 252 textContainers.put(elementName, true); 253 } 254 return this; 255 } 256 disallowTextIn(String... elementNames)257 public HtmlPolicyBuilder disallowTextIn(String... elementNames) { 258 invalidateCompiledState(); 259 for (String elementName : elementNames) { 260 elementName = HtmlLexer.canonicalName(elementName); 261 textContainers.put(elementName, false); 262 } 263 return this; 264 } 265 266 /** 267 * Assuming the given elements are allowed, allows them to appear without 268 * attributes. 269 * 270 * @see #DEFAULT_SKIP_IF_EMPTY 271 * @see #disallowWithoutAttributes 272 */ allowWithoutAttributes(String... elementNames)273 public HtmlPolicyBuilder allowWithoutAttributes(String... elementNames) { 274 invalidateCompiledState(); 275 for (String elementName : elementNames) { 276 elementName = HtmlLexer.canonicalName(elementName); 277 skipIfEmpty.remove(elementName); 278 } 279 return this; 280 } 281 282 /** 283 * Disallows the given elements from appearing without attributes. 284 * 285 * @see #DEFAULT_SKIP_IF_EMPTY 286 * @see #allowWithoutAttributes 287 */ disallowWithoutAttributes(String... elementNames)288 public HtmlPolicyBuilder disallowWithoutAttributes(String... elementNames) { 289 invalidateCompiledState(); 290 for (String elementName : elementNames) { 291 elementName = HtmlLexer.canonicalName(elementName); 292 skipIfEmpty.add(elementName); 293 } 294 return this; 295 } 296 297 /** 298 * Returns an object that lets you associate policies with the given 299 * attributes, and allow them globally or on specific elements. 300 */ allowAttributes(String... attributeNames)301 public AttributeBuilder allowAttributes(String... attributeNames) { 302 ImmutableList.Builder<String> b = ImmutableList.builder(); 303 for (String attributeName : attributeNames) { 304 b.add(HtmlLexer.canonicalName(attributeName)); 305 } 306 return new AttributeBuilder(b.build()); 307 } 308 309 /** 310 * Reverse an earlier attribute {@link #allowAttributes allow}. 311 * <p> 312 * For this to have an effect you must call at least one of 313 * {@link AttributeBuilder#globally} and {@link AttributeBuilder#onElements}. 314 * <p> 315 * Attributes are disallowed by default, so there is no need to call this 316 * with a laundry list of attribute/element pairs. 317 */ disallowAttributes(String... attributeNames)318 public AttributeBuilder disallowAttributes(String... attributeNames) { 319 return this.allowAttributes(attributeNames) 320 .matching(AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY); 321 } 322 323 allowAttributesGlobally( AttributePolicy policy, List<String> attributeNames)324 private HtmlPolicyBuilder allowAttributesGlobally( 325 AttributePolicy policy, List<String> attributeNames) { 326 invalidateCompiledState(); 327 for (String attributeName : attributeNames) { 328 // We reinterpret the identity policy later via policy joining since its 329 // the default passed from the policy-less method, but we don't do 330 // anything here since we don't know until build() is called whether the 331 // policy author wants to allow certain URL protocols or wants to deal 332 // with styles. 333 AttributePolicy oldPolicy = globalAttrPolicies.get(attributeName); 334 globalAttrPolicies.put( 335 attributeName, AttributePolicy.Util.join(oldPolicy, policy)); 336 } 337 return this; 338 } 339 allowAttributesOnElements( AttributePolicy policy, List<String> attributeNames, List<String> elementNames)340 private HtmlPolicyBuilder allowAttributesOnElements( 341 AttributePolicy policy, List<String> attributeNames, 342 List<String> elementNames) { 343 invalidateCompiledState(); 344 for (String elementName : elementNames) { 345 Map<String, AttributePolicy> policies = attrPolicies.get(elementName); 346 if (policies == null) { 347 policies = Maps.newLinkedHashMap(); 348 attrPolicies.put(elementName, policies); 349 } 350 for (String attributeName : attributeNames) { 351 AttributePolicy oldPolicy = policies.get(attributeName); 352 policies.put( 353 attributeName, 354 AttributePolicy.Util.join(oldPolicy, policy)); 355 } 356 } 357 return this; 358 } 359 360 /** 361 * Adds <a href="http://en.wikipedia.org/wiki/Nofollow"><code>rel=nofollow</code></a> 362 * to links. 363 */ requireRelNofollowOnLinks()364 public HtmlPolicyBuilder requireRelNofollowOnLinks() { 365 invalidateCompiledState(); 366 this.requireRelNofollowOnLinks = true; 367 return this; 368 } 369 370 /** 371 * Adds to the set of protocols that are allowed in URL attributes. 372 * For each URL attribute that is allowed, we further constrain it by 373 * only allowing the value through if it specifies no protocol, or if it 374 * specifies one in the allowedProtocols white-list. 375 * This is done regardless of whether any protocols have been allowed, so 376 * allowing the attribute "href" globally with the identity policy but 377 * not white-listing any protocols, effectively disallows the "href" 378 * attribute globally. 379 * <p> 380 * Do not allow any <code>*script</code> such as <code>javascript</code> 381 * protocols if you might use this policy with untrusted code. 382 */ allowUrlProtocols(String... protocols)383 public HtmlPolicyBuilder allowUrlProtocols(String... protocols) { 384 invalidateCompiledState(); 385 // If there is at least one allowed protocol, then allow URLs and 386 // add a filter that checks href and src values. 387 388 // Do not allow href and srcs through otherwise, and only allow on images 389 // and links. 390 for (String protocol : protocols) { 391 protocol = Strings.toLowerCase(protocol); 392 allowedProtocols.add(protocol); 393 } 394 return this; 395 } 396 397 /** 398 * Reverses a decision made by {@link #allowUrlProtocols}. 399 */ disallowUrlProtocols(String... protocols)400 public HtmlPolicyBuilder disallowUrlProtocols(String... protocols) { 401 invalidateCompiledState(); 402 for (String protocol : protocols) { 403 protocol = Strings.toLowerCase(protocol); 404 allowedProtocols.remove(protocol); 405 } 406 return this; 407 } 408 409 /** 410 * A canned URL protocol policy that allows <code>http</code>, 411 * <code>https</code>, and <code>mailto</code>. 412 */ allowStandardUrlProtocols()413 public HtmlPolicyBuilder allowStandardUrlProtocols() { 414 return allowUrlProtocols("http", "https", "mailto"); 415 } 416 417 /** 418 * Convert <code>style="<CSS>"</code> to sanitized CSS which allows 419 * color, font-size, type-face, and other styling using the default schema; 420 * but which does not allow content to escape its clipping context. 421 */ allowStyling()422 public HtmlPolicyBuilder allowStyling() { 423 allowStyling(CssSchema.DEFAULT); 424 return this; 425 } 426 427 /** 428 * Convert <code>style="<CSS>"</code> to sanitized CSS which allows 429 * color, font-size, type-face, and other styling using the given schema. 430 */ allowStyling(CssSchema whitelist)431 public HtmlPolicyBuilder allowStyling(CssSchema whitelist) { 432 invalidateCompiledState(); 433 allowAttributesGlobally( 434 new StylingPolicy(whitelist), ImmutableList.of("style")); 435 return this; 436 } 437 438 /** 439 * Names of attributes from HTML 4 whose values are URLs. 440 * Other attributes, e.g. <code>style</code> may contain URLs even though 441 * there values are not URLs. 442 */ 443 private static final Set<String> URL_ATTRIBUTE_NAMES = ImmutableSet.of( 444 "action", "archive", "background", "cite", "classid", "codebase", "data", 445 "dsync", "formaction", "href", "icon", "longdesc", "manifest", "poster", 446 "profile", "src", "srcset", "usemap"); 447 448 /** 449 * Produces a policy based on the allow and disallow calls previously made. 450 * 451 * @param out receives calls to open only tags allowed by 452 * previous calls to this object. 453 * Typically a {@link HtmlStreamRenderer}. 454 */ build(HtmlStreamEventReceiver out)455 public HtmlSanitizer.Policy build(HtmlStreamEventReceiver out) { 456 return toFactory().apply(out); 457 } 458 459 /** 460 * Produces a policy based on the allow and disallow calls previously made. 461 * 462 * @param out receives calls to open only tags allowed by 463 * previous calls to this object. 464 * Typically a {@link HtmlStreamRenderer}. 465 * @param listener is notified of dropped tags and attributes so that 466 * intrusion detection systems can be alerted to questionable HTML. 467 * If {@code null} then no notifications are sent. 468 * @param context if {@code (listener != null)} then the context value passed 469 * with alerts. This can be used to let the listener know from which 470 * connection or request the questionable HTML was received. 471 */ build( HtmlStreamEventReceiver out, @Nullable HtmlChangeListener<? super CTX> listener, @Nullable CTX context)472 public <CTX> HtmlSanitizer.Policy build( 473 HtmlStreamEventReceiver out, 474 @Nullable HtmlChangeListener<? super CTX> listener, 475 @Nullable CTX context) { 476 return toFactory().apply(out, listener, context); 477 } 478 479 /** 480 * Like {@link #build} but can be reused to create many different policies 481 * each backed by a different output channel. 482 */ toFactory()483 public PolicyFactory toFactory() { 484 ImmutableSet.Builder<String> textContainers = ImmutableSet.builder(); 485 for (Map.Entry<String, Boolean> textContainer 486 : this.textContainers.entrySet()) { 487 if (Boolean.TRUE.equals(textContainer.getValue())) { 488 textContainers.add(textContainer.getKey()); 489 } 490 } 491 return new PolicyFactory(compilePolicies(), textContainers.build(), 492 ImmutableMap.copyOf(globalAttrPolicies)); 493 } 494 495 // Speed up subsequent builds by caching the compiled policies. 496 private transient ImmutableMap<String, ElementAndAttributePolicies> 497 compiledPolicies; 498 499 /** Called by mutators to signal that any compiled policy is out-of-date. */ invalidateCompiledState()500 private void invalidateCompiledState() { 501 compiledPolicies = null; 502 } 503 compilePolicies()504 private ImmutableMap<String, ElementAndAttributePolicies> compilePolicies() { 505 if (compiledPolicies != null) { return compiledPolicies; } 506 507 // Copy maps before normalizing in case builder is reused. 508 Map<String, ElementPolicy> elPolicies 509 = Maps.newLinkedHashMap(this.elPolicies); 510 Map<String, Map<String, AttributePolicy>> attrPolicies 511 = Maps.newLinkedHashMap(this.attrPolicies); 512 for (Map.Entry<String, Map<String, AttributePolicy>> e : 513 attrPolicies.entrySet()) { 514 e.setValue(Maps.newLinkedHashMap(e.getValue())); 515 } 516 Map<String, AttributePolicy> globalAttrPolicies 517 = Maps.newLinkedHashMap(this.globalAttrPolicies); 518 Set<String> allowedProtocols = ImmutableSet.copyOf(this.allowedProtocols); 519 520 // Implement requireRelNofollowOnLinks 521 if (requireRelNofollowOnLinks) { 522 ElementPolicy linkPolicy = elPolicies.get("a"); 523 if (linkPolicy == null) { 524 linkPolicy = ElementPolicy.REJECT_ALL_ELEMENT_POLICY; 525 } 526 elPolicies.put( 527 "a", 528 ElementPolicy.Util.join( 529 linkPolicy, 530 new ElementPolicy() { 531 public String apply(String elementName, List<String> attrs) { 532 for (int i = 0, n = attrs.size(); i < n; i += 2) { 533 if ("href".equals(attrs.get(i))) { 534 attrs.add("rel"); 535 attrs.add("nofollow"); 536 break; 537 } 538 } 539 return elementName; 540 } 541 })); 542 } 543 544 // Implement protocol policies. 545 // For each URL attribute that is allowed, we further constrain it by 546 // only allowing the value through if it specifies no protocol, or if it 547 // specifies one in the allowedProtocols white-list. 548 // This is done regardless of whether any protocols have been allowed, so 549 // allowing the attribute "href" globally with the identity policy but 550 // not white-listing any protocols, effectively disallows the "href" 551 // attribute globally. 552 { 553 AttributePolicy urlAttributePolicy; 554 if (allowedProtocols.size() == 3 555 && allowedProtocols.contains("mailto") 556 && allowedProtocols.contains("http") 557 && allowedProtocols.contains("https")) { 558 urlAttributePolicy = StandardUrlAttributePolicy.INSTANCE; 559 } else { 560 urlAttributePolicy = new FilterUrlByProtocolAttributePolicy( 561 allowedProtocols); 562 } 563 Set<String> toGuard = Sets.newLinkedHashSet(URL_ATTRIBUTE_NAMES); 564 for (String urlAttributeName : URL_ATTRIBUTE_NAMES) { 565 if (globalAttrPolicies.containsKey(urlAttributeName)) { 566 toGuard.remove(urlAttributeName); 567 globalAttrPolicies.put(urlAttributeName, AttributePolicy.Util.join( 568 urlAttributePolicy, globalAttrPolicies.get(urlAttributeName))); 569 } 570 } 571 // Implement guards not implemented on global policies in the per-element 572 // policy maps. 573 for (Map.Entry<String, Map<String, AttributePolicy>> e 574 : attrPolicies.entrySet()) { 575 Map<String, AttributePolicy> policies = e.getValue(); 576 for (String urlAttributeName : toGuard) { 577 if (policies.containsKey(urlAttributeName)) { 578 policies.put(urlAttributeName, AttributePolicy.Util.join( 579 urlAttributePolicy, policies.get(urlAttributeName))); 580 } 581 } 582 } 583 } 584 585 ImmutableMap.Builder<String, ElementAndAttributePolicies> policiesBuilder 586 = ImmutableMap.builder(); 587 for (Map.Entry<String, ElementPolicy> e : elPolicies.entrySet()) { 588 String elementName = e.getKey(); 589 ElementPolicy elPolicy = e.getValue(); 590 if (ElementPolicy.REJECT_ALL_ELEMENT_POLICY.equals(elPolicy)) { 591 continue; 592 } 593 594 Map<String, AttributePolicy> elAttrPolicies 595 = attrPolicies.get(elementName); 596 if (elAttrPolicies == null) { elAttrPolicies = ImmutableMap.of(); } 597 ImmutableMap.Builder<String, AttributePolicy> attrs 598 = ImmutableMap.builder(); 599 for (Map.Entry<String, AttributePolicy> ape : elAttrPolicies.entrySet()) { 600 String attributeName = ape.getKey(); 601 // Handle below so we don't end up putting the same key into the map 602 // twice. ImmutableMap.Builder hates that. 603 if (globalAttrPolicies.containsKey(attributeName)) { continue; } 604 AttributePolicy policy = ape.getValue(); 605 if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) { 606 attrs.put(attributeName, policy); 607 } 608 } 609 for (Map.Entry<String, AttributePolicy> ape 610 : globalAttrPolicies.entrySet()) { 611 String attributeName = ape.getKey(); 612 AttributePolicy policy = AttributePolicy.Util.join( 613 elAttrPolicies.get(attributeName), ape.getValue()); 614 if (!AttributePolicy.REJECT_ALL_ATTRIBUTE_POLICY.equals(policy)) { 615 attrs.put(attributeName, policy); 616 } 617 } 618 619 policiesBuilder.put( 620 elementName, 621 new ElementAndAttributePolicies( 622 elementName, 623 elPolicy, attrs.build(), skipIfEmpty.contains(elementName))); 624 } 625 return compiledPolicies = policiesBuilder.build(); 626 } 627 628 /** 629 * Builds the relationship between attributes, the values that they may have, 630 * and the elements on which they may appear. 631 * 632 * @author Mike Samuel 633 */ 634 public final class AttributeBuilder { 635 private final List<String> attributeNames; 636 private AttributePolicy policy = AttributePolicy.IDENTITY_ATTRIBUTE_POLICY; 637 AttributeBuilder(List<? extends String> attributeNames)638 AttributeBuilder(List<? extends String> attributeNames) { 639 this.attributeNames = ImmutableList.copyOf(attributeNames); 640 } 641 642 /** 643 * Filters and/or transforms the attribute values 644 * allowed by later {@code allow*} calls. 645 * Multiple calls to {@code matching} are combined so that the policies 646 * receive the value in order, each seeing the value after any 647 * transformation by a previous policy. 648 */ matching(AttributePolicy policy)649 public AttributeBuilder matching(AttributePolicy policy) { 650 this.policy = AttributePolicy.Util.join(this.policy, policy); 651 return this; 652 } 653 654 /** 655 * Restrict the values allowed by later {@code allow*} calls to those 656 * matching the pattern. 657 * Multiple calls to {@code matching} are combined to restrict to the 658 * intersection of possible matched values. 659 */ matching(final Pattern pattern)660 public AttributeBuilder matching(final Pattern pattern) { 661 return matching(new AttributePolicy() { 662 public @Nullable String apply( 663 String elementName, String attributeName, String value) { 664 return pattern.matcher(value).matches() ? value : null; 665 } 666 }); 667 } 668 669 /** 670 * Restrict the values allowed by later {@code allow*} calls to those 671 * matching the given predicate. 672 * Multiple calls to {@code matching} are combined to restrict to the 673 * intersection of possible matched values. 674 */ matching( final Predicate<? super String> filter)675 public AttributeBuilder matching( 676 final Predicate<? super String> filter) { 677 return matching(new AttributePolicy() { 678 public @Nullable String apply( 679 String elementName, String attributeName, String value) { 680 return filter.apply(value) ? value : null; 681 } 682 }); 683 } 684 685 /** 686 * Restrict the values allowed by later {@code allow*} calls to those 687 * supplied. 688 * Multiple calls to {@code matching} are combined to restrict to the 689 * intersection of possible matched values. 690 */ 691 public AttributeBuilder matching( 692 boolean ignoreCase, String... allowedValues) { 693 return matching(ignoreCase, ImmutableSet.copyOf(allowedValues)); 694 } 695 696 /** 697 * Restrict the values allowed by later {@code allow*} calls to those 698 * supplied. 699 * Multiple calls to {@code matching} are combined to restrict to the 700 * intersection of possible matched values. 701 */ 702 public AttributeBuilder matching( 703 final boolean ignoreCase, Set<? extends String> allowedValues) { 704 final ImmutableSet<String> allowed = ImmutableSet.copyOf(allowedValues); 705 return matching(new AttributePolicy() { 706 public @Nullable String apply( 707 String elementName, String attributeName, String value) { 708 if (ignoreCase) { value = Strings.toLowerCase(value); } 709 return allowed.contains(value) ? value : null; 710 } 711 }); 712 } 713 714 /** 715 * Allows the given attributes on any elements but filters the 716 * attributes' values based on previous calls to {@code matching(...)}. 717 * Global attribute policies are applied after element specific policies. 718 * Be careful of using this with attributes like <code>type</code> which 719 * have different meanings on different attributes. 720 * Also be careful of allowing globally attributes like <code>href</code> 721 * which can have more far-reaching effects on tags like 722 * <code><base></code> and <code><link></code> than on 723 * <code><a></code> because in the former, they have an effect without 724 * user interaction and can change the behavior of the current page. 725 */ 726 public HtmlPolicyBuilder globally() { 727 return HtmlPolicyBuilder.this.allowAttributesGlobally( 728 policy, attributeNames); 729 } 730 731 /** 732 * Allows the named attributes on the given elements but filters the 733 * attributes' values based on previous calls to {@code matching(...)}. 734 */ 735 public HtmlPolicyBuilder onElements(String... elementNames) { 736 ImmutableList.Builder<String> b = ImmutableList.builder(); 737 for (String elementName : elementNames) { 738 b.add(HtmlLexer.canonicalName(elementName)); 739 } 740 return HtmlPolicyBuilder.this.allowAttributesOnElements( 741 policy, attributeNames, b.build()); 742 } 743 } 744 } 745