1// Copyright 2011 The Go Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style 3// license that can be found in the LICENSE file. 4 5package template 6 7import ( 8 "bytes" 9 "fmt" 10 "strings" 11 "unicode/utf8" 12) 13 14// htmlNospaceEscaper escapes for inclusion in unquoted attribute values. 15func htmlNospaceEscaper(args ...any) string { 16 s, t := stringify(args...) 17 if s == "" { 18 return filterFailsafe 19 } 20 if t == contentTypeHTML { 21 return htmlReplacer(stripTags(s), htmlNospaceNormReplacementTable, false) 22 } 23 return htmlReplacer(s, htmlNospaceReplacementTable, false) 24} 25 26// attrEscaper escapes for inclusion in quoted attribute values. 27func attrEscaper(args ...any) string { 28 s, t := stringify(args...) 29 if t == contentTypeHTML { 30 return htmlReplacer(stripTags(s), htmlNormReplacementTable, true) 31 } 32 return htmlReplacer(s, htmlReplacementTable, true) 33} 34 35// rcdataEscaper escapes for inclusion in an RCDATA element body. 36func rcdataEscaper(args ...any) string { 37 s, t := stringify(args...) 38 if t == contentTypeHTML { 39 return htmlReplacer(s, htmlNormReplacementTable, true) 40 } 41 return htmlReplacer(s, htmlReplacementTable, true) 42} 43 44// htmlEscaper escapes for inclusion in HTML text. 45func htmlEscaper(args ...any) string { 46 s, t := stringify(args...) 47 if t == contentTypeHTML { 48 return s 49 } 50 return htmlReplacer(s, htmlReplacementTable, true) 51} 52 53// htmlReplacementTable contains the runes that need to be escaped 54// inside a quoted attribute value or in a text node. 55var htmlReplacementTable = []string{ 56 // https://www.w3.org/TR/html5/syntax.html#attribute-value-(unquoted)-state 57 // U+0000 NULL Parse error. Append a U+FFFD REPLACEMENT 58 // CHARACTER character to the current attribute's value. 59 // " 60 // and similarly 61 // https://www.w3.org/TR/html5/syntax.html#before-attribute-value-state 62 0: "\uFFFD", 63 '"': """, 64 '&': "&", 65 '\'': "'", 66 '+': "+", 67 '<': "<", 68 '>': ">", 69} 70 71// htmlNormReplacementTable is like htmlReplacementTable but without '&' to 72// avoid over-encoding existing entities. 73var htmlNormReplacementTable = []string{ 74 0: "\uFFFD", 75 '"': """, 76 '\'': "'", 77 '+': "+", 78 '<': "<", 79 '>': ">", 80} 81 82// htmlNospaceReplacementTable contains the runes that need to be escaped 83// inside an unquoted attribute value. 84// The set of runes escaped is the union of the HTML specials and 85// those determined by running the JS below in browsers: 86// <div id=d></div> 87// <script>(function () { 88// var a = [], d = document.getElementById("d"), i, c, s; 89// for (i = 0; i < 0x10000; ++i) { 90// 91// c = String.fromCharCode(i); 92// d.innerHTML = "<span title=" + c + "lt" + c + "></span>" 93// s = d.getElementsByTagName("SPAN")[0]; 94// if (!s || s.title !== c + "lt" + c) { a.push(i.toString(16)); } 95// 96// } 97// document.write(a.join(", ")); 98// })()</script> 99var htmlNospaceReplacementTable = []string{ 100 0: "�", 101 '\t': "	", 102 '\n': "
", 103 '\v': "", 104 '\f': "", 105 '\r': "
", 106 ' ': " ", 107 '"': """, 108 '&': "&", 109 '\'': "'", 110 '+': "+", 111 '<': "<", 112 '=': "=", 113 '>': ">", 114 // A parse error in the attribute value (unquoted) and 115 // before attribute value states. 116 // Treated as a quoting character by IE. 117 '`': "`", 118} 119 120// htmlNospaceNormReplacementTable is like htmlNospaceReplacementTable but 121// without '&' to avoid over-encoding existing entities. 122var htmlNospaceNormReplacementTable = []string{ 123 0: "�", 124 '\t': "	", 125 '\n': "
", 126 '\v': "", 127 '\f': "", 128 '\r': "
", 129 ' ': " ", 130 '"': """, 131 '\'': "'", 132 '+': "+", 133 '<': "<", 134 '=': "=", 135 '>': ">", 136 // A parse error in the attribute value (unquoted) and 137 // before attribute value states. 138 // Treated as a quoting character by IE. 139 '`': "`", 140} 141 142// htmlReplacer returns s with runes replaced according to replacementTable 143// and when badRunes is true, certain bad runes are allowed through unescaped. 144func htmlReplacer(s string, replacementTable []string, badRunes bool) string { 145 written, b := 0, new(strings.Builder) 146 r, w := rune(0), 0 147 for i := 0; i < len(s); i += w { 148 // Cannot use 'for range s' because we need to preserve the width 149 // of the runes in the input. If we see a decoding error, the input 150 // width will not be utf8.Runelen(r) and we will overrun the buffer. 151 r, w = utf8.DecodeRuneInString(s[i:]) 152 if int(r) < len(replacementTable) { 153 if repl := replacementTable[r]; len(repl) != 0 { 154 if written == 0 { 155 b.Grow(len(s)) 156 } 157 b.WriteString(s[written:i]) 158 b.WriteString(repl) 159 written = i + w 160 } 161 } else if badRunes { 162 // No-op. 163 // IE does not allow these ranges in unquoted attrs. 164 } else if 0xfdd0 <= r && r <= 0xfdef || 0xfff0 <= r && r <= 0xffff { 165 if written == 0 { 166 b.Grow(len(s)) 167 } 168 fmt.Fprintf(b, "%s&#x%x;", s[written:i], r) 169 written = i + w 170 } 171 } 172 if written == 0 { 173 return s 174 } 175 b.WriteString(s[written:]) 176 return b.String() 177} 178 179// stripTags takes a snippet of HTML and returns only the text content. 180// For example, `<b>¡Hi!</b> <script>...</script>` -> `¡Hi! `. 181func stripTags(html string) string { 182 var b strings.Builder 183 s, c, i, allText := []byte(html), context{}, 0, true 184 // Using the transition funcs helps us avoid mangling 185 // `<div title="1>2">` or `I <3 Ponies!`. 186 for i != len(s) { 187 if c.delim == delimNone { 188 st := c.state 189 // Use RCDATA instead of parsing into JS or CSS styles. 190 if c.element != elementNone && !isInTag(st) { 191 st = stateRCDATA 192 } 193 d, nread := transitionFunc[st](c, s[i:]) 194 i1 := i + nread 195 if c.state == stateText || c.state == stateRCDATA { 196 // Emit text up to the start of the tag or comment. 197 j := i1 198 if d.state != c.state { 199 for j1 := j - 1; j1 >= i; j1-- { 200 if s[j1] == '<' { 201 j = j1 202 break 203 } 204 } 205 } 206 b.Write(s[i:j]) 207 } else { 208 allText = false 209 } 210 c, i = d, i1 211 continue 212 } 213 i1 := i + bytes.IndexAny(s[i:], delimEnds[c.delim]) 214 if i1 < i { 215 break 216 } 217 if c.delim != delimSpaceOrTagEnd { 218 // Consume any quote. 219 i1++ 220 } 221 c, i = context{state: stateTag, element: c.element}, i1 222 } 223 if allText { 224 return html 225 } else if c.state == stateText || c.state == stateRCDATA { 226 b.Write(s[i:]) 227 } 228 return b.String() 229} 230 231// htmlNameFilter accepts valid parts of an HTML attribute or tag name or 232// a known-safe HTML attribute. 233func htmlNameFilter(args ...any) string { 234 s, t := stringify(args...) 235 if t == contentTypeHTMLAttr { 236 return s 237 } 238 if len(s) == 0 { 239 // Avoid violation of structure preservation. 240 // <input checked {{.K}}={{.V}}>. 241 // Without this, if .K is empty then .V is the value of 242 // checked, but otherwise .V is the value of the attribute 243 // named .K. 244 return filterFailsafe 245 } 246 s = strings.ToLower(s) 247 if t := attrType(s); t != contentTypePlain { 248 // TODO: Split attr and element name part filters so we can recognize known attributes. 249 return filterFailsafe 250 } 251 for _, r := range s { 252 switch { 253 case '0' <= r && r <= '9': 254 case 'a' <= r && r <= 'z': 255 default: 256 return filterFailsafe 257 } 258 } 259 return s 260} 261 262// commentEscaper returns the empty string regardless of input. 263// Comment content does not correspond to any parsed structure or 264// human-readable content, so the simplest and most secure policy is to drop 265// content interpolated into comments. 266// This approach is equally valid whether or not static comment content is 267// removed from the template. 268func commentEscaper(args ...any) string { 269 return "" 270} 271