1 // Copyright (c) 2011, Mike Samuel 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions 6 // are met: 7 // 8 // Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // Neither the name of the OWASP nor the names of its contributors may 14 // be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 29 package org.owasp.html; 30 31 import com.google.common.collect.ImmutableMap; 32 33 /** 34 * Utilities for decoding HTML entities, e.g., {@code &}. 35 */ 36 class HtmlEntities { 37 38 /** 39 * Decodes any HTML entity at the given location. This handles both named and 40 * numeric entities. 41 * 42 * @param html HTML text. 43 * @param offset the position of the sequence to decode. 44 * @param limit the last position in chars that could be part of the sequence 45 * to decode. 46 * @return The offset after the end of the decoded sequence and the decoded 47 * code-point or code-unit packed into a long. 48 * The first 32 bits are the offset, and the second 32 bits are a 49 * code-point or a code-unit. 50 */ decodeEntityAt(String html, int offset, int limit)51 public static long decodeEntityAt(String html, int offset, int limit) { 52 char ch = html.charAt(offset); 53 if ('&' != ch) { 54 return ((offset + 1L) << 32) | ch; 55 } 56 57 int entityLimit = Math.min(limit, offset + 10); 58 int end = -1; 59 int tail = -1; 60 if (entityLimit == limit) { 61 // Assume a broken entity that ends at the end until shown otherwise. 62 end = tail = entityLimit; 63 } 64 entityloop: 65 for (int i = offset + 1; i < entityLimit; ++i) { 66 switch (html.charAt(i)) { 67 case ';': // An unbroken entity. 68 end = i; 69 tail = end + 1; 70 break entityloop; 71 case '#': 72 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': 73 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L': 74 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': 75 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X': 76 case 'Y': case 'Z': 77 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': 78 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l': 79 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': 80 case 's': case 't': case 'u': case 'v': case 'w': case 'x': 81 case 'y': case 'z': 82 case '0': case '1': case '2': case '3': case '4': case '5': 83 case '6': case '7': case '8': case '9': 84 break; 85 case '=': 86 // An equal sign after an entity missing a closing semicolon should 87 // never have the semicolon inserted since that causes trouble with 88 // parameters in partially encoded URLs. 89 return ((offset + 1L) << 32) | '&'; 90 default: // A possible broken entity. 91 end = i; 92 tail = i; 93 break entityloop; 94 } 95 } 96 if (end < 0 || offset + 2 >= end) { 97 return ((offset + 1L) << 32) | '&'; 98 } 99 // Now we know where the entity ends, and that there is at least one 100 // character in the entity name 101 char ch1 = html.charAt(offset + 1); 102 char ch2 = html.charAt(offset + 2); 103 int codepoint = -1; 104 if ('#' == ch1) { 105 // numeric entity 106 if ('x' == ch2 || 'X' == ch2) { 107 if (end == offset + 3) { // No digits 108 return ((offset + 1L) << 32) | '&'; 109 } 110 codepoint = 0; 111 // hex literal 112 digloop: 113 for (int i = offset + 3; i < end; ++i) { 114 char digit = html.charAt(i); 115 switch (digit & 0xfff8) { 116 case 0x30: case 0x38: // ASCII 48-57 are '0'-'9' 117 int decDig = digit & 0xf; 118 if (decDig < 10) { 119 codepoint = (codepoint << 4) | decDig; 120 } else { 121 codepoint = -1; 122 break digloop; 123 } 124 break; 125 // ASCII 65-70 and 97-102 are 'A'-'Z' && 'a'-'z' 126 case 0x40: case 0x60: 127 int hexDig = (digit & 0x7); 128 if (hexDig != 0 && hexDig < 7) { 129 codepoint = (codepoint << 4) | (hexDig + 9); 130 } else { 131 codepoint = -1; 132 break digloop; 133 } 134 break; 135 default: 136 codepoint = -1; 137 break digloop; 138 } 139 } 140 if (codepoint > Character.MAX_CODE_POINT) { 141 codepoint = 0xfffd; // Unknown. 142 } 143 } else { 144 codepoint = 0; 145 // decimal literal 146 digloop: 147 for (int i = offset + 2; i < end; ++i) { 148 char digit = html.charAt(i); 149 switch (digit & 0xfff8) { 150 case 0x30: case 0x38: // ASCII 48-57 are '0'-'9' 151 int decDig = digit - '0'; 152 if (decDig < 10) { 153 codepoint = (codepoint * 10) + decDig; 154 } else { 155 codepoint = -1; 156 break digloop; 157 } 158 break; 159 default: 160 codepoint = -1; 161 break digloop; 162 } 163 } 164 if (codepoint > Character.MAX_CODE_POINT) { 165 codepoint = 0xfffd; // Unknown. 166 } 167 } 168 } else { 169 Trie t = ENTITY_TRIE; 170 for (int i = offset + 1; i < end; ++i) { 171 char nameChar = html.charAt(i); 172 t = t.lookup(nameChar); 173 if (t == null) { break; } 174 } 175 if (t == null) { 176 t = ENTITY_TRIE; 177 for (int i = offset + 1; i < end; ++i) { 178 char nameChar = html.charAt(i); 179 if ('Z' >= nameChar && nameChar >= 'A') { nameChar |= 32; } 180 t = t.lookup(nameChar); 181 if (t == null) { break; } 182 } 183 } 184 if (t != null && t.isTerminal()) { 185 codepoint = t.getValue(); 186 } 187 } 188 if (codepoint < 0) { 189 return ((offset + 1L) << 32) | '&'; 190 } else { 191 return (((long) tail) << 32) | codepoint; 192 } 193 } 194 195 // /** A possible entity name like "amp" or "gt". */ 196 // public static boolean isEntityName(String name) { 197 // Trie t = ENTITY_TRIE; 198 // int n = name.length(); 199 // 200 // // Treat AMP the same amp, but not Amp. 201 // boolean isUcase = true; 202 // for (int i = 0; i < n; ++i) { 203 // char ch = name.charAt(i); 204 // if (!('A' <= ch && ch <= 'Z')) { 205 // isUcase = false; 206 // break; 207 // } 208 // } 209 // 210 // if (isUcase) { name = Strings.toLowerCase(name); } 211 // 212 // for (int i = 0; i < n; ++i) { 213 // t = t.lookup(name.charAt(i)); 214 // if (t == null) { return false; } 215 // } 216 // return t.isTerminal(); 217 // } 218 219 /** A trie that maps entity names to codepoints. */ 220 public static final Trie ENTITY_TRIE = new Trie( 221 ImmutableMap.<String, Integer>builder() 222 // C0 Controls and Basic Latin 223 .put("quot", Integer.valueOf('"')) 224 .put("amp", Integer.valueOf('&')) 225 .put("lt", Integer.valueOf('<')) 226 .put("gt", Integer.valueOf('>')) 227 228 // XML 1.0 229 .put("apos", Integer.valueOf('\'')) 230 231 // HTML4 entities 232 .put("nbsp", Integer.valueOf('\u00a0')) 233 .put("iexcl", Integer.valueOf('\u00a1')) 234 .put("cent", Integer.valueOf('\u00a2')) 235 .put("pound", Integer.valueOf('\u00a3')) 236 .put("curren", Integer.valueOf('\u00a4')) 237 .put("yen", Integer.valueOf('\u00a5')) 238 .put("brvbar", Integer.valueOf('\u00a6')) 239 .put("sect", Integer.valueOf('\u00a7')) 240 .put("uml", Integer.valueOf('\u00a8')) 241 .put("copy", Integer.valueOf('\u00a9')) 242 .put("ordf", Integer.valueOf('\u00aa')) 243 .put("laquo", Integer.valueOf('\u00ab')) 244 .put("not", Integer.valueOf('\u00ac')) 245 .put("shy", Integer.valueOf('\u00ad')) 246 .put("reg", Integer.valueOf('\u00ae')) 247 .put("macr", Integer.valueOf('\u00af')) 248 .put("deg", Integer.valueOf('\u00b0')) 249 .put("plusmn", Integer.valueOf('\u00b1')) 250 .put("sup2", Integer.valueOf('\u00b2')) 251 .put("sup3", Integer.valueOf('\u00b3')) 252 .put("acute", Integer.valueOf('\u00b4')) 253 .put("micro", Integer.valueOf('\u00b5')) 254 .put("para", Integer.valueOf('\u00b6')) 255 .put("middot", Integer.valueOf('\u00b7')) 256 .put("cedil", Integer.valueOf('\u00b8')) 257 .put("sup1", Integer.valueOf('\u00b9')) 258 .put("ordm", Integer.valueOf('\u00ba')) 259 .put("raquo", Integer.valueOf('\u00bb')) 260 .put("frac14", Integer.valueOf('\u00bc')) 261 .put("frac12", Integer.valueOf('\u00bd')) 262 .put("frac34", Integer.valueOf('\u00be')) 263 .put("iquest", Integer.valueOf('\u00bf')) 264 .put("Agrave", Integer.valueOf('\u00c0')) 265 .put("Aacute", Integer.valueOf('\u00c1')) 266 .put("Acirc", Integer.valueOf('\u00c2')) 267 .put("Atilde", Integer.valueOf('\u00c3')) 268 .put("Auml", Integer.valueOf('\u00c4')) 269 .put("Aring", Integer.valueOf('\u00c5')) 270 .put("AElig", Integer.valueOf('\u00c6')) 271 .put("Ccedil", Integer.valueOf('\u00c7')) 272 .put("Egrave", Integer.valueOf('\u00c8')) 273 .put("Eacute", Integer.valueOf('\u00c9')) 274 .put("Ecirc", Integer.valueOf('\u00ca')) 275 .put("Euml", Integer.valueOf('\u00cb')) 276 .put("Igrave", Integer.valueOf('\u00cc')) 277 .put("Iacute", Integer.valueOf('\u00cd')) 278 .put("Icirc", Integer.valueOf('\u00ce')) 279 .put("Iuml", Integer.valueOf('\u00cf')) 280 .put("ETH", Integer.valueOf('\u00d0')) 281 .put("Ntilde", Integer.valueOf('\u00d1')) 282 .put("Ograve", Integer.valueOf('\u00d2')) 283 .put("Oacute", Integer.valueOf('\u00d3')) 284 .put("Ocirc", Integer.valueOf('\u00d4')) 285 .put("Otilde", Integer.valueOf('\u00d5')) 286 .put("Ouml", Integer.valueOf('\u00d6')) 287 .put("times", Integer.valueOf('\u00d7')) 288 .put("Oslash", Integer.valueOf('\u00d8')) 289 .put("Ugrave", Integer.valueOf('\u00d9')) 290 .put("Uacute", Integer.valueOf('\u00da')) 291 .put("Ucirc", Integer.valueOf('\u00db')) 292 .put("Uuml", Integer.valueOf('\u00dc')) 293 .put("Yacute", Integer.valueOf('\u00dd')) 294 .put("THORN", Integer.valueOf('\u00de')) 295 .put("szlig", Integer.valueOf('\u00df')) 296 .put("agrave", Integer.valueOf('\u00e0')) 297 .put("aacute", Integer.valueOf('\u00e1')) 298 .put("acirc", Integer.valueOf('\u00e2')) 299 .put("atilde", Integer.valueOf('\u00e3')) 300 .put("auml", Integer.valueOf('\u00e4')) 301 .put("aring", Integer.valueOf('\u00e5')) 302 .put("aelig", Integer.valueOf('\u00e6')) 303 .put("ccedil", Integer.valueOf('\u00e7')) 304 .put("egrave", Integer.valueOf('\u00e8')) 305 .put("eacute", Integer.valueOf('\u00e9')) 306 .put("ecirc", Integer.valueOf('\u00ea')) 307 .put("euml", Integer.valueOf('\u00eb')) 308 .put("igrave", Integer.valueOf('\u00ec')) 309 .put("iacute", Integer.valueOf('\u00ed')) 310 .put("icirc", Integer.valueOf('\u00ee')) 311 .put("iuml", Integer.valueOf('\u00ef')) 312 .put("eth", Integer.valueOf('\u00f0')) 313 .put("ntilde", Integer.valueOf('\u00f1')) 314 .put("ograve", Integer.valueOf('\u00f2')) 315 .put("oacute", Integer.valueOf('\u00f3')) 316 .put("ocirc", Integer.valueOf('\u00f4')) 317 .put("otilde", Integer.valueOf('\u00f5')) 318 .put("ouml", Integer.valueOf('\u00f6')) 319 .put("divide", Integer.valueOf('\u00f7')) 320 .put("oslash", Integer.valueOf('\u00f8')) 321 .put("ugrave", Integer.valueOf('\u00f9')) 322 .put("uacute", Integer.valueOf('\u00fa')) 323 .put("ucirc", Integer.valueOf('\u00fb')) 324 .put("uuml", Integer.valueOf('\u00fc')) 325 .put("yacute", Integer.valueOf('\u00fd')) 326 .put("thorn", Integer.valueOf('\u00fe')) 327 .put("yuml", Integer.valueOf('\u00ff')) 328 329 // Latin Extended-B 330 .put("fnof", Integer.valueOf('\u0192')) 331 332 // Greek 333 .put("Alpha", Integer.valueOf('\u0391')) 334 .put("Beta", Integer.valueOf('\u0392')) 335 .put("Gamma", Integer.valueOf('\u0393')) 336 .put("Delta", Integer.valueOf('\u0394')) 337 .put("Epsilon", Integer.valueOf('\u0395')) 338 .put("Zeta", Integer.valueOf('\u0396')) 339 .put("Eta", Integer.valueOf('\u0397')) 340 .put("Theta", Integer.valueOf('\u0398')) 341 .put("Iota", Integer.valueOf('\u0399')) 342 .put("Kappa", Integer.valueOf('\u039a')) 343 .put("Lambda", Integer.valueOf('\u039b')) 344 .put("Mu", Integer.valueOf('\u039c')) 345 .put("Nu", Integer.valueOf('\u039d')) 346 .put("Xi", Integer.valueOf('\u039e')) 347 .put("Omicron", Integer.valueOf('\u039f')) 348 .put("Pi", Integer.valueOf('\u03a0')) 349 .put("Rho", Integer.valueOf('\u03a1')) 350 .put("Sigma", Integer.valueOf('\u03a3')) 351 .put("Tau", Integer.valueOf('\u03a4')) 352 .put("Upsilon", Integer.valueOf('\u03a5')) 353 .put("Phi", Integer.valueOf('\u03a6')) 354 .put("Chi", Integer.valueOf('\u03a7')) 355 .put("Psi", Integer.valueOf('\u03a8')) 356 .put("Omega", Integer.valueOf('\u03a9')) 357 358 .put("alpha", Integer.valueOf('\u03b1')) 359 .put("beta", Integer.valueOf('\u03b2')) 360 .put("gamma", Integer.valueOf('\u03b3')) 361 .put("delta", Integer.valueOf('\u03b4')) 362 .put("epsilon", Integer.valueOf('\u03b5')) 363 .put("zeta", Integer.valueOf('\u03b6')) 364 .put("eta", Integer.valueOf('\u03b7')) 365 .put("theta", Integer.valueOf('\u03b8')) 366 .put("iota", Integer.valueOf('\u03b9')) 367 .put("kappa", Integer.valueOf('\u03ba')) 368 .put("lambda", Integer.valueOf('\u03bb')) 369 .put("mu", Integer.valueOf('\u03bc')) 370 .put("nu", Integer.valueOf('\u03bd')) 371 .put("xi", Integer.valueOf('\u03be')) 372 .put("omicron", Integer.valueOf('\u03bf')) 373 .put("pi", Integer.valueOf('\u03c0')) 374 .put("rho", Integer.valueOf('\u03c1')) 375 .put("sigmaf", Integer.valueOf('\u03c2')) 376 .put("sigma", Integer.valueOf('\u03c3')) 377 .put("tau", Integer.valueOf('\u03c4')) 378 .put("upsilon", Integer.valueOf('\u03c5')) 379 .put("phi", Integer.valueOf('\u03c6')) 380 .put("chi", Integer.valueOf('\u03c7')) 381 .put("psi", Integer.valueOf('\u03c8')) 382 .put("omega", Integer.valueOf('\u03c9')) 383 .put("thetasym", Integer.valueOf('\u03d1')) 384 .put("upsih", Integer.valueOf('\u03d2')) 385 .put("piv", Integer.valueOf('\u03d6')) 386 387 // General Punctuation 388 .put("bull", Integer.valueOf('\u2022')) 389 .put("hellip", Integer.valueOf('\u2026')) 390 .put("prime", Integer.valueOf('\u2032')) 391 .put("Prime", Integer.valueOf('\u2033')) 392 .put("oline", Integer.valueOf('\u203e')) 393 .put("frasl", Integer.valueOf('\u2044')) 394 395 // Letterlike Symbols 396 .put("weierp", Integer.valueOf('\u2118')) 397 .put("image", Integer.valueOf('\u2111')) 398 .put("real", Integer.valueOf('\u211c')) 399 .put("trade", Integer.valueOf('\u2122')) 400 .put("alefsym", Integer.valueOf('\u2135')) 401 402 // Arrows 403 .put("larr", Integer.valueOf('\u2190')) 404 .put("uarr", Integer.valueOf('\u2191')) 405 .put("rarr", Integer.valueOf('\u2192')) 406 .put("darr", Integer.valueOf('\u2193')) 407 .put("harr", Integer.valueOf('\u2194')) 408 .put("crarr", Integer.valueOf('\u21b5')) 409 .put("lArr", Integer.valueOf('\u21d0')) 410 .put("uArr", Integer.valueOf('\u21d1')) 411 .put("rArr", Integer.valueOf('\u21d2')) 412 .put("dArr", Integer.valueOf('\u21d3')) 413 .put("hArr", Integer.valueOf('\u21d4')) 414 415 // Mathematical Operators 416 .put("forall", Integer.valueOf('\u2200')) 417 .put("part", Integer.valueOf('\u2202')) 418 .put("exist", Integer.valueOf('\u2203')) 419 .put("empty", Integer.valueOf('\u2205')) 420 .put("nabla", Integer.valueOf('\u2207')) 421 .put("isin", Integer.valueOf('\u2208')) 422 .put("notin", Integer.valueOf('\u2209')) 423 .put("ni", Integer.valueOf('\u220b')) 424 .put("prod", Integer.valueOf('\u220f')) 425 .put("sum", Integer.valueOf('\u2211')) 426 .put("minus", Integer.valueOf('\u2212')) 427 .put("lowast", Integer.valueOf('\u2217')) 428 .put("radic", Integer.valueOf('\u221a')) 429 .put("prop", Integer.valueOf('\u221d')) 430 .put("infin", Integer.valueOf('\u221e')) 431 .put("ang", Integer.valueOf('\u2220')) 432 .put("and", Integer.valueOf('\u2227')) 433 .put("or", Integer.valueOf('\u2228')) 434 .put("cap", Integer.valueOf('\u2229')) 435 .put("cup", Integer.valueOf('\u222a')) 436 .put("int", Integer.valueOf('\u222b')) 437 .put("there4", Integer.valueOf('\u2234')) 438 .put("sim", Integer.valueOf('\u223c')) 439 .put("cong", Integer.valueOf('\u2245')) 440 .put("asymp", Integer.valueOf('\u2248')) 441 .put("ne", Integer.valueOf('\u2260')) 442 .put("equiv", Integer.valueOf('\u2261')) 443 .put("le", Integer.valueOf('\u2264')) 444 .put("ge", Integer.valueOf('\u2265')) 445 .put("sub", Integer.valueOf('\u2282')) 446 .put("sup", Integer.valueOf('\u2283')) 447 .put("nsub", Integer.valueOf('\u2284')) 448 .put("sube", Integer.valueOf('\u2286')) 449 .put("supe", Integer.valueOf('\u2287')) 450 .put("oplus", Integer.valueOf('\u2295')) 451 .put("otimes", Integer.valueOf('\u2297')) 452 .put("perp", Integer.valueOf('\u22a5')) 453 .put("sdot", Integer.valueOf('\u22c5')) 454 455 // Miscellaneous Technical 456 .put("lceil", Integer.valueOf('\u2308')) 457 .put("rceil", Integer.valueOf('\u2309')) 458 .put("lfloor", Integer.valueOf('\u230a')) 459 .put("rfloor", Integer.valueOf('\u230b')) 460 .put("lang", Integer.valueOf('\u2329')) 461 .put("rang", Integer.valueOf('\u232a')) 462 463 // Geometric Shapes 464 .put("loz", Integer.valueOf('\u25ca')) 465 466 // Miscellaneous Symbols 467 .put("spades", Integer.valueOf('\u2660')) 468 .put("clubs", Integer.valueOf('\u2663')) 469 .put("hearts", Integer.valueOf('\u2665')) 470 .put("diams", Integer.valueOf('\u2666')) 471 472 // Latin Extended-A 473 .put("OElig", Integer.valueOf('\u0152')) 474 .put("oelig", Integer.valueOf('\u0153')) 475 .put("Scaron", Integer.valueOf('\u0160')) 476 .put("scaron", Integer.valueOf('\u0161')) 477 .put("Yuml", Integer.valueOf('\u0178')) 478 479 // Spacing Modifier Letters 480 .put("circ", Integer.valueOf('\u02c6')) 481 .put("tilde", Integer.valueOf('\u02dc')) 482 483 // General Punctuation 484 .put("ensp", Integer.valueOf('\u2002')) 485 .put("emsp", Integer.valueOf('\u2003')) 486 .put("thinsp", Integer.valueOf('\u2009')) 487 .put("zwnj", Integer.valueOf('\u200c')) 488 .put("zwj", Integer.valueOf('\u200d')) 489 .put("lrm", Integer.valueOf('\u200e')) 490 .put("rlm", Integer.valueOf('\u200f')) 491 .put("ndash", Integer.valueOf('\u2013')) 492 .put("mdash", Integer.valueOf('\u2014')) 493 .put("lsquo", Integer.valueOf('\u2018')) 494 .put("rsquo", Integer.valueOf('\u2019')) 495 .put("sbquo", Integer.valueOf('\u201a')) 496 .put("ldquo", Integer.valueOf('\u201c')) 497 .put("rdquo", Integer.valueOf('\u201d')) 498 .put("bdquo", Integer.valueOf('\u201e')) 499 .put("dagger", Integer.valueOf('\u2020')) 500 .put("Dagger", Integer.valueOf('\u2021')) 501 .put("permil", Integer.valueOf('\u2030')) 502 .put("lsaquo", Integer.valueOf('\u2039')) 503 .put("rsaquo", Integer.valueOf('\u203a')) 504 .put("euro", Integer.valueOf('\u20ac')) 505 .build()); 506 HtmlEntities()507 private HtmlEntities() { /* uninstantiable */ } 508 } 509