• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // Copyright (c) 2011, Mike Samuel
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions
6 // are met:
7 //
8 // Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // Neither the name of the OWASP nor the names of its contributors may
14 // be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 // POSSIBILITY OF SUCH DAMAGE.
28 
29 package org.owasp.html;
30 
31 import com.google.common.collect.ImmutableMap;
32 
33 /**
34  * Utilities for decoding HTML entities, e.g., {@code &}.
35  */
36 class HtmlEntities {
37 
38   /**
39    * Decodes any HTML entity at the given location.  This handles both named and
40    * numeric entities.
41    *
42    * @param html HTML text.
43    * @param offset the position of the sequence to decode.
44    * @param limit the last position in chars that could be part of the sequence
45    *    to decode.
46    * @return The offset after the end of the decoded sequence and the decoded
47    *    code-point or code-unit packed into a long.
48    *    The first 32 bits are the offset, and the second 32 bits are a
49    *    code-point or a code-unit.
50    */
decodeEntityAt(String html, int offset, int limit)51   public static long decodeEntityAt(String html, int offset, int limit) {
52     char ch = html.charAt(offset);
53     if ('&' != ch) {
54       return ((offset + 1L) << 32) | ch;
55     }
56 
57     int entityLimit = Math.min(limit, offset + 10);
58     int end = -1;
59     int tail = -1;
60     if (entityLimit == limit) {
61       // Assume a broken entity that ends at the end until shown otherwise.
62       end = tail = entityLimit;
63     }
64     entityloop:
65     for (int i = offset + 1; i < entityLimit; ++i) {
66       switch (html.charAt(i)) {
67         case ';':  // An unbroken entity.
68           end = i;
69           tail = end + 1;
70           break entityloop;
71         case '#':
72         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
73         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
74         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
75         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
76         case 'Y': case 'Z':
77         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
78         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
79         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
80         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
81         case 'y': case 'z':
82         case '0': case '1': case '2': case '3': case '4': case '5':
83         case '6': case '7': case '8': case '9':
84           break;
85         case '=':
86           // An equal sign after an entity missing a closing semicolon should
87           // never have the semicolon inserted since that causes trouble with
88           // parameters in partially encoded URLs.
89           return ((offset + 1L) << 32) | '&';
90         default:  // A possible broken entity.
91           end = i;
92           tail = i;
93           break entityloop;
94       }
95     }
96     if (end < 0 || offset + 2 >= end) {
97       return ((offset + 1L) << 32) | '&';
98     }
99     // Now we know where the entity ends, and that there is at least one
100     // character in the entity name
101     char ch1 = html.charAt(offset + 1);
102     char ch2 = html.charAt(offset + 2);
103     int codepoint = -1;
104     if ('#' == ch1) {
105       // numeric entity
106       if ('x' == ch2 || 'X' == ch2) {
107         if (end == offset + 3) {  // No digits
108           return ((offset + 1L) << 32) | '&';
109         }
110         codepoint = 0;
111         // hex literal
112         digloop:
113         for (int i = offset + 3; i < end; ++i) {
114           char digit = html.charAt(i);
115           switch (digit & 0xfff8) {
116             case 0x30: case 0x38: // ASCII 48-57 are '0'-'9'
117               int decDig = digit & 0xf;
118               if (decDig < 10) {
119                 codepoint = (codepoint << 4) | decDig;
120               } else {
121                 codepoint = -1;
122                 break digloop;
123               }
124               break;
125             // ASCII 65-70 and 97-102 are 'A'-'Z' && 'a'-'z'
126             case 0x40: case 0x60:
127               int hexDig = (digit & 0x7);
128               if (hexDig != 0 && hexDig < 7) {
129                 codepoint = (codepoint << 4) | (hexDig + 9);
130               } else {
131                 codepoint = -1;
132                 break digloop;
133               }
134               break;
135             default:
136               codepoint = -1;
137               break digloop;
138           }
139         }
140         if (codepoint > Character.MAX_CODE_POINT) {
141           codepoint = 0xfffd;  // Unknown.
142         }
143       } else {
144         codepoint = 0;
145         // decimal literal
146         digloop:
147         for (int i = offset + 2; i < end; ++i) {
148           char digit = html.charAt(i);
149           switch (digit & 0xfff8) {
150             case 0x30: case 0x38: // ASCII 48-57 are '0'-'9'
151               int decDig = digit - '0';
152               if (decDig < 10) {
153                 codepoint = (codepoint * 10) + decDig;
154               } else {
155                 codepoint = -1;
156                 break digloop;
157               }
158               break;
159             default:
160               codepoint = -1;
161               break digloop;
162           }
163         }
164         if (codepoint > Character.MAX_CODE_POINT) {
165           codepoint = 0xfffd;  // Unknown.
166         }
167       }
168     } else {
169       Trie t = ENTITY_TRIE;
170       for (int i = offset + 1; i < end; ++i) {
171         char nameChar = html.charAt(i);
172         t = t.lookup(nameChar);
173         if (t == null) { break; }
174       }
175       if (t == null) {
176         t = ENTITY_TRIE;
177         for (int i = offset + 1; i < end; ++i) {
178           char nameChar = html.charAt(i);
179           if ('Z' >= nameChar && nameChar >= 'A') { nameChar |= 32; }
180           t = t.lookup(nameChar);
181           if (t == null) { break; }
182         }
183       }
184       if (t != null && t.isTerminal()) {
185         codepoint = t.getValue();
186       }
187     }
188     if (codepoint < 0) {
189       return ((offset + 1L) << 32) | '&';
190     } else {
191       return (((long) tail) << 32) | codepoint;
192     }
193   }
194 
195 //  /** A possible entity name like "amp" or "gt". */
196 //  public static boolean isEntityName(String name) {
197 //    Trie t = ENTITY_TRIE;
198 //    int n = name.length();
199 //
200 //    // Treat AMP the same amp, but not Amp.
201 //    boolean isUcase = true;
202 //    for (int i = 0; i < n; ++i) {
203 //      char ch = name.charAt(i);
204 //      if (!('A' <= ch && ch <= 'Z')) {
205 //        isUcase = false;
206 //        break;
207 //      }
208 //    }
209 //
210 //    if (isUcase) { name = Strings.toLowerCase(name); }
211 //
212 //    for (int i = 0; i < n; ++i) {
213 //      t = t.lookup(name.charAt(i));
214 //      if (t == null) { return false; }
215 //    }
216 //    return t.isTerminal();
217 //  }
218 
219   /** A trie that maps entity names to codepoints. */
220   public static final Trie ENTITY_TRIE = new Trie(
221       ImmutableMap.<String, Integer>builder()
222     // C0 Controls and Basic Latin
223       .put("quot", Integer.valueOf('"'))
224       .put("amp", Integer.valueOf('&'))
225       .put("lt", Integer.valueOf('<'))
226       .put("gt", Integer.valueOf('>'))
227 
228     // XML 1.0
229       .put("apos", Integer.valueOf('\''))
230 
231     // HTML4 entities
232       .put("nbsp", Integer.valueOf('\u00a0'))
233       .put("iexcl", Integer.valueOf('\u00a1'))
234       .put("cent", Integer.valueOf('\u00a2'))
235       .put("pound", Integer.valueOf('\u00a3'))
236       .put("curren", Integer.valueOf('\u00a4'))
237       .put("yen", Integer.valueOf('\u00a5'))
238       .put("brvbar", Integer.valueOf('\u00a6'))
239       .put("sect", Integer.valueOf('\u00a7'))
240       .put("uml", Integer.valueOf('\u00a8'))
241       .put("copy", Integer.valueOf('\u00a9'))
242       .put("ordf", Integer.valueOf('\u00aa'))
243       .put("laquo", Integer.valueOf('\u00ab'))
244       .put("not", Integer.valueOf('\u00ac'))
245       .put("shy", Integer.valueOf('\u00ad'))
246       .put("reg", Integer.valueOf('\u00ae'))
247       .put("macr", Integer.valueOf('\u00af'))
248       .put("deg", Integer.valueOf('\u00b0'))
249       .put("plusmn", Integer.valueOf('\u00b1'))
250       .put("sup2", Integer.valueOf('\u00b2'))
251       .put("sup3", Integer.valueOf('\u00b3'))
252       .put("acute", Integer.valueOf('\u00b4'))
253       .put("micro", Integer.valueOf('\u00b5'))
254       .put("para", Integer.valueOf('\u00b6'))
255       .put("middot", Integer.valueOf('\u00b7'))
256       .put("cedil", Integer.valueOf('\u00b8'))
257       .put("sup1", Integer.valueOf('\u00b9'))
258       .put("ordm", Integer.valueOf('\u00ba'))
259       .put("raquo", Integer.valueOf('\u00bb'))
260       .put("frac14", Integer.valueOf('\u00bc'))
261       .put("frac12", Integer.valueOf('\u00bd'))
262       .put("frac34", Integer.valueOf('\u00be'))
263       .put("iquest", Integer.valueOf('\u00bf'))
264       .put("Agrave", Integer.valueOf('\u00c0'))
265       .put("Aacute", Integer.valueOf('\u00c1'))
266       .put("Acirc", Integer.valueOf('\u00c2'))
267       .put("Atilde", Integer.valueOf('\u00c3'))
268       .put("Auml", Integer.valueOf('\u00c4'))
269       .put("Aring", Integer.valueOf('\u00c5'))
270       .put("AElig", Integer.valueOf('\u00c6'))
271       .put("Ccedil", Integer.valueOf('\u00c7'))
272       .put("Egrave", Integer.valueOf('\u00c8'))
273       .put("Eacute", Integer.valueOf('\u00c9'))
274       .put("Ecirc", Integer.valueOf('\u00ca'))
275       .put("Euml", Integer.valueOf('\u00cb'))
276       .put("Igrave", Integer.valueOf('\u00cc'))
277       .put("Iacute", Integer.valueOf('\u00cd'))
278       .put("Icirc", Integer.valueOf('\u00ce'))
279       .put("Iuml", Integer.valueOf('\u00cf'))
280       .put("ETH", Integer.valueOf('\u00d0'))
281       .put("Ntilde", Integer.valueOf('\u00d1'))
282       .put("Ograve", Integer.valueOf('\u00d2'))
283       .put("Oacute", Integer.valueOf('\u00d3'))
284       .put("Ocirc", Integer.valueOf('\u00d4'))
285       .put("Otilde", Integer.valueOf('\u00d5'))
286       .put("Ouml", Integer.valueOf('\u00d6'))
287       .put("times", Integer.valueOf('\u00d7'))
288       .put("Oslash", Integer.valueOf('\u00d8'))
289       .put("Ugrave", Integer.valueOf('\u00d9'))
290       .put("Uacute", Integer.valueOf('\u00da'))
291       .put("Ucirc", Integer.valueOf('\u00db'))
292       .put("Uuml", Integer.valueOf('\u00dc'))
293       .put("Yacute", Integer.valueOf('\u00dd'))
294       .put("THORN", Integer.valueOf('\u00de'))
295       .put("szlig", Integer.valueOf('\u00df'))
296       .put("agrave", Integer.valueOf('\u00e0'))
297       .put("aacute", Integer.valueOf('\u00e1'))
298       .put("acirc", Integer.valueOf('\u00e2'))
299       .put("atilde", Integer.valueOf('\u00e3'))
300       .put("auml", Integer.valueOf('\u00e4'))
301       .put("aring", Integer.valueOf('\u00e5'))
302       .put("aelig", Integer.valueOf('\u00e6'))
303       .put("ccedil", Integer.valueOf('\u00e7'))
304       .put("egrave", Integer.valueOf('\u00e8'))
305       .put("eacute", Integer.valueOf('\u00e9'))
306       .put("ecirc", Integer.valueOf('\u00ea'))
307       .put("euml", Integer.valueOf('\u00eb'))
308       .put("igrave", Integer.valueOf('\u00ec'))
309       .put("iacute", Integer.valueOf('\u00ed'))
310       .put("icirc", Integer.valueOf('\u00ee'))
311       .put("iuml", Integer.valueOf('\u00ef'))
312       .put("eth", Integer.valueOf('\u00f0'))
313       .put("ntilde", Integer.valueOf('\u00f1'))
314       .put("ograve", Integer.valueOf('\u00f2'))
315       .put("oacute", Integer.valueOf('\u00f3'))
316       .put("ocirc", Integer.valueOf('\u00f4'))
317       .put("otilde", Integer.valueOf('\u00f5'))
318       .put("ouml", Integer.valueOf('\u00f6'))
319       .put("divide", Integer.valueOf('\u00f7'))
320       .put("oslash", Integer.valueOf('\u00f8'))
321       .put("ugrave", Integer.valueOf('\u00f9'))
322       .put("uacute", Integer.valueOf('\u00fa'))
323       .put("ucirc", Integer.valueOf('\u00fb'))
324       .put("uuml", Integer.valueOf('\u00fc'))
325       .put("yacute", Integer.valueOf('\u00fd'))
326       .put("thorn", Integer.valueOf('\u00fe'))
327       .put("yuml", Integer.valueOf('\u00ff'))
328 
329     // Latin Extended-B
330       .put("fnof", Integer.valueOf('\u0192'))
331 
332     // Greek
333       .put("Alpha", Integer.valueOf('\u0391'))
334       .put("Beta", Integer.valueOf('\u0392'))
335       .put("Gamma", Integer.valueOf('\u0393'))
336       .put("Delta", Integer.valueOf('\u0394'))
337       .put("Epsilon", Integer.valueOf('\u0395'))
338       .put("Zeta", Integer.valueOf('\u0396'))
339       .put("Eta", Integer.valueOf('\u0397'))
340       .put("Theta", Integer.valueOf('\u0398'))
341       .put("Iota", Integer.valueOf('\u0399'))
342       .put("Kappa", Integer.valueOf('\u039a'))
343       .put("Lambda", Integer.valueOf('\u039b'))
344       .put("Mu", Integer.valueOf('\u039c'))
345       .put("Nu", Integer.valueOf('\u039d'))
346       .put("Xi", Integer.valueOf('\u039e'))
347       .put("Omicron", Integer.valueOf('\u039f'))
348       .put("Pi", Integer.valueOf('\u03a0'))
349       .put("Rho", Integer.valueOf('\u03a1'))
350       .put("Sigma", Integer.valueOf('\u03a3'))
351       .put("Tau", Integer.valueOf('\u03a4'))
352       .put("Upsilon", Integer.valueOf('\u03a5'))
353       .put("Phi", Integer.valueOf('\u03a6'))
354       .put("Chi", Integer.valueOf('\u03a7'))
355       .put("Psi", Integer.valueOf('\u03a8'))
356       .put("Omega", Integer.valueOf('\u03a9'))
357 
358       .put("alpha", Integer.valueOf('\u03b1'))
359       .put("beta", Integer.valueOf('\u03b2'))
360       .put("gamma", Integer.valueOf('\u03b3'))
361       .put("delta", Integer.valueOf('\u03b4'))
362       .put("epsilon", Integer.valueOf('\u03b5'))
363       .put("zeta", Integer.valueOf('\u03b6'))
364       .put("eta", Integer.valueOf('\u03b7'))
365       .put("theta", Integer.valueOf('\u03b8'))
366       .put("iota", Integer.valueOf('\u03b9'))
367       .put("kappa", Integer.valueOf('\u03ba'))
368       .put("lambda", Integer.valueOf('\u03bb'))
369       .put("mu", Integer.valueOf('\u03bc'))
370       .put("nu", Integer.valueOf('\u03bd'))
371       .put("xi", Integer.valueOf('\u03be'))
372       .put("omicron", Integer.valueOf('\u03bf'))
373       .put("pi", Integer.valueOf('\u03c0'))
374       .put("rho", Integer.valueOf('\u03c1'))
375       .put("sigmaf", Integer.valueOf('\u03c2'))
376       .put("sigma", Integer.valueOf('\u03c3'))
377       .put("tau", Integer.valueOf('\u03c4'))
378       .put("upsilon", Integer.valueOf('\u03c5'))
379       .put("phi", Integer.valueOf('\u03c6'))
380       .put("chi", Integer.valueOf('\u03c7'))
381       .put("psi", Integer.valueOf('\u03c8'))
382       .put("omega", Integer.valueOf('\u03c9'))
383       .put("thetasym", Integer.valueOf('\u03d1'))
384       .put("upsih", Integer.valueOf('\u03d2'))
385       .put("piv", Integer.valueOf('\u03d6'))
386 
387     // General Punctuation
388       .put("bull", Integer.valueOf('\u2022'))
389       .put("hellip", Integer.valueOf('\u2026'))
390       .put("prime", Integer.valueOf('\u2032'))
391       .put("Prime", Integer.valueOf('\u2033'))
392       .put("oline", Integer.valueOf('\u203e'))
393       .put("frasl", Integer.valueOf('\u2044'))
394 
395     // Letterlike Symbols
396       .put("weierp", Integer.valueOf('\u2118'))
397       .put("image", Integer.valueOf('\u2111'))
398       .put("real", Integer.valueOf('\u211c'))
399       .put("trade", Integer.valueOf('\u2122'))
400       .put("alefsym", Integer.valueOf('\u2135'))
401 
402     // Arrows
403       .put("larr", Integer.valueOf('\u2190'))
404       .put("uarr", Integer.valueOf('\u2191'))
405       .put("rarr", Integer.valueOf('\u2192'))
406       .put("darr", Integer.valueOf('\u2193'))
407       .put("harr", Integer.valueOf('\u2194'))
408       .put("crarr", Integer.valueOf('\u21b5'))
409       .put("lArr", Integer.valueOf('\u21d0'))
410       .put("uArr", Integer.valueOf('\u21d1'))
411       .put("rArr", Integer.valueOf('\u21d2'))
412       .put("dArr", Integer.valueOf('\u21d3'))
413       .put("hArr", Integer.valueOf('\u21d4'))
414 
415     // Mathematical Operators
416       .put("forall", Integer.valueOf('\u2200'))
417       .put("part", Integer.valueOf('\u2202'))
418       .put("exist", Integer.valueOf('\u2203'))
419       .put("empty", Integer.valueOf('\u2205'))
420       .put("nabla", Integer.valueOf('\u2207'))
421       .put("isin", Integer.valueOf('\u2208'))
422       .put("notin", Integer.valueOf('\u2209'))
423       .put("ni", Integer.valueOf('\u220b'))
424       .put("prod", Integer.valueOf('\u220f'))
425       .put("sum", Integer.valueOf('\u2211'))
426       .put("minus", Integer.valueOf('\u2212'))
427       .put("lowast", Integer.valueOf('\u2217'))
428       .put("radic", Integer.valueOf('\u221a'))
429       .put("prop", Integer.valueOf('\u221d'))
430       .put("infin", Integer.valueOf('\u221e'))
431       .put("ang", Integer.valueOf('\u2220'))
432       .put("and", Integer.valueOf('\u2227'))
433       .put("or", Integer.valueOf('\u2228'))
434       .put("cap", Integer.valueOf('\u2229'))
435       .put("cup", Integer.valueOf('\u222a'))
436       .put("int", Integer.valueOf('\u222b'))
437       .put("there4", Integer.valueOf('\u2234'))
438       .put("sim", Integer.valueOf('\u223c'))
439       .put("cong", Integer.valueOf('\u2245'))
440       .put("asymp", Integer.valueOf('\u2248'))
441       .put("ne", Integer.valueOf('\u2260'))
442       .put("equiv", Integer.valueOf('\u2261'))
443       .put("le", Integer.valueOf('\u2264'))
444       .put("ge", Integer.valueOf('\u2265'))
445       .put("sub", Integer.valueOf('\u2282'))
446       .put("sup", Integer.valueOf('\u2283'))
447       .put("nsub", Integer.valueOf('\u2284'))
448       .put("sube", Integer.valueOf('\u2286'))
449       .put("supe", Integer.valueOf('\u2287'))
450       .put("oplus", Integer.valueOf('\u2295'))
451       .put("otimes", Integer.valueOf('\u2297'))
452       .put("perp", Integer.valueOf('\u22a5'))
453       .put("sdot", Integer.valueOf('\u22c5'))
454 
455     // Miscellaneous Technical
456       .put("lceil", Integer.valueOf('\u2308'))
457       .put("rceil", Integer.valueOf('\u2309'))
458       .put("lfloor", Integer.valueOf('\u230a'))
459       .put("rfloor", Integer.valueOf('\u230b'))
460       .put("lang", Integer.valueOf('\u2329'))
461       .put("rang", Integer.valueOf('\u232a'))
462 
463     // Geometric Shapes
464       .put("loz", Integer.valueOf('\u25ca'))
465 
466     // Miscellaneous Symbols
467       .put("spades", Integer.valueOf('\u2660'))
468       .put("clubs", Integer.valueOf('\u2663'))
469       .put("hearts", Integer.valueOf('\u2665'))
470       .put("diams", Integer.valueOf('\u2666'))
471 
472     // Latin Extended-A
473       .put("OElig", Integer.valueOf('\u0152'))
474       .put("oelig", Integer.valueOf('\u0153'))
475       .put("Scaron", Integer.valueOf('\u0160'))
476       .put("scaron", Integer.valueOf('\u0161'))
477       .put("Yuml", Integer.valueOf('\u0178'))
478 
479     // Spacing Modifier Letters
480       .put("circ", Integer.valueOf('\u02c6'))
481       .put("tilde", Integer.valueOf('\u02dc'))
482 
483     // General Punctuation
484       .put("ensp", Integer.valueOf('\u2002'))
485       .put("emsp", Integer.valueOf('\u2003'))
486       .put("thinsp", Integer.valueOf('\u2009'))
487       .put("zwnj", Integer.valueOf('\u200c'))
488       .put("zwj", Integer.valueOf('\u200d'))
489       .put("lrm", Integer.valueOf('\u200e'))
490       .put("rlm", Integer.valueOf('\u200f'))
491       .put("ndash", Integer.valueOf('\u2013'))
492       .put("mdash", Integer.valueOf('\u2014'))
493       .put("lsquo", Integer.valueOf('\u2018'))
494       .put("rsquo", Integer.valueOf('\u2019'))
495       .put("sbquo", Integer.valueOf('\u201a'))
496       .put("ldquo", Integer.valueOf('\u201c'))
497       .put("rdquo", Integer.valueOf('\u201d'))
498       .put("bdquo", Integer.valueOf('\u201e'))
499       .put("dagger", Integer.valueOf('\u2020'))
500       .put("Dagger", Integer.valueOf('\u2021'))
501       .put("permil", Integer.valueOf('\u2030'))
502       .put("lsaquo", Integer.valueOf('\u2039'))
503       .put("rsaquo", Integer.valueOf('\u203a'))
504       .put("euro", Integer.valueOf('\u20ac'))
505       .build());
506 
HtmlEntities()507   private HtmlEntities() { /* uninstantiable */ }
508 }
509