/** * Copyright (c) 2006, Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.android.mail.common.base; import static com.google.android.mail.common.base.Preconditions.checkNotNull; import java.io.IOException; /** * Utility functions for dealing with {@code CharEscaper}s, and some commonly * used {@code CharEscaper} instances. * * @author sven@google.com (Sven Mawson) * @author laurence@google.com (Laurence Gonsalves) */ public final class CharEscapers { private CharEscapers() {} // TODO(matevossian): To implementors of escapers -- // For each xxxEscaper method, please add links to external // reference pages that we consider authoritative for what // that escaper should exactly be doing. /** * Performs no escaping. */ private static final CharEscaper NULL_ESCAPER = new CharEscaper() { @Override public String escape(String string) { checkNotNull(string); return string; } @Override public Appendable escape(final Appendable out) { checkNotNull(out); // we can't simply return out because the CharEscaper contract says that // the returned Appendable will throw a NullPointerException if asked to // append null. return new Appendable() { @Override public Appendable append(CharSequence csq) throws IOException { checkNotNull(csq); out.append(csq); return this; } @Override public Appendable append(CharSequence csq, int start, int end) throws IOException { checkNotNull(csq); out.append(csq, start, end); return this; } @Override public Appendable append(char c) throws IOException { out.append(c); return this; } }; } @Override protected char[] escape(char c) { return null; } }; /** * Returns a {@link CharEscaper} that does no escaping. */ public static CharEscaper nullEscaper() { return NULL_ESCAPER; } /** * Returns a {@link CharEscaper} instance that escapes special characters in a * string so it can safely be included in an XML document in either element * content or attribute values. * *

Note

: silently removes null-characters and control * characters, as there is no way to represent them in XML. */ public static CharEscaper xmlEscaper() { return XML_ESCAPER; } /** * Escapes special characters from a string so it can safely be included in an * XML document in either element content or attribute values. Also removes * null-characters and control characters, as there is no way to represent * them in XML. */ private static final CharEscaper XML_ESCAPER = newBasicXmlEscapeBuilder() .addEscape('"', """) .addEscape('\'', "'") .toEscaper(); /** * Returns a {@link CharEscaper} instance that escapes special characters in a * string so it can safely be included in an XML document in element content. * *

Note

: double and single quotes are not escaped, so it is not * safe to use this escaper to escape attribute values. Use the * {@link #xmlEscaper()} escaper to escape attribute values or if you are * unsure. Also silently removes non-whitespace control characters, as there * is no way to represent them in XML. */ public static CharEscaper xmlContentEscaper() { return XML_CONTENT_ESCAPER; } /** * Escapes special characters from a string so it can safely be included in an * XML document in element content. Note that quotes are not * escaped, so this is not safe for use in attribute values. Use * {@link #XML_ESCAPER} for attribute values, or if you are unsure. Also * removes non-whitespace control characters, as there is no way to represent * them in XML. */ private static final CharEscaper XML_CONTENT_ESCAPER = newBasicXmlEscapeBuilder().toEscaper(); /** * Returns a {@link CharEscaper} instance that escapes special characters in a * string so it can safely be included in an HTML document in either element * content or attribute values. * *

Note

: alters non-ASCII and control characters. * * The entity list was taken from: * here */ public static CharEscaper htmlEscaper() { return HtmlEscaperHolder.HTML_ESCAPER; } /** * A lazy initialization holder for HTML_ESCAPER. */ private static class HtmlEscaperHolder { private static final CharEscaper HTML_ESCAPER = new HtmlCharEscaper(new CharEscaperBuilder() .addEscape('"', """) .addEscape('\'', "'") .addEscape('&', "&") .addEscape('<', "<") .addEscape('>', ">") .addEscape('\u00A0', " ") .addEscape('\u00A1', "¡") .addEscape('\u00A2', "¢") .addEscape('\u00A3', "£") .addEscape('\u00A4', "¤") .addEscape('\u00A5', "¥") .addEscape('\u00A6', "¦") .addEscape('\u00A7', "§") .addEscape('\u00A8', "¨") .addEscape('\u00A9', "©") .addEscape('\u00AA', "ª") .addEscape('\u00AB', "«") .addEscape('\u00AC', "¬") .addEscape('\u00AD', "­") .addEscape('\u00AE', "®") .addEscape('\u00AF', "¯") .addEscape('\u00B0', "°") .addEscape('\u00B1', "±") .addEscape('\u00B2', "²") .addEscape('\u00B3', "³") .addEscape('\u00B4', "´") .addEscape('\u00B5', "µ") .addEscape('\u00B6', "¶") .addEscape('\u00B7', "·") .addEscape('\u00B8', "¸") .addEscape('\u00B9', "¹") .addEscape('\u00BA', "º") .addEscape('\u00BB', "»") .addEscape('\u00BC', "¼") .addEscape('\u00BD', "½") .addEscape('\u00BE', "¾") .addEscape('\u00BF', "¿") .addEscape('\u00C0', "À") .addEscape('\u00C1', "Á") .addEscape('\u00C2', "Â") .addEscape('\u00C3', "Ã") .addEscape('\u00C4', "Ä") .addEscape('\u00C5', "Å") .addEscape('\u00C6', "Æ") .addEscape('\u00C7', "Ç") .addEscape('\u00C8', "È") .addEscape('\u00C9', "É") .addEscape('\u00CA', "Ê") .addEscape('\u00CB', "Ë") .addEscape('\u00CC', "Ì") .addEscape('\u00CD', "Í") .addEscape('\u00CE', "Î") .addEscape('\u00CF', "Ï") .addEscape('\u00D0', "Ð") .addEscape('\u00D1', "Ñ") .addEscape('\u00D2', "Ò") .addEscape('\u00D3', "Ó") .addEscape('\u00D4', "Ô") .addEscape('\u00D5', "Õ") .addEscape('\u00D6', "Ö") .addEscape('\u00D7', "×") .addEscape('\u00D8', "Ø") .addEscape('\u00D9', "Ù") .addEscape('\u00DA', "Ú") .addEscape('\u00DB', "Û") .addEscape('\u00DC', "Ü") .addEscape('\u00DD', "Ý") .addEscape('\u00DE', "Þ") .addEscape('\u00DF', "ß") .addEscape('\u00E0', "à") .addEscape('\u00E1', "á") .addEscape('\u00E2', "â") .addEscape('\u00E3', "ã") .addEscape('\u00E4', "ä") .addEscape('\u00E5', "å") .addEscape('\u00E6', "æ") .addEscape('\u00E7', "ç") .addEscape('\u00E8', "è") .addEscape('\u00E9', "é") .addEscape('\u00EA', "ê") .addEscape('\u00EB', "ë") .addEscape('\u00EC', "ì") .addEscape('\u00ED', "í") .addEscape('\u00EE', "î") .addEscape('\u00EF', "ï") .addEscape('\u00F0', "ð") .addEscape('\u00F1', "ñ") .addEscape('\u00F2', "ò") .addEscape('\u00F3', "ó") .addEscape('\u00F4', "ô") .addEscape('\u00F5', "õ") .addEscape('\u00F6', "ö") .addEscape('\u00F7', "÷") .addEscape('\u00F8', "ø") .addEscape('\u00F9', "ù") .addEscape('\u00FA', "ú") .addEscape('\u00FB', "û") .addEscape('\u00FC', "ü") .addEscape('\u00FD', "ý") .addEscape('\u00FE', "þ") .addEscape('\u00FF', "ÿ") .addEscape('\u0152', "Œ") .addEscape('\u0153', "œ") .addEscape('\u0160', "Š") .addEscape('\u0161', "š") .addEscape('\u0178', "Ÿ") .addEscape('\u0192', "ƒ") .addEscape('\u02C6', "ˆ") .addEscape('\u02DC', "˜") .addEscape('\u0391', "Α") .addEscape('\u0392', "Β") .addEscape('\u0393', "Γ") .addEscape('\u0394', "Δ") .addEscape('\u0395', "Ε") .addEscape('\u0396', "Ζ") .addEscape('\u0397', "Η") .addEscape('\u0398', "Θ") .addEscape('\u0399', "Ι") .addEscape('\u039A', "Κ") .addEscape('\u039B', "Λ") .addEscape('\u039C', "Μ") .addEscape('\u039D', "Ν") .addEscape('\u039E', "Ξ") .addEscape('\u039F', "Ο") .addEscape('\u03A0', "Π") .addEscape('\u03A1', "Ρ") .addEscape('\u03A3', "Σ") .addEscape('\u03A4', "Τ") .addEscape('\u03A5', "Υ") .addEscape('\u03A6', "Φ") .addEscape('\u03A7', "Χ") .addEscape('\u03A8', "Ψ") .addEscape('\u03A9', "Ω") .addEscape('\u03B1', "α") .addEscape('\u03B2', "β") .addEscape('\u03B3', "γ") .addEscape('\u03B4', "δ") .addEscape('\u03B5', "ε") .addEscape('\u03B6', "ζ") .addEscape('\u03B7', "η") .addEscape('\u03B8', "θ") .addEscape('\u03B9', "ι") .addEscape('\u03BA', "κ") .addEscape('\u03BB', "λ") .addEscape('\u03BC', "μ") .addEscape('\u03BD', "ν") .addEscape('\u03BE', "ξ") .addEscape('\u03BF', "ο") .addEscape('\u03C0', "π") .addEscape('\u03C1', "ρ") .addEscape('\u03C2', "ς") .addEscape('\u03C3', "σ") .addEscape('\u03C4', "τ") .addEscape('\u03C5', "υ") .addEscape('\u03C6', "φ") .addEscape('\u03C7', "χ") .addEscape('\u03C8', "ψ") .addEscape('\u03C9', "ω") .addEscape('\u03D1', "ϑ") .addEscape('\u03D2', "ϒ") .addEscape('\u03D6', "ϖ") .addEscape('\u2002', " ") .addEscape('\u2003', " ") .addEscape('\u2009', " ") .addEscape('\u200C', "‌") .addEscape('\u200D', "‍") .addEscape('\u200E', "‎") .addEscape('\u200F', "‏") .addEscape('\u2013', "–") .addEscape('\u2014', "—") .addEscape('\u2018', "‘") .addEscape('\u2019', "’") .addEscape('\u201A', "‚") .addEscape('\u201C', "“") .addEscape('\u201D', "”") .addEscape('\u201E', "„") .addEscape('\u2020', "†") .addEscape('\u2021', "‡") .addEscape('\u2022', "•") .addEscape('\u2026', "…") .addEscape('\u2030', "‰") .addEscape('\u2032', "′") .addEscape('\u2033', "″") .addEscape('\u2039', "‹") .addEscape('\u203A', "›") .addEscape('\u203E', "‾") .addEscape('\u2044', "⁄") .addEscape('\u20AC', "€") .addEscape('\u2111', "ℑ") .addEscape('\u2118', "℘") .addEscape('\u211C', "ℜ") .addEscape('\u2122', "™") .addEscape('\u2135', "ℵ") .addEscape('\u2190', "←") .addEscape('\u2191', "↑") .addEscape('\u2192', "→") .addEscape('\u2193', "↓") .addEscape('\u2194', "↔") .addEscape('\u21B5', "↵") .addEscape('\u21D0', "⇐") .addEscape('\u21D1', "⇑") .addEscape('\u21D2', "⇒") .addEscape('\u21D3', "⇓") .addEscape('\u21D4', "⇔") .addEscape('\u2200', "∀") .addEscape('\u2202', "∂") .addEscape('\u2203', "∃") .addEscape('\u2205', "∅") .addEscape('\u2207', "∇") .addEscape('\u2208', "∈") .addEscape('\u2209', "∉") .addEscape('\u220B', "∋") .addEscape('\u220F', "∏") .addEscape('\u2211', "∑") .addEscape('\u2212', "−") .addEscape('\u2217', "∗") .addEscape('\u221A', "√") .addEscape('\u221D', "∝") .addEscape('\u221E', "∞") .addEscape('\u2220', "∠") .addEscape('\u2227', "∧") .addEscape('\u2228', "∨") .addEscape('\u2229', "∩") .addEscape('\u222A', "∪") .addEscape('\u222B', "∫") .addEscape('\u2234', "∴") .addEscape('\u223C', "∼") .addEscape('\u2245', "≅") .addEscape('\u2248', "≈") .addEscape('\u2260', "≠") .addEscape('\u2261', "≡") .addEscape('\u2264', "≤") .addEscape('\u2265', "≥") .addEscape('\u2282', "⊂") .addEscape('\u2283', "⊃") .addEscape('\u2284', "⊄") .addEscape('\u2286', "⊆") .addEscape('\u2287', "⊇") .addEscape('\u2295', "⊕") .addEscape('\u2297', "⊗") .addEscape('\u22A5', "⊥") .addEscape('\u22C5', "⋅") .addEscape('\u2308', "⌈") .addEscape('\u2309', "⌉") .addEscape('\u230A', "⌊") .addEscape('\u230B', "⌋") .addEscape('\u2329', "⟨") .addEscape('\u232A', "⟩") .addEscape('\u25CA', "◊") .addEscape('\u2660', "♠") .addEscape('\u2663', "♣") .addEscape('\u2665', "♥") .addEscape('\u2666', "♦") .toArray()); } /** * Returns a {@link CharEscaper} instance that escapes special characters in a * string so it can safely be included in an HTML document in either element * content or attribute values. * *

Note

: does not alter non-ASCII and control characters. */ public static CharEscaper asciiHtmlEscaper() { return ASCII_HTML_ESCAPER; } /** * Escapes special characters from a string so it can safely be included in an * HTML document in either element content or attribute values. Does * not alter non-ASCII characters or control characters. */ private static final CharEscaper ASCII_HTML_ESCAPER = new CharEscaperBuilder() .addEscape('"', """) .addEscape('\'', "'") .addEscape('&', "&") .addEscape('<', "<") .addEscape('>', ">") .toEscaper(); /** * Returns an {@link Escaper} instance that escapes Java chars so they can be * safely included in URIs. For details on escaping URIs, see section 2.4 of * RFC 2396. * *

When encoding a String, the following rules apply: *