1 /* 2 * Copyright (C) 2009 The Guava Authors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package android.databinding.tool.util; 18 19 import com.google.common.escape.ArrayBasedCharEscaper; 20 import com.google.common.escape.CharEscaper; 21 import com.google.common.escape.Escaper; 22 23 import java.util.HashMap; 24 import java.util.Map; 25 26 /** 27 * This file has been copied from the google internal implementation of guava. Some unused parts of 28 * the file have been removed. 29 */ 30 31 /** 32 * A factory for Escaper instances used to escape strings for safe use in 33 * various common programming languages. 34 * 35 * @author Alex Matevossian 36 * @author David Beaumont 37 */ 38 public final class SourceCodeEscapers { SourceCodeEscapers()39 private SourceCodeEscapers() {} 40 41 // For each xxxEscaper() method, please add links to external reference pages 42 // that are considered authoritative for the behavior of that escaper. 43 44 // From: http://en.wikipedia.org/wiki/ASCII#ASCII_printable_characters 45 private static final char PRINTABLE_ASCII_MIN = 0x20; // ' ' 46 private static final char PRINTABLE_ASCII_MAX = 0x7E; // '~' 47 48 private static final char[] HEX_DIGITS = "0123456789abcdef".toCharArray(); 49 50 /** 51 * Returns an {@link Escaper} instance that escapes special characters in a 52 * string so it can safely be included in either a Java character literal or 53 * string literal. This is the preferred way to escape Java characters for 54 * use in String or character literals. 55 * 56 * <p>For more details, see <a href="http://goo.gl/NsGW7">Escape Sequences for 57 * Character and String Literals</a> in The Java Language Specification. 58 */ javaCharEscaper()59 public static Escaper javaCharEscaper() { 60 return JAVA_CHAR_ESCAPER; 61 } 62 63 /** 64 * Returns an {@link Escaper} instance that escapes special characters in a 65 * string so it can safely be included in either a Java character literal or 66 * string literal. The behavior of this escaper is the same as that of the 67 * {@link #javaStringEscaperWithOctal()} except it also escapes single quotes. 68 * 69 * <p>Unlike {@link #javaCharEscaper} this escaper produces octal escape 70 * sequences ({@literal \}nnn) for characters with values less than 256. While 71 * the escaped output can be shorter than when the standard Unicode escape 72 * sequence ({@literal \}uxxxx) is used, the Java Language Specification 73 * discourages the use of octal for escaping Java strings. It is strongly 74 * recommended that, if possible, you use {@code javaCharEscaper()} in 75 * preference to this method. 76 * 77 * <p>For more details, see <a href="http://goo.gl/NsGW7">Escape Sequences for 78 * Character and String Literals</a> in The Java Language Specification. 79 */ javaCharEscaperWithOctal()80 public static Escaper javaCharEscaperWithOctal() { 81 return JAVA_CHAR_ESCAPER_WITH_OCTAL; 82 } 83 84 /** 85 * Returns an {@link Escaper} instance that escapes special characters in a 86 * string so it can safely be included in a Java string literal. 87 * 88 * <p><b>Note:</b> Single quotes are not escaped, so it is <b>not safe</b> to 89 * use this escaper for escaping character literals. 90 * 91 * <p>Unlike {@link #javaCharEscaper} this escaper produces octal escape 92 * sequences ({@literal \}nnn) for characters with values less than 256. While 93 * the escaped output can be shorter than when the standard Unicode escape 94 * sequence ({@literal \}uxxxx) is used, the Java Language Specification 95 * discourages the use of octal for escaping Java strings. It is strongly 96 * recommended that, if possible, you use {@code javaCharEscaper()} in 97 * preference to this method. 98 * 99 * <p>For more details, see <a href="http://goo.gl/NsGW7">Escape Sequences for 100 * Character and String Literals</a> in The Java Language Specification. 101 */ javaStringEscaperWithOctal()102 public static Escaper javaStringEscaperWithOctal() { 103 return JAVA_STRING_ESCAPER_WITH_OCTAL; 104 } 105 106 private static final Escaper JAVA_CHAR_ESCAPER; 107 private static final Escaper JAVA_CHAR_ESCAPER_WITH_OCTAL; 108 private static final Escaper JAVA_STRING_ESCAPER_WITH_OCTAL; 109 static { 110 Map<Character, String> javaMap = new HashMap<Character, String>(); 111 javaMap.put('\b', "\\b"); 112 javaMap.put('\f', "\\f"); 113 javaMap.put('\n', "\\n"); 114 javaMap.put('\r', "\\r"); 115 javaMap.put('\t', "\\t"); 116 javaMap.put('\"', "\\\""); 117 javaMap.put('\\', "\\\\"); 118 JAVA_STRING_ESCAPER_WITH_OCTAL = new JavaCharEscaperWithOctal(javaMap); 119 // The only difference is that the char escaper also escapes single quotes. 120 javaMap.put('\'', "\\'"); 121 JAVA_CHAR_ESCAPER = new JavaCharEscaper(javaMap); 122 JAVA_CHAR_ESCAPER_WITH_OCTAL = new JavaCharEscaperWithOctal(javaMap); 123 } 124 125 // This escaper does not produce octal escape sequences. See: 126 // http://goo.gl/NsGW7 127 // "Octal escapes are provided for compatibility with C, but can express 128 // only Unicode values \u0000 through \u00FF, so Unicode escapes are 129 // usually preferred." 130 private static class JavaCharEscaper extends ArrayBasedCharEscaper { JavaCharEscaper(Map<Character, String> replacements)131 JavaCharEscaper(Map<Character, String> replacements) { 132 super(replacements, PRINTABLE_ASCII_MIN, PRINTABLE_ASCII_MAX); 133 } 134 escapeUnsafe(char c)135 @Override protected char[] escapeUnsafe(char c) { 136 return asUnicodeHexEscape(c); 137 } 138 } 139 140 private static class JavaCharEscaperWithOctal extends ArrayBasedCharEscaper { JavaCharEscaperWithOctal(Map<Character, String> replacements)141 JavaCharEscaperWithOctal(Map<Character, String> replacements) { 142 super(replacements, PRINTABLE_ASCII_MIN, PRINTABLE_ASCII_MAX); 143 } 144 escapeUnsafe(char c)145 @Override protected char[] escapeUnsafe(char c) { 146 if (c < 0x100) { 147 return asOctalEscape(c); 148 } else { 149 return asUnicodeHexEscape(c); 150 } 151 } 152 } 153 154 /** 155 * Returns an {@link Escaper} instance that replaces non-ASCII characters 156 * in a string with their Unicode escape sequences ({@code \\uxxxx} where 157 * {@code xxxx} is a hex number). Existing escape sequences won't be affected. 158 * 159 * <p>As existing escape sequences are not re-escaped, this escaper is 160 * idempotent. However this means that there can be no well defined inverse 161 * function for this escaper. 162 * 163 * <p><b>Note:</b> the returned escaper is still a {@code CharEscaper} and 164 * will not combine surrogate pairs into a single code point before escaping. 165 */ javaStringUnicodeEscaper()166 public static Escaper javaStringUnicodeEscaper() { 167 return JAVA_STRING_UNICODE_ESCAPER; 168 } 169 170 private static final Escaper JAVA_STRING_UNICODE_ESCAPER 171 = new CharEscaper() { 172 @Override protected char[] escape(char c) { 173 if (c < 0x80) { 174 return null; 175 } 176 return asUnicodeHexEscape(c); 177 } 178 }; 179 180 // Helper for common case of escaping a single char. asUnicodeHexEscape(char c)181 private static char[] asUnicodeHexEscape(char c) { 182 // Equivalent to String.format("\\u%04x", (int) c); 183 char[] r = new char[6]; 184 r[0] = '\\'; 185 r[1] = 'u'; 186 r[5] = HEX_DIGITS[c & 0xF]; 187 c >>>= 4; 188 r[4] = HEX_DIGITS[c & 0xF]; 189 c >>>= 4; 190 r[3] = HEX_DIGITS[c & 0xF]; 191 c >>>= 4; 192 r[2] = HEX_DIGITS[c & 0xF]; 193 return r; 194 } 195 196 // Helper for backward compatible octal escape sequences (c < 256) asOctalEscape(char c)197 private static char[] asOctalEscape(char c) { 198 char[] r = new char[4]; 199 r[0] = '\\'; 200 r[3] = HEX_DIGITS[c & 0x7]; 201 c >>>= 3; 202 r[2] = HEX_DIGITS[c & 0x7]; 203 c >>>= 3; 204 r[1] = HEX_DIGITS[c & 0x3]; 205 return r; 206 } 207 } 208