1 /* GENERATED SOURCE. DO NOT MODIFY. */ 2 // © 2016 and later: Unicode, Inc. and others. 3 // License & terms of use: http://www.unicode.org/copyright.html 4 /* 5 ******************************************************************************* 6 * Copyright (C) 2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 ******************************************************************************* 9 * created on: 2011feb25 10 * created by: Markus W. Scherer 11 */ 12 13 package android.icu.impl; 14 15 /** 16 * Implements the immutable Unicode properties Pattern_Syntax and Pattern_White_Space. 17 * Hardcodes these properties, does not load data, does not depend on other ICU classes. 18 * <p> 19 * Note: Both properties include ASCII as well as non-ASCII, non-Latin-1 code points, 20 * and both properties only include BMP code points (no supplementary ones). 21 * Pattern_Syntax includes some unassigned code points. 22 * <p> 23 * [:Pattern_White_Space:] = 24 * [\u0009-\u000D\ \u0020\u0085\u200E\u200F\u2028\u2029] 25 * <p> 26 * [:Pattern_Syntax:] = 27 * [!-/\:-@\[-\^`\{-~\u00A1-\u00A7\u00A9\u00AB\u00AC\u00AE 28 * \u00B0\u00B1\u00B6\u00BB\u00BF\u00D7\u00F7 29 * \u2010-\u2027\u2030-\u203E\u2041-\u2053\u2055-\u205E 30 * \u2190-\u245F\u2500-\u2775\u2794-\u2BFF\u2E00-\u2E7F 31 * \u3001-\u3003\u3008-\u3020\u3030\uFD3E\uFD3F\uFE45\uFE46] 32 * @author mscherer 33 * @hide Only a subset of ICU is exposed in Android 34 */ 35 public final class PatternProps { 36 /** 37 * @return true if c is a Pattern_Syntax code point. 38 */ isSyntax(int c)39 public static boolean isSyntax(int c) { 40 if(c<0) { 41 return false; 42 } else if(c<=0xff) { 43 return latin1[c]==3; 44 } else if(c<0x2010) { 45 return false; 46 } else if(c<=0x3030) { 47 int bits=syntax2000[index2000[(c-0x2000)>>5]]; 48 return ((bits>>(c&0x1f))&1)!=0; 49 } else if(0xfd3e<=c && c<=0xfe46) { 50 return c<=0xfd3f || 0xfe45<=c; 51 } else { 52 return false; 53 } 54 } 55 56 /** 57 * @return true if c is a Pattern_Syntax or Pattern_White_Space code point. 58 */ isSyntaxOrWhiteSpace(int c)59 public static boolean isSyntaxOrWhiteSpace(int c) { 60 if(c<0) { 61 return false; 62 } else if(c<=0xff) { 63 return latin1[c]!=0; 64 } else if(c<0x200e) { 65 return false; 66 } else if(c<=0x3030) { 67 int bits=syntaxOrWhiteSpace2000[index2000[(c-0x2000)>>5]]; 68 return ((bits>>(c&0x1f))&1)!=0; 69 } else if(0xfd3e<=c && c<=0xfe46) { 70 return c<=0xfd3f || 0xfe45<=c; 71 } else { 72 return false; 73 } 74 } 75 76 /** 77 * @return true if c is a Pattern_White_Space character. 78 */ isWhiteSpace(int c)79 public static boolean isWhiteSpace(int c) { 80 if(c<0) { 81 return false; 82 } else if(c<=0xff) { 83 return latin1[c]==5; 84 } else if(0x200e<=c && c<=0x2029) { 85 return c<=0x200f || 0x2028<=c; 86 } else { 87 return false; 88 } 89 } 90 91 /** 92 * Skips over Pattern_White_Space starting at index i of the CharSequence. 93 * @return The smallest index at or after i with a non-white space character. 94 */ skipWhiteSpace(CharSequence s, int i)95 public static int skipWhiteSpace(CharSequence s, int i) { 96 while(i<s.length() && isWhiteSpace(s.charAt(i))) { 97 ++i; 98 } 99 return i; 100 } 101 102 /** 103 * @return s except with leading and trailing Pattern_White_Space removed. 104 */ trimWhiteSpace(String s)105 public static String trimWhiteSpace(String s) { 106 if(s.length()==0 || (!isWhiteSpace(s.charAt(0)) && !isWhiteSpace(s.charAt(s.length()-1)))) { 107 return s; 108 } 109 int start=0; 110 int limit=s.length(); 111 while(start<limit && isWhiteSpace(s.charAt(start))) { 112 ++start; 113 } 114 if(start<limit) { 115 // There is non-white space at start; we will not move limit below that, 116 // so we need not test start<limit in the loop. 117 while(isWhiteSpace(s.charAt(limit-1))) { 118 --limit; 119 } 120 } 121 return s.substring(start, limit); 122 } 123 124 /** 125 * @return s except with leading and trailing SpaceChar characters removed. 126 */ trimSpaceChar(String s)127 public static String trimSpaceChar(String s) { 128 if (s.length() == 0 || 129 (!Character.isSpaceChar(s.charAt(0)) && !Character.isSpaceChar(s.charAt(s.length() - 1)))) { 130 return s; 131 } 132 int start = 0; 133 int limit = s.length(); 134 while (start < limit && Character.isSpaceChar(s.charAt(start))) { 135 ++start; 136 } 137 if (start < limit) { 138 // There is non-SpaceChar at start; we will not move limit below that, 139 // so we need not test start<limit in the loop. 140 while (isWhiteSpace(s.charAt(limit - 1))) { 141 --limit; 142 } 143 } 144 return s.substring(start, limit); 145 } 146 147 /** 148 * Tests whether the CharSequence contains a "pattern identifier", that is, 149 * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters. 150 * @return true if there are no Pattern_White_Space or Pattern_Syntax characters in s. 151 */ isIdentifier(CharSequence s)152 public static boolean isIdentifier(CharSequence s) { 153 int limit=s.length(); 154 if(limit==0) { 155 return false; 156 } 157 int start=0; 158 do { 159 if(isSyntaxOrWhiteSpace(s.charAt(start++))) { 160 return false; 161 } 162 } while(start<limit); 163 return true; 164 } 165 166 /** 167 * Tests whether the CharSequence contains a "pattern identifier", that is, 168 * whether it contains only non-Pattern_White_Space, non-Pattern_Syntax characters. 169 * @return true if there are no Pattern_White_Space or Pattern_Syntax characters 170 * in s between start and (exclusive) limit. 171 */ isIdentifier(CharSequence s, int start, int limit)172 public static boolean isIdentifier(CharSequence s, int start, int limit) { 173 if(start>=limit) { 174 return false; 175 } 176 do { 177 if(isSyntaxOrWhiteSpace(s.charAt(start++))) { 178 return false; 179 } 180 } while(start<limit); 181 return true; 182 } 183 184 /** 185 * Skips over a "pattern identifier" starting at index i of the CharSequence. 186 * @return The smallest index at or after i with 187 * a Pattern_White_Space or Pattern_Syntax character. 188 */ skipIdentifier(CharSequence s, int i)189 public static int skipIdentifier(CharSequence s, int i) { 190 while(i<s.length() && !isSyntaxOrWhiteSpace(s.charAt(i))) { 191 ++i; 192 } 193 return i; 194 } 195 196 /* 197 * One byte per Latin-1 character. 198 * Bit 0 is set if either Pattern property is true, 199 * bit 1 if Pattern_Syntax is true, 200 * bit 2 if Pattern_White_Space is true. 201 * That is, Pattern_Syntax is encoded as 3 and Pattern_White_Space as 5. 202 */ 203 private static final byte latin1[]=new byte[] { // 256 204 // WS: 9..D 205 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 0, 0, 206 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 207 // WS: 20 Syntax: 21..2F 208 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 209 // Syntax: 3A..40 210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 3, 3, 211 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212 // Syntax: 5B..5E 213 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 214 // Syntax: 60 215 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 216 // Syntax: 7B..7E 217 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 3, 3, 3, 0, 218 // WS: 85 219 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 220 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 221 // Syntax: A1..A7, A9, AB, AC, AE 222 0, 3, 3, 3, 3, 3, 3, 3, 0, 3, 0, 3, 3, 0, 3, 0, 223 // Syntax: B0, B1, B6, BB, BF 224 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 0, 3, 0, 0, 0, 3, 225 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 226 // Syntax: D7 227 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 228 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 229 // Syntax: F7 230 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0 231 }; 232 233 /* 234 * One byte per 32 characters from U+2000..U+303F indexing into 235 * a small table of 32-bit data words. 236 * The first two data words are all-zeros and all-ones. 237 */ 238 private static final byte index2000[]=new byte[] { // 130 239 2, 3, 4, 0, 0, 0, 0, 0, // 20xx 240 0, 0, 0, 0, 5, 1, 1, 1, // 21xx 241 1, 1, 1, 1, 1, 1, 1, 1, // 22xx 242 1, 1, 1, 1, 1, 1, 1, 1, // 23xx 243 1, 1, 1, 0, 0, 0, 0, 0, // 24xx 244 1, 1, 1, 1, 1, 1, 1, 1, // 25xx 245 1, 1, 1, 1, 1, 1, 1, 1, // 26xx 246 1, 1, 1, 6, 7, 1, 1, 1, // 27xx 247 1, 1, 1, 1, 1, 1, 1, 1, // 28xx 248 1, 1, 1, 1, 1, 1, 1, 1, // 29xx 249 1, 1, 1, 1, 1, 1, 1, 1, // 2Axx 250 1, 1, 1, 1, 1, 1, 1, 1, // 2Bxx 251 0, 0, 0, 0, 0, 0, 0, 0, // 2Cxx 252 0, 0, 0, 0, 0, 0, 0, 0, // 2Dxx 253 1, 1, 1, 1, 0, 0, 0, 0, // 2Exx 254 0, 0, 0, 0, 0, 0, 0, 0, // 2Fxx 255 8, 9 // 3000..303F 256 }; 257 258 /* 259 * One 32-bit integer per 32 characters. Ranges of all-false and all-true 260 * are mapped to the first two values, other ranges map to appropriate bit patterns. 261 */ 262 private static final int syntax2000[]=new int[] { 263 0, 264 -1, 265 0xffff0000, // 2: 2010..201F 266 0x7fff00ff, // 3: 2020..2027, 2030..203E 267 0x7feffffe, // 4: 2041..2053, 2055..205E 268 0xffff0000, // 5: 2190..219F 269 0x003fffff, // 6: 2760..2775 270 0xfff00000, // 7: 2794..279F 271 0xffffff0e, // 8: 3001..3003, 3008..301F 272 0x00010001 // 9: 3020, 3030 273 }; 274 275 /* 276 * Same as syntax2000, but with additional bits set for the 277 * Pattern_White_Space characters 200E 200F 2028 2029. 278 */ 279 private static final int syntaxOrWhiteSpace2000[]=new int[] { 280 0, 281 -1, 282 0xffffc000, // 2: 200E..201F 283 0x7fff03ff, // 3: 2020..2029, 2030..203E 284 0x7feffffe, // 4: 2041..2053, 2055..205E 285 0xffff0000, // 5: 2190..219F 286 0x003fffff, // 6: 2760..2775 287 0xfff00000, // 7: 2794..279F 288 0xffffff0e, // 8: 3001..3003, 3008..301F 289 0x00010001 // 9: 3020, 3030 290 }; 291 } 292