1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package java.lang; 18 19 import java.util.Locale; 20 import libcore.icu.ICU; 21 import libcore.icu.Transliterator; 22 23 /** 24 * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html. 25 */ 26 class CaseMapper { 27 private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray(); 28 private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray(); 29 30 private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130'; 31 private static final char GREEK_CAPITAL_SIGMA = '\u03a3'; 32 private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2'; 33 34 /** 35 * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed, 36 * this class should be changed so that you instantiate it with the String and its value, 37 * offset, and count fields. 38 */ CaseMapper()39 private CaseMapper() { 40 } 41 42 /** 43 * Implements String.toLowerCase. We need 's' so that we can return the original String instance 44 * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise 45 * accessible. 46 */ toLowerCase(Locale locale, String s, char[] value, int offset, int count)47 public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) { 48 // Punt hard cases to ICU4C. 49 // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase. 50 String languageCode = locale.getLanguage(); 51 if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { 52 return ICU.toLowerCase(s, locale); 53 } 54 55 char[] newValue = null; 56 int newCount = 0; 57 for (int i = offset, end = offset + count; i < end; ++i) { 58 char ch = value[i]; 59 char newCh; 60 if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) { 61 // Punt these hard cases. 62 return ICU.toLowerCase(s, locale); 63 } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) { 64 newCh = GREEK_SMALL_FINAL_SIGMA; 65 } else { 66 newCh = Character.toLowerCase(ch); 67 } 68 if (newValue == null && ch != newCh) { 69 newValue = new char[count]; // The result can't be longer than the input. 70 newCount = i - offset; 71 System.arraycopy(value, offset, newValue, 0, newCount); 72 } 73 if (newValue != null) { 74 newValue[newCount++] = newCh; 75 } 76 } 77 return newValue != null ? new String(0, newCount, newValue) : s; 78 } 79 80 /** 81 * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable 82 * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and 83 * then a cased letter. 84 */ isFinalSigma(char[] value, int offset, int count, int index)85 private static boolean isFinalSigma(char[] value, int offset, int count, int index) { 86 // TODO: we don't skip case-ignorable sequences like we should. 87 // TODO: we should add a more direct way to test for a cased letter. 88 if (index <= offset) { 89 return false; 90 } 91 char previous = value[index - 1]; 92 if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) { 93 return false; 94 } 95 if (index + 1 >= offset + count) { 96 return true; 97 } 98 char next = value[index + 1]; 99 if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) { 100 return false; 101 } 102 return true; 103 } 104 105 /** 106 * Return the index of the specified character into the upperValues table. 107 * The upperValues table contains three entries at each position. These 108 * three characters are the upper case conversion. If only two characters 109 * are used, the third character in the table is \u0000. 110 * @return the index into the upperValues table, or -1 111 */ upperIndex(int ch)112 private static int upperIndex(int ch) { 113 int index = -1; 114 if (ch >= 0xdf) { 115 if (ch <= 0x587) { 116 switch (ch) { 117 case 0xdf: return 0; 118 case 0x149: return 1; 119 case 0x1f0: return 2; 120 case 0x390: return 3; 121 case 0x3b0: return 4; 122 case 0x587: return 5; 123 } 124 } else if (ch >= 0x1e96) { 125 if (ch <= 0x1e9a) { 126 index = 6 + ch - 0x1e96; 127 } else if (ch >= 0x1f50 && ch <= 0x1ffc) { 128 index = upperValues2[ch - 0x1f50]; 129 if (index == 0) { 130 index = -1; 131 } 132 } else if (ch >= 0xfb00) { 133 if (ch <= 0xfb06) { 134 index = 90 + ch - 0xfb00; 135 } else if (ch >= 0xfb13 && ch <= 0xfb17) { 136 index = 97 + ch - 0xfb13; 137 } 138 } 139 } 140 } 141 return index; 142 } 143 144 private static final ThreadLocal<Transliterator> EL_UPPER = new ThreadLocal<Transliterator>() { 145 @Override protected Transliterator initialValue() { 146 return new Transliterator("el-Upper"); 147 } 148 }; 149 toUpperCase(Locale locale, String s, char[] value, int offset, int count)150 public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) { 151 String languageCode = locale.getLanguage(); 152 if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) { 153 return ICU.toUpperCase(s, locale); 154 } 155 if (languageCode.equals("el")) { 156 return EL_UPPER.get().transliterate(s); 157 } 158 159 char[] output = null; 160 int i = 0; 161 for (int o = offset, end = offset + count; o < end; o++) { 162 char ch = value[o]; 163 if (Character.isHighSurrogate(ch)) { 164 return ICU.toUpperCase(s, locale); 165 } 166 int index = upperIndex(ch); 167 if (index == -1) { 168 if (output != null && i >= output.length) { 169 char[] newoutput = new char[output.length + (count / 6) + 2]; 170 System.arraycopy(output, 0, newoutput, 0, output.length); 171 output = newoutput; 172 } 173 char upch = Character.toUpperCase(ch); 174 if (ch != upch) { 175 if (output == null) { 176 output = new char[count]; 177 i = o - offset; 178 System.arraycopy(value, offset, output, 0, i); 179 } 180 output[i++] = upch; 181 } else if (output != null) { 182 output[i++] = ch; 183 } 184 } else { 185 int target = index * 3; 186 char val3 = upperValues[target + 2]; 187 if (output == null) { 188 output = new char[count + (count / 6) + 2]; 189 i = o - offset; 190 System.arraycopy(value, offset, output, 0, i); 191 } else if (i + (val3 == 0 ? 1 : 2) >= output.length) { 192 char[] newoutput = new char[output.length + (count / 6) + 3]; 193 System.arraycopy(output, 0, newoutput, 0, output.length); 194 output = newoutput; 195 } 196 197 char val = upperValues[target]; 198 output[i++] = val; 199 val = upperValues[target + 1]; 200 output[i++] = val; 201 if (val3 != 0) { 202 output[i++] = val3; 203 } 204 } 205 } 206 if (output == null) { 207 return s; 208 } 209 return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i); 210 } 211 } 212