• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package java.lang;
18 
19 import java.util.Locale;
20 import libcore.icu.ICU;
21 import libcore.icu.Transliterator;
22 
23 /**
24  * Performs case operations as described by http://unicode.org/reports/tr21/tr21-5.html.
25  */
26 class CaseMapper {
27     private static final char[] upperValues = "SS\u0000\u02bcN\u0000J\u030c\u0000\u0399\u0308\u0301\u03a5\u0308\u0301\u0535\u0552\u0000H\u0331\u0000T\u0308\u0000W\u030a\u0000Y\u030a\u0000A\u02be\u0000\u03a5\u0313\u0000\u03a5\u0313\u0300\u03a5\u0313\u0301\u03a5\u0313\u0342\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f08\u0399\u0000\u1f09\u0399\u0000\u1f0a\u0399\u0000\u1f0b\u0399\u0000\u1f0c\u0399\u0000\u1f0d\u0399\u0000\u1f0e\u0399\u0000\u1f0f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f28\u0399\u0000\u1f29\u0399\u0000\u1f2a\u0399\u0000\u1f2b\u0399\u0000\u1f2c\u0399\u0000\u1f2d\u0399\u0000\u1f2e\u0399\u0000\u1f2f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1f68\u0399\u0000\u1f69\u0399\u0000\u1f6a\u0399\u0000\u1f6b\u0399\u0000\u1f6c\u0399\u0000\u1f6d\u0399\u0000\u1f6e\u0399\u0000\u1f6f\u0399\u0000\u1fba\u0399\u0000\u0391\u0399\u0000\u0386\u0399\u0000\u0391\u0342\u0000\u0391\u0342\u0399\u0391\u0399\u0000\u1fca\u0399\u0000\u0397\u0399\u0000\u0389\u0399\u0000\u0397\u0342\u0000\u0397\u0342\u0399\u0397\u0399\u0000\u0399\u0308\u0300\u0399\u0308\u0301\u0399\u0342\u0000\u0399\u0308\u0342\u03a5\u0308\u0300\u03a5\u0308\u0301\u03a1\u0313\u0000\u03a5\u0342\u0000\u03a5\u0308\u0342\u1ffa\u0399\u0000\u03a9\u0399\u0000\u038f\u0399\u0000\u03a9\u0342\u0000\u03a9\u0342\u0399\u03a9\u0399\u0000FF\u0000FI\u0000FL\u0000FFIFFLST\u0000ST\u0000\u0544\u0546\u0000\u0544\u0535\u0000\u0544\u053b\u0000\u054e\u0546\u0000\u0544\u053d\u0000".toCharArray();
28     private static final char[] upperValues2 = "\u000b\u0000\f\u0000\r\u0000\u000e\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>\u0000\u0000?@A\u0000BC\u0000\u0000\u0000\u0000D\u0000\u0000\u0000\u0000\u0000EFG\u0000HI\u0000\u0000\u0000\u0000J\u0000\u0000\u0000\u0000\u0000KL\u0000\u0000MN\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000OPQ\u0000RS\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000TUV\u0000WX\u0000\u0000\u0000\u0000Y".toCharArray();
29 
30     private static final char LATIN_CAPITAL_I_WITH_DOT = '\u0130';
31     private static final char GREEK_CAPITAL_SIGMA = '\u03a3';
32     private static final char GREEK_SMALL_FINAL_SIGMA = '\u03c2';
33 
34     /**
35      * Our current GC makes short-lived objects more expensive than we'd like. When that's fixed,
36      * this class should be changed so that you instantiate it with the String and its value,
37      * offset, and count fields.
38      */
CaseMapper()39     private CaseMapper() {
40     }
41 
42     /**
43      * Implements String.toLowerCase. We need 's' so that we can return the original String instance
44      * if nothing changes. We need 'value', 'offset', and 'count' because they're not otherwise
45      * accessible.
46      */
toLowerCase(Locale locale, String s, char[] value, int offset, int count)47     public static String toLowerCase(Locale locale, String s, char[] value, int offset, int count) {
48         // Punt hard cases to ICU4C.
49         // Note that Greek isn't a particularly hard case for toLowerCase, only toUpperCase.
50         String languageCode = locale.getLanguage();
51         if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
52             return ICU.toLowerCase(s, locale);
53         }
54 
55         char[] newValue = null;
56         int newCount = 0;
57         for (int i = offset, end = offset + count; i < end; ++i) {
58             char ch = value[i];
59             char newCh;
60             if (ch == LATIN_CAPITAL_I_WITH_DOT || Character.isHighSurrogate(ch)) {
61                 // Punt these hard cases.
62                 return ICU.toLowerCase(s, locale);
63             } else if (ch == GREEK_CAPITAL_SIGMA && isFinalSigma(value, offset, count, i)) {
64                 newCh = GREEK_SMALL_FINAL_SIGMA;
65             } else {
66                 newCh = Character.toLowerCase(ch);
67             }
68             if (newValue == null && ch != newCh) {
69                 newValue = new char[count]; // The result can't be longer than the input.
70                 newCount = i - offset;
71                 System.arraycopy(value, offset, newValue, 0, newCount);
72             }
73             if (newValue != null) {
74                 newValue[newCount++] = newCh;
75             }
76         }
77         return newValue != null ? new String(0, newCount, newValue) : s;
78     }
79 
80     /**
81      * True if 'index' is preceded by a sequence consisting of a cased letter and a case-ignorable
82      * sequence, and 'index' is not followed by a sequence consisting of an ignorable sequence and
83      * then a cased letter.
84      */
isFinalSigma(char[] value, int offset, int count, int index)85     private static boolean isFinalSigma(char[] value, int offset, int count, int index) {
86         // TODO: we don't skip case-ignorable sequences like we should.
87         // TODO: we should add a more direct way to test for a cased letter.
88         if (index <= offset) {
89             return false;
90         }
91         char previous = value[index - 1];
92         if (!(Character.isLowerCase(previous) || Character.isUpperCase(previous) || Character.isTitleCase(previous))) {
93             return false;
94         }
95         if (index + 1 >= offset + count) {
96             return true;
97         }
98         char next = value[index + 1];
99         if (Character.isLowerCase(next) || Character.isUpperCase(next) || Character.isTitleCase(next)) {
100             return false;
101         }
102         return true;
103     }
104 
105     /**
106      * Return the index of the specified character into the upperValues table.
107      * The upperValues table contains three entries at each position. These
108      * three characters are the upper case conversion. If only two characters
109      * are used, the third character in the table is \u0000.
110      * @return the index into the upperValues table, or -1
111      */
upperIndex(int ch)112     private static int upperIndex(int ch) {
113         int index = -1;
114         if (ch >= 0xdf) {
115             if (ch <= 0x587) {
116                 switch (ch) {
117                 case 0xdf: return 0;
118                 case 0x149: return 1;
119                 case 0x1f0: return 2;
120                 case 0x390: return 3;
121                 case 0x3b0: return 4;
122                 case 0x587: return 5;
123                 }
124             } else if (ch >= 0x1e96) {
125                 if (ch <= 0x1e9a) {
126                     index = 6 + ch - 0x1e96;
127                 } else if (ch >= 0x1f50 && ch <= 0x1ffc) {
128                     index = upperValues2[ch - 0x1f50];
129                     if (index == 0) {
130                         index = -1;
131                     }
132                 } else if (ch >= 0xfb00) {
133                     if (ch <= 0xfb06) {
134                         index = 90 + ch - 0xfb00;
135                     } else if (ch >= 0xfb13 && ch <= 0xfb17) {
136                         index = 97 + ch - 0xfb13;
137                     }
138                 }
139             }
140         }
141         return index;
142     }
143 
144     private static final ThreadLocal<Transliterator> EL_UPPER = new ThreadLocal<Transliterator>() {
145         @Override protected Transliterator initialValue() {
146             return new Transliterator("el-Upper");
147         }
148     };
149 
toUpperCase(Locale locale, String s, char[] value, int offset, int count)150     public static String toUpperCase(Locale locale, String s, char[] value, int offset, int count) {
151         String languageCode = locale.getLanguage();
152         if (languageCode.equals("tr") || languageCode.equals("az") || languageCode.equals("lt")) {
153             return ICU.toUpperCase(s, locale);
154         }
155         if (languageCode.equals("el")) {
156             return EL_UPPER.get().transliterate(s);
157         }
158 
159         char[] output = null;
160         int i = 0;
161         for (int o = offset, end = offset + count; o < end; o++) {
162             char ch = value[o];
163             if (Character.isHighSurrogate(ch)) {
164                 return ICU.toUpperCase(s, locale);
165             }
166             int index = upperIndex(ch);
167             if (index == -1) {
168                 if (output != null && i >= output.length) {
169                     char[] newoutput = new char[output.length + (count / 6) + 2];
170                     System.arraycopy(output, 0, newoutput, 0, output.length);
171                     output = newoutput;
172                 }
173                 char upch = Character.toUpperCase(ch);
174                 if (ch != upch) {
175                     if (output == null) {
176                         output = new char[count];
177                         i = o - offset;
178                         System.arraycopy(value, offset, output, 0, i);
179                     }
180                     output[i++] = upch;
181                 } else if (output != null) {
182                     output[i++] = ch;
183                 }
184             } else {
185                 int target = index * 3;
186                 char val3 = upperValues[target + 2];
187                 if (output == null) {
188                     output = new char[count + (count / 6) + 2];
189                     i = o - offset;
190                     System.arraycopy(value, offset, output, 0, i);
191                 } else if (i + (val3 == 0 ? 1 : 2) >= output.length) {
192                     char[] newoutput = new char[output.length + (count / 6) + 3];
193                     System.arraycopy(output, 0, newoutput, 0, output.length);
194                     output = newoutput;
195                 }
196 
197                 char val = upperValues[target];
198                 output[i++] = val;
199                 val = upperValues[target + 1];
200                 output[i++] = val;
201                 if (val3 != 0) {
202                     output[i++] = val3;
203                 }
204             }
205         }
206         if (output == null) {
207             return s;
208         }
209         return output.length == i || output.length - i < 8 ? new String(0, i, output) : new String(output, 0, i);
210     }
211 }
212