1 /* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <utils/AndroidUnicode.h> 18 #include "CharacterData.h" 19 20 #define LOG_TAG "Unicode" 21 #include <utils/Log.h> 22 23 // ICU headers for using macros 24 #include <unicode/utf16.h> 25 26 #define MIN_RADIX 2 27 #define MAX_RADIX 36 28 29 #define TYPE_SHIFT 0 30 #define TYPE_MASK ((1<<5)-1) 31 32 #define DIRECTION_SHIFT (TYPE_SHIFT+5) 33 #define DIRECTION_MASK ((1<<5)-1) 34 35 #define MIRRORED_SHIFT (DIRECTION_SHIFT+5) 36 #define MIRRORED_MASK ((1<<1)-1) 37 38 #define TOUPPER_SHIFT (MIRRORED_SHIFT+1) 39 #define TOUPPER_MASK ((1<<6)-1) 40 41 #define TOLOWER_SHIFT (TOUPPER_SHIFT+6) 42 #define TOLOWER_MASK ((1<<6)-1) 43 44 #define TOTITLE_SHIFT (TOLOWER_SHIFT+6) 45 #define TOTITLE_MASK ((1<<2)-1) 46 47 #define MIRROR_SHIFT (TOTITLE_SHIFT+2) 48 #define MIRROR_MASK ((1<<5)-1) 49 50 #define NUMERIC_SHIFT (TOTITLE_SHIFT+2) 51 #define NUMERIC_MASK ((1<<7)-1) 52 53 #define DECOMPOSITION_SHIFT (11) 54 #define DECOMPOSITION_MASK ((1<<5)-1) 55 56 /* 57 * Returns the value stored in the CharacterData tables that contains 58 * an index into the packed data table and the decomposition type. 59 */ findCharacterValue(UChar32 c)60static uint16_t findCharacterValue(UChar32 c) 61 { 62 LOG_ASSERT(c >= 0 && c <= 0x10FFFF, "findCharacterValue received an invalid codepoint"); 63 if (c < 256) 64 return CharacterData::LATIN1_DATA[c]; 65 66 // Rotate the bits because the tables are separated into even and odd codepoints 67 c = (c >> 1) | ((c & 1) << 20); 68 69 CharacterData::Range search = CharacterData::FULL_DATA[c >> 16]; 70 const uint32_t* array = search.array; 71 72 // This trick is so that that compare in the while loop does not 73 // need to shift the array entry down by 16 74 c <<= 16; 75 c |= 0xFFFF; 76 77 int high = (int)search.length - 1; 78 int low = 0; 79 80 if (high < 0) 81 return 0; 82 83 while (low < high - 1) 84 { 85 int probe = (high + low) >> 1; 86 87 // The entries contain the codepoint in the high 16 bits and the index 88 // into PACKED_DATA in the low 16. 89 if (array[probe] > (unsigned)c) 90 high = probe; 91 else 92 low = probe; 93 } 94 95 LOG_ASSERT((array[low] <= (unsigned)c), "A suitable range was not found"); 96 return array[low] & 0xFFFF; 97 } 98 getPackedData(UChar32 c)99uint32_t android::Unicode::getPackedData(UChar32 c) 100 { 101 // findCharacterValue returns a 16-bit value with the top 5 bits containing a decomposition type 102 // and the remaining bits containing an index. 103 return CharacterData::PACKED_DATA[findCharacterValue(c) & 0x7FF]; 104 } 105 getType(UChar32 c)106android::Unicode::CharType android::Unicode::getType(UChar32 c) 107 { 108 if (c < 0 || c >= 0x10FFFF) 109 return CHARTYPE_UNASSIGNED; 110 return (CharType)((getPackedData(c) >> TYPE_SHIFT) & TYPE_MASK); 111 } 112 getDecompositionType(UChar32 c)113android::Unicode::DecompositionType android::Unicode::getDecompositionType(UChar32 c) 114 { 115 // findCharacterValue returns a 16-bit value with the top 5 bits containing a decomposition type 116 // and the remaining bits containing an index. 117 return (DecompositionType)((findCharacterValue(c) >> DECOMPOSITION_SHIFT) & DECOMPOSITION_MASK); 118 } 119 getDigitValue(UChar32 c,int radix)120int android::Unicode::getDigitValue(UChar32 c, int radix) 121 { 122 if (radix < MIN_RADIX || radix > MAX_RADIX) 123 return -1; 124 125 int tempValue = radix; 126 127 if (c >= '0' && c <= '9') 128 tempValue = c - '0'; 129 else if (c >= 'a' && c <= 'z') 130 tempValue = c - 'a' + 10; 131 else if (c >= 'A' && c <= 'Z') 132 tempValue = c - 'A' + 10; 133 134 return tempValue < radix ? tempValue : -1; 135 } 136 getNumericValue(UChar32 c)137int android::Unicode::getNumericValue(UChar32 c) 138 { 139 if (isMirrored(c)) 140 return -1; 141 142 return (int) CharacterData::NUMERICS[((getPackedData(c) >> NUMERIC_SHIFT) & NUMERIC_MASK)]; 143 } 144 toLower(UChar32 c)145UChar32 android::Unicode::toLower(UChar32 c) 146 { 147 return c + CharacterData::LCDIFF[(getPackedData(c) >> TOLOWER_SHIFT) & TOLOWER_MASK]; 148 } 149 toUpper(UChar32 c)150UChar32 android::Unicode::toUpper(UChar32 c) 151 { 152 return c + CharacterData::UCDIFF[(getPackedData(c) >> TOUPPER_SHIFT) & TOUPPER_MASK]; 153 } 154 getDirectionality(UChar32 c)155android::Unicode::Direction android::Unicode::getDirectionality(UChar32 c) 156 { 157 uint32_t data = getPackedData(c); 158 159 if (0 == data) 160 return DIRECTIONALITY_UNDEFINED; 161 162 Direction d = (Direction) ((data >> DIRECTION_SHIFT) & DIRECTION_MASK); 163 164 if (DIRECTION_MASK == d) 165 return DIRECTIONALITY_UNDEFINED; 166 167 return d; 168 } 169 isMirrored(UChar32 c)170bool android::Unicode::isMirrored(UChar32 c) 171 { 172 return ((getPackedData(c) >> MIRRORED_SHIFT) & MIRRORED_MASK) != 0; 173 } 174 toMirror(UChar32 c)175UChar32 android::Unicode::toMirror(UChar32 c) 176 { 177 if (!isMirrored(c)) 178 return c; 179 180 return c + CharacterData::MIRROR_DIFF[(getPackedData(c) >> MIRROR_SHIFT) & MIRROR_MASK]; 181 } 182 toTitle(UChar32 c)183UChar32 android::Unicode::toTitle(UChar32 c) 184 { 185 int32_t diff = CharacterData::TCDIFF[(getPackedData(c) >> TOTITLE_SHIFT) & TOTITLE_MASK]; 186 187 if (TOTITLE_MASK == diff) 188 return toUpper(c); 189 190 return c + diff; 191 } 192 193 194