• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LATINIME_CHAR_UTILS_H
18 #define LATINIME_CHAR_UTILS_H
19 
20 #include <cctype>
21 
22 #include "defines.h"
23 
24 namespace latinime {
25 
isAsciiUpper(int c)26 inline static bool isAsciiUpper(int c) {
27     // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
28     // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
29     return (c >= 'A' && c <= 'Z');
30 }
31 
toAsciiLower(int c)32 inline static int toAsciiLower(int c) {
33     return c - 'A' + 'a';
34 }
35 
isAscii(int c)36 inline static bool isAscii(int c) {
37     return isascii(c) != 0;
38 }
39 
40 unsigned short latin_tolower(const unsigned short c);
41 
42 /**
43  * Table mapping most combined Latin, Greek, and Cyrillic characters
44  * to their base characters.  If c is in range, BASE_CHARS[c] == c
45  * if c is not a combined character, or the base character if it
46  * is combined.
47  */
48 static const int BASE_CHARS_SIZE = 0x0500;
49 extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
50 
toBaseCodePoint(int c)51 inline static int toBaseCodePoint(int c) {
52     if (c < BASE_CHARS_SIZE) {
53         return static_cast<int>(BASE_CHARS[c]);
54     }
55     return c;
56 }
57 
toLowerCase(const int c)58 AK_FORCE_INLINE static int toLowerCase(const int c) {
59     if (isAsciiUpper(c)) {
60         return toAsciiLower(c);
61     }
62     if (isAscii(c)) {
63         return c;
64     }
65     return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
66 }
67 
toBaseLowerCase(const int c)68 AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
69     return toLowerCase(toBaseCodePoint(c));
70 }
71 
isIntentionalOmissionCodePoint(const int codePoint)72 inline static bool isIntentionalOmissionCodePoint(const int codePoint) {
73     // TODO: Do not hardcode here
74     return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
75 }
76 
getCodePointCount(const int arraySize,const int * const codePoints)77 inline static int getCodePointCount(const int arraySize, const int *const codePoints) {
78     int size = 0;
79     for (; size < arraySize; ++size) {
80         if (codePoints[size] == '\0') {
81             break;
82         }
83     }
84     return size;
85 }
86 
87 } // namespace latinime
88 #endif // LATINIME_CHAR_UTILS_H
89