1 /*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef LATINIME_CHAR_UTILS_H
18 #define LATINIME_CHAR_UTILS_H
19
20 #include <cctype>
21
22 #include "defines.h"
23
24 namespace latinime {
25
isAsciiUpper(int c)26 inline static bool isAsciiUpper(int c) {
27 // Note: isupper(...) reports false positives for some Cyrillic characters, causing them to
28 // be incorrectly lower-cased using toAsciiLower(...) rather than latin_tolower(...).
29 return (c >= 'A' && c <= 'Z');
30 }
31
toAsciiLower(int c)32 inline static int toAsciiLower(int c) {
33 return c - 'A' + 'a';
34 }
35
isAscii(int c)36 inline static bool isAscii(int c) {
37 return isascii(c) != 0;
38 }
39
40 unsigned short latin_tolower(const unsigned short c);
41
42 /**
43 * Table mapping most combined Latin, Greek, and Cyrillic characters
44 * to their base characters. If c is in range, BASE_CHARS[c] == c
45 * if c is not a combined character, or the base character if it
46 * is combined.
47 */
48 static const int BASE_CHARS_SIZE = 0x0500;
49 extern const unsigned short BASE_CHARS[BASE_CHARS_SIZE];
50
toBaseCodePoint(int c)51 inline static int toBaseCodePoint(int c) {
52 if (c < BASE_CHARS_SIZE) {
53 return static_cast<int>(BASE_CHARS[c]);
54 }
55 return c;
56 }
57
toLowerCase(const int c)58 AK_FORCE_INLINE static int toLowerCase(const int c) {
59 if (isAsciiUpper(c)) {
60 return toAsciiLower(c);
61 }
62 if (isAscii(c)) {
63 return c;
64 }
65 return static_cast<int>(latin_tolower(static_cast<unsigned short>(c)));
66 }
67
toBaseLowerCase(const int c)68 AK_FORCE_INLINE static int toBaseLowerCase(const int c) {
69 return toLowerCase(toBaseCodePoint(c));
70 }
71
isIntentionalOmissionCodePoint(const int codePoint)72 inline static bool isIntentionalOmissionCodePoint(const int codePoint) {
73 // TODO: Do not hardcode here
74 return codePoint == KEYCODE_SINGLE_QUOTE || codePoint == KEYCODE_HYPHEN_MINUS;
75 }
76
getCodePointCount(const int arraySize,const int * const codePoints)77 inline static int getCodePointCount(const int arraySize, const int *const codePoints) {
78 int size = 0;
79 for (; size < arraySize; ++size) {
80 if (codePoints[size] == '\0') {
81 break;
82 }
83 }
84 return size;
85 }
86
87 } // namespace latinime
88 #endif // LATINIME_CHAR_UTILS_H
89