1 /*
2 * Copyright (C) 2008 The Android Open Source Project
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * * Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in
12 * the documentation and/or other materials provided with the
13 * distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <wctype.h>
30
31 #include <ctype.h>
32 #include <errno.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <wchar.h>
36
37 #include "private/icu.h"
38
39 enum {
40 WC_TYPE_INVALID = 0,
41 WC_TYPE_ALNUM,
42 WC_TYPE_ALPHA,
43 WC_TYPE_BLANK,
44 WC_TYPE_CNTRL,
45 WC_TYPE_DIGIT,
46 WC_TYPE_GRAPH,
47 WC_TYPE_LOWER,
48 WC_TYPE_PRINT,
49 WC_TYPE_PUNCT,
50 WC_TYPE_SPACE,
51 WC_TYPE_UPPER,
52 WC_TYPE_XDIGIT,
53 WC_TYPE_MAX
54 };
55
iswalnum(wint_t wc)56 int iswalnum(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_ALNUM, isalnum); }
iswalpha(wint_t wc)57 int iswalpha(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_ALPHABETIC, isalpha); }
iswblank(wint_t wc)58 int iswblank(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_BLANK, isblank); }
iswgraph(wint_t wc)59 int iswgraph(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_GRAPH, isgraph); }
iswlower(wint_t wc)60 int iswlower(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_LOWERCASE, islower); }
iswprint(wint_t wc)61 int iswprint(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_PRINT, isprint); }
iswspace(wint_t wc)62 int iswspace(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_WHITE_SPACE, isspace); }
iswupper(wint_t wc)63 int iswupper(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_UPPERCASE, isupper); }
iswxdigit(wint_t wc)64 int iswxdigit(wint_t wc) { return __icu_hasBinaryProperty(wc, UCHAR_POSIX_XDIGIT, isxdigit); }
65
iswcntrl(wint_t wc)66 int iswcntrl(wint_t wc) {
67 typedef int8_t (*FnT)(UChar32);
68 static auto u_charType = reinterpret_cast<FnT>(__find_icu_symbol("u_charType"));
69 return u_charType ? (u_charType(wc) == U_CONTROL_CHAR) : iscntrl(wc);
70 }
71
iswdigit(wint_t wc)72 int iswdigit(wint_t wc) {
73 typedef UBool (*FnT)(UChar32);
74 static auto u_isdigit = reinterpret_cast<FnT>(__find_icu_symbol("u_isdigit"));
75 return u_isdigit ? u_isdigit(wc) : isdigit(wc);
76 }
77
iswpunct(wint_t wc)78 int iswpunct(wint_t wc) {
79 typedef UBool (*FnT)(UChar32);
80 static auto u_ispunct = reinterpret_cast<FnT>(__find_icu_symbol("u_ispunct"));
81 return u_ispunct ? u_ispunct(wc) : ispunct(wc);
82 }
83
iswalnum_l(wint_t c,locale_t)84 int iswalnum_l(wint_t c, locale_t) { return iswalnum(c); }
iswalpha_l(wint_t c,locale_t)85 int iswalpha_l(wint_t c, locale_t) { return iswalpha(c); }
iswblank_l(wint_t c,locale_t)86 int iswblank_l(wint_t c, locale_t) { return iswblank(c); }
iswcntrl_l(wint_t c,locale_t)87 int iswcntrl_l(wint_t c, locale_t) { return iswcntrl(c); }
iswdigit_l(wint_t c,locale_t)88 int iswdigit_l(wint_t c, locale_t) { return iswdigit(c); }
iswgraph_l(wint_t c,locale_t)89 int iswgraph_l(wint_t c, locale_t) { return iswgraph(c); }
iswlower_l(wint_t c,locale_t)90 int iswlower_l(wint_t c, locale_t) { return iswlower(c); }
iswprint_l(wint_t c,locale_t)91 int iswprint_l(wint_t c, locale_t) { return iswprint(c); }
iswpunct_l(wint_t c,locale_t)92 int iswpunct_l(wint_t c, locale_t) { return iswpunct(c); }
iswspace_l(wint_t c,locale_t)93 int iswspace_l(wint_t c, locale_t) { return iswspace(c); }
iswupper_l(wint_t c,locale_t)94 int iswupper_l(wint_t c, locale_t) { return iswupper(c); }
iswxdigit_l(wint_t c,locale_t)95 int iswxdigit_l(wint_t c, locale_t) { return iswxdigit(c); }
96
iswctype(wint_t wc,wctype_t char_class)97 int iswctype(wint_t wc, wctype_t char_class) {
98 switch (char_class) {
99 case WC_TYPE_ALNUM: return iswalnum(wc);
100 case WC_TYPE_ALPHA: return iswalpha(wc);
101 case WC_TYPE_BLANK: return iswblank(wc);
102 case WC_TYPE_CNTRL: return iswcntrl(wc);
103 case WC_TYPE_DIGIT: return iswdigit(wc);
104 case WC_TYPE_GRAPH: return iswgraph(wc);
105 case WC_TYPE_LOWER: return iswlower(wc);
106 case WC_TYPE_PRINT: return iswprint(wc);
107 case WC_TYPE_PUNCT: return iswpunct(wc);
108 case WC_TYPE_SPACE: return iswspace(wc);
109 case WC_TYPE_UPPER: return iswupper(wc);
110 case WC_TYPE_XDIGIT: return iswxdigit(wc);
111 default: return 0;
112 }
113 }
114
iswctype_l(wint_t wc,wctype_t char_class,locale_t)115 int iswctype_l(wint_t wc, wctype_t char_class, locale_t) {
116 return iswctype(wc, char_class);
117 }
118
towlower(wint_t wc)119 wint_t towlower(wint_t wc) {
120 if (wc < 0x80) {
121 if (wc >= 'A' && wc <= 'Z') return wc | 0x20;
122 return wc;
123 }
124
125 typedef UChar32 (*FnT)(UChar32);
126 static auto u_tolower = reinterpret_cast<FnT>(__find_icu_symbol("u_tolower"));
127 return u_tolower ? u_tolower(wc) : tolower(wc);
128 }
129
towupper(wint_t wc)130 wint_t towupper(wint_t wc) {
131 if (wc < 0x80) {
132 // Using EOR rather than AND makes no difference on arm, but saves an
133 // instruction on arm64.
134 if (wc >= 'a' && wc <= 'z') return wc ^ 0x20;
135 return wc;
136 }
137
138 typedef UChar32 (*FnT)(UChar32);
139 static auto u_toupper = reinterpret_cast<FnT>(__find_icu_symbol("u_toupper"));
140 return u_toupper ? u_toupper(wc) : toupper(wc);
141 }
142
towupper_l(wint_t c,locale_t)143 wint_t towupper_l(wint_t c, locale_t) { return towupper(c); }
towlower_l(wint_t c,locale_t)144 wint_t towlower_l(wint_t c, locale_t) { return towlower(c); }
145
wctype(const char * property)146 wctype_t wctype(const char* property) {
147 static const char* const properties[WC_TYPE_MAX] = {
148 "<invalid>",
149 "alnum", "alpha", "blank", "cntrl", "digit", "graph",
150 "lower", "print", "punct", "space", "upper", "xdigit"
151 };
152 for (size_t i = 0; i < WC_TYPE_MAX; ++i) {
153 if (!strcmp(properties[i], property)) {
154 return static_cast<wctype_t>(i);
155 }
156 }
157 return static_cast<wctype_t>(0);
158 }
159
wctype_l(const char * property,locale_t)160 wctype_t wctype_l(const char* property, locale_t) {
161 return wctype(property);
162 }
163
164 static wctrans_t wctrans_tolower = wctrans_t(1);
165 static wctrans_t wctrans_toupper = wctrans_t(2);
166
wctrans(const char * name)167 wctrans_t wctrans(const char* name) {
168 if (strcmp(name, "tolower") == 0) return wctrans_tolower;
169 if (strcmp(name, "toupper") == 0) return wctrans_toupper;
170 return nullptr;
171 }
172
wctrans_l(const char * name,locale_t)173 wctrans_t wctrans_l(const char* name, locale_t) {
174 return wctrans(name);
175 }
176
towctrans(wint_t c,wctrans_t t)177 wint_t towctrans(wint_t c, wctrans_t t) {
178 if (t == wctrans_tolower) return towlower(c);
179 if (t == wctrans_toupper) return towupper(c);
180 errno = EINVAL;
181 return 0;
182 }
183
towctrans_l(wint_t c,wctrans_t t,locale_t)184 wint_t towctrans_l(wint_t c, wctrans_t t, locale_t) {
185 return towctrans(c, t);
186 }
187