1 /** @file
2 Character classification and case conversion tables, and functions,
3 for the C Standard Library as required to implement ctype.h.
4
5 These are the default, C locale, tables.
6
7 Copyright (c) 2010 - 2011, Intel Corporation. All rights reserved.<BR>
8 This program and the accompanying materials are licensed and made available under
9 the terms and conditions of the BSD License that accompanies this distribution.
10 The full text of the license may be found at
11 http://opensource.org/licenses/bsd-license.
12
13 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
14 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
15 **/
16 #include <LibConfig.h>
17 #include <ctype.h>
18
19 /// ASCII-7 Character Classification Table
20 const UINT16 _C_CharClassTable[128] = {
21 /* 00 NUL */ ( _CC ),
22 /* 01 SOH */ ( _CC ),
23 /* 02 STX */ ( _CC ),
24 /* 03 ETX */ ( _CC ),
25 /* 04 EOT */ ( _CC ),
26 /* 05 ENQ */ ( _CC ),
27 /* 06 ACK */ ( _CC ),
28 /* 07 BEL */ ( _CC ),
29 /* 08 BS */ ( _CC ),
30 /* 09 TAB */ ( _CC | _CW | _CB ),
31 /* 0A LF */ ( _CC | _CW ),
32 /* 0B VT */ ( _CC | _CW ),
33 /* 0C FF */ ( _CC | _CW ),
34 /* 0D CR */ ( _CC | _CW ),
35 /* 0E SO */ ( _CC ),
36 /* 0F SI */ ( _CC ),
37 /* 10 DLE */ ( _CC ),
38 /* 11 DC1 */ ( _CC ),
39 /* 12 DC2 */ ( _CC ),
40 /* 13 DC3 */ ( _CC ),
41 /* 14 DC4 */ ( _CC ),
42 /* 15 NAK */ ( _CC ),
43 /* 16 SYN */ ( _CC ),
44 /* 17 ETB */ ( _CC ),
45 /* 18 CAN */ ( _CC ),
46 /* 19 EM */ ( _CC ),
47 /* 1A SUB */ ( _CC ),
48 /* 1B ESC */ ( _CC ),
49 /* 1C FS */ ( _CC ),
50 /* 1D GS */ ( _CC ),
51 /* 1E RS */ ( _CC ),
52 /* 1F US */ ( _CC ),
53 /* 20 ' ' */ ( _CW | _CS | _CB ),
54 /* 21 '!' */ ( _CP | _CG ),
55 /* 22 '"' */ ( _CP | _CG ),
56 /* 23 '#' */ ( _CP | _CG ),
57 /* 24 '$' */ ( _CP | _CG ),
58 /* 25 '%' */ ( _CP | _CG ),
59 /* 26 '&' */ ( _CP | _CG ),
60 /* 27 '\''*/ ( _CP | _CG | _C0 ),
61 /* 28 '(' */ ( _CP | _CG ),
62 /* 29 ')' */ ( _CP | _CG ),
63 /* 2A '*' */ ( _CP | _CG ),
64 /* 2B '+' */ ( _CP | _CG ),
65 /* 2C ',' */ ( _CP | _CG ),
66 /* 2D '-' */ ( _CP | _CG ),
67 /* 2E '.' */ ( _CP | _CG ),
68 /* 2F '/' */ ( _CP | _CG | _C0 ),
69 /* 30 '0' */ ( _CD | _CG ),
70 /* 31 '1' */ ( _CD | _CG ),
71 /* 32 '2' */ ( _CD | _CG ),
72 /* 33 '3' */ ( _CD | _CG ),
73 /* 34 '4' */ ( _CD | _CG ),
74 /* 35 '5' */ ( _CD | _CG ),
75 /* 36 '6' */ ( _CD | _CG ),
76 /* 37 '7' */ ( _CD | _CG ),
77 /* 38 '8' */ ( _CD | _CG ),
78 /* 39 '9' */ ( _CD | _CG ),
79 /* 3A ':' */ ( _CP | _CG ),
80 /* 3B ';' */ ( _CP | _CG ),
81 /* 3C '<' */ ( _CP | _CG ),
82 /* 3D '=' */ ( _CP | _CG ),
83 /* 3E '>' */ ( _CP | _CG ),
84 /* 3F '?' */ ( _CP | _CG ),
85 /* 40 '@' */ ( _CP | _CG ),
86 /* 41 'A' */ ( _CU | _CX | _CG ),
87 /* 42 'B' */ ( _CU | _CX | _CG ),
88 /* 43 'C' */ ( _CU | _CX | _CG ),
89 /* 44 'D' */ ( _CU | _CX | _CG ),
90 /* 45 'E' */ ( _CU | _CX | _CG ),
91 /* 46 'F' */ ( _CU | _CX | _CG ),
92 /* 47 'G' */ ( _CU | _CG ),
93 /* 48 'H' */ ( _CU | _CG ),
94 /* 49 'I' */ ( _CU | _CG ),
95 /* 4A 'J' */ ( _CU | _CG ),
96 /* 4B 'K' */ ( _CU | _CG ),
97 /* 4C 'L' */ ( _CU | _CG ),
98 /* 4D 'M' */ ( _CU | _CG ),
99 /* 4E 'N' */ ( _CU | _CG ),
100 /* 4F 'O' */ ( _CU | _CG ),
101 /* 50 'P' */ ( _CU | _CG ),
102 /* 51 'Q' */ ( _CU | _CG ),
103 /* 52 'R' */ ( _CU | _CG ),
104 /* 53 'S' */ ( _CU | _CG ),
105 /* 54 'T' */ ( _CU | _CG ),
106 /* 55 'U' */ ( _CU | _CG ),
107 /* 56 'V' */ ( _CU | _CG ),
108 /* 57 'W' */ ( _CU | _CG ),
109 /* 58 'X' */ ( _CU | _CG ),
110 /* 59 'Y' */ ( _CU | _CG ),
111 /* 5A 'Z' */ ( _CU | _CG ),
112 /* 5B '[' */ ( _CP | _CG ),
113 /* 5C '\' */ ( _CP | _CG ),
114 /* 5D ']' */ ( _CP | _CG ),
115 /* 5E '^' */ ( _CP | _CG ),
116 /* 5F '_' */ ( _CP | _CG ),
117 /* 60 '`' */ ( _CP | _CG ),
118 /* 61 'a' */ ( _CL | _CX | _CG ),
119 /* 62 'b' */ ( _CL | _CX | _CG ),
120 /* 63 'c' */ ( _CL | _CX | _CG ),
121 /* 64 'd' */ ( _CL | _CX | _CG ),
122 /* 65 'e' */ ( _CL | _CX | _CG ),
123 /* 66 'f' */ ( _CL | _CX | _CG ),
124 /* 67 'g' */ ( _CL | _CG ),
125 /* 68 'h' */ ( _CL | _CG ),
126 /* 69 'i' */ ( _CL | _CG ),
127 /* 6A 'j' */ ( _CL | _CG ),
128 /* 6B 'k' */ ( _CL | _CG ),
129 /* 6C 'l' */ ( _CL | _CG ),
130 /* 6D 'm' */ ( _CL | _CG ),
131 /* 6E 'n' */ ( _CL | _CG ),
132 /* 6F 'o' */ ( _CL | _CG ),
133 /* 70 'p' */ ( _CL | _CG ),
134 /* 71 'q' */ ( _CL | _CG ),
135 /* 72 'r' */ ( _CL | _CG ),
136 /* 73 's' */ ( _CL | _CG ),
137 /* 74 't' */ ( _CL | _CG ),
138 /* 75 'u' */ ( _CL | _CG ),
139 /* 76 'v' */ ( _CL | _CG ),
140 /* 77 'w' */ ( _CL | _CG ),
141 /* 78 'x' */ ( _CL | _CG ),
142 /* 79 'y' */ ( _CL | _CG ),
143 /* 7A 'z' */ ( _CL | _CG ),
144 /* 7B '{' */ ( _CP | _CG ),
145 /* 7C '|' */ ( _CP | _CG ),
146 /* 7D '}' */ ( _CP | _CG ),
147 /* 7E '~' */ ( _CP | _CG ),
148 /* 7F DEL */ ( _CC )
149 };
150
151 /// ASCII-7 Upper case to Lower case character conversion table
152 const UINT8 _C_ToLowerTable[128] = {
153 /* 00 NUL */ 0x00, /* 01 SOH */ 0x01,
154 /* 02 STX */ 0x02, /* 03 ETX */ 0x03,
155 /* 04 EOT */ 0x04, /* 05 ENQ */ 0x05,
156 /* 06 ACK */ 0x06, /* 07 BEL */ 0x07,
157 /* 08 BS */ 0x08, /* 09 TAB */ 0x09,
158 /* 0A LF */ 0x0A, /* 0B VT */ 0x0B,
159 /* 0C FF */ 0x0C, /* 0D CR */ 0x0D,
160 /* 0E SO */ 0x0E, /* 0F SI */ 0x0F,
161 /* 10 DLE */ 0x10, /* 11 DC1 */ 0x11,
162 /* 12 DC2 */ 0x12, /* 13 DC3 */ 0x13,
163 /* 14 DC4 */ 0x14, /* 15 NAK */ 0x15,
164 /* 16 SYN */ 0x16, /* 17 ETB */ 0x17,
165 /* 18 CAN */ 0x18, /* 19 EM */ 0x19,
166 /* 1A SUB */ 0x1A, /* 1B ESC */ 0x1B,
167 /* 1C FS */ 0x1C, /* 1D GS */ 0x1D,
168 /* 1E RS */ 0x1E, /* 1F US */ 0x1F,
169 /* 20 ' ' */ 0x20, /* 21 '!' */ 0x21,
170 /* 22 '"' */ 0x22, /* 23 '#' */ 0x23,
171 /* 24 '$' */ 0x24, /* 25 '%' */ 0x25,
172 /* 26 '&' */ 0x26, /* 27 '\''*/ 0x27,
173 /* 28 '(' */ 0x28, /* 29 ')' */ 0x29,
174 /* 2A '*' */ 0x2A, /* 2B '+' */ 0x2B,
175 /* 2C ',' */ 0x2C, /* 2D '-' */ 0x2D,
176 /* 2E '.' */ 0x2E, /* 2F '/' */ 0x2F,
177 /* 30 '0' */ 0x30, /* 31 '1' */ 0x31,
178 /* 32 '2' */ 0x32, /* 33 '3' */ 0x33,
179 /* 34 '4' */ 0x34, /* 35 '5' */ 0x35,
180 /* 36 '6' */ 0x36, /* 37 '7' */ 0x37,
181 /* 38 '8' */ 0x38, /* 39 '9' */ 0x39,
182 /* 3A ':' */ 0x3A, /* 3B ';' */ 0x3B,
183 /* 3C '<' */ 0x3C, /* 3D '=' */ 0x3D,
184 /* 3E '>' */ 0x3E, /* 3F '?' */ 0x3F,
185 /* 40 '@' */ 0x40, /* 41 'A' */ 0x61,
186 /* 42 'B' */ 0x62, /* 43 'C' */ 0x63,
187 /* 44 'D' */ 0x64, /* 45 'E' */ 0x65,
188 /* 46 'F' */ 0x66, /* 47 'G' */ 0x67,
189 /* 48 'H' */ 0x68, /* 49 'I' */ 0x69,
190 /* 4A 'J' */ 0x6A, /* 4B 'K' */ 0x6B,
191 /* 4C 'L' */ 0x6C, /* 4D 'M' */ 0x6D,
192 /* 4E 'N' */ 0x6E, /* 4F 'O' */ 0x6F,
193 /* 50 'P' */ 0x70, /* 51 'Q' */ 0x71,
194 /* 52 'R' */ 0x72, /* 53 'S' */ 0x73,
195 /* 54 'T' */ 0x74, /* 55 'U' */ 0x75,
196 /* 56 'V' */ 0x76, /* 57 'W' */ 0x77,
197 /* 58 'X' */ 0x78, /* 59 'Y' */ 0x79,
198 /* 5A 'Z' */ 0x7A, /* 5B '[' */ 0x5B,
199 /* 5C '\' */ 0x5C, /* 5D ']' */ 0x5D,
200 /* 5E '^' */ 0x5E, /* 5F '_' */ 0x5F,
201 /* 60 '`' */ 0x60, /* 61 'a' */ 0x61,
202 /* 62 'b' */ 0x62, /* 63 'c' */ 0x63,
203 /* 64 'd' */ 0x64, /* 65 'e' */ 0x65,
204 /* 66 'f' */ 0x66, /* 67 'g' */ 0x67,
205 /* 68 'h' */ 0x68, /* 69 'i' */ 0x69,
206 /* 6A 'j' */ 0x6A, /* 6B 'k' */ 0x6B,
207 /* 6C 'l' */ 0x6C, /* 6D 'm' */ 0x6D,
208 /* 6E 'n' */ 0x6E, /* 6F 'o' */ 0x6F,
209 /* 70 'p' */ 0x70, /* 71 'q' */ 0x71,
210 /* 72 'r' */ 0x72, /* 73 's' */ 0x73,
211 /* 74 't' */ 0x74, /* 75 'u' */ 0x75,
212 /* 76 'v' */ 0x76, /* 77 'w' */ 0x77,
213 /* 78 'x' */ 0x78, /* 79 'y' */ 0x79,
214 /* 7A 'z' */ 0x7A, /* 7B '{' */ 0x7B,
215 /* 7C '|' */ 0x7C, /* 7D '}' */ 0x7D,
216 /* 7E '~' */ 0x7E, /* 7F DEL */ 0x7F
217 };
218
219 /// ASCII-7 Lower case to Upper case character conversion table
220 const UINT8 _C_ToUpperTable[128] = {
221 /* 00 NUL */ 0x00, /* 01 SOH */ 0x01,
222 /* 02 STX */ 0x02, /* 03 ETX */ 0x03,
223 /* 04 EOT */ 0x04, /* 05 ENQ */ 0x05,
224 /* 06 ACK */ 0x06, /* 07 BEL */ 0x07,
225 /* 08 BS */ 0x08, /* 09 TAB */ 0x09,
226 /* 0A LF */ 0x0A, /* 0B VT */ 0x0B,
227 /* 0C FF */ 0x0C, /* 0D CR */ 0x0D,
228 /* 0E SO */ 0x0E, /* 0F SI */ 0x0F,
229 /* 10 DLE */ 0x10, /* 11 DC1 */ 0x11,
230 /* 12 DC2 */ 0x12, /* 13 DC3 */ 0x13,
231 /* 14 DC4 */ 0x14, /* 15 NAK */ 0x15,
232 /* 16 SYN */ 0x16, /* 17 ETB */ 0x17,
233 /* 18 CAN */ 0x18, /* 19 EM */ 0x19,
234 /* 1A SUB */ 0x1A, /* 1B ESC */ 0x1B,
235 /* 1C FS */ 0x1C, /* 1D GS */ 0x1D,
236 /* 1E RS */ 0x1E, /* 1F US */ 0x1F,
237 /* 20 ' ' */ 0x20, /* 21 '!' */ 0x21,
238 /* 22 '"' */ 0x22, /* 23 '#' */ 0x23,
239 /* 24 '$' */ 0x24, /* 25 '%' */ 0x25,
240 /* 26 '&' */ 0x26, /* 27 '\''*/ 0x27,
241 /* 28 '(' */ 0x28, /* 29 ')' */ 0x29,
242 /* 2A '*' */ 0x2A, /* 2B '+' */ 0x2B,
243 /* 2C ',' */ 0x2C, /* 2D '-' */ 0x2D,
244 /* 2E '.' */ 0x2E, /* 2F '/' */ 0x2F,
245 /* 30 '0' */ 0x30, /* 31 '1' */ 0x31,
246 /* 32 '2' */ 0x32, /* 33 '3' */ 0x33,
247 /* 34 '4' */ 0x34, /* 35 '5' */ 0x35,
248 /* 36 '6' */ 0x36, /* 37 '7' */ 0x37,
249 /* 38 '8' */ 0x38, /* 39 '9' */ 0x39,
250 /* 3A ':' */ 0x3A, /* 3B ';' */ 0x3B,
251 /* 3C '<' */ 0x3C, /* 3D '=' */ 0x3D,
252 /* 3E '>' */ 0x3E, /* 3F '?' */ 0x3F,
253 /* 40 '@' */ 0x40, /* 41 'A' */ 0x41,
254 /* 42 'B' */ 0x42, /* 43 'C' */ 0x43,
255 /* 44 'D' */ 0x44, /* 45 'E' */ 0x45,
256 /* 46 'F' */ 0x46, /* 47 'G' */ 0x47,
257 /* 48 'H' */ 0x48, /* 49 'I' */ 0x49,
258 /* 4A 'J' */ 0x4A, /* 4B 'K' */ 0x4B,
259 /* 4C 'L' */ 0x4C, /* 4D 'M' */ 0x4D,
260 /* 4E 'N' */ 0x4E, /* 4F 'O' */ 0x4F,
261 /* 50 'P' */ 0x50, /* 51 'Q' */ 0x51,
262 /* 52 'R' */ 0x52, /* 53 'S' */ 0x53,
263 /* 54 'T' */ 0x54, /* 55 'U' */ 0x55,
264 /* 56 'V' */ 0x56, /* 57 'W' */ 0x57,
265 /* 58 'X' */ 0x58, /* 59 'Y' */ 0x59,
266 /* 5A 'Z' */ 0x5A, /* 5B '[' */ 0x5B,
267 /* 5C '\' */ 0x5C, /* 5D ']' */ 0x5D,
268 /* 5E '^' */ 0x5E, /* 5F '_' */ 0x5F,
269 /* 60 '`' */ 0x60, /* 61 'a' */ 0x41,
270 /* 62 'b' */ 0x42, /* 63 'c' */ 0x43,
271 /* 64 'd' */ 0x44, /* 65 'e' */ 0x45,
272 /* 66 'f' */ 0x46, /* 67 'g' */ 0x47,
273 /* 68 'h' */ 0x48, /* 69 'i' */ 0x49,
274 /* 6A 'j' */ 0x4A, /* 6B 'k' */ 0x4B,
275 /* 6C 'l' */ 0x4C, /* 6D 'm' */ 0x4D,
276 /* 6E 'n' */ 0x4E, /* 6F 'o' */ 0x4F,
277 /* 70 'p' */ 0x50, /* 71 'q' */ 0x51,
278 /* 72 'r' */ 0x52, /* 73 's' */ 0x53,
279 /* 74 't' */ 0x54, /* 75 'u' */ 0x55,
280 /* 76 'v' */ 0x56, /* 77 'w' */ 0x57,
281 /* 78 'x' */ 0x58, /* 79 'y' */ 0x59,
282 /* 7A 'z' */ 0x5A, /* 7B '{' */ 0x7B,
283 /* 7C '|' */ 0x7C, /* 7D '}' */ 0x7D,
284 /* 7E '~' */ 0x7E, /* 7F DEL */ 0x7F
285 };
286
287 /// Default character classification table is 7-bit ASCII
288 const UINT16 *_cClass = _C_CharClassTable;
289
290 /// Default upper to lower conversion table is 7-bit ASCII
291 const UINT8 *_lConvT = _C_ToLowerTable;
292
293 /// Default lower to upper conversion table is 7-bit ASCII
294 const UINT8 *_uConvT = _C_ToUpperTable;
295
296 /** Sets the character classification and case conversion tables for the 'C' locale.
297
298 A set of locale-independent pointers are used to point to the classification and
299 conversion tables for the currently specified locale. This function is used to
300 establish the tables for the 'C' locale.
301 **/
302 void
__set_C_locale(void)303 __set_C_locale( void )
304 {
305 _cClass = _C_CharClassTable;
306 _lConvT = _C_ToLowerTable;
307 _uConvT = _C_ToUpperTable;
308 }
309