1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1997-2011, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 *
11 * File CSTRING.C
12 *
13 * @author Helena Shih
14 *
15 * Modification History:
16 *
17 * Date Name Description
18 * 6/18/98 hshih Created
19 * 09/08/98 stephen Added include for ctype, for Mac Port
20 * 11/15/99 helena Integrated S/390 IEEE changes.
21 ******************************************************************************
22 */
23
24
25
26 #include <stdlib.h>
27 #include <stdio.h>
28 #include "unicode/utypes.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "uassert.h"
32
33 /*
34 * We hardcode case conversion for invariant characters to match our expectation
35 * and the compiler execution charset.
36 * This prevents problems on systems
37 * - with non-default casing behavior, like Turkish system locales where
38 * tolower('I') maps to dotless i and toupper('i') maps to dotted I
39 * - where there are no lowercase Latin characters at all, or using different
40 * codes (some old EBCDIC codepages)
41 *
42 * This works because the compiler usually runs on a platform where the execution
43 * charset includes all of the invariant characters at their expected
44 * code positions, so that the char * string literals in ICU code match
45 * the char literals here.
46 *
47 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
48 * and the set of uppercase Latin letters is discontiguous as well.
49 */
50
51 U_CAPI UBool U_EXPORT2
uprv_isASCIILetter(char c)52 uprv_isASCIILetter(char c) {
53 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
54 return
55 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
56 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
57 #else
58 return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
59 #endif
60 }
61
62 U_CAPI char U_EXPORT2
uprv_toupper(char c)63 uprv_toupper(char c) {
64 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
65 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
66 c=(char)(c+('A'-'a'));
67 }
68 #else
69 if('a'<=c && c<='z') {
70 c=(char)(c+('A'-'a'));
71 }
72 #endif
73 return c;
74 }
75
76
77 #if 0
78 /*
79 * Commented out because cstring.h defines uprv_tolower() to be
80 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
81 * to reduce the amount of code to cover with tests.
82 *
83 * Note that this uprv_tolower() definition is likely to work for most
84 * charset families, not just ASCII and EBCDIC, because its #else branch
85 * is written generically.
86 */
87 U_CAPI char U_EXPORT2
88 uprv_tolower(char c) {
89 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
90 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
91 c=(char)(c+('a'-'A'));
92 }
93 #else
94 if('A'<=c && c<='Z') {
95 c=(char)(c+('a'-'A'));
96 }
97 #endif
98 return c;
99 }
100 #endif
101
102 U_CAPI char U_EXPORT2
uprv_asciitolower(char c)103 uprv_asciitolower(char c) {
104 if(0x41<=c && c<=0x5a) {
105 c=(char)(c+0x20);
106 }
107 return c;
108 }
109
110 U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c)111 uprv_ebcdictolower(char c) {
112 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
113 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
114 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
115 ) {
116 c=(char)(c-0x40);
117 }
118 return c;
119 }
120
121
122 U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char * str)123 T_CString_toLowerCase(char* str)
124 {
125 char* origPtr = str;
126
127 if (str) {
128 do
129 *str = (char)uprv_tolower(*str);
130 while (*(str++));
131 }
132
133 return origPtr;
134 }
135
136 U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char * str)137 T_CString_toUpperCase(char* str)
138 {
139 char* origPtr = str;
140
141 if (str) {
142 do
143 *str = (char)uprv_toupper(*str);
144 while (*(str++));
145 }
146
147 return origPtr;
148 }
149
150 /*
151 * Takes a int32_t and fills in a char* string with that number "radix"-based.
152 * Does not handle negative values (makes an empty string for them).
153 * Writes at most 12 chars ("-2147483647" plus NUL).
154 * Returns the length of the string (not including the NUL).
155 */
156 U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char * buffer,int32_t v,int32_t radix)157 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
158 {
159 char tbuf[30];
160 int32_t tbx = sizeof(tbuf);
161 uint8_t digit;
162 int32_t length = 0;
163 uint32_t uval;
164
165 U_ASSERT(radix>=2 && radix<=16);
166 uval = (uint32_t) v;
167 if(v<0 && radix == 10) {
168 /* Only in base 10 do we conside numbers to be signed. */
169 uval = (uint32_t)(-v);
170 buffer[length++] = '-';
171 }
172
173 tbx = sizeof(tbuf)-1;
174 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
175 do {
176 digit = (uint8_t)(uval % radix);
177 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
178 uval = uval / radix;
179 } while (uval != 0);
180
181 /* copy converted number into user buffer */
182 uprv_strcpy(buffer+length, tbuf+tbx);
183 length += sizeof(tbuf) - tbx -1;
184 return length;
185 }
186
187
188
189 /*
190 * Takes a int64_t and fills in a char* string with that number "radix"-based.
191 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
192 * Returns the length of the string, not including the terminating NUL.
193 */
194 U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char * buffer,int64_t v,uint32_t radix)195 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
196 {
197 char tbuf[30];
198 int32_t tbx = sizeof(tbuf);
199 uint8_t digit;
200 int32_t length = 0;
201 uint64_t uval;
202
203 U_ASSERT(radix>=2 && radix<=16);
204 uval = (uint64_t) v;
205 if(v<0 && radix == 10) {
206 /* Only in base 10 do we conside numbers to be signed. */
207 uval = (uint64_t)(-v);
208 buffer[length++] = '-';
209 }
210
211 tbx = sizeof(tbuf)-1;
212 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
213 do {
214 digit = (uint8_t)(uval % radix);
215 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
216 uval = uval / radix;
217 } while (uval != 0);
218
219 /* copy converted number into user buffer */
220 uprv_strcpy(buffer+length, tbuf+tbx);
221 length += sizeof(tbuf) - tbx -1;
222 return length;
223 }
224
225
226 U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char * integerString,int32_t radix)227 T_CString_stringToInteger(const char *integerString, int32_t radix)
228 {
229 char *end;
230 return uprv_strtoul(integerString, &end, radix);
231
232 }
233
234 U_CAPI int U_EXPORT2
uprv_stricmp(const char * str1,const char * str2)235 uprv_stricmp(const char *str1, const char *str2) {
236 if(str1==nullptr) {
237 if(str2==nullptr) {
238 return 0;
239 } else {
240 return -1;
241 }
242 } else if(str2==nullptr) {
243 return 1;
244 } else {
245 /* compare non-nullptr strings lexically with lowercase */
246 int rc;
247 unsigned char c1, c2;
248
249 for(;;) {
250 c1=(unsigned char)*str1;
251 c2=(unsigned char)*str2;
252 if(c1==0) {
253 if(c2==0) {
254 return 0;
255 } else {
256 return -1;
257 }
258 } else if(c2==0) {
259 return 1;
260 } else {
261 /* compare non-zero characters with lowercase */
262 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
263 if(rc!=0) {
264 return rc;
265 }
266 }
267 ++str1;
268 ++str2;
269 }
270 }
271 }
272
273 U_CAPI int U_EXPORT2
uprv_strnicmp(const char * str1,const char * str2,uint32_t n)274 uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
275 if(str1==nullptr) {
276 if(str2==nullptr) {
277 return 0;
278 } else {
279 return -1;
280 }
281 } else if(str2==nullptr) {
282 return 1;
283 } else {
284 /* compare non-nullptr strings lexically with lowercase */
285 int rc;
286 unsigned char c1, c2;
287
288 for(; n--;) {
289 c1=(unsigned char)*str1;
290 c2=(unsigned char)*str2;
291 if(c1==0) {
292 if(c2==0) {
293 return 0;
294 } else {
295 return -1;
296 }
297 } else if(c2==0) {
298 return 1;
299 } else {
300 /* compare non-zero characters with lowercase */
301 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
302 if(rc!=0) {
303 return rc;
304 }
305 }
306 ++str1;
307 ++str2;
308 }
309 }
310
311 return 0;
312 }
313
314 U_CAPI char* U_EXPORT2
uprv_strdup(const char * src)315 uprv_strdup(const char *src) {
316 size_t len = uprv_strlen(src) + 1;
317 char *dup = (char *) uprv_malloc(len);
318
319 if (dup) {
320 uprv_memcpy(dup, src, len);
321 }
322
323 return dup;
324 }
325
326 U_CAPI char* U_EXPORT2
uprv_strndup(const char * src,int32_t n)327 uprv_strndup(const char *src, int32_t n) {
328 char *dup;
329
330 if(n < 0) {
331 dup = uprv_strdup(src);
332 } else {
333 dup = (char*)uprv_malloc(n+1);
334 if (dup) {
335 uprv_memcpy(dup, src, n);
336 dup[n] = 0;
337 }
338 }
339
340 return dup;
341 }
342