1 /*
2 ******************************************************************************
3 *
4 * Copyright (C) 1997-2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 *
9 * File CSTRING.C
10 *
11 * @author Helena Shih
12 *
13 * Modification History:
14 *
15 * Date Name Description
16 * 6/18/98 hshih Created
17 * 09/08/98 stephen Added include for ctype, for Mac Port
18 * 11/15/99 helena Integrated S/390 IEEE changes.
19 ******************************************************************************
20 */
21
22
23
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include "unicode/utypes.h"
27 #include "cmemory.h"
28 #include "cstring.h"
29 #include "uassert.h"
30
31 /*
32 * We hardcode case conversion for invariant characters to match our expectation
33 * and the compiler execution charset.
34 * This prevents problems on systems
35 * - with non-default casing behavior, like Turkish system locales where
36 * tolower('I') maps to dotless i and toupper('i') maps to dotted I
37 * - where there are no lowercase Latin characters at all, or using different
38 * codes (some old EBCDIC codepages)
39 *
40 * This works because the compiler usually runs on a platform where the execution
41 * charset includes all of the invariant characters at their expected
42 * code positions, so that the char * string literals in ICU code match
43 * the char literals here.
44 *
45 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
46 * and the set of uppercase Latin letters is discontiguous as well.
47 */
48
49 U_CAPI char U_EXPORT2
uprv_toupper(char c)50 uprv_toupper(char c) {
51 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
52 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
53 c=(char)(c+('A'-'a'));
54 }
55 #else
56 if('a'<=c && c<='z') {
57 c=(char)(c+('A'-'a'));
58 }
59 #endif
60 return c;
61 }
62
63
64 #if 0
65 /*
66 * Commented out because cstring.h defines uprv_tolower() to be
67 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
68 * to reduce the amount of code to cover with tests.
69 *
70 * Note that this uprv_tolower() definition is likely to work for most
71 * charset families, not just ASCII and EBCDIC, because its #else branch
72 * is written generically.
73 */
74 U_CAPI char U_EXPORT2
75 uprv_tolower(char c) {
76 #if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
77 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
78 c=(char)(c+('a'-'A'));
79 }
80 #else
81 if('A'<=c && c<='Z') {
82 c=(char)(c+('a'-'A'));
83 }
84 #endif
85 return c;
86 }
87 #endif
88
89 U_CAPI char U_EXPORT2
uprv_asciitolower(char c)90 uprv_asciitolower(char c) {
91 if(0x41<=c && c<=0x5a) {
92 c=(char)(c+0x20);
93 }
94 return c;
95 }
96
97 U_CAPI char U_EXPORT2
uprv_ebcdictolower(char c)98 uprv_ebcdictolower(char c) {
99 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
100 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
101 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
102 ) {
103 c=(char)(c-0x40);
104 }
105 return c;
106 }
107
108
109 U_CAPI char* U_EXPORT2
T_CString_toLowerCase(char * str)110 T_CString_toLowerCase(char* str)
111 {
112 char* origPtr = str;
113
114 if (str) {
115 do
116 *str = (char)uprv_tolower(*str);
117 while (*(str++));
118 }
119
120 return origPtr;
121 }
122
123 U_CAPI char* U_EXPORT2
T_CString_toUpperCase(char * str)124 T_CString_toUpperCase(char* str)
125 {
126 char* origPtr = str;
127
128 if (str) {
129 do
130 *str = (char)uprv_toupper(*str);
131 while (*(str++));
132 }
133
134 return origPtr;
135 }
136
137 /*
138 * Takes a int32_t and fills in a char* string with that number "radix"-based.
139 * Does not handle negative values (makes an empty string for them).
140 * Writes at most 12 chars ("-2147483647" plus NUL).
141 * Returns the length of the string (not including the NUL).
142 */
143 U_CAPI int32_t U_EXPORT2
T_CString_integerToString(char * buffer,int32_t v,int32_t radix)144 T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
145 {
146 char tbuf[30];
147 int32_t tbx = sizeof(tbuf);
148 uint8_t digit;
149 int32_t length = 0;
150 uint32_t uval;
151
152 U_ASSERT(radix>=2 && radix<=16);
153 uval = (uint32_t) v;
154 if(v<0 && radix == 10) {
155 /* Only in base 10 do we conside numbers to be signed. */
156 uval = (uint32_t)(-v);
157 buffer[length++] = '-';
158 }
159
160 tbx = sizeof(tbuf)-1;
161 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
162 do {
163 digit = (uint8_t)(uval % radix);
164 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
165 uval = uval / radix;
166 } while (uval != 0);
167
168 /* copy converted number into user buffer */
169 uprv_strcpy(buffer+length, tbuf+tbx);
170 length += sizeof(tbuf) - tbx -1;
171 return length;
172 }
173
174
175
176 /*
177 * Takes a int64_t and fills in a char* string with that number "radix"-based.
178 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
179 * Returns the length of the string, not including the terminating NULL.
180 */
181 U_CAPI int32_t U_EXPORT2
T_CString_int64ToString(char * buffer,int64_t v,uint32_t radix)182 T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
183 {
184 char tbuf[30];
185 int32_t tbx = sizeof(tbuf);
186 uint8_t digit;
187 int32_t length = 0;
188 uint64_t uval;
189
190 U_ASSERT(radix>=2 && radix<=16);
191 uval = (uint64_t) v;
192 if(v<0 && radix == 10) {
193 /* Only in base 10 do we conside numbers to be signed. */
194 uval = (uint64_t)(-v);
195 buffer[length++] = '-';
196 }
197
198 tbx = sizeof(tbuf)-1;
199 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */
200 do {
201 digit = (uint8_t)(uval % radix);
202 tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
203 uval = uval / radix;
204 } while (uval != 0);
205
206 /* copy converted number into user buffer */
207 uprv_strcpy(buffer+length, tbuf+tbx);
208 length += sizeof(tbuf) - tbx -1;
209 return length;
210 }
211
212
213 U_CAPI int32_t U_EXPORT2
T_CString_stringToInteger(const char * integerString,int32_t radix)214 T_CString_stringToInteger(const char *integerString, int32_t radix)
215 {
216 char *end;
217 return uprv_strtoul(integerString, &end, radix);
218
219 }
220
221 U_CAPI int U_EXPORT2
T_CString_stricmp(const char * str1,const char * str2)222 T_CString_stricmp(const char *str1, const char *str2) {
223 if(str1==NULL) {
224 if(str2==NULL) {
225 return 0;
226 } else {
227 return -1;
228 }
229 } else if(str2==NULL) {
230 return 1;
231 } else {
232 /* compare non-NULL strings lexically with lowercase */
233 int rc;
234 unsigned char c1, c2;
235
236 for(;;) {
237 c1=(unsigned char)*str1;
238 c2=(unsigned char)*str2;
239 if(c1==0) {
240 if(c2==0) {
241 return 0;
242 } else {
243 return -1;
244 }
245 } else if(c2==0) {
246 return 1;
247 } else {
248 /* compare non-zero characters with lowercase */
249 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
250 if(rc!=0) {
251 return rc;
252 }
253 }
254 ++str1;
255 ++str2;
256 }
257 }
258 }
259
260 U_CAPI int U_EXPORT2
T_CString_strnicmp(const char * str1,const char * str2,uint32_t n)261 T_CString_strnicmp(const char *str1, const char *str2, uint32_t n) {
262 if(str1==NULL) {
263 if(str2==NULL) {
264 return 0;
265 } else {
266 return -1;
267 }
268 } else if(str2==NULL) {
269 return 1;
270 } else {
271 /* compare non-NULL strings lexically with lowercase */
272 int rc;
273 unsigned char c1, c2;
274
275 for(; n--;) {
276 c1=(unsigned char)*str1;
277 c2=(unsigned char)*str2;
278 if(c1==0) {
279 if(c2==0) {
280 return 0;
281 } else {
282 return -1;
283 }
284 } else if(c2==0) {
285 return 1;
286 } else {
287 /* compare non-zero characters with lowercase */
288 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
289 if(rc!=0) {
290 return rc;
291 }
292 }
293 ++str1;
294 ++str2;
295 }
296 }
297
298 return 0;
299 }
300
301 U_CAPI char* U_EXPORT2
uprv_strdup(const char * src)302 uprv_strdup(const char *src) {
303 size_t len = uprv_strlen(src) + 1;
304 char *dup = (char *) uprv_malloc(len);
305
306 if (dup) {
307 uprv_memcpy(dup, src, len);
308 }
309
310 return dup;
311 }
312
313 U_CAPI char* U_EXPORT2
uprv_strndup(const char * src,int32_t n)314 uprv_strndup(const char *src, int32_t n) {
315 char *dup;
316
317 if(n < 0) {
318 dup = uprv_strdup(src);
319 } else {
320 dup = (char*)uprv_malloc(n+1);
321 if (dup) {
322 uprv_memcpy(dup, src, n);
323 dup[n] = 0;
324 }
325 }
326
327 return dup;
328 }
329