1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 1999-2010, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: uinvchar.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:2
14 *
15 * created on: 2004sep14
16 * created by: Markus W. Scherer
17 *
18 * Functions for handling invariant characters, moved here from putil.c
19 * for better modularization.
20 */
21
22 #include "unicode/utypes.h"
23 #include "unicode/ustring.h"
24 #include "udataswp.h"
25 #include "cstring.h"
26 #include "cmemory.h"
27 #include "uassert.h"
28 #include "uinvchar.h"
29
30 /* invariant-character handling --------------------------------------------- */
31
32 /*
33 * These maps for ASCII to/from EBCDIC map invariant characters (see utypes.h)
34 * appropriately for most EBCDIC codepages.
35 *
36 * They currently also map most other ASCII graphic characters,
37 * appropriately for codepages 37 and 1047.
38 * Exceptions: The characters for []^ have different codes in 37 & 1047.
39 * Both versions are mapped to ASCII.
40 *
41 * ASCII 37 1047
42 * [ 5B BA AD
43 * ] 5D BB BD
44 * ^ 5E B0 5F
45 *
46 * There are no mappings for variant characters from Unicode to EBCDIC.
47 *
48 * Currently, C0 control codes are also included in these maps.
49 * Exceptions: S/390 Open Edition swaps LF and NEL codes compared with other
50 * EBCDIC platforms; both codes (15 and 25) are mapped to ASCII LF (0A),
51 * but there is no mapping for ASCII LF back to EBCDIC.
52 *
53 * ASCII EBCDIC S/390-OE
54 * LF 0A 25 15
55 * NEL 85 15 25
56 *
57 * The maps below explicitly exclude the variant
58 * control and graphical characters that are in ASCII-based
59 * codepages at 0x80 and above.
60 * "No mapping" is expressed by mapping to a 00 byte.
61 *
62 * These tables do not establish a converter or a codepage.
63 */
64
65 static const uint8_t asciiFromEbcdic[256]={
66 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
67 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
68 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
69 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
70
71 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
72 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
73 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
74 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
75
76 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
77 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
78 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
79 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
80
81 0x7b, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
82 0x7d, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, 0x50, 0x51, 0x52, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83 0x5c, 0x00, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
84 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
85 };
86
87 static const uint8_t ebcdicFromAscii[256]={
88 0x00, 0x01, 0x02, 0x03, 0x37, 0x2d, 0x2e, 0x2f, 0x16, 0x05, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
89 0x10, 0x11, 0x12, 0x13, 0x3c, 0x3d, 0x32, 0x26, 0x18, 0x19, 0x3f, 0x27, 0x1c, 0x1d, 0x1e, 0x1f,
90 0x40, 0x00, 0x7f, 0x00, 0x00, 0x6c, 0x50, 0x7d, 0x4d, 0x5d, 0x5c, 0x4e, 0x6b, 0x60, 0x4b, 0x61,
91 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0x7a, 0x5e, 0x4c, 0x7e, 0x6e, 0x6f,
92
93 0x00, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6,
94 0xd7, 0xd8, 0xd9, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0x00, 0x00, 0x00, 0x00, 0x6d,
95 0x00, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
96 0x97, 0x98, 0x99, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x07,
97
98 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
103 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
107 };
108
109 /* Same as asciiFromEbcdic[] except maps all letters to lowercase. */
110 static const uint8_t lowercaseAsciiFromEbcdic[256]={
111 0x00, 0x01, 0x02, 0x03, 0x00, 0x09, 0x00, 0x7f, 0x00, 0x00, 0x00, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
112 0x10, 0x11, 0x12, 0x13, 0x00, 0x0a, 0x08, 0x00, 0x18, 0x19, 0x00, 0x00, 0x1c, 0x1d, 0x1e, 0x1f,
113 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x17, 0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x06, 0x07,
114 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x14, 0x15, 0x00, 0x1a,
115
116 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2e, 0x3c, 0x28, 0x2b, 0x7c,
117 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x21, 0x24, 0x2a, 0x29, 0x3b, 0x5e,
118 0x2d, 0x2f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c, 0x25, 0x5f, 0x3e, 0x3f,
119 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x60, 0x3a, 0x23, 0x40, 0x27, 0x3d, 0x22,
120
121 0x00, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
122 0x00, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
123 0x00, 0x7e, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x5b, 0x00, 0x00,
124 0x5e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x5b, 0x5d, 0x00, 0x5d, 0x00, 0x00,
125
126 0x7b, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
127 0x7d, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
128 0x7c, 0x00, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
129 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
130 };
131
132 /*
133 * Bit sets indicating which characters of the ASCII repertoire
134 * (by ASCII/Unicode code) are "invariant".
135 * See utypes.h for more details.
136 *
137 * As invariant are considered the characters of the ASCII repertoire except
138 * for the following:
139 * 21 '!' <exclamation mark>
140 * 23 '#' <number sign>
141 * 24 '$' <dollar sign>
142 *
143 * 40 '@' <commercial at>
144 *
145 * 5b '[' <left bracket>
146 * 5c '\' <backslash>
147 * 5d ']' <right bracket>
148 * 5e '^' <circumflex>
149 *
150 * 60 '`' <grave accent>
151 *
152 * 7b '{' <left brace>
153 * 7c '|' <vertical line>
154 * 7d '}' <right brace>
155 * 7e '~' <tilde>
156 */
157 static const uint32_t invariantChars[4]={
158 0xfffffbff, /* 00..1f but not 0a */
159 0xffffffe5, /* 20..3f but not 21 23 24 */
160 0x87fffffe, /* 40..5f but not 40 5b..5e */
161 0x87fffffe /* 60..7f but not 60 7b..7e */
162 };
163
164 /*
165 * test unsigned types (or values known to be non-negative) for invariant characters,
166 * tests ASCII-family character values
167 */
168 #define UCHAR_IS_INVARIANT(c) (((c)<=0x7f) && (invariantChars[(c)>>5]&((uint32_t)1<<((c)&0x1f)))!=0)
169
170 /* test signed types for invariant characters, adds test for positive values */
171 #define SCHAR_IS_INVARIANT(c) ((0<=(c)) && UCHAR_IS_INVARIANT(c))
172
173 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
174 #define CHAR_TO_UCHAR(c) c
175 #define UCHAR_TO_CHAR(c) c
176 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
177 #define CHAR_TO_UCHAR(u) asciiFromEbcdic[u]
178 #define UCHAR_TO_CHAR(u) ebcdicFromAscii[u]
179 #else
180 # error U_CHARSET_FAMILY is not valid
181 #endif
182
183
184 U_CAPI void U_EXPORT2
u_charsToUChars(const char * cs,UChar * us,int32_t length)185 u_charsToUChars(const char *cs, UChar *us, int32_t length) {
186 UChar u;
187 uint8_t c;
188
189 /*
190 * Allow the entire ASCII repertoire to be mapped _to_ Unicode.
191 * For EBCDIC systems, this works for characters with codes from
192 * codepages 37 and 1047 or compatible.
193 */
194 while(length>0) {
195 c=(uint8_t)(*cs++);
196 u=(UChar)CHAR_TO_UCHAR(c);
197 U_ASSERT((u!=0 || c==0)); /* only invariant chars converted? */
198 *us++=u;
199 --length;
200 }
201 }
202
203 U_CAPI void U_EXPORT2
u_UCharsToChars(const UChar * us,char * cs,int32_t length)204 u_UCharsToChars(const UChar *us, char *cs, int32_t length) {
205 UChar u;
206
207 while(length>0) {
208 u=*us++;
209 if(!UCHAR_IS_INVARIANT(u)) {
210 U_ASSERT(false); /* Variant characters were used. These are not portable in ICU. */
211 u=0;
212 }
213 *cs++=(char)UCHAR_TO_CHAR(u);
214 --length;
215 }
216 }
217
218 U_CAPI UBool U_EXPORT2
uprv_isInvariantString(const char * s,int32_t length)219 uprv_isInvariantString(const char *s, int32_t length) {
220 uint8_t c;
221
222 for(;;) {
223 if(length<0) {
224 /* NUL-terminated */
225 c=(uint8_t)*s++;
226 if(c==0) {
227 break;
228 }
229 } else {
230 /* count length */
231 if(length==0) {
232 break;
233 }
234 --length;
235 c=(uint8_t)*s++;
236 if(c==0) {
237 continue; /* NUL is invariant */
238 }
239 }
240 /* c!=0 now, one branch below checks c==0 for variant characters */
241
242 /*
243 * no assertions here because these functions are legitimately called
244 * for strings with variant characters
245 */
246 #if U_CHARSET_FAMILY==U_ASCII_FAMILY
247 if(!UCHAR_IS_INVARIANT(c)) {
248 return false; /* found a variant char */
249 }
250 #elif U_CHARSET_FAMILY==U_EBCDIC_FAMILY
251 c=CHAR_TO_UCHAR(c);
252 if(c==0 || !UCHAR_IS_INVARIANT(c)) {
253 return false; /* found a variant char */
254 }
255 #else
256 # error U_CHARSET_FAMILY is not valid
257 #endif
258 }
259 return true;
260 }
261
262 U_CAPI UBool U_EXPORT2
uprv_isInvariantUString(const UChar * s,int32_t length)263 uprv_isInvariantUString(const UChar *s, int32_t length) {
264 UChar c;
265
266 for(;;) {
267 if(length<0) {
268 /* NUL-terminated */
269 c=*s++;
270 if(c==0) {
271 break;
272 }
273 } else {
274 /* count length */
275 if(length==0) {
276 break;
277 }
278 --length;
279 c=*s++;
280 }
281
282 /*
283 * no assertions here because these functions are legitimately called
284 * for strings with variant characters
285 */
286 if(!UCHAR_IS_INVARIANT(c)) {
287 return false; /* found a variant char */
288 }
289 }
290 return true;
291 }
292
293 /* UDataSwapFn implementations used in udataswp.c ------- */
294
295 /* convert ASCII to EBCDIC and verify that all characters are invariant */
296 U_CAPI int32_t U_EXPORT2
uprv_ebcdicFromAscii(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)297 uprv_ebcdicFromAscii(const UDataSwapper *ds,
298 const void *inData, int32_t length, void *outData,
299 UErrorCode *pErrorCode) {
300 const uint8_t *s;
301 uint8_t *t;
302 uint8_t c;
303
304 int32_t count;
305
306 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
307 return 0;
308 }
309 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
310 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
311 return 0;
312 }
313
314 /* setup and swapping */
315 s=(const uint8_t *)inData;
316 t=(uint8_t *)outData;
317 count=length;
318 while(count>0) {
319 c=*s++;
320 if(!UCHAR_IS_INVARIANT(c)) {
321 udata_printError(ds, "uprv_ebcdicFromAscii() string[%d] contains a variant character in position %d\n",
322 length, length-count);
323 *pErrorCode=U_INVALID_CHAR_FOUND;
324 return 0;
325 }
326 *t++=ebcdicFromAscii[c];
327 --count;
328 }
329
330 return length;
331 }
332
333 /* this function only checks and copies ASCII strings without conversion */
334 U_CFUNC int32_t
uprv_copyAscii(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)335 uprv_copyAscii(const UDataSwapper *ds,
336 const void *inData, int32_t length, void *outData,
337 UErrorCode *pErrorCode) {
338 const uint8_t *s;
339 uint8_t c;
340
341 int32_t count;
342
343 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
344 return 0;
345 }
346 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
347 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
348 return 0;
349 }
350
351 /* setup and checking */
352 s=(const uint8_t *)inData;
353 count=length;
354 while(count>0) {
355 c=*s++;
356 if(!UCHAR_IS_INVARIANT(c)) {
357 udata_printError(ds, "uprv_copyFromAscii() string[%d] contains a variant character in position %d\n",
358 length, length-count);
359 *pErrorCode=U_INVALID_CHAR_FOUND;
360 return 0;
361 }
362 --count;
363 }
364
365 if(length>0 && inData!=outData) {
366 uprv_memcpy(outData, inData, length);
367 }
368
369 return length;
370 }
371
372 /* convert EBCDIC to ASCII and verify that all characters are invariant */
373 U_CFUNC int32_t
uprv_asciiFromEbcdic(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)374 uprv_asciiFromEbcdic(const UDataSwapper *ds,
375 const void *inData, int32_t length, void *outData,
376 UErrorCode *pErrorCode) {
377 const uint8_t *s;
378 uint8_t *t;
379 uint8_t c;
380
381 int32_t count;
382
383 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
384 return 0;
385 }
386 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
387 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
388 return 0;
389 }
390
391 /* setup and swapping */
392 s=(const uint8_t *)inData;
393 t=(uint8_t *)outData;
394 count=length;
395 while(count>0) {
396 c=*s++;
397 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
398 udata_printError(ds, "uprv_asciiFromEbcdic() string[%d] contains a variant character in position %d\n",
399 length, length-count);
400 *pErrorCode=U_INVALID_CHAR_FOUND;
401 return 0;
402 }
403 *t++=c;
404 --count;
405 }
406
407 return length;
408 }
409
410 /* this function only checks and copies EBCDIC strings without conversion */
411 U_CFUNC int32_t
uprv_copyEbcdic(const UDataSwapper * ds,const void * inData,int32_t length,void * outData,UErrorCode * pErrorCode)412 uprv_copyEbcdic(const UDataSwapper *ds,
413 const void *inData, int32_t length, void *outData,
414 UErrorCode *pErrorCode) {
415 const uint8_t *s;
416 uint8_t c;
417
418 int32_t count;
419
420 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
421 return 0;
422 }
423 if(ds==NULL || inData==NULL || length<0 || (length>0 && outData==NULL)) {
424 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
425 return 0;
426 }
427
428 /* setup and checking */
429 s=(const uint8_t *)inData;
430 count=length;
431 while(count>0) {
432 c=*s++;
433 if(c!=0 && ((c=asciiFromEbcdic[c])==0 || !UCHAR_IS_INVARIANT(c))) {
434 udata_printError(ds, "uprv_copyEbcdic() string[%] contains a variant character in position %d\n",
435 length, length-count);
436 *pErrorCode=U_INVALID_CHAR_FOUND;
437 return 0;
438 }
439 --count;
440 }
441
442 if(length>0 && inData!=outData) {
443 uprv_memcpy(outData, inData, length);
444 }
445
446 return length;
447 }
448
449 U_CFUNC UBool
uprv_isEbcdicAtSign(char c)450 uprv_isEbcdicAtSign(char c) {
451 static const uint8_t ebcdicAtSigns[] = {
452 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
453 return c != 0 && uprv_strchr((const char *)ebcdicAtSigns, c) != nullptr;
454 }
455
456 /* compare invariant strings; variant characters compare less than others and unlike each other */
457 U_CFUNC int32_t
uprv_compareInvAscii(const UDataSwapper * ds,const char * outString,int32_t outLength,const UChar * localString,int32_t localLength)458 uprv_compareInvAscii(const UDataSwapper *ds,
459 const char *outString, int32_t outLength,
460 const UChar *localString, int32_t localLength) {
461 (void)ds;
462 int32_t minLength;
463 UChar32 c1, c2;
464 uint8_t c;
465
466 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
467 return 0;
468 }
469
470 if(outLength<0) {
471 outLength=(int32_t)uprv_strlen(outString);
472 }
473 if(localLength<0) {
474 localLength=u_strlen(localString);
475 }
476
477 minLength= outLength<localLength ? outLength : localLength;
478
479 while(minLength>0) {
480 c=(uint8_t)*outString++;
481 if(UCHAR_IS_INVARIANT(c)) {
482 c1=c;
483 } else {
484 c1=-1;
485 }
486
487 c2=*localString++;
488 if(!UCHAR_IS_INVARIANT(c2)) {
489 c2=-2;
490 }
491
492 if((c1-=c2)!=0) {
493 return c1;
494 }
495
496 --minLength;
497 }
498
499 /* strings start with same prefix, compare lengths */
500 return outLength-localLength;
501 }
502
503 U_CFUNC int32_t
uprv_compareInvEbcdic(const UDataSwapper * ds,const char * outString,int32_t outLength,const UChar * localString,int32_t localLength)504 uprv_compareInvEbcdic(const UDataSwapper *ds,
505 const char *outString, int32_t outLength,
506 const UChar *localString, int32_t localLength) {
507 (void)ds;
508 int32_t minLength;
509 UChar32 c1, c2;
510 uint8_t c;
511
512 if(outString==NULL || outLength<-1 || localString==NULL || localLength<-1) {
513 return 0;
514 }
515
516 if(outLength<0) {
517 outLength=(int32_t)uprv_strlen(outString);
518 }
519 if(localLength<0) {
520 localLength=u_strlen(localString);
521 }
522
523 minLength= outLength<localLength ? outLength : localLength;
524
525 while(minLength>0) {
526 c=(uint8_t)*outString++;
527 if(c==0) {
528 c1=0;
529 } else if((c1=asciiFromEbcdic[c])!=0 && UCHAR_IS_INVARIANT(c1)) {
530 /* c1 is set */
531 } else {
532 c1=-1;
533 }
534
535 c2=*localString++;
536 if(!UCHAR_IS_INVARIANT(c2)) {
537 c2=-2;
538 }
539
540 if((c1-=c2)!=0) {
541 return c1;
542 }
543
544 --minLength;
545 }
546
547 /* strings start with same prefix, compare lengths */
548 return outLength-localLength;
549 }
550
551 U_CAPI int32_t U_EXPORT2
uprv_compareInvEbcdicAsAscii(const char * s1,const char * s2)552 uprv_compareInvEbcdicAsAscii(const char *s1, const char *s2) {
553 int32_t c1, c2;
554
555 for(;; ++s1, ++s2) {
556 c1=(uint8_t)*s1;
557 c2=(uint8_t)*s2;
558 if(c1!=c2) {
559 if(c1!=0 && ((c1=asciiFromEbcdic[c1])==0 || !UCHAR_IS_INVARIANT(c1))) {
560 c1=-(int32_t)(uint8_t)*s1;
561 }
562 if(c2!=0 && ((c2=asciiFromEbcdic[c2])==0 || !UCHAR_IS_INVARIANT(c2))) {
563 c2=-(int32_t)(uint8_t)*s2;
564 }
565 return c1-c2;
566 } else if(c1==0) {
567 return 0;
568 }
569 }
570 }
571
572 U_CAPI char U_EXPORT2
uprv_ebcdicToAscii(char c)573 uprv_ebcdicToAscii(char c) {
574 return (char)asciiFromEbcdic[(uint8_t)c];
575 }
576
577 U_CAPI char U_EXPORT2
uprv_ebcdicToLowercaseAscii(char c)578 uprv_ebcdicToLowercaseAscii(char c) {
579 return (char)lowercaseAsciiFromEbcdic[(uint8_t)c];
580 }
581
582 U_CAPI uint8_t* U_EXPORT2
uprv_aestrncpy(uint8_t * dst,const uint8_t * src,int32_t n)583 uprv_aestrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
584 {
585 uint8_t *orig_dst = dst;
586
587 if(n==-1) {
588 n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
589 }
590 /* copy non-null */
591 while(*src && n>0) {
592 *(dst++) = asciiFromEbcdic[*(src++)];
593 n--;
594 }
595 /* pad */
596 while(n>0) {
597 *(dst++) = 0;
598 n--;
599 }
600 return orig_dst;
601 }
602
603 U_CAPI uint8_t* U_EXPORT2
uprv_eastrncpy(uint8_t * dst,const uint8_t * src,int32_t n)604 uprv_eastrncpy(uint8_t *dst, const uint8_t *src, int32_t n)
605 {
606 uint8_t *orig_dst = dst;
607
608 if(n==-1) {
609 n = static_cast<int32_t>(uprv_strlen((const char*)src)+1); /* copy NUL */
610 }
611 /* copy non-null */
612 while(*src && n>0) {
613 char ch = ebcdicFromAscii[*(src++)];
614 if(ch == 0) {
615 ch = ebcdicFromAscii[0x3f]; /* questionmark (subchar) */
616 }
617 *(dst++) = ch;
618 n--;
619 }
620 /* pad */
621 while(n>0) {
622 *(dst++) = 0;
623 n--;
624 }
625 return orig_dst;
626 }
627
628