1 /*
2 SDL - Simple DirectMedia Layer
3 Copyright (C) 1997-2012 Sam Lantinga
4
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 This library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with this library; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18
19 Sam Lantinga
20 slouken@libsdl.org
21 */
22 #include "SDL_config.h"
23
24 /* This file contains portable iconv functions for SDL */
25
26 #include "SDL_stdinc.h"
27 #include "SDL_endian.h"
28
29 #ifdef HAVE_ICONV
30
31 /* Depending on which standard the iconv() was implemented with,
32 iconv() may or may not use const char ** for the inbuf param.
33 If we get this wrong, it's just a warning, so no big deal.
34 */
35 #if defined(_XGP6) || \
36 defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
37 #define ICONV_INBUF_NONCONST
38 #endif
39
40 #include <errno.h>
41
SDL_iconv(SDL_iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)42 size_t SDL_iconv(SDL_iconv_t cd,
43 const char **inbuf, size_t *inbytesleft,
44 char **outbuf, size_t *outbytesleft)
45 {
46 size_t retCode;
47 #ifdef ICONV_INBUF_NONCONST
48 retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft);
49 #else
50 retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
51 #endif
52 if ( retCode == (size_t)-1 ) {
53 switch(errno) {
54 case E2BIG:
55 return SDL_ICONV_E2BIG;
56 case EILSEQ:
57 return SDL_ICONV_EILSEQ;
58 case EINVAL:
59 return SDL_ICONV_EINVAL;
60 default:
61 return SDL_ICONV_ERROR;
62 }
63 }
64 return retCode;
65 }
66
67 #else
68
69 /* Lots of useful information on Unicode at:
70 http://www.cl.cam.ac.uk/~mgk25/unicode.html
71 */
72
73 #define UNICODE_BOM 0xFEFF
74
75 #define UNKNOWN_ASCII '?'
76 #define UNKNOWN_UNICODE 0xFFFD
77
78 enum {
79 ENCODING_UNKNOWN,
80 ENCODING_ASCII,
81 ENCODING_LATIN1,
82 ENCODING_UTF8,
83 ENCODING_UTF16, /* Needs byte order marker */
84 ENCODING_UTF16BE,
85 ENCODING_UTF16LE,
86 ENCODING_UTF32, /* Needs byte order marker */
87 ENCODING_UTF32BE,
88 ENCODING_UTF32LE,
89 ENCODING_UCS2, /* Native byte order assumed */
90 ENCODING_UCS4, /* Native byte order assumed */
91 };
92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
93 #define ENCODING_UTF16NATIVE ENCODING_UTF16BE
94 #define ENCODING_UTF32NATIVE ENCODING_UTF32BE
95 #else
96 #define ENCODING_UTF16NATIVE ENCODING_UTF16LE
97 #define ENCODING_UTF32NATIVE ENCODING_UTF32LE
98 #endif
99
100 struct _SDL_iconv_t
101 {
102 int src_fmt;
103 int dst_fmt;
104 };
105
106 static struct {
107 const char *name;
108 int format;
109 } encodings[] = {
110 { "ASCII", ENCODING_ASCII },
111 { "US-ASCII", ENCODING_ASCII },
112 { "8859-1", ENCODING_LATIN1 },
113 { "ISO-8859-1", ENCODING_LATIN1 },
114 { "UTF8", ENCODING_UTF8 },
115 { "UTF-8", ENCODING_UTF8 },
116 { "UTF16", ENCODING_UTF16 },
117 { "UTF-16", ENCODING_UTF16 },
118 { "UTF16BE", ENCODING_UTF16BE },
119 { "UTF-16BE", ENCODING_UTF16BE },
120 { "UTF16LE", ENCODING_UTF16LE },
121 { "UTF-16LE", ENCODING_UTF16LE },
122 { "UTF32", ENCODING_UTF32 },
123 { "UTF-32", ENCODING_UTF32 },
124 { "UTF32BE", ENCODING_UTF32BE },
125 { "UTF-32BE", ENCODING_UTF32BE },
126 { "UTF32LE", ENCODING_UTF32LE },
127 { "UTF-32LE", ENCODING_UTF32LE },
128 { "UCS2", ENCODING_UCS2 },
129 { "UCS-2", ENCODING_UCS2 },
130 { "UCS4", ENCODING_UCS4 },
131 { "UCS-4", ENCODING_UCS4 },
132 };
133
getlocale(char * buffer,size_t bufsize)134 static const char *getlocale(char *buffer, size_t bufsize)
135 {
136 const char *lang;
137 char *ptr;
138
139 lang = SDL_getenv("LC_ALL");
140 if ( !lang ) {
141 lang = SDL_getenv("LC_CTYPE");
142 }
143 if ( !lang ) {
144 lang = SDL_getenv("LC_MESSAGES");
145 }
146 if ( !lang ) {
147 lang = SDL_getenv("LANG");
148 }
149 if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) {
150 lang = "ASCII";
151 }
152
153 /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
154 ptr = SDL_strchr(lang, '.');
155 if (ptr != NULL) {
156 lang = ptr + 1;
157 }
158
159 SDL_strlcpy(buffer, lang, bufsize);
160 ptr = SDL_strchr(buffer, '@');
161 if (ptr != NULL) {
162 *ptr = '\0'; /* chop end of string. */
163 }
164
165 return buffer;
166 }
167
SDL_iconv_open(const char * tocode,const char * fromcode)168 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
169 {
170 int src_fmt = ENCODING_UNKNOWN;
171 int dst_fmt = ENCODING_UNKNOWN;
172 int i;
173 char fromcode_buffer[64];
174 char tocode_buffer[64];
175
176 if ( !fromcode || !*fromcode ) {
177 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
178 }
179 if ( !tocode || !*tocode ) {
180 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
181 }
182 for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
183 if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
184 src_fmt = encodings[i].format;
185 if ( dst_fmt != ENCODING_UNKNOWN ) {
186 break;
187 }
188 }
189 if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
190 dst_fmt = encodings[i].format;
191 if ( src_fmt != ENCODING_UNKNOWN ) {
192 break;
193 }
194 }
195 }
196 if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
197 SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
198 if ( cd ) {
199 cd->src_fmt = src_fmt;
200 cd->dst_fmt = dst_fmt;
201 return cd;
202 }
203 }
204 return (SDL_iconv_t)-1;
205 }
206
SDL_iconv(SDL_iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)207 size_t SDL_iconv(SDL_iconv_t cd,
208 const char **inbuf, size_t *inbytesleft,
209 char **outbuf, size_t *outbytesleft)
210 {
211 /* For simplicity, we'll convert everything to and from UCS-4 */
212 const char *src;
213 char *dst;
214 size_t srclen, dstlen;
215 Uint32 ch = 0;
216 size_t total;
217
218 if ( !inbuf || !*inbuf ) {
219 /* Reset the context */
220 return 0;
221 }
222 if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
223 return SDL_ICONV_E2BIG;
224 }
225 src = *inbuf;
226 srclen = (inbytesleft ? *inbytesleft : 0);
227 dst = *outbuf;
228 dstlen = *outbytesleft;
229
230 switch ( cd->src_fmt ) {
231 case ENCODING_UTF16:
232 /* Scan for a byte order marker */
233 {
234 Uint8 *p = (Uint8 *)src;
235 size_t n = srclen / 2;
236 while ( n ) {
237 if ( p[0] == 0xFF && p[1] == 0xFE ) {
238 cd->src_fmt = ENCODING_UTF16BE;
239 break;
240 } else if ( p[0] == 0xFE && p[1] == 0xFF ) {
241 cd->src_fmt = ENCODING_UTF16LE;
242 break;
243 }
244 p += 2;
245 --n;
246 }
247 if ( n == 0 ) {
248 /* We can't tell, default to host order */
249 cd->src_fmt = ENCODING_UTF16NATIVE;
250 }
251 }
252 break;
253 case ENCODING_UTF32:
254 /* Scan for a byte order marker */
255 {
256 Uint8 *p = (Uint8 *)src;
257 size_t n = srclen / 4;
258 while ( n ) {
259 if ( p[0] == 0xFF && p[1] == 0xFE &&
260 p[2] == 0x00 && p[3] == 0x00 ) {
261 cd->src_fmt = ENCODING_UTF32BE;
262 break;
263 } else if ( p[0] == 0x00 && p[1] == 0x00 &&
264 p[2] == 0xFE && p[3] == 0xFF ) {
265 cd->src_fmt = ENCODING_UTF32LE;
266 break;
267 }
268 p += 4;
269 --n;
270 }
271 if ( n == 0 ) {
272 /* We can't tell, default to host order */
273 cd->src_fmt = ENCODING_UTF32NATIVE;
274 }
275 }
276 break;
277 }
278
279 switch ( cd->dst_fmt ) {
280 case ENCODING_UTF16:
281 /* Default to host order, need to add byte order marker */
282 if ( dstlen < 2 ) {
283 return SDL_ICONV_E2BIG;
284 }
285 *(Uint16 *)dst = UNICODE_BOM;
286 dst += 2;
287 dstlen -= 2;
288 cd->dst_fmt = ENCODING_UTF16NATIVE;
289 break;
290 case ENCODING_UTF32:
291 /* Default to host order, need to add byte order marker */
292 if ( dstlen < 4 ) {
293 return SDL_ICONV_E2BIG;
294 }
295 *(Uint32 *)dst = UNICODE_BOM;
296 dst += 4;
297 dstlen -= 4;
298 cd->dst_fmt = ENCODING_UTF32NATIVE;
299 break;
300 }
301
302 total = 0;
303 while ( srclen > 0 ) {
304 /* Decode a character */
305 switch ( cd->src_fmt ) {
306 case ENCODING_ASCII:
307 {
308 Uint8 *p = (Uint8 *)src;
309 ch = (Uint32)(p[0] & 0x7F);
310 ++src;
311 --srclen;
312 }
313 break;
314 case ENCODING_LATIN1:
315 {
316 Uint8 *p = (Uint8 *)src;
317 ch = (Uint32)p[0];
318 ++src;
319 --srclen;
320 }
321 break;
322 case ENCODING_UTF8: /* RFC 3629 */
323 {
324 Uint8 *p = (Uint8 *)src;
325 size_t left = 0;
326 SDL_bool overlong = SDL_FALSE;
327 if ( p[0] >= 0xFC ) {
328 if ( (p[0] & 0xFE) != 0xFC ) {
329 /* Skip illegal sequences
330 return SDL_ICONV_EILSEQ;
331 */
332 ch = UNKNOWN_UNICODE;
333 } else {
334 if ( p[0] == 0xFC ) {
335 overlong = SDL_TRUE;
336 }
337 ch = (Uint32)(p[0] & 0x01);
338 left = 5;
339 }
340 } else if ( p[0] >= 0xF8 ) {
341 if ( (p[0] & 0xFC) != 0xF8 ) {
342 /* Skip illegal sequences
343 return SDL_ICONV_EILSEQ;
344 */
345 ch = UNKNOWN_UNICODE;
346 } else {
347 if ( p[0] == 0xF8 ) {
348 overlong = SDL_TRUE;
349 }
350 ch = (Uint32)(p[0] & 0x03);
351 left = 4;
352 }
353 } else if ( p[0] >= 0xF0 ) {
354 if ( (p[0] & 0xF8) != 0xF0 ) {
355 /* Skip illegal sequences
356 return SDL_ICONV_EILSEQ;
357 */
358 ch = UNKNOWN_UNICODE;
359 } else {
360 if ( p[0] == 0xF0 ) {
361 overlong = SDL_TRUE;
362 }
363 ch = (Uint32)(p[0] & 0x07);
364 left = 3;
365 }
366 } else if ( p[0] >= 0xE0 ) {
367 if ( (p[0] & 0xF0) != 0xE0 ) {
368 /* Skip illegal sequences
369 return SDL_ICONV_EILSEQ;
370 */
371 ch = UNKNOWN_UNICODE;
372 } else {
373 if ( p[0] == 0xE0 ) {
374 overlong = SDL_TRUE;
375 }
376 ch = (Uint32)(p[0] & 0x0F);
377 left = 2;
378 }
379 } else if ( p[0] >= 0xC0 ) {
380 if ( (p[0] & 0xE0) != 0xC0 ) {
381 /* Skip illegal sequences
382 return SDL_ICONV_EILSEQ;
383 */
384 ch = UNKNOWN_UNICODE;
385 } else {
386 if ( (p[0] & 0xDE) == 0xC0 ) {
387 overlong = SDL_TRUE;
388 }
389 ch = (Uint32)(p[0] & 0x1F);
390 left = 1;
391 }
392 } else {
393 if ( (p[0] & 0x80) != 0x00 ) {
394 /* Skip illegal sequences
395 return SDL_ICONV_EILSEQ;
396 */
397 ch = UNKNOWN_UNICODE;
398 } else {
399 ch = (Uint32)p[0];
400 }
401 }
402 ++src;
403 --srclen;
404 if ( srclen < left ) {
405 return SDL_ICONV_EINVAL;
406 }
407 while ( left-- ) {
408 ++p;
409 if ( (p[0] & 0xC0) != 0x80 ) {
410 /* Skip illegal sequences
411 return SDL_ICONV_EILSEQ;
412 */
413 ch = UNKNOWN_UNICODE;
414 break;
415 }
416 ch <<= 6;
417 ch |= (p[0] & 0x3F);
418 ++src;
419 --srclen;
420 }
421 if ( overlong ) {
422 /* Potential security risk
423 return SDL_ICONV_EILSEQ;
424 */
425 ch = UNKNOWN_UNICODE;
426 }
427 if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
428 (ch == 0xFFFE || ch == 0xFFFF) ||
429 ch > 0x10FFFF ) {
430 /* Skip illegal sequences
431 return SDL_ICONV_EILSEQ;
432 */
433 ch = UNKNOWN_UNICODE;
434 }
435 }
436 break;
437 case ENCODING_UTF16BE: /* RFC 2781 */
438 {
439 Uint8 *p = (Uint8 *)src;
440 Uint16 W1, W2;
441 if ( srclen < 2 ) {
442 return SDL_ICONV_EINVAL;
443 }
444 W1 = ((Uint16)p[0] << 8) |
445 (Uint16)p[1];
446 src += 2;
447 srclen -= 2;
448 if ( W1 < 0xD800 || W1 > 0xDFFF ) {
449 ch = (Uint32)W1;
450 break;
451 }
452 if ( W1 > 0xDBFF ) {
453 /* Skip illegal sequences
454 return SDL_ICONV_EILSEQ;
455 */
456 ch = UNKNOWN_UNICODE;
457 break;
458 }
459 if ( srclen < 2 ) {
460 return SDL_ICONV_EINVAL;
461 }
462 p = (Uint8 *)src;
463 W2 = ((Uint16)p[0] << 8) |
464 (Uint16)p[1];
465 src += 2;
466 srclen -= 2;
467 if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
468 /* Skip illegal sequences
469 return SDL_ICONV_EILSEQ;
470 */
471 ch = UNKNOWN_UNICODE;
472 break;
473 }
474 ch = (((Uint32)(W1 & 0x3FF) << 10) |
475 (Uint32)(W2 & 0x3FF)) + 0x10000;
476 }
477 break;
478 case ENCODING_UTF16LE: /* RFC 2781 */
479 {
480 Uint8 *p = (Uint8 *)src;
481 Uint16 W1, W2;
482 if ( srclen < 2 ) {
483 return SDL_ICONV_EINVAL;
484 }
485 W1 = ((Uint16)p[1] << 8) |
486 (Uint16)p[0];
487 src += 2;
488 srclen -= 2;
489 if ( W1 < 0xD800 || W1 > 0xDFFF ) {
490 ch = (Uint32)W1;
491 break;
492 }
493 if ( W1 > 0xDBFF ) {
494 /* Skip illegal sequences
495 return SDL_ICONV_EILSEQ;
496 */
497 ch = UNKNOWN_UNICODE;
498 break;
499 }
500 if ( srclen < 2 ) {
501 return SDL_ICONV_EINVAL;
502 }
503 p = (Uint8 *)src;
504 W2 = ((Uint16)p[1] << 8) |
505 (Uint16)p[0];
506 src += 2;
507 srclen -= 2;
508 if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
509 /* Skip illegal sequences
510 return SDL_ICONV_EILSEQ;
511 */
512 ch = UNKNOWN_UNICODE;
513 break;
514 }
515 ch = (((Uint32)(W1 & 0x3FF) << 10) |
516 (Uint32)(W2 & 0x3FF)) + 0x10000;
517 }
518 break;
519 case ENCODING_UTF32BE:
520 {
521 Uint8 *p = (Uint8 *)src;
522 if ( srclen < 4 ) {
523 return SDL_ICONV_EINVAL;
524 }
525 ch = ((Uint32)p[0] << 24) |
526 ((Uint32)p[1] << 16) |
527 ((Uint32)p[2] << 8) |
528 (Uint32)p[3];
529 src += 4;
530 srclen -= 4;
531 }
532 break;
533 case ENCODING_UTF32LE:
534 {
535 Uint8 *p = (Uint8 *)src;
536 if ( srclen < 4 ) {
537 return SDL_ICONV_EINVAL;
538 }
539 ch = ((Uint32)p[3] << 24) |
540 ((Uint32)p[2] << 16) |
541 ((Uint32)p[1] << 8) |
542 (Uint32)p[0];
543 src += 4;
544 srclen -= 4;
545 }
546 break;
547 case ENCODING_UCS2:
548 {
549 Uint16 *p = (Uint16 *)src;
550 if ( srclen < 2 ) {
551 return SDL_ICONV_EINVAL;
552 }
553 ch = *p;
554 src += 2;
555 srclen -= 2;
556 }
557 break;
558 case ENCODING_UCS4:
559 {
560 Uint32 *p = (Uint32 *)src;
561 if ( srclen < 4 ) {
562 return SDL_ICONV_EINVAL;
563 }
564 ch = *p;
565 src += 4;
566 srclen -= 4;
567 }
568 break;
569 }
570
571 /* Encode a character */
572 switch ( cd->dst_fmt ) {
573 case ENCODING_ASCII:
574 {
575 Uint8 *p = (Uint8 *)dst;
576 if ( dstlen < 1 ) {
577 return SDL_ICONV_E2BIG;
578 }
579 if ( ch > 0x7F ) {
580 *p = UNKNOWN_ASCII;
581 } else {
582 *p = (Uint8)ch;
583 }
584 ++dst;
585 --dstlen;
586 }
587 break;
588 case ENCODING_LATIN1:
589 {
590 Uint8 *p = (Uint8 *)dst;
591 if ( dstlen < 1 ) {
592 return SDL_ICONV_E2BIG;
593 }
594 if ( ch > 0xFF ) {
595 *p = UNKNOWN_ASCII;
596 } else {
597 *p = (Uint8)ch;
598 }
599 ++dst;
600 --dstlen;
601 }
602 break;
603 case ENCODING_UTF8: /* RFC 3629 */
604 {
605 Uint8 *p = (Uint8 *)dst;
606 if ( ch > 0x10FFFF ) {
607 ch = UNKNOWN_UNICODE;
608 }
609 if ( ch <= 0x7F ) {
610 if ( dstlen < 1 ) {
611 return SDL_ICONV_E2BIG;
612 }
613 *p = (Uint8)ch;
614 ++dst;
615 --dstlen;
616 } else if ( ch <= 0x7FF ) {
617 if ( dstlen < 2 ) {
618 return SDL_ICONV_E2BIG;
619 }
620 p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
621 p[1] = 0x80 | (Uint8)(ch & 0x3F);
622 dst += 2;
623 dstlen -= 2;
624 } else if ( ch <= 0xFFFF ) {
625 if ( dstlen < 3 ) {
626 return SDL_ICONV_E2BIG;
627 }
628 p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
629 p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
630 p[2] = 0x80 | (Uint8)(ch & 0x3F);
631 dst += 3;
632 dstlen -= 3;
633 } else if ( ch <= 0x1FFFFF ) {
634 if ( dstlen < 4 ) {
635 return SDL_ICONV_E2BIG;
636 }
637 p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
638 p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
639 p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
640 p[3] = 0x80 | (Uint8)(ch & 0x3F);
641 dst += 4;
642 dstlen -= 4;
643 } else if ( ch <= 0x3FFFFFF ) {
644 if ( dstlen < 5 ) {
645 return SDL_ICONV_E2BIG;
646 }
647 p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
648 p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
649 p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
650 p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
651 p[4] = 0x80 | (Uint8)(ch & 0x3F);
652 dst += 5;
653 dstlen -= 5;
654 } else {
655 if ( dstlen < 6 ) {
656 return SDL_ICONV_E2BIG;
657 }
658 p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
659 p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
660 p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
661 p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
662 p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
663 p[5] = 0x80 | (Uint8)(ch & 0x3F);
664 dst += 6;
665 dstlen -= 6;
666 }
667 }
668 break;
669 case ENCODING_UTF16BE: /* RFC 2781 */
670 {
671 Uint8 *p = (Uint8 *)dst;
672 if ( ch > 0x10FFFF ) {
673 ch = UNKNOWN_UNICODE;
674 }
675 if ( ch < 0x10000 ) {
676 if ( dstlen < 2 ) {
677 return SDL_ICONV_E2BIG;
678 }
679 p[0] = (Uint8)(ch >> 8);
680 p[1] = (Uint8)ch;
681 dst += 2;
682 dstlen -= 2;
683 } else {
684 Uint16 W1, W2;
685 if ( dstlen < 4 ) {
686 return SDL_ICONV_E2BIG;
687 }
688 ch = ch - 0x10000;
689 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
690 W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
691 p[0] = (Uint8)(W1 >> 8);
692 p[1] = (Uint8)W1;
693 p[2] = (Uint8)(W2 >> 8);
694 p[3] = (Uint8)W2;
695 dst += 4;
696 dstlen -= 4;
697 }
698 }
699 break;
700 case ENCODING_UTF16LE: /* RFC 2781 */
701 {
702 Uint8 *p = (Uint8 *)dst;
703 if ( ch > 0x10FFFF ) {
704 ch = UNKNOWN_UNICODE;
705 }
706 if ( ch < 0x10000 ) {
707 if ( dstlen < 2 ) {
708 return SDL_ICONV_E2BIG;
709 }
710 p[1] = (Uint8)(ch >> 8);
711 p[0] = (Uint8)ch;
712 dst += 2;
713 dstlen -= 2;
714 } else {
715 Uint16 W1, W2;
716 if ( dstlen < 4 ) {
717 return SDL_ICONV_E2BIG;
718 }
719 ch = ch - 0x10000;
720 W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
721 W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
722 p[1] = (Uint8)(W1 >> 8);
723 p[0] = (Uint8)W1;
724 p[3] = (Uint8)(W2 >> 8);
725 p[2] = (Uint8)W2;
726 dst += 4;
727 dstlen -= 4;
728 }
729 }
730 break;
731 case ENCODING_UTF32BE:
732 {
733 Uint8 *p = (Uint8 *)dst;
734 if ( ch > 0x10FFFF ) {
735 ch = UNKNOWN_UNICODE;
736 }
737 if ( dstlen < 4 ) {
738 return SDL_ICONV_E2BIG;
739 }
740 p[0] = (Uint8)(ch >> 24);
741 p[1] = (Uint8)(ch >> 16);
742 p[2] = (Uint8)(ch >> 8);
743 p[3] = (Uint8)ch;
744 dst += 4;
745 dstlen -= 4;
746 }
747 break;
748 case ENCODING_UTF32LE:
749 {
750 Uint8 *p = (Uint8 *)dst;
751 if ( ch > 0x10FFFF ) {
752 ch = UNKNOWN_UNICODE;
753 }
754 if ( dstlen < 4 ) {
755 return SDL_ICONV_E2BIG;
756 }
757 p[3] = (Uint8)(ch >> 24);
758 p[2] = (Uint8)(ch >> 16);
759 p[1] = (Uint8)(ch >> 8);
760 p[0] = (Uint8)ch;
761 dst += 4;
762 dstlen -= 4;
763 }
764 break;
765 case ENCODING_UCS2:
766 {
767 Uint16 *p = (Uint16 *)dst;
768 if ( ch > 0xFFFF ) {
769 ch = UNKNOWN_UNICODE;
770 }
771 if ( dstlen < 2 ) {
772 return SDL_ICONV_E2BIG;
773 }
774 *p = (Uint16)ch;
775 dst += 2;
776 dstlen -= 2;
777 }
778 break;
779 case ENCODING_UCS4:
780 {
781 Uint32 *p = (Uint32 *)dst;
782 if ( ch > 0x7FFFFFFF ) {
783 ch = UNKNOWN_UNICODE;
784 }
785 if ( dstlen < 4 ) {
786 return SDL_ICONV_E2BIG;
787 }
788 *p = ch;
789 dst += 4;
790 dstlen -= 4;
791 }
792 break;
793 }
794
795 /* Update state */
796 *inbuf = src;
797 *inbytesleft = srclen;
798 *outbuf = dst;
799 *outbytesleft = dstlen;
800 ++total;
801 }
802 return total;
803 }
804
SDL_iconv_close(SDL_iconv_t cd)805 int SDL_iconv_close(SDL_iconv_t cd)
806 {
807 if ( cd && cd != (SDL_iconv_t)-1 ) {
808 SDL_free(cd);
809 }
810 return 0;
811 }
812
813 #endif /* !HAVE_ICONV */
814
SDL_iconv_string(const char * tocode,const char * fromcode,const char * inbuf,size_t inbytesleft)815 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
816 {
817 SDL_iconv_t cd;
818 char *string;
819 size_t stringsize;
820 char *outbuf;
821 size_t outbytesleft;
822 size_t retCode = 0;
823
824 cd = SDL_iconv_open(tocode, fromcode);
825 if ( cd == (SDL_iconv_t)-1 ) {
826 /* See if we can recover here (fixes iconv on Solaris 11) */
827 if ( !tocode || !*tocode ) {
828 tocode = "UTF-8";
829 }
830 if ( !fromcode || !*fromcode ) {
831 fromcode = "UTF-8";
832 }
833 cd = SDL_iconv_open(tocode, fromcode);
834 }
835 if ( cd == (SDL_iconv_t)-1 ) {
836 return NULL;
837 }
838
839 stringsize = inbytesleft > 4 ? inbytesleft : 4;
840 string = SDL_malloc(stringsize);
841 if ( !string ) {
842 SDL_iconv_close(cd);
843 return NULL;
844 }
845 outbuf = string;
846 outbytesleft = stringsize;
847 SDL_memset(outbuf, 0, 4);
848
849 while ( inbytesleft > 0 ) {
850 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
851 switch (retCode) {
852 case SDL_ICONV_E2BIG:
853 {
854 char *oldstring = string;
855 stringsize *= 2;
856 string = SDL_realloc(string, stringsize);
857 if ( !string ) {
858 SDL_iconv_close(cd);
859 return NULL;
860 }
861 outbuf = string + (outbuf - oldstring);
862 outbytesleft = stringsize - (outbuf - string);
863 SDL_memset(outbuf, 0, 4);
864 }
865 break;
866 case SDL_ICONV_EILSEQ:
867 /* Try skipping some input data - not perfect, but... */
868 ++inbuf;
869 --inbytesleft;
870 break;
871 case SDL_ICONV_EINVAL:
872 case SDL_ICONV_ERROR:
873 /* We can't continue... */
874 inbytesleft = 0;
875 break;
876 }
877 }
878 SDL_iconv_close(cd);
879
880 return string;
881 }
882