• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2     SDL - Simple DirectMedia Layer
3     Copyright (C) 1997-2012 Sam Lantinga
4 
5     This library is free software; you can redistribute it and/or
6     modify it under the terms of the GNU Lesser General Public
7     License as published by the Free Software Foundation; either
8     version 2.1 of the License, or (at your option) any later version.
9 
10     This library is distributed in the hope that it will be useful,
11     but WITHOUT ANY WARRANTY; without even the implied warranty of
12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13     Lesser General Public License for more details.
14 
15     You should have received a copy of the GNU Lesser General Public
16     License along with this library; if not, write to the Free Software
17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
18 
19     Sam Lantinga
20     slouken@libsdl.org
21 */
22 #include "SDL_config.h"
23 
24 /* This file contains portable iconv functions for SDL */
25 
26 #include "SDL_stdinc.h"
27 #include "SDL_endian.h"
28 
29 #ifdef HAVE_ICONV
30 
31 /* Depending on which standard the iconv() was implemented with,
32    iconv() may or may not use const char ** for the inbuf param.
33    If we get this wrong, it's just a warning, so no big deal.
34 */
35 #if defined(_XGP6) || \
36     defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2))
37 #define ICONV_INBUF_NONCONST
38 #endif
39 
40 #include <errno.h>
41 
SDL_iconv(SDL_iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)42 size_t SDL_iconv(SDL_iconv_t cd,
43                  const char **inbuf, size_t *inbytesleft,
44                  char **outbuf, size_t *outbytesleft)
45 {
46 	size_t retCode;
47 #ifdef ICONV_INBUF_NONCONST
48 	retCode = iconv(cd, (char **)inbuf, inbytesleft, outbuf, outbytesleft);
49 #else
50 	retCode = iconv(cd, inbuf, inbytesleft, outbuf, outbytesleft);
51 #endif
52 	if ( retCode == (size_t)-1 ) {
53 		switch(errno) {
54 		    case E2BIG:
55 			return SDL_ICONV_E2BIG;
56 		    case EILSEQ:
57 			return SDL_ICONV_EILSEQ;
58 		    case EINVAL:
59 			return SDL_ICONV_EINVAL;
60 		    default:
61 			return SDL_ICONV_ERROR;
62 		}
63 	}
64 	return retCode;
65 }
66 
67 #else
68 
69 /* Lots of useful information on Unicode at:
70 	http://www.cl.cam.ac.uk/~mgk25/unicode.html
71 */
72 
73 #define UNICODE_BOM	0xFEFF
74 
75 #define UNKNOWN_ASCII	'?'
76 #define UNKNOWN_UNICODE	0xFFFD
77 
78 enum {
79 	ENCODING_UNKNOWN,
80 	ENCODING_ASCII,
81 	ENCODING_LATIN1,
82 	ENCODING_UTF8,
83 	ENCODING_UTF16,		/* Needs byte order marker */
84 	ENCODING_UTF16BE,
85 	ENCODING_UTF16LE,
86 	ENCODING_UTF32,		/* Needs byte order marker */
87 	ENCODING_UTF32BE,
88 	ENCODING_UTF32LE,
89 	ENCODING_UCS2,		/* Native byte order assumed */
90 	ENCODING_UCS4,		/* Native byte order assumed */
91 };
92 #if SDL_BYTEORDER == SDL_BIG_ENDIAN
93 #define ENCODING_UTF16NATIVE	ENCODING_UTF16BE
94 #define ENCODING_UTF32NATIVE	ENCODING_UTF32BE
95 #else
96 #define ENCODING_UTF16NATIVE	ENCODING_UTF16LE
97 #define ENCODING_UTF32NATIVE	ENCODING_UTF32LE
98 #endif
99 
100 struct _SDL_iconv_t
101 {
102 	int src_fmt;
103 	int dst_fmt;
104 };
105 
106 static struct {
107 	const char *name;
108 	int format;
109 } encodings[] = {
110 	{ "ASCII",	ENCODING_ASCII },
111 	{ "US-ASCII",	ENCODING_ASCII },
112 	{ "8859-1",	ENCODING_LATIN1 },
113 	{ "ISO-8859-1",	ENCODING_LATIN1 },
114 	{ "UTF8",	ENCODING_UTF8 },
115 	{ "UTF-8",	ENCODING_UTF8 },
116 	{ "UTF16",	ENCODING_UTF16 },
117 	{ "UTF-16",	ENCODING_UTF16 },
118 	{ "UTF16BE",	ENCODING_UTF16BE },
119 	{ "UTF-16BE",	ENCODING_UTF16BE },
120 	{ "UTF16LE",	ENCODING_UTF16LE },
121 	{ "UTF-16LE",	ENCODING_UTF16LE },
122 	{ "UTF32",	ENCODING_UTF32 },
123 	{ "UTF-32",	ENCODING_UTF32 },
124 	{ "UTF32BE",	ENCODING_UTF32BE },
125 	{ "UTF-32BE",	ENCODING_UTF32BE },
126 	{ "UTF32LE",	ENCODING_UTF32LE },
127 	{ "UTF-32LE",	ENCODING_UTF32LE },
128 	{ "UCS2",	ENCODING_UCS2 },
129 	{ "UCS-2",	ENCODING_UCS2 },
130 	{ "UCS4",	ENCODING_UCS4 },
131 	{ "UCS-4",	ENCODING_UCS4 },
132 };
133 
getlocale(char * buffer,size_t bufsize)134 static const char *getlocale(char *buffer, size_t bufsize)
135 {
136 	const char *lang;
137 	char *ptr;
138 
139 	lang = SDL_getenv("LC_ALL");
140 	if ( !lang ) {
141 		lang = SDL_getenv("LC_CTYPE");
142 	}
143 	if ( !lang ) {
144 		lang = SDL_getenv("LC_MESSAGES");
145 	}
146 	if ( !lang ) {
147 		lang = SDL_getenv("LANG");
148 	}
149 	if ( !lang || !*lang || SDL_strcmp(lang, "C") == 0 ) {
150 		lang = "ASCII";
151 	}
152 
153 	/* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */
154 	ptr = SDL_strchr(lang, '.');
155 	if (ptr != NULL) {
156 		lang = ptr + 1;
157 	}
158 
159 	SDL_strlcpy(buffer, lang, bufsize);
160 	ptr = SDL_strchr(buffer, '@');
161 	if (ptr != NULL) {
162 		*ptr = '\0';  /* chop end of string. */
163 	}
164 
165 	return buffer;
166 }
167 
SDL_iconv_open(const char * tocode,const char * fromcode)168 SDL_iconv_t SDL_iconv_open(const char *tocode, const char *fromcode)
169 {
170 	int src_fmt = ENCODING_UNKNOWN;
171 	int dst_fmt = ENCODING_UNKNOWN;
172 	int i;
173 	char fromcode_buffer[64];
174 	char tocode_buffer[64];
175 
176 	if ( !fromcode || !*fromcode ) {
177 		fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer));
178 	}
179 	if ( !tocode || !*tocode ) {
180 		tocode = getlocale(tocode_buffer, sizeof(tocode_buffer));
181 	}
182 	for ( i = 0; i < SDL_arraysize(encodings); ++i ) {
183 		if ( SDL_strcasecmp(fromcode, encodings[i].name) == 0 ) {
184 			src_fmt = encodings[i].format;
185 			if ( dst_fmt != ENCODING_UNKNOWN ) {
186 				break;
187 			}
188 		}
189 		if ( SDL_strcasecmp(tocode, encodings[i].name) == 0 ) {
190 			dst_fmt = encodings[i].format;
191 			if ( src_fmt != ENCODING_UNKNOWN ) {
192 				break;
193 			}
194 		}
195 	}
196 	if ( src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN ) {
197 		SDL_iconv_t cd = (SDL_iconv_t)SDL_malloc(sizeof(*cd));
198 		if ( cd ) {
199 			cd->src_fmt = src_fmt;
200 			cd->dst_fmt = dst_fmt;
201 			return cd;
202 		}
203 	}
204 	return (SDL_iconv_t)-1;
205 }
206 
SDL_iconv(SDL_iconv_t cd,const char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)207 size_t SDL_iconv(SDL_iconv_t cd,
208                  const char **inbuf, size_t *inbytesleft,
209                  char **outbuf, size_t *outbytesleft)
210 {
211 	/* For simplicity, we'll convert everything to and from UCS-4 */
212 	const char *src;
213 	char *dst;
214 	size_t srclen, dstlen;
215 	Uint32 ch = 0;
216 	size_t total;
217 
218 	if ( !inbuf || !*inbuf ) {
219 		/* Reset the context */
220 		return 0;
221 	}
222 	if ( !outbuf || !*outbuf || !outbytesleft || !*outbytesleft ) {
223 		return SDL_ICONV_E2BIG;
224 	}
225 	src = *inbuf;
226 	srclen = (inbytesleft ? *inbytesleft : 0);
227 	dst = *outbuf;
228 	dstlen = *outbytesleft;
229 
230 	switch ( cd->src_fmt ) {
231 	    case ENCODING_UTF16:
232 		/* Scan for a byte order marker */
233 		{
234 			Uint8 *p = (Uint8 *)src;
235 			size_t n = srclen / 2;
236 			while ( n ) {
237 				if ( p[0] == 0xFF && p[1] == 0xFE ) {
238 					cd->src_fmt = ENCODING_UTF16BE;
239 					break;
240 				} else if ( p[0] == 0xFE && p[1] == 0xFF ) {
241 					cd->src_fmt = ENCODING_UTF16LE;
242 					break;
243 				}
244 				p += 2;
245 				--n;
246 			}
247 			if ( n == 0 ) {
248 				/* We can't tell, default to host order */
249 				cd->src_fmt = ENCODING_UTF16NATIVE;
250 			}
251 		}
252 		break;
253 	    case ENCODING_UTF32:
254 		/* Scan for a byte order marker */
255 		{
256 			Uint8 *p = (Uint8 *)src;
257 			size_t n = srclen / 4;
258 			while ( n ) {
259 				if ( p[0] == 0xFF && p[1] == 0xFE &&
260 				     p[2] == 0x00 && p[3] == 0x00 ) {
261 					cd->src_fmt = ENCODING_UTF32BE;
262 					break;
263 				} else if ( p[0] == 0x00 && p[1] == 0x00 &&
264 				            p[2] == 0xFE && p[3] == 0xFF ) {
265 					cd->src_fmt = ENCODING_UTF32LE;
266 					break;
267 				}
268 				p += 4;
269 				--n;
270 			}
271 			if ( n == 0 ) {
272 				/* We can't tell, default to host order */
273 				cd->src_fmt = ENCODING_UTF32NATIVE;
274 			}
275 		}
276 		break;
277 	}
278 
279 	switch ( cd->dst_fmt ) {
280 	    case ENCODING_UTF16:
281 		/* Default to host order, need to add byte order marker */
282 		if ( dstlen < 2 ) {
283 			return SDL_ICONV_E2BIG;
284 		}
285 		*(Uint16 *)dst = UNICODE_BOM;
286 		dst += 2;
287 		dstlen -= 2;
288 		cd->dst_fmt = ENCODING_UTF16NATIVE;
289 		break;
290 	    case ENCODING_UTF32:
291 		/* Default to host order, need to add byte order marker */
292 		if ( dstlen < 4 ) {
293 			return SDL_ICONV_E2BIG;
294 		}
295 		*(Uint32 *)dst = UNICODE_BOM;
296 		dst += 4;
297 		dstlen -= 4;
298 		cd->dst_fmt = ENCODING_UTF32NATIVE;
299 		break;
300 	}
301 
302 	total = 0;
303 	while ( srclen > 0 ) {
304 		/* Decode a character */
305 		switch ( cd->src_fmt ) {
306 		    case ENCODING_ASCII:
307 			{
308 				Uint8 *p = (Uint8 *)src;
309 				ch = (Uint32)(p[0] & 0x7F);
310 				++src;
311 				--srclen;
312 			}
313 			break;
314 		    case ENCODING_LATIN1:
315 			{
316 				Uint8 *p = (Uint8 *)src;
317 				ch = (Uint32)p[0];
318 				++src;
319 				--srclen;
320 			}
321 			break;
322 		    case ENCODING_UTF8: /* RFC 3629 */
323 			{
324 				Uint8 *p = (Uint8 *)src;
325 				size_t left = 0;
326 				SDL_bool overlong = SDL_FALSE;
327 				if ( p[0] >= 0xFC ) {
328 					if ( (p[0] & 0xFE) != 0xFC ) {
329 						/* Skip illegal sequences
330 						return SDL_ICONV_EILSEQ;
331 						*/
332 						ch = UNKNOWN_UNICODE;
333 					} else {
334 						if ( p[0] == 0xFC ) {
335 							overlong = SDL_TRUE;
336 						}
337 						ch = (Uint32)(p[0] & 0x01);
338 						left = 5;
339 					}
340 				} else if ( p[0] >= 0xF8 ) {
341 					if ( (p[0] & 0xFC) != 0xF8 ) {
342 						/* Skip illegal sequences
343 						return SDL_ICONV_EILSEQ;
344 						*/
345 						ch = UNKNOWN_UNICODE;
346 					} else {
347 						if ( p[0] == 0xF8 ) {
348 							overlong = SDL_TRUE;
349 						}
350 						ch = (Uint32)(p[0] & 0x03);
351 						left = 4;
352 					}
353 				} else if ( p[0] >= 0xF0 ) {
354 					if ( (p[0] & 0xF8) != 0xF0 ) {
355 						/* Skip illegal sequences
356 						return SDL_ICONV_EILSEQ;
357 						*/
358 						ch = UNKNOWN_UNICODE;
359 					} else {
360 						if ( p[0] == 0xF0 ) {
361 							overlong = SDL_TRUE;
362 						}
363 						ch = (Uint32)(p[0] & 0x07);
364 						left = 3;
365 					}
366 				} else if ( p[0] >= 0xE0 ) {
367 					if ( (p[0] & 0xF0) != 0xE0 ) {
368 						/* Skip illegal sequences
369 						return SDL_ICONV_EILSEQ;
370 						*/
371 						ch = UNKNOWN_UNICODE;
372 					} else {
373 						if ( p[0] == 0xE0 ) {
374 							overlong = SDL_TRUE;
375 						}
376 						ch = (Uint32)(p[0] & 0x0F);
377 						left = 2;
378 					}
379 				} else if ( p[0] >= 0xC0 ) {
380 					if ( (p[0] & 0xE0) != 0xC0 ) {
381 						/* Skip illegal sequences
382 						return SDL_ICONV_EILSEQ;
383 						*/
384 						ch = UNKNOWN_UNICODE;
385 					} else {
386 						if ( (p[0] & 0xDE) == 0xC0 ) {
387 							overlong = SDL_TRUE;
388 						}
389 						ch = (Uint32)(p[0] & 0x1F);
390 						left = 1;
391 					}
392 				} else {
393 					if ( (p[0] & 0x80) != 0x00 ) {
394 						/* Skip illegal sequences
395 						return SDL_ICONV_EILSEQ;
396 						*/
397 						ch = UNKNOWN_UNICODE;
398 					} else {
399 						ch = (Uint32)p[0];
400 					}
401 				}
402 				++src;
403 				--srclen;
404 				if ( srclen < left ) {
405 					return SDL_ICONV_EINVAL;
406 				}
407 				while ( left-- ) {
408 					++p;
409 					if ( (p[0] & 0xC0) != 0x80 ) {
410 						/* Skip illegal sequences
411 						return SDL_ICONV_EILSEQ;
412 						*/
413 						ch = UNKNOWN_UNICODE;
414 						break;
415 					}
416 					ch <<= 6;
417 					ch |= (p[0] & 0x3F);
418 					++src;
419 					--srclen;
420 				}
421 				if ( overlong ) {
422 					/* Potential security risk
423 					return SDL_ICONV_EILSEQ;
424 					*/
425 					ch = UNKNOWN_UNICODE;
426 				}
427 				if ( (ch >= 0xD800 && ch <= 0xDFFF) ||
428 				     (ch == 0xFFFE || ch == 0xFFFF) ||
429 				     ch > 0x10FFFF ) {
430 					/* Skip illegal sequences
431 					return SDL_ICONV_EILSEQ;
432 					*/
433 					ch = UNKNOWN_UNICODE;
434 				}
435 			}
436 			break;
437 		    case ENCODING_UTF16BE: /* RFC 2781 */
438 			{
439 				Uint8 *p = (Uint8 *)src;
440 				Uint16 W1, W2;
441 				if ( srclen < 2 ) {
442 					return SDL_ICONV_EINVAL;
443 				}
444 				W1 = ((Uint16)p[0] << 8) |
445 				      (Uint16)p[1];
446 				src += 2;
447 				srclen -= 2;
448 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
449 					ch = (Uint32)W1;
450 					break;
451 				}
452 				if ( W1 > 0xDBFF ) {
453 					/* Skip illegal sequences
454 					return SDL_ICONV_EILSEQ;
455 					*/
456 					ch = UNKNOWN_UNICODE;
457 					break;
458 				}
459 				if ( srclen < 2 ) {
460 					return SDL_ICONV_EINVAL;
461 				}
462 				p = (Uint8 *)src;
463 				W2 = ((Uint16)p[0] << 8) |
464 				      (Uint16)p[1];
465 				src += 2;
466 				srclen -= 2;
467 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
468 					/* Skip illegal sequences
469 					return SDL_ICONV_EILSEQ;
470 					*/
471 					ch = UNKNOWN_UNICODE;
472 					break;
473 				}
474 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
475 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
476 			}
477 			break;
478 		    case ENCODING_UTF16LE: /* RFC 2781 */
479 			{
480 				Uint8 *p = (Uint8 *)src;
481 				Uint16 W1, W2;
482 				if ( srclen < 2 ) {
483 					return SDL_ICONV_EINVAL;
484 				}
485 				W1 = ((Uint16)p[1] << 8) |
486 				      (Uint16)p[0];
487 				src += 2;
488 				srclen -= 2;
489 				if ( W1 < 0xD800 || W1 > 0xDFFF ) {
490 					ch = (Uint32)W1;
491 					break;
492 				}
493 				if ( W1 > 0xDBFF ) {
494 					/* Skip illegal sequences
495 					return SDL_ICONV_EILSEQ;
496 					*/
497 					ch = UNKNOWN_UNICODE;
498 					break;
499 				}
500 				if ( srclen < 2 ) {
501 					return SDL_ICONV_EINVAL;
502 				}
503 				p = (Uint8 *)src;
504 				W2 = ((Uint16)p[1] << 8) |
505 				      (Uint16)p[0];
506 				src += 2;
507 				srclen -= 2;
508 				if ( W2 < 0xDC00 || W2 > 0xDFFF ) {
509 					/* Skip illegal sequences
510 					return SDL_ICONV_EILSEQ;
511 					*/
512 					ch = UNKNOWN_UNICODE;
513 					break;
514 				}
515 				ch = (((Uint32)(W1 & 0x3FF) << 10) |
516 				      (Uint32)(W2 & 0x3FF)) + 0x10000;
517 			}
518 			break;
519 		    case ENCODING_UTF32BE:
520 			{
521 				Uint8 *p = (Uint8 *)src;
522 				if ( srclen < 4 ) {
523 					return SDL_ICONV_EINVAL;
524 				}
525 				ch = ((Uint32)p[0] << 24) |
526 				     ((Uint32)p[1] << 16) |
527 				     ((Uint32)p[2] << 8) |
528 				      (Uint32)p[3];
529 				src += 4;
530 				srclen -= 4;
531 			}
532 			break;
533 		    case ENCODING_UTF32LE:
534 			{
535 				Uint8 *p = (Uint8 *)src;
536 				if ( srclen < 4 ) {
537 					return SDL_ICONV_EINVAL;
538 				}
539 				ch = ((Uint32)p[3] << 24) |
540 				     ((Uint32)p[2] << 16) |
541 				     ((Uint32)p[1] << 8) |
542 				      (Uint32)p[0];
543 				src += 4;
544 				srclen -= 4;
545 			}
546 			break;
547 		    case ENCODING_UCS2:
548 			{
549 				Uint16 *p = (Uint16 *)src;
550 				if ( srclen < 2 ) {
551 					return SDL_ICONV_EINVAL;
552 				}
553 				ch = *p;
554 				src += 2;
555 				srclen -= 2;
556 			}
557 			break;
558 		    case ENCODING_UCS4:
559 			{
560 				Uint32 *p = (Uint32 *)src;
561 				if ( srclen < 4 ) {
562 					return SDL_ICONV_EINVAL;
563 				}
564 				ch = *p;
565 				src += 4;
566 				srclen -= 4;
567 			}
568 			break;
569 		}
570 
571 		/* Encode a character */
572 		switch ( cd->dst_fmt ) {
573 		    case ENCODING_ASCII:
574 			{
575 				Uint8 *p = (Uint8 *)dst;
576 				if ( dstlen < 1 ) {
577 					return SDL_ICONV_E2BIG;
578 				}
579 				if ( ch > 0x7F ) {
580 					*p = UNKNOWN_ASCII;
581 				} else {
582 					*p = (Uint8)ch;
583 				}
584 				++dst;
585 				--dstlen;
586 			}
587 			break;
588 		    case ENCODING_LATIN1:
589 			{
590 				Uint8 *p = (Uint8 *)dst;
591 				if ( dstlen < 1 ) {
592 					return SDL_ICONV_E2BIG;
593 				}
594 				if ( ch > 0xFF ) {
595 					*p = UNKNOWN_ASCII;
596 				} else {
597 					*p = (Uint8)ch;
598 				}
599 				++dst;
600 				--dstlen;
601 			}
602 			break;
603 		    case ENCODING_UTF8: /* RFC 3629 */
604 			{
605 				Uint8 *p = (Uint8 *)dst;
606 				if ( ch > 0x10FFFF ) {
607 					ch = UNKNOWN_UNICODE;
608 				}
609 				if ( ch <= 0x7F ) {
610 					if ( dstlen < 1 ) {
611 						return SDL_ICONV_E2BIG;
612 					}
613 					*p = (Uint8)ch;
614 					++dst;
615 					--dstlen;
616 				} else if ( ch <= 0x7FF ) {
617 					if ( dstlen < 2 ) {
618 						return SDL_ICONV_E2BIG;
619 					}
620 					p[0] = 0xC0 | (Uint8)((ch >> 6) & 0x1F);
621 					p[1] = 0x80 | (Uint8)(ch & 0x3F);
622 					dst += 2;
623 					dstlen -= 2;
624 				} else if ( ch <= 0xFFFF ) {
625 					if ( dstlen < 3 ) {
626 						return SDL_ICONV_E2BIG;
627 					}
628 					p[0] = 0xE0 | (Uint8)((ch >> 12) & 0x0F);
629 					p[1] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
630 					p[2] = 0x80 | (Uint8)(ch & 0x3F);
631 					dst += 3;
632 					dstlen -= 3;
633 				} else if ( ch <= 0x1FFFFF ) {
634 					if ( dstlen < 4 ) {
635 						return SDL_ICONV_E2BIG;
636 					}
637 					p[0] = 0xF0 | (Uint8)((ch >> 18) & 0x07);
638 					p[1] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
639 					p[2] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
640 					p[3] = 0x80 | (Uint8)(ch & 0x3F);
641 					dst += 4;
642 					dstlen -= 4;
643 				} else if ( ch <= 0x3FFFFFF ) {
644 					if ( dstlen < 5 ) {
645 						return SDL_ICONV_E2BIG;
646 					}
647 					p[0] = 0xF8 | (Uint8)((ch >> 24) & 0x03);
648 					p[1] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
649 					p[2] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
650 					p[3] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
651 					p[4] = 0x80 | (Uint8)(ch & 0x3F);
652 					dst += 5;
653 					dstlen -= 5;
654 				} else {
655 					if ( dstlen < 6 ) {
656 						return SDL_ICONV_E2BIG;
657 					}
658 					p[0] = 0xFC | (Uint8)((ch >> 30) & 0x01);
659 					p[1] = 0x80 | (Uint8)((ch >> 24) & 0x3F);
660 					p[2] = 0x80 | (Uint8)((ch >> 18) & 0x3F);
661 					p[3] = 0x80 | (Uint8)((ch >> 12) & 0x3F);
662 					p[4] = 0x80 | (Uint8)((ch >> 6) & 0x3F);
663 					p[5] = 0x80 | (Uint8)(ch & 0x3F);
664 					dst += 6;
665 					dstlen -= 6;
666 				}
667 			}
668 			break;
669 		    case ENCODING_UTF16BE: /* RFC 2781 */
670 			{
671 				Uint8 *p = (Uint8 *)dst;
672 				if ( ch > 0x10FFFF ) {
673 					ch = UNKNOWN_UNICODE;
674 				}
675 				if ( ch < 0x10000 ) {
676 					if ( dstlen < 2 ) {
677 						return SDL_ICONV_E2BIG;
678 					}
679 					p[0] = (Uint8)(ch >> 8);
680 					p[1] = (Uint8)ch;
681 					dst += 2;
682 					dstlen -= 2;
683 				} else {
684 					Uint16 W1, W2;
685 					if ( dstlen < 4 ) {
686 						return SDL_ICONV_E2BIG;
687 					}
688 					ch = ch - 0x10000;
689 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
690 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
691 					p[0] = (Uint8)(W1 >> 8);
692 					p[1] = (Uint8)W1;
693 					p[2] = (Uint8)(W2 >> 8);
694 					p[3] = (Uint8)W2;
695 					dst += 4;
696 					dstlen -= 4;
697 				}
698 			}
699 			break;
700 		    case ENCODING_UTF16LE: /* RFC 2781 */
701 			{
702 				Uint8 *p = (Uint8 *)dst;
703 				if ( ch > 0x10FFFF ) {
704 					ch = UNKNOWN_UNICODE;
705 				}
706 				if ( ch < 0x10000 ) {
707 					if ( dstlen < 2 ) {
708 						return SDL_ICONV_E2BIG;
709 					}
710 					p[1] = (Uint8)(ch >> 8);
711 					p[0] = (Uint8)ch;
712 					dst += 2;
713 					dstlen -= 2;
714 				} else {
715 					Uint16 W1, W2;
716 					if ( dstlen < 4 ) {
717 						return SDL_ICONV_E2BIG;
718 					}
719 					ch = ch - 0x10000;
720 					W1 = 0xD800 | (Uint16)((ch >> 10) & 0x3FF);
721 					W2 = 0xDC00 | (Uint16)(ch & 0x3FF);
722 					p[1] = (Uint8)(W1 >> 8);
723 					p[0] = (Uint8)W1;
724 					p[3] = (Uint8)(W2 >> 8);
725 					p[2] = (Uint8)W2;
726 					dst += 4;
727 					dstlen -= 4;
728 				}
729 			}
730 			break;
731 		    case ENCODING_UTF32BE:
732 			{
733 				Uint8 *p = (Uint8 *)dst;
734 				if ( ch > 0x10FFFF ) {
735 					ch = UNKNOWN_UNICODE;
736 				}
737 				if ( dstlen < 4 ) {
738 					return SDL_ICONV_E2BIG;
739 				}
740 				p[0] = (Uint8)(ch >> 24);
741 				p[1] = (Uint8)(ch >> 16);
742 				p[2] = (Uint8)(ch >> 8);
743 				p[3] = (Uint8)ch;
744 				dst += 4;
745 				dstlen -= 4;
746 			}
747 			break;
748 		    case ENCODING_UTF32LE:
749 			{
750 				Uint8 *p = (Uint8 *)dst;
751 				if ( ch > 0x10FFFF ) {
752 					ch = UNKNOWN_UNICODE;
753 				}
754 				if ( dstlen < 4 ) {
755 					return SDL_ICONV_E2BIG;
756 				}
757 				p[3] = (Uint8)(ch >> 24);
758 				p[2] = (Uint8)(ch >> 16);
759 				p[1] = (Uint8)(ch >> 8);
760 				p[0] = (Uint8)ch;
761 				dst += 4;
762 				dstlen -= 4;
763 			}
764 			break;
765 		    case ENCODING_UCS2:
766 			{
767 				Uint16 *p = (Uint16 *)dst;
768 				if ( ch > 0xFFFF ) {
769 					ch = UNKNOWN_UNICODE;
770 				}
771 				if ( dstlen < 2 ) {
772 					return SDL_ICONV_E2BIG;
773 				}
774 				*p = (Uint16)ch;
775 				dst += 2;
776 				dstlen -= 2;
777 			}
778 			break;
779 		    case ENCODING_UCS4:
780 			{
781 				Uint32 *p = (Uint32 *)dst;
782 				if ( ch > 0x7FFFFFFF ) {
783 					ch = UNKNOWN_UNICODE;
784 				}
785 				if ( dstlen < 4 ) {
786 					return SDL_ICONV_E2BIG;
787 				}
788 				*p = ch;
789 				dst += 4;
790 				dstlen -= 4;
791 			}
792 			break;
793 		}
794 
795 		/* Update state */
796 		*inbuf = src;
797 		*inbytesleft = srclen;
798 		*outbuf = dst;
799 		*outbytesleft = dstlen;
800 		++total;
801 	}
802 	return total;
803 }
804 
SDL_iconv_close(SDL_iconv_t cd)805 int SDL_iconv_close(SDL_iconv_t cd)
806 {
807 	if ( cd && cd != (SDL_iconv_t)-1 ) {
808 		SDL_free(cd);
809 	}
810 	return 0;
811 }
812 
813 #endif /* !HAVE_ICONV */
814 
SDL_iconv_string(const char * tocode,const char * fromcode,const char * inbuf,size_t inbytesleft)815 char *SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, size_t inbytesleft)
816 {
817 	SDL_iconv_t cd;
818 	char *string;
819 	size_t stringsize;
820 	char *outbuf;
821 	size_t outbytesleft;
822 	size_t retCode = 0;
823 
824 	cd = SDL_iconv_open(tocode, fromcode);
825 	if ( cd == (SDL_iconv_t)-1 ) {
826 		/* See if we can recover here (fixes iconv on Solaris 11) */
827 		if ( !tocode || !*tocode ) {
828 			tocode = "UTF-8";
829 		}
830 		if ( !fromcode || !*fromcode ) {
831 			fromcode = "UTF-8";
832 		}
833 		cd = SDL_iconv_open(tocode, fromcode);
834 	}
835 	if ( cd == (SDL_iconv_t)-1 ) {
836 		return NULL;
837 	}
838 
839 	stringsize = inbytesleft > 4 ? inbytesleft : 4;
840 	string = SDL_malloc(stringsize);
841 	if ( !string ) {
842 		SDL_iconv_close(cd);
843 		return NULL;
844 	}
845 	outbuf = string;
846 	outbytesleft = stringsize;
847 	SDL_memset(outbuf, 0, 4);
848 
849 	while ( inbytesleft > 0 ) {
850 		retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
851 		switch (retCode) {
852 		    case SDL_ICONV_E2BIG:
853 			{
854 				char *oldstring = string;
855 				stringsize *= 2;
856 				string = SDL_realloc(string, stringsize);
857 				if ( !string ) {
858 					SDL_iconv_close(cd);
859 					return NULL;
860 				}
861 				outbuf = string + (outbuf - oldstring);
862 				outbytesleft = stringsize - (outbuf - string);
863 				SDL_memset(outbuf, 0, 4);
864 			}
865 			break;
866 		    case SDL_ICONV_EILSEQ:
867 			/* Try skipping some input data - not perfect, but... */
868 			++inbuf;
869 			--inbytesleft;
870 			break;
871 		    case SDL_ICONV_EINVAL:
872 		    case SDL_ICONV_ERROR:
873 			/* We can't continue... */
874 			inbytesleft = 0;
875 			break;
876 		}
877 	}
878 	SDL_iconv_close(cd);
879 
880 	return string;
881 }
882