1 /*
2 * Internationalization test for CUPS.
3 *
4 * Copyright 2007-2014 by Apple Inc.
5 * Copyright 1997-2006 by Easy Software Products.
6 *
7 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
8 */
9
10 /*
11 * Include necessary headers...
12 */
13
14 #include "string-private.h"
15 #include "language-private.h"
16 #include <stdlib.h>
17 #include <time.h>
18 #include <unistd.h>
19
20
21 /*
22 * Local globals...
23 */
24
25 static const char * const lang_encodings[] =
26 { /* Encoding strings */
27 "us-ascii", "iso-8859-1",
28 "iso-8859-2", "iso-8859-3",
29 "iso-8859-4", "iso-8859-5",
30 "iso-8859-6", "iso-8859-7",
31 "iso-8859-8", "iso-8859-9",
32 "iso-8859-10", "utf-8",
33 "iso-8859-13", "iso-8859-14",
34 "iso-8859-15", "windows-874",
35 "windows-1250", "windows-1251",
36 "windows-1252", "windows-1253",
37 "windows-1254", "windows-1255",
38 "windows-1256", "windows-1257",
39 "windows-1258", "koi8-r",
40 "koi8-u", "iso-8859-11",
41 "iso-8859-16", "mac-roman",
42 "unknown", "unknown",
43 "unknown", "unknown",
44 "unknown", "unknown",
45 "unknown", "unknown",
46 "unknown", "unknown",
47 "unknown", "unknown",
48 "unknown", "unknown",
49 "unknown", "unknown",
50 "unknown", "unknown",
51 "unknown", "unknown",
52 "unknown", "unknown",
53 "unknown", "unknown",
54 "unknown", "unknown",
55 "unknown", "unknown",
56 "unknown", "unknown",
57 "unknown", "unknown",
58 "unknown", "unknown",
59 "windows-932", "windows-936",
60 "windows-949", "windows-950",
61 "windows-1361", "unknown",
62 "unknown", "unknown",
63 "unknown", "unknown",
64 "unknown", "unknown",
65 "unknown", "unknown",
66 "unknown", "unknown",
67 "unknown", "unknown",
68 "unknown", "unknown",
69 "unknown", "unknown",
70 "unknown", "unknown",
71 "unknown", "unknown",
72 "unknown", "unknown",
73 "unknown", "unknown",
74 "unknown", "unknown",
75 "unknown", "unknown",
76 "unknown", "unknown",
77 "unknown", "unknown",
78 "unknown", "unknown",
79 "unknown", "unknown",
80 "unknown", "unknown",
81 "unknown", "unknown",
82 "unknown", "unknown",
83 "unknown", "unknown",
84 "unknown", "unknown",
85 "unknown", "unknown",
86 "unknown", "unknown",
87 "unknown", "unknown",
88 "unknown", "unknown",
89 "unknown", "unknown",
90 "unknown", "unknown",
91 "euc-cn", "euc-jp",
92 "euc-kr", "euc-tw",
93 "jis-x0213"
94 };
95
96
97 /*
98 * Local functions...
99 */
100
101 static void print_utf8(const char *msg, const cups_utf8_t *src);
102
103
104 /*
105 * 'main()' - Main entry for internationalization test module.
106 */
107
108 int /* O - Exit code */
main(int argc,char * argv[])109 main(int argc, /* I - Argument Count */
110 char *argv[]) /* I - Arguments */
111 {
112 FILE *fp; /* File pointer */
113 int count; /* File line counter */
114 int status, /* Status of current test */
115 errors; /* Error count */
116 char line[1024]; /* File line source string */
117 int len; /* Length (count) of string */
118 char legsrc[1024], /* Legacy source string */
119 legdest[1024], /* Legacy destination string */
120 *legptr; /* Pointer into legacy string */
121 cups_utf8_t utf8latin[] = /* UTF-8 Latin-1 source */
122 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
123 /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
124 cups_utf8_t utf8repla[] = /* UTF-8 Latin-1 replacement */
125 { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
126 /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
127 cups_utf8_t utf8greek[] = /* UTF-8 Greek source string */
128 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
129 /* "A != <ALPHA>." - use ISO 8859-7 */
130 cups_utf8_t utf8japan[] = /* UTF-8 Japanese source */
131 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
132 /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
133 cups_utf8_t utf8taiwan[] = /* UTF-8 Chinese source */
134 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
135 /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
136 cups_utf8_t utf8dest[1024]; /* UTF-8 destination string */
137 cups_utf32_t utf32dest[1024]; /* UTF-32 destination string */
138
139
140 if (argc > 1)
141 {
142 int i; /* Looping var */
143 cups_encoding_t encoding; /* Source encoding */
144
145
146 if (argc != 3)
147 {
148 puts("Usage: ./testi18n [filename charset]");
149 return (1);
150 }
151
152 if ((fp = fopen(argv[1], "rb")) == NULL)
153 {
154 perror(argv[1]);
155 return (1);
156 }
157
158 for (i = 0, encoding = CUPS_AUTO_ENCODING;
159 i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
160 i ++)
161 if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
162 {
163 encoding = (cups_encoding_t)i;
164 break;
165 }
166
167 if (encoding == CUPS_AUTO_ENCODING)
168 {
169 fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
170 return (1);
171 }
172
173 while (fgets(line, sizeof(line), fp))
174 {
175 if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
176 {
177 fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
178 return (1);
179 }
180
181 fputs((char *)utf8dest, stdout);
182 }
183
184 fclose(fp);
185 return (0);
186 }
187
188 /*
189 * Start with some conversion tests from a UTF-8 test file.
190 */
191
192 errors = 0;
193
194 if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
195 {
196 perror("utf8demo.txt");
197 return (1);
198 }
199
200 /*
201 * cupsUTF8ToUTF32
202 */
203
204 fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
205
206 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
207 {
208 count ++;
209
210 if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
211 {
212 printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
213 errors ++;
214 status = 1;
215 break;
216 }
217 }
218
219 if (!status)
220 puts("PASS");
221
222 /*
223 * cupsUTF8ToCharset(CUPS_EUC_JP)
224 */
225
226 fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
227
228 rewind(fp);
229
230 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
231 {
232 count ++;
233
234 len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
235 if (len < 0)
236 {
237 printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
238 errors ++;
239 status = 1;
240 break;
241 }
242 }
243
244 if (!status)
245 puts("PASS");
246
247 fclose(fp);
248
249 /*
250 * Test UTF-8 to legacy charset (ISO 8859-1)...
251 */
252
253 fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
254
255 legdest[0] = 0;
256
257 len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
258 if (len < 0)
259 {
260 printf("FAIL (len=%d)\n", len);
261 errors ++;
262 }
263 else
264 puts("PASS");
265
266 /*
267 * cupsCharsetToUTF8
268 */
269
270 fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
271
272 strlcpy(legsrc, legdest, sizeof(legsrc));
273
274 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
275 if ((size_t)len != strlen((char *)utf8latin))
276 {
277 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
278 print_utf8(" utf8latin", utf8latin);
279 print_utf8(" utf8dest", utf8dest);
280 errors ++;
281 }
282 else if (memcmp(utf8latin, utf8dest, (size_t)len))
283 {
284 puts("FAIL (results do not match)");
285 print_utf8(" utf8latin", utf8latin);
286 print_utf8(" utf8dest", utf8dest);
287 errors ++;
288 }
289 else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
290 {
291 puts("FAIL (replacement characters do not work!)");
292 errors ++;
293 }
294 else
295 puts("PASS");
296
297 /*
298 * Test UTF-8 to/from legacy charset (ISO 8859-7)...
299 */
300
301 fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
302
303 if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
304 {
305 puts("FAIL");
306 errors ++;
307 }
308 else
309 {
310 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
311
312 if (*legptr)
313 {
314 puts("FAIL (unknown character)");
315 errors ++;
316 }
317 else
318 puts("PASS");
319 }
320
321 fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
322
323 strlcpy(legsrc, legdest, sizeof(legsrc));
324
325 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
326 if ((size_t)len != strlen((char *)utf8greek))
327 {
328 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
329 print_utf8(" utf8greek", utf8greek);
330 print_utf8(" utf8dest", utf8dest);
331 errors ++;
332 }
333 else if (memcmp(utf8greek, utf8dest, (size_t)len))
334 {
335 puts("FAIL (results do not match)");
336 print_utf8(" utf8greek", utf8greek);
337 print_utf8(" utf8dest", utf8dest);
338 errors ++;
339 }
340 else
341 puts("PASS");
342
343 /*
344 * Test UTF-8 to/from legacy charset (Windows 932)...
345 */
346
347 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
348
349 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
350 {
351 puts("FAIL");
352 errors ++;
353 }
354 else
355 {
356 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
357
358 if (*legptr)
359 {
360 puts("FAIL (unknown character)");
361 errors ++;
362 }
363 else
364 puts("PASS");
365 }
366
367 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
368
369 strlcpy(legsrc, legdest, sizeof(legsrc));
370
371 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
372 if ((size_t)len != strlen((char *)utf8japan))
373 {
374 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
375 print_utf8(" utf8japan", utf8japan);
376 print_utf8(" utf8dest", utf8dest);
377 errors ++;
378 }
379 else if (memcmp(utf8japan, utf8dest, (size_t)len))
380 {
381 puts("FAIL (results do not match)");
382 print_utf8(" utf8japan", utf8japan);
383 print_utf8(" utf8dest", utf8dest);
384 errors ++;
385 }
386 else
387 puts("PASS");
388
389 /*
390 * Test UTF-8 to/from legacy charset (EUC-JP)...
391 */
392
393 fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
394
395 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
396 {
397 puts("FAIL");
398 errors ++;
399 }
400 else
401 {
402 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
403
404 if (*legptr)
405 {
406 puts("FAIL (unknown character)");
407 errors ++;
408 }
409 else
410 puts("PASS");
411 }
412
413 #ifndef __linux
414 fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
415
416 strlcpy(legsrc, legdest, sizeof(legsrc));
417
418 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
419 if ((size_t)len != strlen((char *)utf8japan))
420 {
421 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
422 print_utf8(" utf8japan", utf8japan);
423 print_utf8(" utf8dest", utf8dest);
424 errors ++;
425 }
426 else if (memcmp(utf8japan, utf8dest, (size_t)len))
427 {
428 puts("FAIL (results do not match)");
429 print_utf8(" utf8japan", utf8japan);
430 print_utf8(" utf8dest", utf8dest);
431 errors ++;
432 }
433 else
434 puts("PASS");
435 #endif /* !__linux */
436
437 /*
438 * Test UTF-8 to/from legacy charset (Windows 950)...
439 */
440
441 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
442
443 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
444 {
445 puts("FAIL");
446 errors ++;
447 }
448 else
449 {
450 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
451
452 if (*legptr)
453 {
454 puts("FAIL (unknown character)");
455 errors ++;
456 }
457 else
458 puts("PASS");
459 }
460
461 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
462
463 strlcpy(legsrc, legdest, sizeof(legsrc));
464
465 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
466 if ((size_t)len != strlen((char *)utf8taiwan))
467 {
468 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
469 print_utf8(" utf8taiwan", utf8taiwan);
470 print_utf8(" utf8dest", utf8dest);
471 errors ++;
472 }
473 else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
474 {
475 puts("FAIL (results do not match)");
476 print_utf8(" utf8taiwan", utf8taiwan);
477 print_utf8(" utf8dest", utf8dest);
478 errors ++;
479 }
480 else
481 puts("PASS");
482
483 /*
484 * Test UTF-8 to/from legacy charset (EUC-TW)...
485 */
486
487 fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
488
489 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
490 {
491 puts("FAIL");
492 errors ++;
493 }
494 else
495 {
496 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
497
498 if (*legptr)
499 {
500 puts("FAIL (unknown character)");
501 errors ++;
502 }
503 else
504 puts("PASS");
505 }
506
507 fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
508
509 strlcpy(legsrc, legdest, sizeof(legsrc));
510
511 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
512 if ((size_t)len != strlen((char *)utf8taiwan))
513 {
514 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
515 print_utf8(" utf8taiwan", utf8taiwan);
516 print_utf8(" utf8dest", utf8dest);
517 errors ++;
518 }
519 else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
520 {
521 puts("FAIL (results do not match)");
522 print_utf8(" utf8taiwan", utf8taiwan);
523 print_utf8(" utf8dest", utf8dest);
524 errors ++;
525 }
526 else
527 puts("PASS");
528
529 #if 0
530 /*
531 * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
532 */
533 if (verbose)
534 printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
535 len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
536 if (len < 0)
537 return (1);
538 if (verbose)
539 {
540 print_utf8(" utf8good ", utf8good);
541 print_utf32(" utf32dest", utf32dest);
542 }
543 memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
544 len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
545 if (len < 0)
546 return (1);
547 if (len != strlen ((char *) utf8good))
548 return (1);
549 if (memcmp(utf8good, utf8dest, len) != 0)
550 return (1);
551
552 /*
553 * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
554 */
555 if (verbose)
556 printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
557 len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
558 if (len >= 0)
559 return (1);
560 if (verbose)
561 print_utf8(" utf8bad ", utf8bad);
562
563 /*
564 * Test _cupsCharmapFlush()...
565 */
566 if (verbose)
567 printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
568 _cupsCharmapFlush();
569 return (0);
570 #endif /* 0 */
571
572 return (errors > 0);
573 }
574
575
576 /*
577 * 'print_utf8()' - Print UTF-8 string with (optional) message.
578 */
579
580 static void
print_utf8(const char * msg,const cups_utf8_t * src)581 print_utf8(const char *msg, /* I - Message String */
582 const cups_utf8_t *src) /* I - UTF-8 Source String */
583 {
584 const char *prefix; /* Prefix string */
585
586
587 if (msg)
588 printf("%s:", msg);
589
590 for (prefix = " "; *src; src ++)
591 {
592 printf("%s%02x", prefix, *src);
593
594 if ((src[0] & 0x80) && (src[1] & 0x80))
595 prefix = "";
596 else
597 prefix = " ";
598 }
599
600 putchar('\n');
601 }
602