1 /*
2 * Internationalization test for CUPS.
3 *
4 * Copyright © 2020-2024 by OpenPrinting.
5 * Copyright 2007-2014 by Apple Inc.
6 * Copyright 1997-2006 by Easy Software Products.
7 *
8 * Licensed under Apache License v2.0. See the file "LICENSE" for more information.
9 */
10
11 /*
12 * Include necessary headers...
13 */
14
15 #include "string-private.h"
16 #include "language-private.h"
17 #include <stdlib.h>
18 #include <time.h>
19 #include <unistd.h>
20
21
22 /*
23 * Local globals...
24 */
25
26 static const char * const lang_encodings[] =
27 { /* Encoding strings */
28 "us-ascii", "iso-8859-1",
29 "iso-8859-2", "iso-8859-3",
30 "iso-8859-4", "iso-8859-5",
31 "iso-8859-6", "iso-8859-7",
32 "iso-8859-8", "iso-8859-9",
33 "iso-8859-10", "utf-8",
34 "iso-8859-13", "iso-8859-14",
35 "iso-8859-15", "windows-874",
36 "windows-1250", "windows-1251",
37 "windows-1252", "windows-1253",
38 "windows-1254", "windows-1255",
39 "windows-1256", "windows-1257",
40 "windows-1258", "koi8-r",
41 "koi8-u", "iso-8859-11",
42 "iso-8859-16", "mac-roman",
43 "unknown", "unknown",
44 "unknown", "unknown",
45 "unknown", "unknown",
46 "unknown", "unknown",
47 "unknown", "unknown",
48 "unknown", "unknown",
49 "unknown", "unknown",
50 "unknown", "unknown",
51 "unknown", "unknown",
52 "unknown", "unknown",
53 "unknown", "unknown",
54 "unknown", "unknown",
55 "unknown", "unknown",
56 "unknown", "unknown",
57 "unknown", "unknown",
58 "unknown", "unknown",
59 "unknown", "unknown",
60 "windows-932", "windows-936",
61 "windows-949", "windows-950",
62 "windows-1361", "unknown",
63 "unknown", "unknown",
64 "unknown", "unknown",
65 "unknown", "unknown",
66 "unknown", "unknown",
67 "unknown", "unknown",
68 "unknown", "unknown",
69 "unknown", "unknown",
70 "unknown", "unknown",
71 "unknown", "unknown",
72 "unknown", "unknown",
73 "unknown", "unknown",
74 "unknown", "unknown",
75 "unknown", "unknown",
76 "unknown", "unknown",
77 "unknown", "unknown",
78 "unknown", "unknown",
79 "unknown", "unknown",
80 "unknown", "unknown",
81 "unknown", "unknown",
82 "unknown", "unknown",
83 "unknown", "unknown",
84 "unknown", "unknown",
85 "unknown", "unknown",
86 "unknown", "unknown",
87 "unknown", "unknown",
88 "unknown", "unknown",
89 "unknown", "unknown",
90 "unknown", "unknown",
91 "unknown", "unknown",
92 "euc-cn", "euc-jp",
93 "euc-kr", "euc-tw",
94 "jis-x0213"
95 };
96
97
98 /*
99 * Local functions...
100 */
101
102 static void print_utf8(const char *msg, const cups_utf8_t *src);
103
104
105 /*
106 * 'main()' - Main entry for internationalization test module.
107 */
108
109 int /* O - Exit code */
main(int argc,char * argv[])110 main(int argc, /* I - Argument Count */
111 char *argv[]) /* I - Arguments */
112 {
113 FILE *fp; /* File pointer */
114 int count; /* File line counter */
115 int status, /* Status of current test */
116 errors; /* Error count */
117 char line[1024]; /* File line source string */
118 int len; /* Length (count) of string */
119 char legsrc[1024], /* Legacy source string */
120 legdest[1024], /* Legacy destination string */
121 *legptr; /* Pointer into legacy string */
122 cups_utf8_t utf8latin[] = /* UTF-8 Latin-1 source */
123 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
124 /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
125 cups_utf8_t utf8repla[] = /* UTF-8 Latin-1 replacement */
126 { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
127 /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
128 cups_utf8_t utf8greek[] = /* UTF-8 Greek source string */
129 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
130 /* "A != <ALPHA>." - use ISO 8859-7 */
131 cups_utf8_t utf8japan[] = /* UTF-8 Japanese source */
132 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
133 /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
134 cups_utf8_t utf8taiwan[] = /* UTF-8 Chinese source */
135 { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
136 /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
137 cups_utf8_t utf8dest[1024]; /* UTF-8 destination string */
138 cups_utf32_t utf32dest[1024]; /* UTF-32 destination string */
139
140
141 if (argc > 1)
142 {
143 int i; /* Looping var */
144 cups_encoding_t encoding; /* Source encoding */
145
146
147 if (argc != 3)
148 {
149 puts("Usage: ./testi18n [filename charset]");
150 return (1);
151 }
152
153 if ((fp = fopen(argv[1], "rb")) == NULL)
154 {
155 perror(argv[1]);
156 return (1);
157 }
158
159 for (i = 0, encoding = CUPS_AUTO_ENCODING;
160 i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
161 i ++)
162 if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
163 {
164 encoding = (cups_encoding_t)i;
165 break;
166 }
167
168 if (encoding == CUPS_AUTO_ENCODING)
169 {
170 fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
171 fclose(fp);
172 return (1);
173 }
174
175 while (fgets(line, sizeof(line), fp))
176 {
177 if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
178 {
179 fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
180 fclose(fp);
181 return (1);
182 }
183
184 fputs((char *)utf8dest, stdout);
185 }
186
187 fclose(fp);
188 return (0);
189 }
190
191 /*
192 * Start with some conversion tests from a UTF-8 test file.
193 */
194
195 errors = 0;
196
197 if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
198 {
199 perror("utf8demo.txt");
200 return (1);
201 }
202
203 /*
204 * cupsUTF8ToUTF32
205 */
206
207 fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
208
209 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
210 {
211 count ++;
212
213 if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
214 {
215 printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
216 errors ++;
217 status = 1;
218 break;
219 }
220 }
221
222 if (!status)
223 puts("PASS");
224
225 /*
226 * cupsUTF8ToCharset(CUPS_EUC_JP)
227 */
228
229 fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
230
231 rewind(fp);
232
233 for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
234 {
235 count ++;
236
237 len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
238 if (len < 0)
239 {
240 printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
241 errors ++;
242 status = 1;
243 break;
244 }
245 }
246
247 if (!status)
248 puts("PASS");
249
250 fclose(fp);
251
252 /*
253 * Test UTF-8 to legacy charset (ISO 8859-1)...
254 */
255
256 fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
257
258 legdest[0] = 0;
259
260 len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
261 if (len < 0)
262 {
263 printf("FAIL (len=%d)\n", len);
264 errors ++;
265 }
266 else
267 puts("PASS");
268
269 /*
270 * cupsCharsetToUTF8
271 */
272
273 fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
274
275 strlcpy(legsrc, legdest, sizeof(legsrc));
276
277 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
278 if ((size_t)len != strlen((char *)utf8latin))
279 {
280 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
281 print_utf8(" utf8latin", utf8latin);
282 print_utf8(" utf8dest", utf8dest);
283 errors ++;
284 }
285 else if (memcmp(utf8latin, utf8dest, (size_t)len))
286 {
287 puts("FAIL (results do not match)");
288 print_utf8(" utf8latin", utf8latin);
289 print_utf8(" utf8dest", utf8dest);
290 errors ++;
291 }
292 else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
293 {
294 puts("FAIL (replacement characters do not work!)");
295 errors ++;
296 }
297 else
298 puts("PASS");
299
300 /*
301 * Test UTF-8 to/from legacy charset (ISO 8859-7)...
302 */
303
304 fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
305
306 if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
307 {
308 puts("FAIL");
309 errors ++;
310 }
311 else
312 {
313 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
314
315 if (*legptr)
316 {
317 puts("FAIL (unknown character)");
318 errors ++;
319 }
320 else
321 puts("PASS");
322 }
323
324 fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
325
326 strlcpy(legsrc, legdest, sizeof(legsrc));
327
328 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
329 if ((size_t)len != strlen((char *)utf8greek))
330 {
331 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
332 print_utf8(" utf8greek", utf8greek);
333 print_utf8(" utf8dest", utf8dest);
334 errors ++;
335 }
336 else if (memcmp(utf8greek, utf8dest, (size_t)len))
337 {
338 puts("FAIL (results do not match)");
339 print_utf8(" utf8greek", utf8greek);
340 print_utf8(" utf8dest", utf8dest);
341 errors ++;
342 }
343 else
344 puts("PASS");
345
346 /*
347 * Test UTF-8 to/from legacy charset (Windows 932)...
348 */
349
350 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
351
352 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
353 {
354 puts("FAIL");
355 errors ++;
356 }
357 else
358 {
359 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
360
361 if (*legptr)
362 {
363 puts("FAIL (unknown character)");
364 errors ++;
365 }
366 else
367 puts("PASS");
368 }
369
370 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
371
372 strlcpy(legsrc, legdest, sizeof(legsrc));
373
374 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
375 if ((size_t)len != strlen((char *)utf8japan))
376 {
377 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
378 print_utf8(" utf8japan", utf8japan);
379 print_utf8(" utf8dest", utf8dest);
380 errors ++;
381 }
382 else if (memcmp(utf8japan, utf8dest, (size_t)len))
383 {
384 puts("FAIL (results do not match)");
385 print_utf8(" utf8japan", utf8japan);
386 print_utf8(" utf8dest", utf8dest);
387 errors ++;
388 }
389 else
390 puts("PASS");
391
392 /*
393 * Test UTF-8 to/from legacy charset (EUC-JP)...
394 */
395
396 fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
397
398 if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
399 {
400 puts("FAIL");
401 errors ++;
402 }
403 else
404 {
405 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
406
407 if (*legptr)
408 {
409 puts("FAIL (unknown character)");
410 errors ++;
411 }
412 else
413 puts("PASS");
414 }
415
416 #if 0 // Appears to be broken on all OS's
417 fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
418
419 strlcpy(legsrc, legdest, sizeof(legsrc));
420
421 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
422 if ((size_t)len != strlen((char *)utf8japan))
423 {
424 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
425 print_utf8(" utf8japan", utf8japan);
426 print_utf8(" utf8dest", utf8dest);
427 errors ++;
428 }
429 else if (memcmp(utf8japan, utf8dest, (size_t)len))
430 {
431 puts("FAIL (results do not match)");
432 print_utf8(" utf8japan", utf8japan);
433 print_utf8(" utf8dest", utf8dest);
434 errors ++;
435 }
436 else
437 puts("PASS");
438 #endif /* 0 */
439
440 /*
441 * Test UTF-8 to/from legacy charset (Windows 950)...
442 */
443
444 fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
445
446 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
447 {
448 puts("FAIL");
449 errors ++;
450 }
451 else
452 {
453 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
454
455 if (*legptr)
456 {
457 puts("FAIL (unknown character)");
458 errors ++;
459 }
460 else
461 puts("PASS");
462 }
463
464 fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
465
466 strlcpy(legsrc, legdest, sizeof(legsrc));
467
468 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
469 if ((size_t)len != strlen((char *)utf8taiwan))
470 {
471 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
472 print_utf8(" utf8taiwan", utf8taiwan);
473 print_utf8(" utf8dest", utf8dest);
474 errors ++;
475 }
476 else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
477 {
478 puts("FAIL (results do not match)");
479 print_utf8(" utf8taiwan", utf8taiwan);
480 print_utf8(" utf8dest", utf8dest);
481 errors ++;
482 }
483 else
484 puts("PASS");
485
486 /*
487 * Test UTF-8 to/from legacy charset (EUC-TW)...
488 */
489
490 fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
491
492 if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
493 {
494 puts("FAIL");
495 errors ++;
496 }
497 else
498 {
499 for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
500
501 if (*legptr)
502 {
503 puts("FAIL (unknown character)");
504 errors ++;
505 }
506 else
507 puts("PASS");
508 }
509
510 fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
511
512 strlcpy(legsrc, legdest, sizeof(legsrc));
513
514 len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
515 if ((size_t)len != strlen((char *)utf8taiwan))
516 {
517 printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
518 print_utf8(" utf8taiwan", utf8taiwan);
519 print_utf8(" utf8dest", utf8dest);
520 errors ++;
521 }
522 else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
523 {
524 puts("FAIL (results do not match)");
525 print_utf8(" utf8taiwan", utf8taiwan);
526 print_utf8(" utf8dest", utf8dest);
527 errors ++;
528 }
529 else
530 puts("PASS");
531
532 #if 0
533 /*
534 * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
535 */
536 if (verbose)
537 printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
538 len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
539 if (len < 0)
540 return (1);
541 if (verbose)
542 {
543 print_utf8(" utf8good ", utf8good);
544 print_utf32(" utf32dest", utf32dest);
545 }
546 memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
547 len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
548 if (len < 0)
549 return (1);
550 if (len != strlen ((char *) utf8good))
551 return (1);
552 if (memcmp(utf8good, utf8dest, len) != 0)
553 return (1);
554
555 /*
556 * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
557 */
558 if (verbose)
559 printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
560 len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
561 if (len >= 0)
562 return (1);
563 if (verbose)
564 print_utf8(" utf8bad ", utf8bad);
565
566 /*
567 * Test _cupsCharmapFlush()...
568 */
569 if (verbose)
570 printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
571 _cupsCharmapFlush();
572 return (0);
573 #endif /* 0 */
574
575 return (errors > 0);
576 }
577
578
579 /*
580 * 'print_utf8()' - Print UTF-8 string with (optional) message.
581 */
582
583 static void
print_utf8(const char * msg,const cups_utf8_t * src)584 print_utf8(const char *msg, /* I - Message String */
585 const cups_utf8_t *src) /* I - UTF-8 Source String */
586 {
587 const char *prefix; /* Prefix string */
588
589
590 if (msg)
591 printf("%s:", msg);
592
593 for (prefix = " "; *src; src ++)
594 {
595 printf("%s%02x", prefix, *src);
596
597 if ((src[0] & 0x80) && (src[1] & 0x80))
598 prefix = "";
599 else
600 prefix = " ";
601 }
602
603 putchar('\n');
604 }
605