• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Internationalization test for CUPS.
3  *
4  * Copyright © 2020-2024 by OpenPrinting.
5  * Copyright 2007-2014 by Apple Inc.
6  * Copyright 1997-2006 by Easy Software Products.
7  *
8  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
9  */
10 
11 /*
12  * Include necessary headers...
13  */
14 
15 #include "string-private.h"
16 #include "language-private.h"
17 #include <stdlib.h>
18 #include <time.h>
19 #include <unistd.h>
20 
21 
22 /*
23  * Local globals...
24  */
25 
26 static const char * const lang_encodings[] =
27 			{		/* Encoding strings */
28 			  "us-ascii",		"iso-8859-1",
29 			  "iso-8859-2",		"iso-8859-3",
30 			  "iso-8859-4",		"iso-8859-5",
31 			  "iso-8859-6",		"iso-8859-7",
32 			  "iso-8859-8",		"iso-8859-9",
33 			  "iso-8859-10",	"utf-8",
34 			  "iso-8859-13",	"iso-8859-14",
35 			  "iso-8859-15",	"windows-874",
36 			  "windows-1250",	"windows-1251",
37 			  "windows-1252",	"windows-1253",
38 			  "windows-1254",	"windows-1255",
39 			  "windows-1256",	"windows-1257",
40 			  "windows-1258",	"koi8-r",
41 			  "koi8-u",		"iso-8859-11",
42 			  "iso-8859-16",	"mac-roman",
43 			  "unknown",		"unknown",
44 			  "unknown",		"unknown",
45 			  "unknown",		"unknown",
46 			  "unknown",		"unknown",
47 			  "unknown",		"unknown",
48 			  "unknown",		"unknown",
49 			  "unknown",		"unknown",
50 			  "unknown",		"unknown",
51 			  "unknown",		"unknown",
52 			  "unknown",		"unknown",
53 			  "unknown",		"unknown",
54 			  "unknown",		"unknown",
55 			  "unknown",		"unknown",
56 			  "unknown",		"unknown",
57 			  "unknown",		"unknown",
58 			  "unknown",		"unknown",
59 			  "unknown",		"unknown",
60 			  "windows-932",	"windows-936",
61 			  "windows-949",	"windows-950",
62 			  "windows-1361",	"unknown",
63 			  "unknown",		"unknown",
64 			  "unknown",		"unknown",
65 			  "unknown",		"unknown",
66 			  "unknown",		"unknown",
67 			  "unknown",		"unknown",
68 			  "unknown",		"unknown",
69 			  "unknown",		"unknown",
70 			  "unknown",		"unknown",
71 			  "unknown",		"unknown",
72 			  "unknown",		"unknown",
73 			  "unknown",		"unknown",
74 			  "unknown",		"unknown",
75 			  "unknown",		"unknown",
76 			  "unknown",		"unknown",
77 			  "unknown",		"unknown",
78 			  "unknown",		"unknown",
79 			  "unknown",		"unknown",
80 			  "unknown",		"unknown",
81 			  "unknown",		"unknown",
82 			  "unknown",		"unknown",
83 			  "unknown",		"unknown",
84 			  "unknown",		"unknown",
85 			  "unknown",		"unknown",
86 			  "unknown",		"unknown",
87 			  "unknown",		"unknown",
88 			  "unknown",		"unknown",
89 			  "unknown",		"unknown",
90 			  "unknown",		"unknown",
91 			  "unknown",		"unknown",
92 			  "euc-cn",		"euc-jp",
93 			  "euc-kr",		"euc-tw",
94 			  "jis-x0213"
95 			};
96 
97 
98 /*
99  * Local functions...
100  */
101 
102 static void	print_utf8(const char *msg, const cups_utf8_t *src);
103 
104 
105 /*
106  * 'main()' - Main entry for internationalization test module.
107  */
108 
109 int					/* O - Exit code */
main(int argc,char * argv[])110 main(int  argc,				/* I - Argument Count */
111      char *argv[])			/* I - Arguments */
112 {
113   FILE		*fp;			/* File pointer */
114   int		count;			/* File line counter */
115   int		status,			/* Status of current test */
116 		errors;			/* Error count */
117   char		line[1024];		/* File line source string */
118   int		len;			/* Length (count) of string */
119   char		legsrc[1024],		/* Legacy source string */
120 		legdest[1024],		/* Legacy destination string */
121 		*legptr;		/* Pointer into legacy string */
122   cups_utf8_t	utf8latin[] =		/* UTF-8 Latin-1 source */
123     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
124     /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
125   cups_utf8_t	utf8repla[] =		/* UTF-8 Latin-1 replacement */
126     { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
127     /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
128   cups_utf8_t	utf8greek[] =		/* UTF-8 Greek source string */
129     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
130     /* "A != <ALPHA>." - use ISO 8859-7 */
131   cups_utf8_t	utf8japan[] =		/* UTF-8 Japanese source */
132     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
133     /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
134   cups_utf8_t	utf8taiwan[] =		/* UTF-8 Chinese source */
135     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
136     /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
137   cups_utf8_t	utf8dest[1024];		/* UTF-8 destination string */
138   cups_utf32_t	utf32dest[1024];	/* UTF-32 destination string */
139 
140 
141   if (argc > 1)
142   {
143     int			i;		/* Looping var */
144     cups_encoding_t	encoding;	/* Source encoding */
145 
146 
147     if (argc != 3)
148     {
149       puts("Usage: ./testi18n [filename charset]");
150       return (1);
151     }
152 
153     if ((fp = fopen(argv[1], "rb")) == NULL)
154     {
155       perror(argv[1]);
156       return (1);
157     }
158 
159     for (i = 0, encoding = CUPS_AUTO_ENCODING;
160          i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
161 	 i ++)
162       if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
163       {
164         encoding = (cups_encoding_t)i;
165 	break;
166       }
167 
168     if (encoding == CUPS_AUTO_ENCODING)
169     {
170       fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
171       fclose(fp);
172       return (1);
173     }
174 
175     while (fgets(line, sizeof(line), fp))
176     {
177       if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
178       {
179         fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
180         fclose(fp);
181 	return (1);
182       }
183 
184       fputs((char *)utf8dest, stdout);
185     }
186 
187     fclose(fp);
188     return (0);
189   }
190 
191  /*
192   * Start with some conversion tests from a UTF-8 test file.
193   */
194 
195   errors = 0;
196 
197   if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
198   {
199     perror("utf8demo.txt");
200     return (1);
201   }
202 
203  /*
204   * cupsUTF8ToUTF32
205   */
206 
207   fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
208 
209   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
210   {
211     count ++;
212 
213     if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
214     {
215       printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
216       errors ++;
217       status = 1;
218       break;
219     }
220   }
221 
222   if (!status)
223     puts("PASS");
224 
225  /*
226   * cupsUTF8ToCharset(CUPS_EUC_JP)
227   */
228 
229   fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
230 
231   rewind(fp);
232 
233   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
234   {
235     count ++;
236 
237     len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
238     if (len < 0)
239     {
240       printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
241       errors ++;
242       status = 1;
243       break;
244     }
245   }
246 
247   if (!status)
248     puts("PASS");
249 
250   fclose(fp);
251 
252  /*
253   * Test UTF-8 to legacy charset (ISO 8859-1)...
254   */
255 
256   fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
257 
258   legdest[0] = 0;
259 
260   len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
261   if (len < 0)
262   {
263     printf("FAIL (len=%d)\n", len);
264     errors ++;
265   }
266   else
267     puts("PASS");
268 
269  /*
270   * cupsCharsetToUTF8
271   */
272 
273   fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
274 
275   strlcpy(legsrc, legdest, sizeof(legsrc));
276 
277   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
278   if ((size_t)len != strlen((char *)utf8latin))
279   {
280     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
281     print_utf8("    utf8latin", utf8latin);
282     print_utf8("    utf8dest", utf8dest);
283     errors ++;
284   }
285   else if (memcmp(utf8latin, utf8dest, (size_t)len))
286   {
287     puts("FAIL (results do not match)");
288     print_utf8("    utf8latin", utf8latin);
289     print_utf8("    utf8dest", utf8dest);
290     errors ++;
291   }
292   else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
293   {
294     puts("FAIL (replacement characters do not work!)");
295     errors ++;
296   }
297   else
298     puts("PASS");
299 
300  /*
301   * Test UTF-8 to/from legacy charset (ISO 8859-7)...
302   */
303 
304   fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
305 
306   if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
307   {
308     puts("FAIL");
309     errors ++;
310   }
311   else
312   {
313     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
314 
315     if (*legptr)
316     {
317       puts("FAIL (unknown character)");
318       errors ++;
319     }
320     else
321       puts("PASS");
322   }
323 
324   fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
325 
326   strlcpy(legsrc, legdest, sizeof(legsrc));
327 
328   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
329   if ((size_t)len != strlen((char *)utf8greek))
330   {
331     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
332     print_utf8("    utf8greek", utf8greek);
333     print_utf8("    utf8dest", utf8dest);
334     errors ++;
335   }
336   else if (memcmp(utf8greek, utf8dest, (size_t)len))
337   {
338     puts("FAIL (results do not match)");
339     print_utf8("    utf8greek", utf8greek);
340     print_utf8("    utf8dest", utf8dest);
341     errors ++;
342   }
343   else
344     puts("PASS");
345 
346  /*
347   * Test UTF-8 to/from legacy charset (Windows 932)...
348   */
349 
350   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
351 
352   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
353   {
354     puts("FAIL");
355     errors ++;
356   }
357   else
358   {
359     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
360 
361     if (*legptr)
362     {
363       puts("FAIL (unknown character)");
364       errors ++;
365     }
366     else
367       puts("PASS");
368   }
369 
370   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
371 
372   strlcpy(legsrc, legdest, sizeof(legsrc));
373 
374   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
375   if ((size_t)len != strlen((char *)utf8japan))
376   {
377     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
378     print_utf8("    utf8japan", utf8japan);
379     print_utf8("    utf8dest", utf8dest);
380     errors ++;
381   }
382   else if (memcmp(utf8japan, utf8dest, (size_t)len))
383   {
384     puts("FAIL (results do not match)");
385     print_utf8("    utf8japan", utf8japan);
386     print_utf8("    utf8dest", utf8dest);
387     errors ++;
388   }
389   else
390     puts("PASS");
391 
392  /*
393   * Test UTF-8 to/from legacy charset (EUC-JP)...
394   */
395 
396   fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
397 
398   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
399   {
400     puts("FAIL");
401     errors ++;
402   }
403   else
404   {
405     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
406 
407     if (*legptr)
408     {
409       puts("FAIL (unknown character)");
410       errors ++;
411     }
412     else
413       puts("PASS");
414   }
415 
416 #if 0 // Appears to be broken on all OS's
417   fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
418 
419   strlcpy(legsrc, legdest, sizeof(legsrc));
420 
421   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
422   if ((size_t)len != strlen((char *)utf8japan))
423   {
424     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
425     print_utf8("    utf8japan", utf8japan);
426     print_utf8("    utf8dest", utf8dest);
427     errors ++;
428   }
429   else if (memcmp(utf8japan, utf8dest, (size_t)len))
430   {
431     puts("FAIL (results do not match)");
432     print_utf8("    utf8japan", utf8japan);
433     print_utf8("    utf8dest", utf8dest);
434     errors ++;
435   }
436   else
437     puts("PASS");
438 #endif /* 0 */
439 
440  /*
441   * Test UTF-8 to/from legacy charset (Windows 950)...
442   */
443 
444   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
445 
446   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
447   {
448     puts("FAIL");
449     errors ++;
450   }
451   else
452   {
453     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
454 
455     if (*legptr)
456     {
457       puts("FAIL (unknown character)");
458       errors ++;
459     }
460     else
461       puts("PASS");
462   }
463 
464   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
465 
466   strlcpy(legsrc, legdest, sizeof(legsrc));
467 
468   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
469   if ((size_t)len != strlen((char *)utf8taiwan))
470   {
471     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
472     print_utf8("    utf8taiwan", utf8taiwan);
473     print_utf8("    utf8dest", utf8dest);
474     errors ++;
475   }
476   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
477   {
478     puts("FAIL (results do not match)");
479     print_utf8("    utf8taiwan", utf8taiwan);
480     print_utf8("    utf8dest", utf8dest);
481     errors ++;
482   }
483   else
484     puts("PASS");
485 
486  /*
487   * Test UTF-8 to/from legacy charset (EUC-TW)...
488   */
489 
490   fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
491 
492   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
493   {
494     puts("FAIL");
495     errors ++;
496   }
497   else
498   {
499     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
500 
501     if (*legptr)
502     {
503       puts("FAIL (unknown character)");
504       errors ++;
505     }
506     else
507       puts("PASS");
508   }
509 
510   fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
511 
512   strlcpy(legsrc, legdest, sizeof(legsrc));
513 
514   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
515   if ((size_t)len != strlen((char *)utf8taiwan))
516   {
517     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
518     print_utf8("    utf8taiwan", utf8taiwan);
519     print_utf8("    utf8dest", utf8dest);
520     errors ++;
521   }
522   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
523   {
524     puts("FAIL (results do not match)");
525     print_utf8("    utf8taiwan", utf8taiwan);
526     print_utf8("    utf8dest", utf8dest);
527     errors ++;
528   }
529   else
530     puts("PASS");
531 
532 #if 0
533  /*
534   * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
535   */
536   if (verbose)
537     printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
538   len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
539   if (len < 0)
540     return (1);
541   if (verbose)
542   {
543     print_utf8(" utf8good ", utf8good);
544     print_utf32(" utf32dest", utf32dest);
545   }
546   memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
547   len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
548   if (len < 0)
549     return (1);
550   if (len != strlen ((char *) utf8good))
551     return (1);
552   if (memcmp(utf8good, utf8dest, len) != 0)
553     return (1);
554 
555  /*
556   * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
557   */
558   if (verbose)
559     printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
560   len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
561   if (len >= 0)
562     return (1);
563   if (verbose)
564     print_utf8(" utf8bad  ", utf8bad);
565 
566  /*
567   * Test _cupsCharmapFlush()...
568   */
569   if (verbose)
570     printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
571   _cupsCharmapFlush();
572   return (0);
573 #endif /* 0 */
574 
575   return (errors > 0);
576 }
577 
578 
579 /*
580  * 'print_utf8()' - Print UTF-8 string with (optional) message.
581  */
582 
583 static void
print_utf8(const char * msg,const cups_utf8_t * src)584 print_utf8(const char	     *msg,	/* I - Message String */
585 	   const cups_utf8_t *src)	/* I - UTF-8 Source String */
586 {
587   const char	*prefix;		/* Prefix string */
588 
589 
590   if (msg)
591     printf("%s:", msg);
592 
593   for (prefix = " "; *src; src ++)
594   {
595     printf("%s%02x", prefix, *src);
596 
597     if ((src[0] & 0x80) && (src[1] & 0x80))
598       prefix = "";
599     else
600       prefix = " ";
601   }
602 
603   putchar('\n');
604 }
605