• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Internationalization test for CUPS.
3  *
4  * Copyright 2007-2014 by Apple Inc.
5  * Copyright 1997-2006 by Easy Software Products.
6  *
7  * Licensed under Apache License v2.0.  See the file "LICENSE" for more information.
8  */
9 
10 /*
11  * Include necessary headers...
12  */
13 
14 #include "string-private.h"
15 #include "language-private.h"
16 #include <stdlib.h>
17 #include <time.h>
18 #include <unistd.h>
19 
20 
21 /*
22  * Local globals...
23  */
24 
25 static const char * const lang_encodings[] =
26 			{		/* Encoding strings */
27 			  "us-ascii",		"iso-8859-1",
28 			  "iso-8859-2",		"iso-8859-3",
29 			  "iso-8859-4",		"iso-8859-5",
30 			  "iso-8859-6",		"iso-8859-7",
31 			  "iso-8859-8",		"iso-8859-9",
32 			  "iso-8859-10",	"utf-8",
33 			  "iso-8859-13",	"iso-8859-14",
34 			  "iso-8859-15",	"windows-874",
35 			  "windows-1250",	"windows-1251",
36 			  "windows-1252",	"windows-1253",
37 			  "windows-1254",	"windows-1255",
38 			  "windows-1256",	"windows-1257",
39 			  "windows-1258",	"koi8-r",
40 			  "koi8-u",		"iso-8859-11",
41 			  "iso-8859-16",	"mac-roman",
42 			  "unknown",		"unknown",
43 			  "unknown",		"unknown",
44 			  "unknown",		"unknown",
45 			  "unknown",		"unknown",
46 			  "unknown",		"unknown",
47 			  "unknown",		"unknown",
48 			  "unknown",		"unknown",
49 			  "unknown",		"unknown",
50 			  "unknown",		"unknown",
51 			  "unknown",		"unknown",
52 			  "unknown",		"unknown",
53 			  "unknown",		"unknown",
54 			  "unknown",		"unknown",
55 			  "unknown",		"unknown",
56 			  "unknown",		"unknown",
57 			  "unknown",		"unknown",
58 			  "unknown",		"unknown",
59 			  "windows-932",	"windows-936",
60 			  "windows-949",	"windows-950",
61 			  "windows-1361",	"unknown",
62 			  "unknown",		"unknown",
63 			  "unknown",		"unknown",
64 			  "unknown",		"unknown",
65 			  "unknown",		"unknown",
66 			  "unknown",		"unknown",
67 			  "unknown",		"unknown",
68 			  "unknown",		"unknown",
69 			  "unknown",		"unknown",
70 			  "unknown",		"unknown",
71 			  "unknown",		"unknown",
72 			  "unknown",		"unknown",
73 			  "unknown",		"unknown",
74 			  "unknown",		"unknown",
75 			  "unknown",		"unknown",
76 			  "unknown",		"unknown",
77 			  "unknown",		"unknown",
78 			  "unknown",		"unknown",
79 			  "unknown",		"unknown",
80 			  "unknown",		"unknown",
81 			  "unknown",		"unknown",
82 			  "unknown",		"unknown",
83 			  "unknown",		"unknown",
84 			  "unknown",		"unknown",
85 			  "unknown",		"unknown",
86 			  "unknown",		"unknown",
87 			  "unknown",		"unknown",
88 			  "unknown",		"unknown",
89 			  "unknown",		"unknown",
90 			  "unknown",		"unknown",
91 			  "euc-cn",		"euc-jp",
92 			  "euc-kr",		"euc-tw",
93 			  "jis-x0213"
94 			};
95 
96 
97 /*
98  * Local functions...
99  */
100 
101 static void	print_utf8(const char *msg, const cups_utf8_t *src);
102 
103 
104 /*
105  * 'main()' - Main entry for internationalization test module.
106  */
107 
108 int					/* O - Exit code */
main(int argc,char * argv[])109 main(int  argc,				/* I - Argument Count */
110      char *argv[])			/* I - Arguments */
111 {
112   FILE		*fp;			/* File pointer */
113   int		count;			/* File line counter */
114   int		status,			/* Status of current test */
115 		errors;			/* Error count */
116   char		line[1024];		/* File line source string */
117   int		len;			/* Length (count) of string */
118   char		legsrc[1024],		/* Legacy source string */
119 		legdest[1024],		/* Legacy destination string */
120 		*legptr;		/* Pointer into legacy string */
121   cups_utf8_t	utf8latin[] =		/* UTF-8 Latin-1 source */
122     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
123     /* "A != <A WITH DIAERESIS>." - use ISO 8859-1 */
124   cups_utf8_t	utf8repla[] =		/* UTF-8 Latin-1 replacement */
125     { 0x41, 0x20, 0xE2, 0x89, 0xA2, 0x20, 0xC3, 0x84, 0x2E, 0x00 };
126     /* "A <NOT IDENTICAL TO> <A WITH DIAERESIS>." */
127   cups_utf8_t	utf8greek[] =		/* UTF-8 Greek source string */
128     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xCE, 0x91, 0x2E, 0x00 };
129     /* "A != <ALPHA>." - use ISO 8859-7 */
130   cups_utf8_t	utf8japan[] =		/* UTF-8 Japanese source */
131     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xEE, 0x9C, 0x80, 0x2E, 0x00 };
132     /* "A != <PRIVATE U+E700>." - use Windows 932 or EUC-JP */
133   cups_utf8_t	utf8taiwan[] =		/* UTF-8 Chinese source */
134     { 0x41, 0x20, 0x21, 0x3D, 0x20, 0xE4, 0xB9, 0x82, 0x2E, 0x00 };
135     /* "A != <CJK U+4E42>." - use Windows 950 (Big5) or EUC-TW */
136   cups_utf8_t	utf8dest[1024];		/* UTF-8 destination string */
137   cups_utf32_t	utf32dest[1024];	/* UTF-32 destination string */
138 
139 
140   if (argc > 1)
141   {
142     int			i;		/* Looping var */
143     cups_encoding_t	encoding;	/* Source encoding */
144 
145 
146     if (argc != 3)
147     {
148       puts("Usage: ./testi18n [filename charset]");
149       return (1);
150     }
151 
152     if ((fp = fopen(argv[1], "rb")) == NULL)
153     {
154       perror(argv[1]);
155       return (1);
156     }
157 
158     for (i = 0, encoding = CUPS_AUTO_ENCODING;
159          i < (int)(sizeof(lang_encodings) / sizeof(lang_encodings[0]));
160 	 i ++)
161       if (!_cups_strcasecmp(lang_encodings[i], argv[2]))
162       {
163         encoding = (cups_encoding_t)i;
164 	break;
165       }
166 
167     if (encoding == CUPS_AUTO_ENCODING)
168     {
169       fprintf(stderr, "%s: Unknown character set!\n", argv[2]);
170       return (1);
171     }
172 
173     while (fgets(line, sizeof(line), fp))
174     {
175       if (cupsCharsetToUTF8(utf8dest, line, sizeof(utf8dest), encoding) < 0)
176       {
177         fprintf(stderr, "%s: Unable to convert line: %s", argv[1], line);
178 	return (1);
179       }
180 
181       fputs((char *)utf8dest, stdout);
182     }
183 
184     fclose(fp);
185     return (0);
186   }
187 
188  /*
189   * Start with some conversion tests from a UTF-8 test file.
190   */
191 
192   errors = 0;
193 
194   if ((fp = fopen("utf8demo.txt", "rb")) == NULL)
195   {
196     perror("utf8demo.txt");
197     return (1);
198   }
199 
200  /*
201   * cupsUTF8ToUTF32
202   */
203 
204   fputs("cupsUTF8ToUTF32 of utfdemo.txt: ", stdout);
205 
206   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
207   {
208     count ++;
209 
210     if (cupsUTF8ToUTF32(utf32dest, (cups_utf8_t *)line, 1024) < 0)
211     {
212       printf("FAIL (UTF-8 to UTF-32 on line %d)\n", count);
213       errors ++;
214       status = 1;
215       break;
216     }
217   }
218 
219   if (!status)
220     puts("PASS");
221 
222  /*
223   * cupsUTF8ToCharset(CUPS_EUC_JP)
224   */
225 
226   fputs("cupsUTF8ToCharset(CUPS_EUC_JP) of utfdemo.txt: ", stdout);
227 
228   rewind(fp);
229 
230   for (count = 0, status = 0; fgets(line, sizeof(line), fp);)
231   {
232     count ++;
233 
234     len = cupsUTF8ToCharset(legdest, (cups_utf8_t *)line, 1024, CUPS_EUC_JP);
235     if (len < 0)
236     {
237       printf("FAIL (UTF-8 to EUC-JP on line %d)\n", count);
238       errors ++;
239       status = 1;
240       break;
241     }
242   }
243 
244   if (!status)
245     puts("PASS");
246 
247   fclose(fp);
248 
249  /*
250   * Test UTF-8 to legacy charset (ISO 8859-1)...
251   */
252 
253   fputs("cupsUTF8ToCharset(CUPS_ISO8859_1): ", stdout);
254 
255   legdest[0] = 0;
256 
257   len = cupsUTF8ToCharset(legdest, utf8latin, 1024, CUPS_ISO8859_1);
258   if (len < 0)
259   {
260     printf("FAIL (len=%d)\n", len);
261     errors ++;
262   }
263   else
264     puts("PASS");
265 
266  /*
267   * cupsCharsetToUTF8
268   */
269 
270   fputs("cupsCharsetToUTF8(CUPS_ISO8859_1): ", stdout);
271 
272   strlcpy(legsrc, legdest, sizeof(legsrc));
273 
274   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_1);
275   if ((size_t)len != strlen((char *)utf8latin))
276   {
277     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8latin));
278     print_utf8("    utf8latin", utf8latin);
279     print_utf8("    utf8dest", utf8dest);
280     errors ++;
281   }
282   else if (memcmp(utf8latin, utf8dest, (size_t)len))
283   {
284     puts("FAIL (results do not match)");
285     print_utf8("    utf8latin", utf8latin);
286     print_utf8("    utf8dest", utf8dest);
287     errors ++;
288   }
289   else if (cupsUTF8ToCharset(legdest, utf8repla, 1024, CUPS_ISO8859_1) < 0)
290   {
291     puts("FAIL (replacement characters do not work!)");
292     errors ++;
293   }
294   else
295     puts("PASS");
296 
297  /*
298   * Test UTF-8 to/from legacy charset (ISO 8859-7)...
299   */
300 
301   fputs("cupsUTF8ToCharset(CUPS_ISO8859_7): ", stdout);
302 
303   if (cupsUTF8ToCharset(legdest, utf8greek, 1024, CUPS_ISO8859_7) < 0)
304   {
305     puts("FAIL");
306     errors ++;
307   }
308   else
309   {
310     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
311 
312     if (*legptr)
313     {
314       puts("FAIL (unknown character)");
315       errors ++;
316     }
317     else
318       puts("PASS");
319   }
320 
321   fputs("cupsCharsetToUTF8(CUPS_ISO8859_7): ", stdout);
322 
323   strlcpy(legsrc, legdest, sizeof(legsrc));
324 
325   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_ISO8859_7);
326   if ((size_t)len != strlen((char *)utf8greek))
327   {
328     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8greek));
329     print_utf8("    utf8greek", utf8greek);
330     print_utf8("    utf8dest", utf8dest);
331     errors ++;
332   }
333   else if (memcmp(utf8greek, utf8dest, (size_t)len))
334   {
335     puts("FAIL (results do not match)");
336     print_utf8("    utf8greek", utf8greek);
337     print_utf8("    utf8dest", utf8dest);
338     errors ++;
339   }
340   else
341     puts("PASS");
342 
343  /*
344   * Test UTF-8 to/from legacy charset (Windows 932)...
345   */
346 
347   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_932): ", stdout);
348 
349   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_WINDOWS_932) < 0)
350   {
351     puts("FAIL");
352     errors ++;
353   }
354   else
355   {
356     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
357 
358     if (*legptr)
359     {
360       puts("FAIL (unknown character)");
361       errors ++;
362     }
363     else
364       puts("PASS");
365   }
366 
367   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_932): ", stdout);
368 
369   strlcpy(legsrc, legdest, sizeof(legsrc));
370 
371   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_932);
372   if ((size_t)len != strlen((char *)utf8japan))
373   {
374     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
375     print_utf8("    utf8japan", utf8japan);
376     print_utf8("    utf8dest", utf8dest);
377     errors ++;
378   }
379   else if (memcmp(utf8japan, utf8dest, (size_t)len))
380   {
381     puts("FAIL (results do not match)");
382     print_utf8("    utf8japan", utf8japan);
383     print_utf8("    utf8dest", utf8dest);
384     errors ++;
385   }
386   else
387     puts("PASS");
388 
389  /*
390   * Test UTF-8 to/from legacy charset (EUC-JP)...
391   */
392 
393   fputs("cupsUTF8ToCharset(CUPS_EUC_JP): ", stdout);
394 
395   if (cupsUTF8ToCharset(legdest, utf8japan, 1024, CUPS_EUC_JP) < 0)
396   {
397     puts("FAIL");
398     errors ++;
399   }
400   else
401   {
402     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
403 
404     if (*legptr)
405     {
406       puts("FAIL (unknown character)");
407       errors ++;
408     }
409     else
410       puts("PASS");
411   }
412 
413 #ifndef __linux
414   fputs("cupsCharsetToUTF8(CUPS_EUC_JP): ", stdout);
415 
416   strlcpy(legsrc, legdest, sizeof(legsrc));
417 
418   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_JP);
419   if ((size_t)len != strlen((char *)utf8japan))
420   {
421     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8japan));
422     print_utf8("    utf8japan", utf8japan);
423     print_utf8("    utf8dest", utf8dest);
424     errors ++;
425   }
426   else if (memcmp(utf8japan, utf8dest, (size_t)len))
427   {
428     puts("FAIL (results do not match)");
429     print_utf8("    utf8japan", utf8japan);
430     print_utf8("    utf8dest", utf8dest);
431     errors ++;
432   }
433   else
434     puts("PASS");
435 #endif /* !__linux */
436 
437  /*
438   * Test UTF-8 to/from legacy charset (Windows 950)...
439   */
440 
441   fputs("cupsUTF8ToCharset(CUPS_WINDOWS_950): ", stdout);
442 
443   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_WINDOWS_950) < 0)
444   {
445     puts("FAIL");
446     errors ++;
447   }
448   else
449   {
450     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
451 
452     if (*legptr)
453     {
454       puts("FAIL (unknown character)");
455       errors ++;
456     }
457     else
458       puts("PASS");
459   }
460 
461   fputs("cupsCharsetToUTF8(CUPS_WINDOWS_950): ", stdout);
462 
463   strlcpy(legsrc, legdest, sizeof(legsrc));
464 
465   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_WINDOWS_950);
466   if ((size_t)len != strlen((char *)utf8taiwan))
467   {
468     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
469     print_utf8("    utf8taiwan", utf8taiwan);
470     print_utf8("    utf8dest", utf8dest);
471     errors ++;
472   }
473   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
474   {
475     puts("FAIL (results do not match)");
476     print_utf8("    utf8taiwan", utf8taiwan);
477     print_utf8("    utf8dest", utf8dest);
478     errors ++;
479   }
480   else
481     puts("PASS");
482 
483  /*
484   * Test UTF-8 to/from legacy charset (EUC-TW)...
485   */
486 
487   fputs("cupsUTF8ToCharset(CUPS_EUC_TW): ", stdout);
488 
489   if (cupsUTF8ToCharset(legdest, utf8taiwan, 1024, CUPS_EUC_TW) < 0)
490   {
491     puts("FAIL");
492     errors ++;
493   }
494   else
495   {
496     for (legptr = legdest; *legptr && *legptr != '?'; legptr ++);
497 
498     if (*legptr)
499     {
500       puts("FAIL (unknown character)");
501       errors ++;
502     }
503     else
504       puts("PASS");
505   }
506 
507   fputs("cupsCharsetToUTF8(CUPS_EUC_TW): ", stdout);
508 
509   strlcpy(legsrc, legdest, sizeof(legsrc));
510 
511   len = cupsCharsetToUTF8(utf8dest, legsrc, 1024, CUPS_EUC_TW);
512   if ((size_t)len != strlen((char *)utf8taiwan))
513   {
514     printf("FAIL (len=%d, expected %d)\n", len, (int)strlen((char *)utf8taiwan));
515     print_utf8("    utf8taiwan", utf8taiwan);
516     print_utf8("    utf8dest", utf8dest);
517     errors ++;
518   }
519   else if (memcmp(utf8taiwan, utf8dest, (size_t)len))
520   {
521     puts("FAIL (results do not match)");
522     print_utf8("    utf8taiwan", utf8taiwan);
523     print_utf8("    utf8dest", utf8dest);
524     errors ++;
525   }
526   else
527     puts("PASS");
528 
529 #if 0
530  /*
531   * Test UTF-8 (16-bit) to UTF-32 (w/ BOM)...
532   */
533   if (verbose)
534     printf("\ntesti18n: Testing UTF-8 to UTF-32 (w/ BOM)...\n");
535   len = cupsUTF8ToUTF32(utf32dest, utf8good, 1024);
536   if (len < 0)
537     return (1);
538   if (verbose)
539   {
540     print_utf8(" utf8good ", utf8good);
541     print_utf32(" utf32dest", utf32dest);
542   }
543   memcpy(utf32src, utf32dest, (len + 1) * sizeof(cups_utf32_t));
544   len = cupsUTF32ToUTF8(utf8dest, utf32src, 1024);
545   if (len < 0)
546     return (1);
547   if (len != strlen ((char *) utf8good))
548     return (1);
549   if (memcmp(utf8good, utf8dest, len) != 0)
550     return (1);
551 
552  /*
553   * Test invalid UTF-8 (16-bit) to UTF-32 (w/ BOM)...
554   */
555   if (verbose)
556     printf("\ntesti18n: Testing UTF-8 bad 16-bit source string...\n");
557   len = cupsUTF8ToUTF32(utf32dest, utf8bad, 1024);
558   if (len >= 0)
559     return (1);
560   if (verbose)
561     print_utf8(" utf8bad  ", utf8bad);
562 
563  /*
564   * Test _cupsCharmapFlush()...
565   */
566   if (verbose)
567     printf("\ntesti18n: Testing _cupsCharmapFlush()...\n");
568   _cupsCharmapFlush();
569   return (0);
570 #endif /* 0 */
571 
572   return (errors > 0);
573 }
574 
575 
576 /*
577  * 'print_utf8()' - Print UTF-8 string with (optional) message.
578  */
579 
580 static void
print_utf8(const char * msg,const cups_utf8_t * src)581 print_utf8(const char	     *msg,	/* I - Message String */
582 	   const cups_utf8_t *src)	/* I - UTF-8 Source String */
583 {
584   const char	*prefix;		/* Prefix string */
585 
586 
587   if (msg)
588     printf("%s:", msg);
589 
590   for (prefix = " "; *src; src ++)
591   {
592     printf("%s%02x", prefix, *src);
593 
594     if ((src[0] & 0x80) && (src[1] & 0x80))
595       prefix = "";
596     else
597       prefix = " ";
598   }
599 
600   putchar('\n');
601 }
602