• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * xsltlocale.c: locale handling
3  *
4  * Reference:
5  * RFC 3066: Tags for the Identification of Languages
6  * http://www.ietf.org/rfc/rfc3066.txt
7  * ISO 639-1, ISO 3166-1
8  *
9  * Author: Nick Wellnhofer
10  * winapi port: Roumen Petrov
11  */
12 
13 #define IN_LIBXSLT
14 #include "libxslt.h"
15 
16 #include <string.h>
17 #include <libxml/xmlmemory.h>
18 
19 #include "xsltlocale.h"
20 #include "xsltutils.h"
21 
22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
23 #define newlocale __newlocale
24 #define freelocale __freelocale
25 #define strxfrm_l __strxfrm_l
26 #define LC_COLLATE_MASK (1 << LC_COLLATE)
27 #endif
28 
29 #define ISALPHA(c) ((c & 0xc0) == 0x40 && (unsigned)((c & 0x1f) - 1) < 26)
30 #define TOUPPER(c) (c & ~0x20)
31 #define TOLOWER(c) (c | 0x20)
32 
33 /*without terminating null character*/
34 #define XSLTMAX_ISO639LANGLEN		8
35 #define XSLTMAX_ISO3166CNTRYLEN		8
36 					/* <lang>-<cntry> */
37 #define XSLTMAX_LANGTAGLEN		(XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
38 
39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
40 
41 #ifdef XSLT_LOCALE_WINAPI
42 xmlRMutexPtr xsltLocaleMutex = NULL;
43 
44 struct xsltRFC1766Info_s {
45       /*note typedef unsigned char xmlChar !*/
46     xmlChar    tag[XSLTMAX_LANGTAGLEN+1];
47       /*note typedef LCID xsltLocale !*/
48     xsltLocale lcid;
49 };
50 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
51 
52 static int xsltLocaleListSize = 0;
53 static xsltRFC1766Info *xsltLocaleList = NULL;
54 
55 
56 static xsltLocale
xslt_locale_WINAPI(const xmlChar * languageTag)57 xslt_locale_WINAPI(const xmlChar *languageTag) {
58     int k;
59     xsltRFC1766Info *p = xsltLocaleList;
60 
61     for (k=0; k<xsltLocaleListSize; k++, p++)
62 	if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
63     return((xsltLocale)0);
64 }
65 
66 static void xsltEnumSupportedLocales(void);
67 #endif
68 
69 /**
70  * xsltNewLocale:
71  * @languageTag: RFC 3066 language tag
72  *
73  * Creates a new locale of an opaque system dependent type based on the
74  * language tag.
75  *
76  * Returns the locale or NULL on error or if no matching locale was found
77  */
78 xsltLocale
xsltNewLocale(const xmlChar * languageTag)79 xsltNewLocale(const xmlChar *languageTag) {
80 #ifdef XSLT_LOCALE_XLOCALE
81     xsltLocale locale;
82     char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
83     const xmlChar *p = languageTag;
84     const char *region = NULL;
85     char *q = localeName;
86     int i, llen;
87 
88     /* Convert something like "pt-br" to "pt_BR.utf8" */
89 
90     if (languageTag == NULL)
91     	return(NULL);
92 
93     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
94 	*q++ = TOLOWER(*p++);
95 
96     if (i == 0)
97     	return(NULL);
98 
99     llen = i;
100     *q++ = '_';
101 
102     if (*p) {
103     	if (*p++ != '-')
104     	    return(NULL);
105 
106 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
107 	    *q++ = TOUPPER(*p++);
108 
109     	if (i == 0 || *p)
110     	    return(NULL);
111 
112         memcpy(q, ".utf8", 6);
113         locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
114         if (locale != NULL)
115             return(locale);
116 
117         /* Continue without using country code */
118 
119         q = localeName + llen + 1;
120     }
121 
122     /* Try locale without territory, e.g. for Esperanto (eo) */
123 
124     memcpy(q, ".utf8", 6);
125     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
126     if (locale != NULL)
127         return(locale);
128 
129     /* Try to find most common country for language */
130 
131     if (llen != 2)
132         return(NULL);
133 
134     region = (char *)xsltDefaultRegion((xmlChar *)localeName);
135     if (region == NULL)
136         return(NULL);
137 
138     q = localeName + llen + 1;
139     *q++ = region[0];
140     *q++ = region[1];
141     memcpy(q, ".utf8", 6);
142     locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
143 
144     return(locale);
145 #endif
146 
147 #ifdef XSLT_LOCALE_WINAPI
148 {
149     xsltLocale    locale = (xsltLocale)0;
150     xmlChar       localeName[XSLTMAX_LANGTAGLEN+1];
151     xmlChar       *q = localeName;
152     const xmlChar *p = languageTag;
153     int           i, llen;
154     const xmlChar *region = NULL;
155 
156     if (languageTag == NULL) goto end;
157 
158     xsltEnumSupportedLocales();
159 
160     for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
161 	*q++ = TOLOWER(*p++);
162     if (i == 0) goto end;
163 
164     llen = i;
165     *q++ = '-';
166     if (*p) { /*if country tag is given*/
167 	if (*p++ != '-') goto end;
168 
169 	for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
170 	    *q++ = TOUPPER(*p++);
171 	if (i == 0 || *p) goto end;
172 
173 	*q = '\0';
174 	locale = xslt_locale_WINAPI(localeName);
175 	if (locale != (xsltLocale)0) goto end;
176     }
177     /* Try to find most common country for language */
178     region = xsltDefaultRegion(localeName);
179     if (region == NULL) goto end;
180 
181     strcpy(localeName + llen + 1, region);
182     locale = xslt_locale_WINAPI(localeName);
183 end:
184     return(locale);
185 }
186 #endif
187 
188 #ifdef XSLT_LOCALE_NONE
189     return(NULL);
190 #endif
191 }
192 
193 static const xmlChar*
xsltDefaultRegion(const xmlChar * localeName)194 xsltDefaultRegion(const xmlChar *localeName) {
195     xmlChar c;
196     /* region should be xmlChar, but gcc warns on all string assignments */
197     const char *region = NULL;
198 
199     c = localeName[1];
200     /* This is based on the locales from glibc 2.3.3 */
201 
202     switch (localeName[0]) {
203         case 'a':
204             if (c == 'a' || c == 'm') region = "ET";
205             else if (c == 'f') region = "ZA";
206             else if (c == 'n') region = "ES";
207             else if (c == 'r') region = "AE";
208             else if (c == 'z') region = "AZ";
209             break;
210         case 'b':
211             if (c == 'e') region = "BY";
212             else if (c == 'g') region = "BG";
213             else if (c == 'n') region = "BD";
214             else if (c == 'r') region = "FR";
215             else if (c == 's') region = "BA";
216             break;
217         case 'c':
218             if (c == 'a') region = "ES";
219             else if (c == 's') region = "CZ";
220             else if (c == 'y') region = "GB";
221             break;
222         case 'd':
223             if (c == 'a') region = "DK";
224             else if (c == 'e') region = "DE";
225             break;
226         case 'e':
227             if (c == 'l') region = "GR";
228             else if (c == 'n' || c == 'o') region = "US";
229             else if (c == 's' || c == 'u') region = "ES";
230             else if (c == 't') region = "EE";
231             break;
232         case 'f':
233             if (c == 'a') region = "IR";
234             else if (c == 'i') region = "FI";
235             else if (c == 'o') region = "FO";
236             else if (c == 'r') region = "FR";
237             break;
238         case 'g':
239             if (c == 'a') region = "IE";
240             else if (c == 'l') region = "ES";
241             else if (c == 'v') region = "GB";
242             break;
243         case 'h':
244             if (c == 'e') region = "IL";
245             else if (c == 'i') region = "IN";
246             else if (c == 'r') region = "HT";
247             else if (c == 'u') region = "HU";
248             break;
249         case 'i':
250             if (c == 'd') region = "ID";
251             else if (c == 's') region = "IS";
252             else if (c == 't') region = "IT";
253             else if (c == 'w') region = "IL";
254             break;
255         case 'j':
256             if (c == 'a') region = "JP";
257             break;
258         case 'k':
259             if (c == 'l') region = "GL";
260             else if (c == 'o') region = "KR";
261             else if (c == 'w') region = "GB";
262             break;
263         case 'l':
264             if (c == 't') region = "LT";
265             else if (c == 'v') region = "LV";
266             break;
267         case 'm':
268             if (c == 'k') region = "MK";
269             else if (c == 'l' || c == 'r') region = "IN";
270             else if (c == 'n') region = "MN";
271             else if (c == 's') region = "MY";
272             else if (c == 't') region = "MT";
273             break;
274         case 'n':
275             if (c == 'b' || c == 'n' || c == 'o') region = "NO";
276             else if (c == 'e') region = "NP";
277             else if (c == 'l') region = "NL";
278             break;
279         case 'o':
280             if (c == 'm') region = "ET";
281             break;
282         case 'p':
283             if (c == 'a') region = "IN";
284             else if (c == 'l') region = "PL";
285             else if (c == 't') region = "PT";
286             break;
287         case 'r':
288             if (c == 'o') region = "RO";
289             else if (c == 'u') region = "RU";
290             break;
291         case 's':
292             switch (c) {
293                 case 'e': region = "NO"; break;
294                 case 'h': region = "YU"; break;
295                 case 'k': region = "SK"; break;
296                 case 'l': region = "SI"; break;
297                 case 'o': region = "ET"; break;
298                 case 'q': region = "AL"; break;
299                 case 't': region = "ZA"; break;
300                 case 'v': region = "SE"; break;
301             }
302             break;
303         case 't':
304             if (c == 'a' || c == 'e') region = "IN";
305             else if (c == 'h') region = "TH";
306             else if (c == 'i') region = "ER";
307             else if (c == 'r') region = "TR";
308             else if (c == 't') region = "RU";
309             break;
310         case 'u':
311             if (c == 'k') region = "UA";
312             else if (c == 'r') region = "PK";
313             break;
314         case 'v':
315             if (c == 'i') region = "VN";
316             break;
317         case 'w':
318             if (c == 'a') region = "BE";
319             break;
320         case 'x':
321             if (c == 'h') region = "ZA";
322             break;
323         case 'z':
324             if (c == 'h') region = "CN";
325             else if (c == 'u') region = "ZA";
326             break;
327     }
328     return((xmlChar *)region);
329 }
330 
331 /**
332  * xsltFreeLocale:
333  * @locale: the locale to free
334  *
335  * Frees a locale created with xsltNewLocale
336  */
337 void
xsltFreeLocale(xsltLocale locale)338 xsltFreeLocale(xsltLocale locale) {
339 #ifdef XSLT_LOCALE_XLOCALE
340     freelocale(locale);
341 #endif
342 }
343 
344 /**
345  * xsltStrxfrm:
346  * @locale: locale created with xsltNewLocale
347  * @string: UTF-8 string to transform
348  *
349  * Transforms a string according to locale. The transformed string must then be
350  * compared with xsltLocaleStrcmp and freed with xmlFree.
351  *
352  * Returns the transformed string or NULL on error
353  */
354 xsltLocaleChar *
xsltStrxfrm(xsltLocale locale,const xmlChar * string)355 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
356 {
357 #ifdef XSLT_LOCALE_NONE
358     return(NULL);
359 #else
360     size_t xstrlen, r;
361     xsltLocaleChar *xstr;
362 
363 #ifdef XSLT_LOCALE_XLOCALE
364     xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
365     xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
366     if (xstr == NULL) {
367 	xsltTransformError(NULL, NULL, NULL,
368 	    "xsltStrxfrm : out of memory error\n");
369 	return(NULL);
370     }
371 
372     r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
373 #endif
374 
375 #ifdef XSLT_LOCALE_WINAPI
376     xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
377     if (xstrlen == 0) {
378         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
379         return(NULL);
380     }
381     xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
382     if (xstr == NULL) {
383         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
384         return(NULL);
385     }
386     r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
387     if (r == 0) {
388         xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
389         xmlFree(xstr);
390         return(NULL);
391     }
392     return(xstr);
393 #endif /* XSLT_LOCALE_WINAPI */
394 
395     if (r >= xstrlen) {
396 	xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
397         xmlFree(xstr);
398         return(NULL);
399     }
400 
401     return(xstr);
402 #endif /* XSLT_LOCALE_NONE */
403 }
404 
405 /**
406  * xsltLocaleStrcmp:
407  * @locale: a locale identifier
408  * @str1: a string transformed with xsltStrxfrm
409  * @str2: a string transformed with xsltStrxfrm
410  *
411  * Compares two strings transformed with xsltStrxfrm
412  *
413  * Returns a value < 0 if str1 sorts before str2,
414  *         a value > 0 if str1 sorts after str2,
415  *         0 if str1 and str2 are equal wrt sorting
416  */
417 int
xsltLocaleStrcmp(xsltLocale locale,const xsltLocaleChar * str1,const xsltLocaleChar * str2)418 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
419     (void)locale;
420 #ifdef XSLT_LOCALE_WINAPI
421 {
422     int ret;
423     if (str1 == str2) return(0);
424     if (str1 == NULL) return(-1);
425     if (str2 == NULL) return(1);
426     ret = CompareStringW(locale, 0, str1, -1, str2, -1);
427     if (ret == 0) {
428         xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
429         return(0);
430     }
431     return(ret - 2);
432 }
433 #else
434     return(xmlStrcmp(str1, str2));
435 #endif
436 }
437 
438 #ifdef XSLT_LOCALE_WINAPI
439 /**
440  * xsltCountSupportedLocales:
441  * @lcid: not used
442  *
443  * callback used to count locales
444  *
445  * Returns TRUE
446  */
447 BOOL CALLBACK
xsltCountSupportedLocales(LPSTR lcid)448 xsltCountSupportedLocales(LPSTR lcid) {
449     (void) lcid;
450     ++xsltLocaleListSize;
451     return(TRUE);
452 }
453 
454 /**
455  * xsltIterateSupportedLocales:
456  * @lcid: not used
457  *
458  * callback used to track locales
459  *
460  * Returns TRUE if not at the end of the array
461  */
462 BOOL CALLBACK
xsltIterateSupportedLocales(LPSTR lcid)463 xsltIterateSupportedLocales(LPSTR lcid) {
464     static int count = 0;
465     xmlChar    iso639lang [XSLTMAX_ISO639LANGLEN  +1];
466     xmlChar    iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
467     int        k, l;
468     xsltRFC1766Info *p = xsltLocaleList + count;
469 
470     k = sscanf(lcid, "%lx", (long*)&p->lcid);
471     if (k < 1) goto end;
472     /*don't count terminating null character*/
473     k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
474     if (--k < 1) goto end;
475     l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
476     if (--l < 1) goto end;
477 
478     {  /*fill results*/
479 	xmlChar    *q = p->tag;
480 	memcpy(q, iso639lang, k);
481 	q += k;
482 	*q++ = '-';
483 	memcpy(q, iso3136ctry, l);
484 	q += l;
485 	*q = '\0';
486     }
487     ++count;
488 end:
489     return((count < xsltLocaleListSize) ? TRUE : FALSE);
490 }
491 
492 
493 static void
xsltEnumSupportedLocales(void)494 xsltEnumSupportedLocales(void) {
495     xmlRMutexLock(xsltLocaleMutex);
496     if (xsltLocaleListSize <= 0) {
497 	size_t len;
498 
499 	EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
500 
501 	len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
502 	xsltLocaleList = xmlMalloc(len);
503 	memset(xsltLocaleList, 0, len);
504 	EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
505     }
506     xmlRMutexUnlock(xsltLocaleMutex);
507 }
508 
509 #endif /*def XSLT_LOCALE_WINAPI*/
510