1 /*
2 * xsltlocale.c: locale handling
3 *
4 * Reference:
5 * RFC 3066: Tags for the Identification of Languages
6 * http://www.ietf.org/rfc/rfc3066.txt
7 * ISO 639-1, ISO 3166-1
8 *
9 * Author: Nick Wellnhofer
10 * winapi port: Roumen Petrov
11 */
12
13 #define IN_LIBXSLT
14 #include "libxslt.h"
15
16 #include <string.h>
17 #include <libxml/xmlmemory.h>
18
19 #include "xsltlocale.h"
20 #include "xsltutils.h"
21
22 #if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ <= 2
23 #define newlocale __newlocale
24 #define freelocale __freelocale
25 #define strxfrm_l __strxfrm_l
26 #define LC_COLLATE_MASK (1 << LC_COLLATE)
27 #endif
28
29 #define ISALPHA(c) ((c & 0xc0) == 0x40 && (unsigned)((c & 0x1f) - 1) < 26)
30 #define TOUPPER(c) (c & ~0x20)
31 #define TOLOWER(c) (c | 0x20)
32
33 /*without terminating null character*/
34 #define XSLTMAX_ISO639LANGLEN 8
35 #define XSLTMAX_ISO3166CNTRYLEN 8
36 /* <lang>-<cntry> */
37 #define XSLTMAX_LANGTAGLEN (XSLTMAX_ISO639LANGLEN+1+XSLTMAX_ISO3166CNTRYLEN)
38
39 static const xmlChar* xsltDefaultRegion(const xmlChar *localeName);
40
41 #ifdef XSLT_LOCALE_WINAPI
42 xmlRMutexPtr xsltLocaleMutex = NULL;
43
44 struct xsltRFC1766Info_s {
45 /*note typedef unsigned char xmlChar !*/
46 xmlChar tag[XSLTMAX_LANGTAGLEN+1];
47 /*note typedef LCID xsltLocale !*/
48 xsltLocale lcid;
49 };
50 typedef struct xsltRFC1766Info_s xsltRFC1766Info;
51
52 static int xsltLocaleListSize = 0;
53 static xsltRFC1766Info *xsltLocaleList = NULL;
54
55
56 static xsltLocale
xslt_locale_WINAPI(const xmlChar * languageTag)57 xslt_locale_WINAPI(const xmlChar *languageTag) {
58 int k;
59 xsltRFC1766Info *p = xsltLocaleList;
60
61 for (k=0; k<xsltLocaleListSize; k++, p++)
62 if (xmlStrcmp(p->tag, languageTag) == 0) return p->lcid;
63 return((xsltLocale)0);
64 }
65
66 static void xsltEnumSupportedLocales(void);
67 #endif
68
69 /**
70 * xsltNewLocale:
71 * @languageTag: RFC 3066 language tag
72 *
73 * Creates a new locale of an opaque system dependent type based on the
74 * language tag.
75 *
76 * Returns the locale or NULL on error or if no matching locale was found
77 */
78 xsltLocale
xsltNewLocale(const xmlChar * languageTag)79 xsltNewLocale(const xmlChar *languageTag) {
80 #ifdef XSLT_LOCALE_XLOCALE
81 xsltLocale locale;
82 char localeName[XSLTMAX_LANGTAGLEN+6]; /* 6 chars for ".utf8\0" */
83 const xmlChar *p = languageTag;
84 const char *region = NULL;
85 char *q = localeName;
86 int i, llen;
87
88 /* Convert something like "pt-br" to "pt_BR.utf8" */
89
90 if (languageTag == NULL)
91 return(NULL);
92
93 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
94 *q++ = TOLOWER(*p++);
95
96 if (i == 0)
97 return(NULL);
98
99 llen = i;
100 *q++ = '_';
101
102 if (*p) {
103 if (*p++ != '-')
104 return(NULL);
105
106 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
107 *q++ = TOUPPER(*p++);
108
109 if (i == 0 || *p)
110 return(NULL);
111
112 memcpy(q, ".utf8", 6);
113 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
114 if (locale != NULL)
115 return(locale);
116
117 /* Continue without using country code */
118
119 q = localeName + llen + 1;
120 }
121
122 /* Try locale without territory, e.g. for Esperanto (eo) */
123
124 memcpy(q, ".utf8", 6);
125 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
126 if (locale != NULL)
127 return(locale);
128
129 /* Try to find most common country for language */
130
131 if (llen != 2)
132 return(NULL);
133
134 region = (char *)xsltDefaultRegion((xmlChar *)localeName);
135 if (region == NULL)
136 return(NULL);
137
138 q = localeName + llen + 1;
139 *q++ = region[0];
140 *q++ = region[1];
141 memcpy(q, ".utf8", 6);
142 locale = newlocale(LC_COLLATE_MASK, localeName, NULL);
143
144 return(locale);
145 #endif
146
147 #ifdef XSLT_LOCALE_WINAPI
148 {
149 xsltLocale locale = (xsltLocale)0;
150 xmlChar localeName[XSLTMAX_LANGTAGLEN+1];
151 xmlChar *q = localeName;
152 const xmlChar *p = languageTag;
153 int i, llen;
154 const xmlChar *region = NULL;
155
156 if (languageTag == NULL) goto end;
157
158 xsltEnumSupportedLocales();
159
160 for (i=0; i<XSLTMAX_ISO639LANGLEN && ISALPHA(*p); ++i)
161 *q++ = TOLOWER(*p++);
162 if (i == 0) goto end;
163
164 llen = i;
165 *q++ = '-';
166 if (*p) { /*if country tag is given*/
167 if (*p++ != '-') goto end;
168
169 for (i=0; i<XSLTMAX_ISO3166CNTRYLEN && ISALPHA(*p); ++i)
170 *q++ = TOUPPER(*p++);
171 if (i == 0 || *p) goto end;
172
173 *q = '\0';
174 locale = xslt_locale_WINAPI(localeName);
175 if (locale != (xsltLocale)0) goto end;
176 }
177 /* Try to find most common country for language */
178 region = xsltDefaultRegion(localeName);
179 if (region == NULL) goto end;
180
181 strcpy(localeName + llen + 1, region);
182 locale = xslt_locale_WINAPI(localeName);
183 end:
184 return(locale);
185 }
186 #endif
187
188 #ifdef XSLT_LOCALE_NONE
189 return(NULL);
190 #endif
191 }
192
193 static const xmlChar*
xsltDefaultRegion(const xmlChar * localeName)194 xsltDefaultRegion(const xmlChar *localeName) {
195 xmlChar c;
196 /* region should be xmlChar, but gcc warns on all string assignments */
197 const char *region = NULL;
198
199 c = localeName[1];
200 /* This is based on the locales from glibc 2.3.3 */
201
202 switch (localeName[0]) {
203 case 'a':
204 if (c == 'a' || c == 'm') region = "ET";
205 else if (c == 'f') region = "ZA";
206 else if (c == 'n') region = "ES";
207 else if (c == 'r') region = "AE";
208 else if (c == 'z') region = "AZ";
209 break;
210 case 'b':
211 if (c == 'e') region = "BY";
212 else if (c == 'g') region = "BG";
213 else if (c == 'n') region = "BD";
214 else if (c == 'r') region = "FR";
215 else if (c == 's') region = "BA";
216 break;
217 case 'c':
218 if (c == 'a') region = "ES";
219 else if (c == 's') region = "CZ";
220 else if (c == 'y') region = "GB";
221 break;
222 case 'd':
223 if (c == 'a') region = "DK";
224 else if (c == 'e') region = "DE";
225 break;
226 case 'e':
227 if (c == 'l') region = "GR";
228 else if (c == 'n' || c == 'o') region = "US";
229 else if (c == 's' || c == 'u') region = "ES";
230 else if (c == 't') region = "EE";
231 break;
232 case 'f':
233 if (c == 'a') region = "IR";
234 else if (c == 'i') region = "FI";
235 else if (c == 'o') region = "FO";
236 else if (c == 'r') region = "FR";
237 break;
238 case 'g':
239 if (c == 'a') region = "IE";
240 else if (c == 'l') region = "ES";
241 else if (c == 'v') region = "GB";
242 break;
243 case 'h':
244 if (c == 'e') region = "IL";
245 else if (c == 'i') region = "IN";
246 else if (c == 'r') region = "HT";
247 else if (c == 'u') region = "HU";
248 break;
249 case 'i':
250 if (c == 'd') region = "ID";
251 else if (c == 's') region = "IS";
252 else if (c == 't') region = "IT";
253 else if (c == 'w') region = "IL";
254 break;
255 case 'j':
256 if (c == 'a') region = "JP";
257 break;
258 case 'k':
259 if (c == 'l') region = "GL";
260 else if (c == 'o') region = "KR";
261 else if (c == 'w') region = "GB";
262 break;
263 case 'l':
264 if (c == 't') region = "LT";
265 else if (c == 'v') region = "LV";
266 break;
267 case 'm':
268 if (c == 'k') region = "MK";
269 else if (c == 'l' || c == 'r') region = "IN";
270 else if (c == 'n') region = "MN";
271 else if (c == 's') region = "MY";
272 else if (c == 't') region = "MT";
273 break;
274 case 'n':
275 if (c == 'b' || c == 'n' || c == 'o') region = "NO";
276 else if (c == 'e') region = "NP";
277 else if (c == 'l') region = "NL";
278 break;
279 case 'o':
280 if (c == 'm') region = "ET";
281 break;
282 case 'p':
283 if (c == 'a') region = "IN";
284 else if (c == 'l') region = "PL";
285 else if (c == 't') region = "PT";
286 break;
287 case 'r':
288 if (c == 'o') region = "RO";
289 else if (c == 'u') region = "RU";
290 break;
291 case 's':
292 switch (c) {
293 case 'e': region = "NO"; break;
294 case 'h': region = "YU"; break;
295 case 'k': region = "SK"; break;
296 case 'l': region = "SI"; break;
297 case 'o': region = "ET"; break;
298 case 'q': region = "AL"; break;
299 case 't': region = "ZA"; break;
300 case 'v': region = "SE"; break;
301 }
302 break;
303 case 't':
304 if (c == 'a' || c == 'e') region = "IN";
305 else if (c == 'h') region = "TH";
306 else if (c == 'i') region = "ER";
307 else if (c == 'r') region = "TR";
308 else if (c == 't') region = "RU";
309 break;
310 case 'u':
311 if (c == 'k') region = "UA";
312 else if (c == 'r') region = "PK";
313 break;
314 case 'v':
315 if (c == 'i') region = "VN";
316 break;
317 case 'w':
318 if (c == 'a') region = "BE";
319 break;
320 case 'x':
321 if (c == 'h') region = "ZA";
322 break;
323 case 'z':
324 if (c == 'h') region = "CN";
325 else if (c == 'u') region = "ZA";
326 break;
327 }
328 return((xmlChar *)region);
329 }
330
331 /**
332 * xsltFreeLocale:
333 * @locale: the locale to free
334 *
335 * Frees a locale created with xsltNewLocale
336 */
337 void
xsltFreeLocale(xsltLocale locale)338 xsltFreeLocale(xsltLocale locale) {
339 #ifdef XSLT_LOCALE_XLOCALE
340 freelocale(locale);
341 #endif
342 }
343
344 /**
345 * xsltStrxfrm:
346 * @locale: locale created with xsltNewLocale
347 * @string: UTF-8 string to transform
348 *
349 * Transforms a string according to locale. The transformed string must then be
350 * compared with xsltLocaleStrcmp and freed with xmlFree.
351 *
352 * Returns the transformed string or NULL on error
353 */
354 xsltLocaleChar *
xsltStrxfrm(xsltLocale locale,const xmlChar * string)355 xsltStrxfrm(xsltLocale locale, const xmlChar *string)
356 {
357 #ifdef XSLT_LOCALE_NONE
358 return(NULL);
359 #else
360 size_t xstrlen, r;
361 xsltLocaleChar *xstr;
362
363 #ifdef XSLT_LOCALE_XLOCALE
364 xstrlen = strxfrm_l(NULL, (const char *)string, 0, locale) + 1;
365 xstr = (xsltLocaleChar *) xmlMalloc(xstrlen);
366 if (xstr == NULL) {
367 xsltTransformError(NULL, NULL, NULL,
368 "xsltStrxfrm : out of memory error\n");
369 return(NULL);
370 }
371
372 r = strxfrm_l((char *)xstr, (const char *)string, xstrlen, locale);
373 #endif
374
375 #ifdef XSLT_LOCALE_WINAPI
376 xstrlen = MultiByteToWideChar(CP_UTF8, 0, string, -1, NULL, 0);
377 if (xstrlen == 0) {
378 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar check failed\n");
379 return(NULL);
380 }
381 xstr = (xsltLocaleChar*) xmlMalloc(xstrlen * sizeof(xsltLocaleChar));
382 if (xstr == NULL) {
383 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : out of memory\n");
384 return(NULL);
385 }
386 r = MultiByteToWideChar(CP_UTF8, 0, string, -1, xstr, xstrlen);
387 if (r == 0) {
388 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : MultiByteToWideChar failed\n");
389 xmlFree(xstr);
390 return(NULL);
391 }
392 return(xstr);
393 #endif /* XSLT_LOCALE_WINAPI */
394
395 if (r >= xstrlen) {
396 xsltTransformError(NULL, NULL, NULL, "xsltStrxfrm : strxfrm failed\n");
397 xmlFree(xstr);
398 return(NULL);
399 }
400
401 return(xstr);
402 #endif /* XSLT_LOCALE_NONE */
403 }
404
405 /**
406 * xsltLocaleStrcmp:
407 * @locale: a locale identifier
408 * @str1: a string transformed with xsltStrxfrm
409 * @str2: a string transformed with xsltStrxfrm
410 *
411 * Compares two strings transformed with xsltStrxfrm
412 *
413 * Returns a value < 0 if str1 sorts before str2,
414 * a value > 0 if str1 sorts after str2,
415 * 0 if str1 and str2 are equal wrt sorting
416 */
417 int
xsltLocaleStrcmp(xsltLocale locale,const xsltLocaleChar * str1,const xsltLocaleChar * str2)418 xsltLocaleStrcmp(xsltLocale locale, const xsltLocaleChar *str1, const xsltLocaleChar *str2) {
419 (void)locale;
420 #ifdef XSLT_LOCALE_WINAPI
421 {
422 int ret;
423 if (str1 == str2) return(0);
424 if (str1 == NULL) return(-1);
425 if (str2 == NULL) return(1);
426 ret = CompareStringW(locale, 0, str1, -1, str2, -1);
427 if (ret == 0) {
428 xsltTransformError(NULL, NULL, NULL, "xsltLocaleStrcmp : CompareStringW fail\n");
429 return(0);
430 }
431 return(ret - 2);
432 }
433 #else
434 return(xmlStrcmp(str1, str2));
435 #endif
436 }
437
438 #ifdef XSLT_LOCALE_WINAPI
439 /**
440 * xsltCountSupportedLocales:
441 * @lcid: not used
442 *
443 * callback used to count locales
444 *
445 * Returns TRUE
446 */
447 BOOL CALLBACK
xsltCountSupportedLocales(LPSTR lcid)448 xsltCountSupportedLocales(LPSTR lcid) {
449 (void) lcid;
450 ++xsltLocaleListSize;
451 return(TRUE);
452 }
453
454 /**
455 * xsltIterateSupportedLocales:
456 * @lcid: not used
457 *
458 * callback used to track locales
459 *
460 * Returns TRUE if not at the end of the array
461 */
462 BOOL CALLBACK
xsltIterateSupportedLocales(LPSTR lcid)463 xsltIterateSupportedLocales(LPSTR lcid) {
464 static int count = 0;
465 xmlChar iso639lang [XSLTMAX_ISO639LANGLEN +1];
466 xmlChar iso3136ctry[XSLTMAX_ISO3166CNTRYLEN+1];
467 int k, l;
468 xsltRFC1766Info *p = xsltLocaleList + count;
469
470 k = sscanf(lcid, "%lx", (long*)&p->lcid);
471 if (k < 1) goto end;
472 /*don't count terminating null character*/
473 k = GetLocaleInfoA(p->lcid, LOCALE_SISO639LANGNAME , iso639lang , sizeof(iso639lang ));
474 if (--k < 1) goto end;
475 l = GetLocaleInfoA(p->lcid, LOCALE_SISO3166CTRYNAME, iso3136ctry, sizeof(iso3136ctry));
476 if (--l < 1) goto end;
477
478 { /*fill results*/
479 xmlChar *q = p->tag;
480 memcpy(q, iso639lang, k);
481 q += k;
482 *q++ = '-';
483 memcpy(q, iso3136ctry, l);
484 q += l;
485 *q = '\0';
486 }
487 ++count;
488 end:
489 return((count < xsltLocaleListSize) ? TRUE : FALSE);
490 }
491
492
493 static void
xsltEnumSupportedLocales(void)494 xsltEnumSupportedLocales(void) {
495 xmlRMutexLock(xsltLocaleMutex);
496 if (xsltLocaleListSize <= 0) {
497 size_t len;
498
499 EnumSystemLocalesA(xsltCountSupportedLocales, LCID_SUPPORTED);
500
501 len = xsltLocaleListSize * sizeof(xsltRFC1766Info);
502 xsltLocaleList = xmlMalloc(len);
503 memset(xsltLocaleList, 0, len);
504 EnumSystemLocalesA(xsltIterateSupportedLocales, LCID_SUPPORTED);
505 }
506 xmlRMutexUnlock(xsltLocaleMutex);
507 }
508
509 #endif /*def XSLT_LOCALE_WINAPI*/
510