1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1997-2012, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: loclikely.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010feb25
14 * created by: Markus W. Scherer
15 *
16 * Code for likely and minimized locale subtags, separated out from other .cpp files
17 * that then do not depend on resource bundle code and likely-subtags data.
18 */
19
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ures.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "ulocimp.h"
27 #include "ustr_imp.h"
28
29 /**
30 * This function looks for the localeID in the likelySubtags resource.
31 *
32 * @param localeID The tag to find.
33 * @param buffer A buffer to hold the matching entry
34 * @param bufferLength The length of the output buffer
35 * @return A pointer to "buffer" if found, or a null pointer if not.
36 */
37 static const char* U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)38 findLikelySubtags(const char* localeID,
39 char* buffer,
40 int32_t bufferLength,
41 UErrorCode* err) {
42 const char* result = NULL;
43
44 if (!U_FAILURE(*err)) {
45 int32_t resLen = 0;
46 const UChar* s = NULL;
47 UErrorCode tmpErr = U_ZERO_ERROR;
48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49 if (U_SUCCESS(tmpErr)) {
50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51
52 if (U_FAILURE(tmpErr)) {
53 /*
54 * If a resource is missing, it's not really an error, it's
55 * just that we don't have any data for that particular locale ID.
56 */
57 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58 *err = tmpErr;
59 }
60 }
61 else if (resLen >= bufferLength) {
62 /* The buffer should never overflow. */
63 *err = U_INTERNAL_PROGRAM_ERROR;
64 }
65 else {
66 u_UCharsToChars(s, buffer, resLen + 1);
67 result = buffer;
68 }
69
70 ures_close(subtags);
71 } else {
72 *err = tmpErr;
73 }
74 }
75
76 return result;
77 }
78
79 /**
80 * Append a tag to a buffer, adding the separator if necessary. The buffer
81 * must be large enough to contain the resulting tag plus any separator
82 * necessary. The tag must not be a zero-length string.
83 *
84 * @param tag The tag to add.
85 * @param tagLength The length of the tag.
86 * @param buffer The output buffer.
87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
88 **/
89 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)90 appendTag(
91 const char* tag,
92 int32_t tagLength,
93 char* buffer,
94 int32_t* bufferLength) {
95
96 if (*bufferLength > 0) {
97 buffer[*bufferLength] = '_';
98 ++(*bufferLength);
99 }
100
101 uprv_memmove(
102 &buffer[*bufferLength],
103 tag,
104 tagLength);
105
106 *bufferLength += tagLength;
107 }
108
109 /**
110 * These are the canonical strings for unknown languages, scripts and regions.
111 **/
112 static const char* const unknownLanguage = "und";
113 static const char* const unknownScript = "Zzzz";
114 static const char* const unknownRegion = "ZZ";
115
116 /**
117 * Create a tag string from the supplied parameters. The lang, script and region
118 * parameters may be NULL pointers. If they are, their corresponding length parameters
119 * must be less than or equal to 0.
120 *
121 * If any of the language, script or region parameters are empty, and the alternateTags
122 * parameter is not NULL, it will be parsed for potential language, script and region tags
123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
124 * it contains no language tag, the default tag for the unknown language is used.
125 *
126 * If the length of the new string exceeds the capacity of the output buffer,
127 * the function copies as many bytes to the output buffer as it can, and returns
128 * the error U_BUFFER_OVERFLOW_ERROR.
129 *
130 * If an illegal argument is provided, the function returns the error
131 * U_ILLEGAL_ARGUMENT_ERROR.
132 *
133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134 * the tag string fits in the output buffer, but the null terminator doesn't.
135 *
136 * @param lang The language tag to use.
137 * @param langLength The length of the language tag.
138 * @param script The script tag to use.
139 * @param scriptLength The length of the script tag.
140 * @param region The region tag to use.
141 * @param regionLength The length of the region tag.
142 * @param trailing Any trailing data to append to the new tag.
143 * @param trailingLength The length of the trailing data.
144 * @param alternateTags A string containing any alternate tags.
145 * @param tag The output buffer.
146 * @param tagCapacity The capacity of the output buffer.
147 * @param err A pointer to a UErrorCode for error reporting.
148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149 **/
150 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)151 createTagStringWithAlternates(
152 const char* lang,
153 int32_t langLength,
154 const char* script,
155 int32_t scriptLength,
156 const char* region,
157 int32_t regionLength,
158 const char* trailing,
159 int32_t trailingLength,
160 const char* alternateTags,
161 char* tag,
162 int32_t tagCapacity,
163 UErrorCode* err) {
164
165 if (U_FAILURE(*err)) {
166 goto error;
167 }
168 else if (tag == NULL ||
169 tagCapacity <= 0 ||
170 langLength >= ULOC_LANG_CAPACITY ||
171 scriptLength >= ULOC_SCRIPT_CAPACITY ||
172 regionLength >= ULOC_COUNTRY_CAPACITY) {
173 goto error;
174 }
175 else {
176 /**
177 * ULOC_FULLNAME_CAPACITY will provide enough capacity
178 * that we can build a string that contains the language,
179 * script and region code without worrying about overrunning
180 * the user-supplied buffer.
181 **/
182 char tagBuffer[ULOC_FULLNAME_CAPACITY];
183 int32_t tagLength = 0;
184 int32_t capacityRemaining = tagCapacity;
185 UBool regionAppended = FALSE;
186
187 if (langLength > 0) {
188 appendTag(
189 lang,
190 langLength,
191 tagBuffer,
192 &tagLength);
193 }
194 else if (alternateTags == NULL) {
195 /*
196 * Append the value for an unknown language, if
197 * we found no language.
198 */
199 appendTag(
200 unknownLanguage,
201 (int32_t)uprv_strlen(unknownLanguage),
202 tagBuffer,
203 &tagLength);
204 }
205 else {
206 /*
207 * Parse the alternateTags string for the language.
208 */
209 char alternateLang[ULOC_LANG_CAPACITY];
210 int32_t alternateLangLength = sizeof(alternateLang);
211
212 alternateLangLength =
213 uloc_getLanguage(
214 alternateTags,
215 alternateLang,
216 alternateLangLength,
217 err);
218 if(U_FAILURE(*err) ||
219 alternateLangLength >= ULOC_LANG_CAPACITY) {
220 goto error;
221 }
222 else if (alternateLangLength == 0) {
223 /*
224 * Append the value for an unknown language, if
225 * we found no language.
226 */
227 appendTag(
228 unknownLanguage,
229 (int32_t)uprv_strlen(unknownLanguage),
230 tagBuffer,
231 &tagLength);
232 }
233 else {
234 appendTag(
235 alternateLang,
236 alternateLangLength,
237 tagBuffer,
238 &tagLength);
239 }
240 }
241
242 if (scriptLength > 0) {
243 appendTag(
244 script,
245 scriptLength,
246 tagBuffer,
247 &tagLength);
248 }
249 else if (alternateTags != NULL) {
250 /*
251 * Parse the alternateTags string for the script.
252 */
253 char alternateScript[ULOC_SCRIPT_CAPACITY];
254
255 const int32_t alternateScriptLength =
256 uloc_getScript(
257 alternateTags,
258 alternateScript,
259 sizeof(alternateScript),
260 err);
261
262 if (U_FAILURE(*err) ||
263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264 goto error;
265 }
266 else if (alternateScriptLength > 0) {
267 appendTag(
268 alternateScript,
269 alternateScriptLength,
270 tagBuffer,
271 &tagLength);
272 }
273 }
274
275 if (regionLength > 0) {
276 appendTag(
277 region,
278 regionLength,
279 tagBuffer,
280 &tagLength);
281
282 regionAppended = TRUE;
283 }
284 else if (alternateTags != NULL) {
285 /*
286 * Parse the alternateTags string for the region.
287 */
288 char alternateRegion[ULOC_COUNTRY_CAPACITY];
289
290 const int32_t alternateRegionLength =
291 uloc_getCountry(
292 alternateTags,
293 alternateRegion,
294 sizeof(alternateRegion),
295 err);
296 if (U_FAILURE(*err) ||
297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298 goto error;
299 }
300 else if (alternateRegionLength > 0) {
301 appendTag(
302 alternateRegion,
303 alternateRegionLength,
304 tagBuffer,
305 &tagLength);
306
307 regionAppended = TRUE;
308 }
309 }
310
311 {
312 const int32_t toCopy =
313 tagLength >= tagCapacity ? tagCapacity : tagLength;
314
315 /**
316 * Copy the partial tag from our internal buffer to the supplied
317 * target.
318 **/
319 uprv_memcpy(
320 tag,
321 tagBuffer,
322 toCopy);
323
324 capacityRemaining -= toCopy;
325 }
326
327 if (trailingLength > 0) {
328 if (*trailing != '@' && capacityRemaining > 0) {
329 tag[tagLength++] = '_';
330 --capacityRemaining;
331 if (capacityRemaining > 0 && !regionAppended) {
332 /* extra separator is required */
333 tag[tagLength++] = '_';
334 --capacityRemaining;
335 }
336 }
337
338 if (capacityRemaining > 0) {
339 /*
340 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
341 * don't know if the user-supplied buffers overlap.
342 */
343 const int32_t toCopy =
344 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
345
346 uprv_memmove(
347 &tag[tagLength],
348 trailing,
349 toCopy);
350 }
351 }
352
353 tagLength += trailingLength;
354
355 return u_terminateChars(
356 tag,
357 tagCapacity,
358 tagLength,
359 err);
360 }
361
362 error:
363
364 /**
365 * An overflow indicates the locale ID passed in
366 * is ill-formed. If we got here, and there was
367 * no previous error, it's an implicit overflow.
368 **/
369 if (*err == U_BUFFER_OVERFLOW_ERROR ||
370 U_SUCCESS(*err)) {
371 *err = U_ILLEGAL_ARGUMENT_ERROR;
372 }
373
374 return -1;
375 }
376
377 /**
378 * Create a tag string from the supplied parameters. The lang, script and region
379 * parameters may be NULL pointers. If they are, their corresponding length parameters
380 * must be less than or equal to 0. If the lang parameter is an empty string, the
381 * default value for an unknown language is written to the output buffer.
382 *
383 * If the length of the new string exceeds the capacity of the output buffer,
384 * the function copies as many bytes to the output buffer as it can, and returns
385 * the error U_BUFFER_OVERFLOW_ERROR.
386 *
387 * If an illegal argument is provided, the function returns the error
388 * U_ILLEGAL_ARGUMENT_ERROR.
389 *
390 * @param lang The language tag to use.
391 * @param langLength The length of the language tag.
392 * @param script The script tag to use.
393 * @param scriptLength The length of the script tag.
394 * @param region The region tag to use.
395 * @param regionLength The length of the region tag.
396 * @param trailing Any trailing data to append to the new tag.
397 * @param trailingLength The length of the trailing data.
398 * @param tag The output buffer.
399 * @param tagCapacity The capacity of the output buffer.
400 * @param err A pointer to a UErrorCode for error reporting.
401 * @return The length of the tag string, which may be greater than tagCapacity.
402 **/
403 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)404 createTagString(
405 const char* lang,
406 int32_t langLength,
407 const char* script,
408 int32_t scriptLength,
409 const char* region,
410 int32_t regionLength,
411 const char* trailing,
412 int32_t trailingLength,
413 char* tag,
414 int32_t tagCapacity,
415 UErrorCode* err)
416 {
417 return createTagStringWithAlternates(
418 lang,
419 langLength,
420 script,
421 scriptLength,
422 region,
423 regionLength,
424 trailing,
425 trailingLength,
426 NULL,
427 tag,
428 tagCapacity,
429 err);
430 }
431
432 /**
433 * Parse the language, script, and region subtags from a tag string, and copy the
434 * results into the corresponding output parameters. The buffers are null-terminated,
435 * unless overflow occurs.
436 *
437 * The langLength, scriptLength, and regionLength parameters are input/output
438 * parameters, and must contain the capacity of their corresponding buffers on
439 * input. On output, they will contain the actual length of the buffers, not
440 * including the null terminator.
441 *
442 * If the length of any of the output subtags exceeds the capacity of the corresponding
443 * buffer, the function copies as many bytes to the output buffer as it can, and returns
444 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
445 * occurs.
446 *
447 * If an illegal argument is provided, the function returns the error
448 * U_ILLEGAL_ARGUMENT_ERROR.
449 *
450 * @param localeID The locale ID to parse.
451 * @param lang The language tag buffer.
452 * @param langLength The length of the language tag.
453 * @param script The script tag buffer.
454 * @param scriptLength The length of the script tag.
455 * @param region The region tag buffer.
456 * @param regionLength The length of the region tag.
457 * @param err A pointer to a UErrorCode for error reporting.
458 * @return The number of chars of the localeID parameter consumed.
459 **/
460 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)461 parseTagString(
462 const char* localeID,
463 char* lang,
464 int32_t* langLength,
465 char* script,
466 int32_t* scriptLength,
467 char* region,
468 int32_t* regionLength,
469 UErrorCode* err)
470 {
471 const char* position = localeID;
472 int32_t subtagLength = 0;
473
474 if(U_FAILURE(*err) ||
475 localeID == NULL ||
476 lang == NULL ||
477 langLength == NULL ||
478 script == NULL ||
479 scriptLength == NULL ||
480 region == NULL ||
481 regionLength == NULL) {
482 goto error;
483 }
484
485 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
486 u_terminateChars(lang, *langLength, subtagLength, err);
487
488 /*
489 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
490 * to be an error, because it indicates the user-supplied tag is
491 * not well-formed.
492 */
493 if(U_FAILURE(*err)) {
494 goto error;
495 }
496
497 *langLength = subtagLength;
498
499 /*
500 * If no language was present, use the value of unknownLanguage
501 * instead. Otherwise, move past any separator.
502 */
503 if (*langLength == 0) {
504 uprv_strcpy(
505 lang,
506 unknownLanguage);
507 *langLength = (int32_t)uprv_strlen(lang);
508 }
509 else if (_isIDSeparator(*position)) {
510 ++position;
511 }
512
513 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
514 u_terminateChars(script, *scriptLength, subtagLength, err);
515
516 if(U_FAILURE(*err)) {
517 goto error;
518 }
519
520 *scriptLength = subtagLength;
521
522 if (*scriptLength > 0) {
523 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
524 /**
525 * If the script part is the "unknown" script, then don't return it.
526 **/
527 *scriptLength = 0;
528 }
529
530 /*
531 * Move past any separator.
532 */
533 if (_isIDSeparator(*position)) {
534 ++position;
535 }
536 }
537
538 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
539 u_terminateChars(region, *regionLength, subtagLength, err);
540
541 if(U_FAILURE(*err)) {
542 goto error;
543 }
544
545 *regionLength = subtagLength;
546
547 if (*regionLength > 0) {
548 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
549 /**
550 * If the region part is the "unknown" region, then don't return it.
551 **/
552 *regionLength = 0;
553 }
554 } else if (*position != 0 && *position != '@') {
555 /* back up over consumed trailing separator */
556 --position;
557 }
558
559 exit:
560
561 return (int32_t)(position - localeID);
562
563 error:
564
565 /**
566 * If we get here, we have no explicit error, it's the result of an
567 * illegal argument.
568 **/
569 if (!U_FAILURE(*err)) {
570 *err = U_ILLEGAL_ARGUMENT_ERROR;
571 }
572
573 goto exit;
574 }
575
576 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)577 createLikelySubtagsString(
578 const char* lang,
579 int32_t langLength,
580 const char* script,
581 int32_t scriptLength,
582 const char* region,
583 int32_t regionLength,
584 const char* variants,
585 int32_t variantsLength,
586 char* tag,
587 int32_t tagCapacity,
588 UErrorCode* err)
589 {
590 /**
591 * ULOC_FULLNAME_CAPACITY will provide enough capacity
592 * that we can build a string that contains the language,
593 * script and region code without worrying about overrunning
594 * the user-supplied buffer.
595 **/
596 char tagBuffer[ULOC_FULLNAME_CAPACITY];
597 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
598
599 if(U_FAILURE(*err)) {
600 goto error;
601 }
602
603 /**
604 * Try the language with the script and region first.
605 **/
606 if (scriptLength > 0 && regionLength > 0) {
607
608 const char* likelySubtags = NULL;
609
610 createTagString(
611 lang,
612 langLength,
613 script,
614 scriptLength,
615 region,
616 regionLength,
617 NULL,
618 0,
619 tagBuffer,
620 sizeof(tagBuffer),
621 err);
622 if(U_FAILURE(*err)) {
623 goto error;
624 }
625
626 likelySubtags =
627 findLikelySubtags(
628 tagBuffer,
629 likelySubtagsBuffer,
630 sizeof(likelySubtagsBuffer),
631 err);
632 if(U_FAILURE(*err)) {
633 goto error;
634 }
635
636 if (likelySubtags != NULL) {
637 /* Always use the language tag from the
638 maximal string, since it may be more
639 specific than the one provided. */
640 return createTagStringWithAlternates(
641 NULL,
642 0,
643 NULL,
644 0,
645 NULL,
646 0,
647 variants,
648 variantsLength,
649 likelySubtags,
650 tag,
651 tagCapacity,
652 err);
653 }
654 }
655
656 /**
657 * Try the language with just the script.
658 **/
659 if (scriptLength > 0) {
660
661 const char* likelySubtags = NULL;
662
663 createTagString(
664 lang,
665 langLength,
666 script,
667 scriptLength,
668 NULL,
669 0,
670 NULL,
671 0,
672 tagBuffer,
673 sizeof(tagBuffer),
674 err);
675 if(U_FAILURE(*err)) {
676 goto error;
677 }
678
679 likelySubtags =
680 findLikelySubtags(
681 tagBuffer,
682 likelySubtagsBuffer,
683 sizeof(likelySubtagsBuffer),
684 err);
685 if(U_FAILURE(*err)) {
686 goto error;
687 }
688
689 if (likelySubtags != NULL) {
690 /* Always use the language tag from the
691 maximal string, since it may be more
692 specific than the one provided. */
693 return createTagStringWithAlternates(
694 NULL,
695 0,
696 NULL,
697 0,
698 region,
699 regionLength,
700 variants,
701 variantsLength,
702 likelySubtags,
703 tag,
704 tagCapacity,
705 err);
706 }
707 }
708
709 /**
710 * Try the language with just the region.
711 **/
712 if (regionLength > 0) {
713
714 const char* likelySubtags = NULL;
715
716 createTagString(
717 lang,
718 langLength,
719 NULL,
720 0,
721 region,
722 regionLength,
723 NULL,
724 0,
725 tagBuffer,
726 sizeof(tagBuffer),
727 err);
728 if(U_FAILURE(*err)) {
729 goto error;
730 }
731
732 likelySubtags =
733 findLikelySubtags(
734 tagBuffer,
735 likelySubtagsBuffer,
736 sizeof(likelySubtagsBuffer),
737 err);
738 if(U_FAILURE(*err)) {
739 goto error;
740 }
741
742 if (likelySubtags != NULL) {
743 /* Always use the language tag from the
744 maximal string, since it may be more
745 specific than the one provided. */
746 return createTagStringWithAlternates(
747 NULL,
748 0,
749 script,
750 scriptLength,
751 NULL,
752 0,
753 variants,
754 variantsLength,
755 likelySubtags,
756 tag,
757 tagCapacity,
758 err);
759 }
760 }
761
762 /**
763 * Finally, try just the language.
764 **/
765 {
766 const char* likelySubtags = NULL;
767
768 createTagString(
769 lang,
770 langLength,
771 NULL,
772 0,
773 NULL,
774 0,
775 NULL,
776 0,
777 tagBuffer,
778 sizeof(tagBuffer),
779 err);
780 if(U_FAILURE(*err)) {
781 goto error;
782 }
783
784 likelySubtags =
785 findLikelySubtags(
786 tagBuffer,
787 likelySubtagsBuffer,
788 sizeof(likelySubtagsBuffer),
789 err);
790 if(U_FAILURE(*err)) {
791 goto error;
792 }
793
794 if (likelySubtags != NULL) {
795 /* Always use the language tag from the
796 maximal string, since it may be more
797 specific than the one provided. */
798 return createTagStringWithAlternates(
799 NULL,
800 0,
801 script,
802 scriptLength,
803 region,
804 regionLength,
805 variants,
806 variantsLength,
807 likelySubtags,
808 tag,
809 tagCapacity,
810 err);
811 }
812 }
813
814 return u_terminateChars(
815 tag,
816 tagCapacity,
817 0,
818 err);
819
820 error:
821
822 if (!U_FAILURE(*err)) {
823 *err = U_ILLEGAL_ARGUMENT_ERROR;
824 }
825
826 return -1;
827 }
828
829 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
830 { int32_t count = 0; \
831 int32_t i; \
832 for (i = 0; i < trailingLength; i++) { \
833 if (trailing[i] == '-' || trailing[i] == '_') { \
834 count = 0; \
835 if (count > 8) { \
836 goto error; \
837 } \
838 } else if (trailing[i] == '@') { \
839 break; \
840 } else if (count > 8) { \
841 goto error; \
842 } else { \
843 count++; \
844 } \
845 } \
846 }
847
848 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)849 _uloc_addLikelySubtags(const char* localeID,
850 char* maximizedLocaleID,
851 int32_t maximizedLocaleIDCapacity,
852 UErrorCode* err)
853 {
854 char lang[ULOC_LANG_CAPACITY];
855 int32_t langLength = sizeof(lang);
856 char script[ULOC_SCRIPT_CAPACITY];
857 int32_t scriptLength = sizeof(script);
858 char region[ULOC_COUNTRY_CAPACITY];
859 int32_t regionLength = sizeof(region);
860 const char* trailing = "";
861 int32_t trailingLength = 0;
862 int32_t trailingIndex = 0;
863 int32_t resultLength = 0;
864
865 if(U_FAILURE(*err)) {
866 goto error;
867 }
868 else if (localeID == NULL ||
869 maximizedLocaleID == NULL ||
870 maximizedLocaleIDCapacity <= 0) {
871 goto error;
872 }
873
874 trailingIndex = parseTagString(
875 localeID,
876 lang,
877 &langLength,
878 script,
879 &scriptLength,
880 region,
881 ®ionLength,
882 err);
883 if(U_FAILURE(*err)) {
884 /* Overflow indicates an illegal argument error */
885 if (*err == U_BUFFER_OVERFLOW_ERROR) {
886 *err = U_ILLEGAL_ARGUMENT_ERROR;
887 }
888
889 goto error;
890 }
891
892 /* Find the length of the trailing portion. */
893 while (_isIDSeparator(localeID[trailingIndex])) {
894 trailingIndex++;
895 }
896 trailing = &localeID[trailingIndex];
897 trailingLength = (int32_t)uprv_strlen(trailing);
898
899 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
900
901 resultLength =
902 createLikelySubtagsString(
903 lang,
904 langLength,
905 script,
906 scriptLength,
907 region,
908 regionLength,
909 trailing,
910 trailingLength,
911 maximizedLocaleID,
912 maximizedLocaleIDCapacity,
913 err);
914
915 if (resultLength == 0) {
916 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
917
918 /*
919 * If we get here, we need to return localeID.
920 */
921 uprv_memcpy(
922 maximizedLocaleID,
923 localeID,
924 localIDLength <= maximizedLocaleIDCapacity ?
925 localIDLength : maximizedLocaleIDCapacity);
926
927 resultLength =
928 u_terminateChars(
929 maximizedLocaleID,
930 maximizedLocaleIDCapacity,
931 localIDLength,
932 err);
933 }
934
935 return resultLength;
936
937 error:
938
939 if (!U_FAILURE(*err)) {
940 *err = U_ILLEGAL_ARGUMENT_ERROR;
941 }
942
943 return -1;
944 }
945
946 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)947 _uloc_minimizeSubtags(const char* localeID,
948 char* minimizedLocaleID,
949 int32_t minimizedLocaleIDCapacity,
950 UErrorCode* err)
951 {
952 /**
953 * ULOC_FULLNAME_CAPACITY will provide enough capacity
954 * that we can build a string that contains the language,
955 * script and region code without worrying about overrunning
956 * the user-supplied buffer.
957 **/
958 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
959 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
960
961 char lang[ULOC_LANG_CAPACITY];
962 int32_t langLength = sizeof(lang);
963 char script[ULOC_SCRIPT_CAPACITY];
964 int32_t scriptLength = sizeof(script);
965 char region[ULOC_COUNTRY_CAPACITY];
966 int32_t regionLength = sizeof(region);
967 const char* trailing = "";
968 int32_t trailingLength = 0;
969 int32_t trailingIndex = 0;
970
971 if(U_FAILURE(*err)) {
972 goto error;
973 }
974 else if (localeID == NULL ||
975 minimizedLocaleID == NULL ||
976 minimizedLocaleIDCapacity <= 0) {
977 goto error;
978 }
979
980 trailingIndex =
981 parseTagString(
982 localeID,
983 lang,
984 &langLength,
985 script,
986 &scriptLength,
987 region,
988 ®ionLength,
989 err);
990 if(U_FAILURE(*err)) {
991
992 /* Overflow indicates an illegal argument error */
993 if (*err == U_BUFFER_OVERFLOW_ERROR) {
994 *err = U_ILLEGAL_ARGUMENT_ERROR;
995 }
996
997 goto error;
998 }
999
1000 /* Find the spot where the variants or the keywords begin, if any. */
1001 while (_isIDSeparator(localeID[trailingIndex])) {
1002 trailingIndex++;
1003 }
1004 trailing = &localeID[trailingIndex];
1005 trailingLength = (int32_t)uprv_strlen(trailing);
1006
1007 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1008
1009 createTagString(
1010 lang,
1011 langLength,
1012 script,
1013 scriptLength,
1014 region,
1015 regionLength,
1016 NULL,
1017 0,
1018 maximizedTagBuffer,
1019 maximizedTagBufferLength,
1020 err);
1021 if(U_FAILURE(*err)) {
1022 goto error;
1023 }
1024
1025 /**
1026 * First, we need to first get the maximization
1027 * from AddLikelySubtags.
1028 **/
1029 maximizedTagBufferLength =
1030 uloc_addLikelySubtags(
1031 maximizedTagBuffer,
1032 maximizedTagBuffer,
1033 maximizedTagBufferLength,
1034 err);
1035
1036 if(U_FAILURE(*err)) {
1037 goto error;
1038 }
1039
1040 /**
1041 * Start first with just the language.
1042 **/
1043 {
1044 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1045
1046 const int32_t tagBufferLength =
1047 createLikelySubtagsString(
1048 lang,
1049 langLength,
1050 NULL,
1051 0,
1052 NULL,
1053 0,
1054 NULL,
1055 0,
1056 tagBuffer,
1057 sizeof(tagBuffer),
1058 err);
1059
1060 if(U_FAILURE(*err)) {
1061 goto error;
1062 }
1063 else if (uprv_strnicmp(
1064 maximizedTagBuffer,
1065 tagBuffer,
1066 tagBufferLength) == 0) {
1067
1068 return createTagString(
1069 lang,
1070 langLength,
1071 NULL,
1072 0,
1073 NULL,
1074 0,
1075 trailing,
1076 trailingLength,
1077 minimizedLocaleID,
1078 minimizedLocaleIDCapacity,
1079 err);
1080 }
1081 }
1082
1083 /**
1084 * Next, try the language and region.
1085 **/
1086 if (regionLength > 0) {
1087
1088 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1089
1090 const int32_t tagBufferLength =
1091 createLikelySubtagsString(
1092 lang,
1093 langLength,
1094 NULL,
1095 0,
1096 region,
1097 regionLength,
1098 NULL,
1099 0,
1100 tagBuffer,
1101 sizeof(tagBuffer),
1102 err);
1103
1104 if(U_FAILURE(*err)) {
1105 goto error;
1106 }
1107 else if (uprv_strnicmp(
1108 maximizedTagBuffer,
1109 tagBuffer,
1110 tagBufferLength) == 0) {
1111
1112 return createTagString(
1113 lang,
1114 langLength,
1115 NULL,
1116 0,
1117 region,
1118 regionLength,
1119 trailing,
1120 trailingLength,
1121 minimizedLocaleID,
1122 minimizedLocaleIDCapacity,
1123 err);
1124 }
1125 }
1126
1127 /**
1128 * Finally, try the language and script. This is our last chance,
1129 * since trying with all three subtags would only yield the
1130 * maximal version that we already have.
1131 **/
1132 if (scriptLength > 0 && regionLength > 0) {
1133 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1134
1135 const int32_t tagBufferLength =
1136 createLikelySubtagsString(
1137 lang,
1138 langLength,
1139 script,
1140 scriptLength,
1141 NULL,
1142 0,
1143 NULL,
1144 0,
1145 tagBuffer,
1146 sizeof(tagBuffer),
1147 err);
1148
1149 if(U_FAILURE(*err)) {
1150 goto error;
1151 }
1152 else if (uprv_strnicmp(
1153 maximizedTagBuffer,
1154 tagBuffer,
1155 tagBufferLength) == 0) {
1156
1157 return createTagString(
1158 lang,
1159 langLength,
1160 script,
1161 scriptLength,
1162 NULL,
1163 0,
1164 trailing,
1165 trailingLength,
1166 minimizedLocaleID,
1167 minimizedLocaleIDCapacity,
1168 err);
1169 }
1170 }
1171
1172 {
1173 /**
1174 * If we got here, return the locale ID parameter.
1175 **/
1176 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1177
1178 uprv_memcpy(
1179 minimizedLocaleID,
1180 localeID,
1181 localeIDLength <= minimizedLocaleIDCapacity ?
1182 localeIDLength : minimizedLocaleIDCapacity);
1183
1184 return u_terminateChars(
1185 minimizedLocaleID,
1186 minimizedLocaleIDCapacity,
1187 localeIDLength,
1188 err);
1189 }
1190
1191 error:
1192
1193 if (!U_FAILURE(*err)) {
1194 *err = U_ILLEGAL_ARGUMENT_ERROR;
1195 }
1196
1197 return -1;
1198
1199
1200 }
1201
1202 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1203 do_canonicalize(const char* localeID,
1204 char* buffer,
1205 int32_t bufferCapacity,
1206 UErrorCode* err)
1207 {
1208 uloc_canonicalize(
1209 localeID,
1210 buffer,
1211 bufferCapacity,
1212 err);
1213
1214 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1215 *err == U_BUFFER_OVERFLOW_ERROR) {
1216 *err = U_ILLEGAL_ARGUMENT_ERROR;
1217
1218 return FALSE;
1219 }
1220 else if (U_FAILURE(*err)) {
1221
1222 return FALSE;
1223 }
1224 else {
1225 return TRUE;
1226 }
1227 }
1228
1229 U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1230 uloc_addLikelySubtags(const char* localeID,
1231 char* maximizedLocaleID,
1232 int32_t maximizedLocaleIDCapacity,
1233 UErrorCode* err)
1234 {
1235 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1236
1237 if (!do_canonicalize(
1238 localeID,
1239 localeBuffer,
1240 sizeof(localeBuffer),
1241 err)) {
1242 return -1;
1243 }
1244 else {
1245 return _uloc_addLikelySubtags(
1246 localeBuffer,
1247 maximizedLocaleID,
1248 maximizedLocaleIDCapacity,
1249 err);
1250 }
1251 }
1252
1253 U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1254 uloc_minimizeSubtags(const char* localeID,
1255 char* minimizedLocaleID,
1256 int32_t minimizedLocaleIDCapacity,
1257 UErrorCode* err)
1258 {
1259 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1260
1261 if (!do_canonicalize(
1262 localeID,
1263 localeBuffer,
1264 sizeof(localeBuffer),
1265 err)) {
1266 return -1;
1267 }
1268 else {
1269 return _uloc_minimizeSubtags(
1270 localeBuffer,
1271 minimizedLocaleID,
1272 minimizedLocaleIDCapacity,
1273 err);
1274 }
1275 }
1276