1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1997-2011, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: loclikely.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010feb25
14 * created by: Markus W. Scherer
15 *
16 * Code for likely and minimized locale subtags, separated out from other .cpp files
17 * that then do not depend on resource bundle code and likely-subtags data.
18 */
19
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ures.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "ulocimp.h"
27 #include "ustr_imp.h"
28
29 /**
30 * This function looks for the localeID in the likelySubtags resource.
31 *
32 * @param localeID The tag to find.
33 * @param buffer A buffer to hold the matching entry
34 * @param bufferLength The length of the output buffer
35 * @return A pointer to "buffer" if found, or a null pointer if not.
36 */
37 static const char* U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)38 findLikelySubtags(const char* localeID,
39 char* buffer,
40 int32_t bufferLength,
41 UErrorCode* err) {
42 const char* result = NULL;
43
44 if (!U_FAILURE(*err)) {
45 int32_t resLen = 0;
46 const UChar* s = NULL;
47 UErrorCode tmpErr = U_ZERO_ERROR;
48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49 if (U_SUCCESS(tmpErr)) {
50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51
52 if (U_FAILURE(tmpErr)) {
53 /*
54 * If a resource is missing, it's not really an error, it's
55 * just that we don't have any data for that particular locale ID.
56 */
57 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58 *err = tmpErr;
59 }
60 }
61 else if (resLen >= bufferLength) {
62 /* The buffer should never overflow. */
63 *err = U_INTERNAL_PROGRAM_ERROR;
64 }
65 else {
66 u_UCharsToChars(s, buffer, resLen + 1);
67 result = buffer;
68 }
69
70 ures_close(subtags);
71 } else {
72 *err = tmpErr;
73 }
74 }
75
76 return result;
77 }
78
79 /**
80 * Append a tag to a buffer, adding the separator if necessary. The buffer
81 * must be large enough to contain the resulting tag plus any separator
82 * necessary. The tag must not be a zero-length string.
83 *
84 * @param tag The tag to add.
85 * @param tagLength The length of the tag.
86 * @param buffer The output buffer.
87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
88 **/
89 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)90 appendTag(
91 const char* tag,
92 int32_t tagLength,
93 char* buffer,
94 int32_t* bufferLength) {
95
96 if (*bufferLength > 0) {
97 buffer[*bufferLength] = '_';
98 ++(*bufferLength);
99 }
100
101 uprv_memmove(
102 &buffer[*bufferLength],
103 tag,
104 tagLength);
105
106 *bufferLength += tagLength;
107 }
108
109 /**
110 * These are the canonical strings for unknown languages, scripts and regions.
111 **/
112 static const char* const unknownLanguage = "und";
113 static const char* const unknownScript = "Zzzz";
114 static const char* const unknownRegion = "ZZ";
115
116 /**
117 * Create a tag string from the supplied parameters. The lang, script and region
118 * parameters may be NULL pointers. If they are, their corresponding length parameters
119 * must be less than or equal to 0.
120 *
121 * If any of the language, script or region parameters are empty, and the alternateTags
122 * parameter is not NULL, it will be parsed for potential language, script and region tags
123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
124 * it contains no language tag, the default tag for the unknown language is used.
125 *
126 * If the length of the new string exceeds the capacity of the output buffer,
127 * the function copies as many bytes to the output buffer as it can, and returns
128 * the error U_BUFFER_OVERFLOW_ERROR.
129 *
130 * If an illegal argument is provided, the function returns the error
131 * U_ILLEGAL_ARGUMENT_ERROR.
132 *
133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134 * the tag string fits in the output buffer, but the null terminator doesn't.
135 *
136 * @param lang The language tag to use.
137 * @param langLength The length of the language tag.
138 * @param script The script tag to use.
139 * @param scriptLength The length of the script tag.
140 * @param region The region tag to use.
141 * @param regionLength The length of the region tag.
142 * @param trailing Any trailing data to append to the new tag.
143 * @param trailingLength The length of the trailing data.
144 * @param alternateTags A string containing any alternate tags.
145 * @param tag The output buffer.
146 * @param tagCapacity The capacity of the output buffer.
147 * @param err A pointer to a UErrorCode for error reporting.
148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149 **/
150 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)151 createTagStringWithAlternates(
152 const char* lang,
153 int32_t langLength,
154 const char* script,
155 int32_t scriptLength,
156 const char* region,
157 int32_t regionLength,
158 const char* trailing,
159 int32_t trailingLength,
160 const char* alternateTags,
161 char* tag,
162 int32_t tagCapacity,
163 UErrorCode* err) {
164
165 if (U_FAILURE(*err)) {
166 goto error;
167 }
168 else if (tag == NULL ||
169 tagCapacity <= 0 ||
170 langLength >= ULOC_LANG_CAPACITY ||
171 scriptLength >= ULOC_SCRIPT_CAPACITY ||
172 regionLength >= ULOC_COUNTRY_CAPACITY) {
173 goto error;
174 }
175 else {
176 /**
177 * ULOC_FULLNAME_CAPACITY will provide enough capacity
178 * that we can build a string that contains the language,
179 * script and region code without worrying about overrunning
180 * the user-supplied buffer.
181 **/
182 char tagBuffer[ULOC_FULLNAME_CAPACITY];
183 int32_t tagLength = 0;
184 int32_t capacityRemaining = tagCapacity;
185 UBool regionAppended = FALSE;
186
187 if (langLength > 0) {
188 appendTag(
189 lang,
190 langLength,
191 tagBuffer,
192 &tagLength);
193 }
194 else if (alternateTags == NULL) {
195 /*
196 * Append the value for an unknown language, if
197 * we found no language.
198 */
199 appendTag(
200 unknownLanguage,
201 (int32_t)uprv_strlen(unknownLanguage),
202 tagBuffer,
203 &tagLength);
204 }
205 else {
206 /*
207 * Parse the alternateTags string for the language.
208 */
209 char alternateLang[ULOC_LANG_CAPACITY];
210 int32_t alternateLangLength = sizeof(alternateLang);
211
212 alternateLangLength =
213 uloc_getLanguage(
214 alternateTags,
215 alternateLang,
216 alternateLangLength,
217 err);
218 if(U_FAILURE(*err) ||
219 alternateLangLength >= ULOC_LANG_CAPACITY) {
220 goto error;
221 }
222 else if (alternateLangLength == 0) {
223 /*
224 * Append the value for an unknown language, if
225 * we found no language.
226 */
227 appendTag(
228 unknownLanguage,
229 (int32_t)uprv_strlen(unknownLanguage),
230 tagBuffer,
231 &tagLength);
232 }
233 else {
234 appendTag(
235 alternateLang,
236 alternateLangLength,
237 tagBuffer,
238 &tagLength);
239 }
240 }
241
242 if (scriptLength > 0) {
243 appendTag(
244 script,
245 scriptLength,
246 tagBuffer,
247 &tagLength);
248 }
249 else if (alternateTags != NULL) {
250 /*
251 * Parse the alternateTags string for the script.
252 */
253 char alternateScript[ULOC_SCRIPT_CAPACITY];
254
255 const int32_t alternateScriptLength =
256 uloc_getScript(
257 alternateTags,
258 alternateScript,
259 sizeof(alternateScript),
260 err);
261
262 if (U_FAILURE(*err) ||
263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264 goto error;
265 }
266 else if (alternateScriptLength > 0) {
267 appendTag(
268 alternateScript,
269 alternateScriptLength,
270 tagBuffer,
271 &tagLength);
272 }
273 }
274
275 if (regionLength > 0) {
276 appendTag(
277 region,
278 regionLength,
279 tagBuffer,
280 &tagLength);
281
282 regionAppended = TRUE;
283 }
284 else if (alternateTags != NULL) {
285 /*
286 * Parse the alternateTags string for the region.
287 */
288 char alternateRegion[ULOC_COUNTRY_CAPACITY];
289
290 const int32_t alternateRegionLength =
291 uloc_getCountry(
292 alternateTags,
293 alternateRegion,
294 sizeof(alternateRegion),
295 err);
296 if (U_FAILURE(*err) ||
297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298 goto error;
299 }
300 else if (alternateRegionLength > 0) {
301 appendTag(
302 alternateRegion,
303 alternateRegionLength,
304 tagBuffer,
305 &tagLength);
306
307 regionAppended = TRUE;
308 }
309 }
310
311 {
312 const int32_t toCopy =
313 tagLength >= tagCapacity ? tagCapacity : tagLength;
314
315 /**
316 * Copy the partial tag from our internal buffer to the supplied
317 * target.
318 **/
319 uprv_memcpy(
320 tag,
321 tagBuffer,
322 toCopy);
323
324 capacityRemaining -= toCopy;
325 }
326
327 if (trailingLength > 0) {
328 if (*trailing != '@' && capacityRemaining > 0) {
329 tag[tagLength++] = '_';
330 --capacityRemaining;
331 if (capacityRemaining > 0 && !regionAppended) {
332 /* extra separator is required */
333 tag[tagLength++] = '_';
334 --capacityRemaining;
335 }
336 }
337
338 if (capacityRemaining > 0) {
339 /*
340 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
341 * don't know if the user-supplied buffers overlap.
342 */
343 const int32_t toCopy =
344 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
345
346 uprv_memmove(
347 &tag[tagLength],
348 trailing,
349 toCopy);
350 }
351 }
352
353 tagLength += trailingLength;
354
355 return u_terminateChars(
356 tag,
357 tagCapacity,
358 tagLength,
359 err);
360 }
361
362 error:
363
364 /**
365 * An overflow indicates the locale ID passed in
366 * is ill-formed. If we got here, and there was
367 * no previous error, it's an implicit overflow.
368 **/
369 if (*err == U_BUFFER_OVERFLOW_ERROR ||
370 U_SUCCESS(*err)) {
371 *err = U_ILLEGAL_ARGUMENT_ERROR;
372 }
373
374 return -1;
375 }
376
377 /**
378 * Create a tag string from the supplied parameters. The lang, script and region
379 * parameters may be NULL pointers. If they are, their corresponding length parameters
380 * must be less than or equal to 0. If the lang parameter is an empty string, the
381 * default value for an unknown language is written to the output buffer.
382 *
383 * If the length of the new string exceeds the capacity of the output buffer,
384 * the function copies as many bytes to the output buffer as it can, and returns
385 * the error U_BUFFER_OVERFLOW_ERROR.
386 *
387 * If an illegal argument is provided, the function returns the error
388 * U_ILLEGAL_ARGUMENT_ERROR.
389 *
390 * @param lang The language tag to use.
391 * @param langLength The length of the language tag.
392 * @param script The script tag to use.
393 * @param scriptLength The length of the script tag.
394 * @param region The region tag to use.
395 * @param regionLength The length of the region tag.
396 * @param trailing Any trailing data to append to the new tag.
397 * @param trailingLength The length of the trailing data.
398 * @param tag The output buffer.
399 * @param tagCapacity The capacity of the output buffer.
400 * @param err A pointer to a UErrorCode for error reporting.
401 * @return The length of the tag string, which may be greater than tagCapacity.
402 **/
403 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)404 createTagString(
405 const char* lang,
406 int32_t langLength,
407 const char* script,
408 int32_t scriptLength,
409 const char* region,
410 int32_t regionLength,
411 const char* trailing,
412 int32_t trailingLength,
413 char* tag,
414 int32_t tagCapacity,
415 UErrorCode* err)
416 {
417 return createTagStringWithAlternates(
418 lang,
419 langLength,
420 script,
421 scriptLength,
422 region,
423 regionLength,
424 trailing,
425 trailingLength,
426 NULL,
427 tag,
428 tagCapacity,
429 err);
430 }
431
432 /**
433 * Parse the language, script, and region subtags from a tag string, and copy the
434 * results into the corresponding output parameters. The buffers are null-terminated,
435 * unless overflow occurs.
436 *
437 * The langLength, scriptLength, and regionLength parameters are input/output
438 * parameters, and must contain the capacity of their corresponding buffers on
439 * input. On output, they will contain the actual length of the buffers, not
440 * including the null terminator.
441 *
442 * If the length of any of the output subtags exceeds the capacity of the corresponding
443 * buffer, the function copies as many bytes to the output buffer as it can, and returns
444 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
445 * occurs.
446 *
447 * If an illegal argument is provided, the function returns the error
448 * U_ILLEGAL_ARGUMENT_ERROR.
449 *
450 * @param localeID The locale ID to parse.
451 * @param lang The language tag buffer.
452 * @param langLength The length of the language tag.
453 * @param script The script tag buffer.
454 * @param scriptLength The length of the script tag.
455 * @param region The region tag buffer.
456 * @param regionLength The length of the region tag.
457 * @param err A pointer to a UErrorCode for error reporting.
458 * @return The number of chars of the localeID parameter consumed.
459 **/
460 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)461 parseTagString(
462 const char* localeID,
463 char* lang,
464 int32_t* langLength,
465 char* script,
466 int32_t* scriptLength,
467 char* region,
468 int32_t* regionLength,
469 UErrorCode* err)
470 {
471 const char* position = localeID;
472 int32_t subtagLength = 0;
473
474 if(U_FAILURE(*err) ||
475 localeID == NULL ||
476 lang == NULL ||
477 langLength == NULL ||
478 script == NULL ||
479 scriptLength == NULL ||
480 region == NULL ||
481 regionLength == NULL) {
482 goto error;
483 }
484
485 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
486 u_terminateChars(lang, *langLength, subtagLength, err);
487
488 /*
489 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
490 * to be an error, because it indicates the user-supplied tag is
491 * not well-formed.
492 */
493 if(U_FAILURE(*err)) {
494 goto error;
495 }
496
497 *langLength = subtagLength;
498
499 /*
500 * If no language was present, use the value of unknownLanguage
501 * instead. Otherwise, move past any separator.
502 */
503 if (*langLength == 0) {
504 uprv_strcpy(
505 lang,
506 unknownLanguage);
507 *langLength = (int32_t)uprv_strlen(lang);
508 }
509 else if (_isIDSeparator(*position)) {
510 ++position;
511 }
512
513 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
514 u_terminateChars(script, *scriptLength, subtagLength, err);
515
516 if(U_FAILURE(*err)) {
517 goto error;
518 }
519
520 *scriptLength = subtagLength;
521
522 if (*scriptLength > 0) {
523 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
524 /**
525 * If the script part is the "unknown" script, then don't return it.
526 **/
527 *scriptLength = 0;
528 }
529
530 /*
531 * Move past any separator.
532 */
533 if (_isIDSeparator(*position)) {
534 ++position;
535 }
536 }
537
538 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
539 u_terminateChars(region, *regionLength, subtagLength, err);
540
541 if(U_FAILURE(*err)) {
542 goto error;
543 }
544
545 *regionLength = subtagLength;
546
547 if (*regionLength > 0) {
548 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
549 /**
550 * If the region part is the "unknown" region, then don't return it.
551 **/
552 *regionLength = 0;
553 }
554 } else if (*position != 0 && *position != '@') {
555 /* back up over consumed trailing separator */
556 --position;
557 }
558
559 exit:
560
561 return (int32_t)(position - localeID);
562
563 error:
564
565 /**
566 * If we get here, we have no explicit error, it's the result of an
567 * illegal argument.
568 **/
569 if (!U_FAILURE(*err)) {
570 *err = U_ILLEGAL_ARGUMENT_ERROR;
571 }
572
573 goto exit;
574 }
575
576 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)577 createLikelySubtagsString(
578 const char* lang,
579 int32_t langLength,
580 const char* script,
581 int32_t scriptLength,
582 const char* region,
583 int32_t regionLength,
584 const char* variants,
585 int32_t variantsLength,
586 char* tag,
587 int32_t tagCapacity,
588 UErrorCode* err)
589 {
590 /**
591 * ULOC_FULLNAME_CAPACITY will provide enough capacity
592 * that we can build a string that contains the language,
593 * script and region code without worrying about overrunning
594 * the user-supplied buffer.
595 **/
596 char tagBuffer[ULOC_FULLNAME_CAPACITY];
597 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
598 int32_t tagBufferLength = 0;
599
600 if(U_FAILURE(*err)) {
601 goto error;
602 }
603
604 /**
605 * Try the language with the script and region first.
606 **/
607 if (scriptLength > 0 && regionLength > 0) {
608
609 const char* likelySubtags = NULL;
610
611 tagBufferLength = createTagString(
612 lang,
613 langLength,
614 script,
615 scriptLength,
616 region,
617 regionLength,
618 NULL,
619 0,
620 tagBuffer,
621 sizeof(tagBuffer),
622 err);
623 if(U_FAILURE(*err)) {
624 goto error;
625 }
626
627 likelySubtags =
628 findLikelySubtags(
629 tagBuffer,
630 likelySubtagsBuffer,
631 sizeof(likelySubtagsBuffer),
632 err);
633 if(U_FAILURE(*err)) {
634 goto error;
635 }
636
637 if (likelySubtags != NULL) {
638 /* Always use the language tag from the
639 maximal string, since it may be more
640 specific than the one provided. */
641 return createTagStringWithAlternates(
642 NULL,
643 0,
644 NULL,
645 0,
646 NULL,
647 0,
648 variants,
649 variantsLength,
650 likelySubtags,
651 tag,
652 tagCapacity,
653 err);
654 }
655 }
656
657 /**
658 * Try the language with just the script.
659 **/
660 if (scriptLength > 0) {
661
662 const char* likelySubtags = NULL;
663
664 tagBufferLength = createTagString(
665 lang,
666 langLength,
667 script,
668 scriptLength,
669 NULL,
670 0,
671 NULL,
672 0,
673 tagBuffer,
674 sizeof(tagBuffer),
675 err);
676 if(U_FAILURE(*err)) {
677 goto error;
678 }
679
680 likelySubtags =
681 findLikelySubtags(
682 tagBuffer,
683 likelySubtagsBuffer,
684 sizeof(likelySubtagsBuffer),
685 err);
686 if(U_FAILURE(*err)) {
687 goto error;
688 }
689
690 if (likelySubtags != NULL) {
691 /* Always use the language tag from the
692 maximal string, since it may be more
693 specific than the one provided. */
694 return createTagStringWithAlternates(
695 NULL,
696 0,
697 NULL,
698 0,
699 region,
700 regionLength,
701 variants,
702 variantsLength,
703 likelySubtags,
704 tag,
705 tagCapacity,
706 err);
707 }
708 }
709
710 /**
711 * Try the language with just the region.
712 **/
713 if (regionLength > 0) {
714
715 const char* likelySubtags = NULL;
716
717 createTagString(
718 lang,
719 langLength,
720 NULL,
721 0,
722 region,
723 regionLength,
724 NULL,
725 0,
726 tagBuffer,
727 sizeof(tagBuffer),
728 err);
729 if(U_FAILURE(*err)) {
730 goto error;
731 }
732
733 likelySubtags =
734 findLikelySubtags(
735 tagBuffer,
736 likelySubtagsBuffer,
737 sizeof(likelySubtagsBuffer),
738 err);
739 if(U_FAILURE(*err)) {
740 goto error;
741 }
742
743 if (likelySubtags != NULL) {
744 /* Always use the language tag from the
745 maximal string, since it may be more
746 specific than the one provided. */
747 return createTagStringWithAlternates(
748 NULL,
749 0,
750 script,
751 scriptLength,
752 NULL,
753 0,
754 variants,
755 variantsLength,
756 likelySubtags,
757 tag,
758 tagCapacity,
759 err);
760 }
761 }
762
763 /**
764 * Finally, try just the language.
765 **/
766 {
767 const char* likelySubtags = NULL;
768
769 createTagString(
770 lang,
771 langLength,
772 NULL,
773 0,
774 NULL,
775 0,
776 NULL,
777 0,
778 tagBuffer,
779 sizeof(tagBuffer),
780 err);
781 if(U_FAILURE(*err)) {
782 goto error;
783 }
784
785 likelySubtags =
786 findLikelySubtags(
787 tagBuffer,
788 likelySubtagsBuffer,
789 sizeof(likelySubtagsBuffer),
790 err);
791 if(U_FAILURE(*err)) {
792 goto error;
793 }
794
795 if (likelySubtags != NULL) {
796 /* Always use the language tag from the
797 maximal string, since it may be more
798 specific than the one provided. */
799 return createTagStringWithAlternates(
800 NULL,
801 0,
802 script,
803 scriptLength,
804 region,
805 regionLength,
806 variants,
807 variantsLength,
808 likelySubtags,
809 tag,
810 tagCapacity,
811 err);
812 }
813 }
814
815 return u_terminateChars(
816 tag,
817 tagCapacity,
818 0,
819 err);
820
821 error:
822
823 if (!U_FAILURE(*err)) {
824 *err = U_ILLEGAL_ARGUMENT_ERROR;
825 }
826
827 return -1;
828 }
829
830 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
831 { int32_t count = 0; \
832 int32_t i; \
833 for (i = 0; i < trailingLength; i++) { \
834 if (trailing[i] == '-' || trailing[i] == '_') { \
835 count = 0; \
836 if (count > 8) { \
837 goto error; \
838 } \
839 } else if (trailing[i] == '@') { \
840 break; \
841 } else if (count > 8) { \
842 goto error; \
843 } else { \
844 count++; \
845 } \
846 } \
847 }
848
849 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)850 _uloc_addLikelySubtags(const char* localeID,
851 char* maximizedLocaleID,
852 int32_t maximizedLocaleIDCapacity,
853 UErrorCode* err)
854 {
855 char lang[ULOC_LANG_CAPACITY];
856 int32_t langLength = sizeof(lang);
857 char script[ULOC_SCRIPT_CAPACITY];
858 int32_t scriptLength = sizeof(script);
859 char region[ULOC_COUNTRY_CAPACITY];
860 int32_t regionLength = sizeof(region);
861 const char* trailing = "";
862 int32_t trailingLength = 0;
863 int32_t trailingIndex = 0;
864 int32_t resultLength = 0;
865
866 if(U_FAILURE(*err)) {
867 goto error;
868 }
869 else if (localeID == NULL ||
870 maximizedLocaleID == NULL ||
871 maximizedLocaleIDCapacity <= 0) {
872 goto error;
873 }
874
875 trailingIndex = parseTagString(
876 localeID,
877 lang,
878 &langLength,
879 script,
880 &scriptLength,
881 region,
882 ®ionLength,
883 err);
884 if(U_FAILURE(*err)) {
885 /* Overflow indicates an illegal argument error */
886 if (*err == U_BUFFER_OVERFLOW_ERROR) {
887 *err = U_ILLEGAL_ARGUMENT_ERROR;
888 }
889
890 goto error;
891 }
892
893 /* Find the length of the trailing portion. */
894 while (_isIDSeparator(localeID[trailingIndex])) {
895 trailingIndex++;
896 }
897 trailing = &localeID[trailingIndex];
898 trailingLength = (int32_t)uprv_strlen(trailing);
899
900 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
901
902 resultLength =
903 createLikelySubtagsString(
904 lang,
905 langLength,
906 script,
907 scriptLength,
908 region,
909 regionLength,
910 trailing,
911 trailingLength,
912 maximizedLocaleID,
913 maximizedLocaleIDCapacity,
914 err);
915
916 if (resultLength == 0) {
917 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
918
919 /*
920 * If we get here, we need to return localeID.
921 */
922 uprv_memcpy(
923 maximizedLocaleID,
924 localeID,
925 localIDLength <= maximizedLocaleIDCapacity ?
926 localIDLength : maximizedLocaleIDCapacity);
927
928 resultLength =
929 u_terminateChars(
930 maximizedLocaleID,
931 maximizedLocaleIDCapacity,
932 localIDLength,
933 err);
934 }
935
936 return resultLength;
937
938 error:
939
940 if (!U_FAILURE(*err)) {
941 *err = U_ILLEGAL_ARGUMENT_ERROR;
942 }
943
944 return -1;
945 }
946
947 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)948 _uloc_minimizeSubtags(const char* localeID,
949 char* minimizedLocaleID,
950 int32_t minimizedLocaleIDCapacity,
951 UErrorCode* err)
952 {
953 /**
954 * ULOC_FULLNAME_CAPACITY will provide enough capacity
955 * that we can build a string that contains the language,
956 * script and region code without worrying about overrunning
957 * the user-supplied buffer.
958 **/
959 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
960 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
961
962 char lang[ULOC_LANG_CAPACITY];
963 int32_t langLength = sizeof(lang);
964 char script[ULOC_SCRIPT_CAPACITY];
965 int32_t scriptLength = sizeof(script);
966 char region[ULOC_COUNTRY_CAPACITY];
967 int32_t regionLength = sizeof(region);
968 const char* trailing = "";
969 int32_t trailingLength = 0;
970 int32_t trailingIndex = 0;
971
972 if(U_FAILURE(*err)) {
973 goto error;
974 }
975 else if (localeID == NULL ||
976 minimizedLocaleID == NULL ||
977 minimizedLocaleIDCapacity <= 0) {
978 goto error;
979 }
980
981 trailingIndex =
982 parseTagString(
983 localeID,
984 lang,
985 &langLength,
986 script,
987 &scriptLength,
988 region,
989 ®ionLength,
990 err);
991 if(U_FAILURE(*err)) {
992
993 /* Overflow indicates an illegal argument error */
994 if (*err == U_BUFFER_OVERFLOW_ERROR) {
995 *err = U_ILLEGAL_ARGUMENT_ERROR;
996 }
997
998 goto error;
999 }
1000
1001 /* Find the spot where the variants or the keywords begin, if any. */
1002 while (_isIDSeparator(localeID[trailingIndex])) {
1003 trailingIndex++;
1004 }
1005 trailing = &localeID[trailingIndex];
1006 trailingLength = (int32_t)uprv_strlen(trailing);
1007
1008 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1009
1010 createTagString(
1011 lang,
1012 langLength,
1013 script,
1014 scriptLength,
1015 region,
1016 regionLength,
1017 NULL,
1018 0,
1019 maximizedTagBuffer,
1020 maximizedTagBufferLength,
1021 err);
1022 if(U_FAILURE(*err)) {
1023 goto error;
1024 }
1025
1026 /**
1027 * First, we need to first get the maximization
1028 * from AddLikelySubtags.
1029 **/
1030 maximizedTagBufferLength =
1031 uloc_addLikelySubtags(
1032 maximizedTagBuffer,
1033 maximizedTagBuffer,
1034 maximizedTagBufferLength,
1035 err);
1036
1037 if(U_FAILURE(*err)) {
1038 goto error;
1039 }
1040
1041 /**
1042 * Start first with just the language.
1043 **/
1044 {
1045 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1046
1047 const int32_t tagBufferLength =
1048 createLikelySubtagsString(
1049 lang,
1050 langLength,
1051 NULL,
1052 0,
1053 NULL,
1054 0,
1055 NULL,
1056 0,
1057 tagBuffer,
1058 sizeof(tagBuffer),
1059 err);
1060
1061 if(U_FAILURE(*err)) {
1062 goto error;
1063 }
1064 else if (uprv_strnicmp(
1065 maximizedTagBuffer,
1066 tagBuffer,
1067 tagBufferLength) == 0) {
1068
1069 return createTagString(
1070 lang,
1071 langLength,
1072 NULL,
1073 0,
1074 NULL,
1075 0,
1076 trailing,
1077 trailingLength,
1078 minimizedLocaleID,
1079 minimizedLocaleIDCapacity,
1080 err);
1081 }
1082 }
1083
1084 /**
1085 * Next, try the language and region.
1086 **/
1087 if (regionLength > 0) {
1088
1089 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1090
1091 const int32_t tagBufferLength =
1092 createLikelySubtagsString(
1093 lang,
1094 langLength,
1095 NULL,
1096 0,
1097 region,
1098 regionLength,
1099 NULL,
1100 0,
1101 tagBuffer,
1102 sizeof(tagBuffer),
1103 err);
1104
1105 if(U_FAILURE(*err)) {
1106 goto error;
1107 }
1108 else if (uprv_strnicmp(
1109 maximizedTagBuffer,
1110 tagBuffer,
1111 tagBufferLength) == 0) {
1112
1113 return createTagString(
1114 lang,
1115 langLength,
1116 NULL,
1117 0,
1118 region,
1119 regionLength,
1120 trailing,
1121 trailingLength,
1122 minimizedLocaleID,
1123 minimizedLocaleIDCapacity,
1124 err);
1125 }
1126 }
1127
1128 /**
1129 * Finally, try the language and script. This is our last chance,
1130 * since trying with all three subtags would only yield the
1131 * maximal version that we already have.
1132 **/
1133 if (scriptLength > 0 && regionLength > 0) {
1134 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1135
1136 const int32_t tagBufferLength =
1137 createLikelySubtagsString(
1138 lang,
1139 langLength,
1140 script,
1141 scriptLength,
1142 NULL,
1143 0,
1144 NULL,
1145 0,
1146 tagBuffer,
1147 sizeof(tagBuffer),
1148 err);
1149
1150 if(U_FAILURE(*err)) {
1151 goto error;
1152 }
1153 else if (uprv_strnicmp(
1154 maximizedTagBuffer,
1155 tagBuffer,
1156 tagBufferLength) == 0) {
1157
1158 return createTagString(
1159 lang,
1160 langLength,
1161 script,
1162 scriptLength,
1163 NULL,
1164 0,
1165 trailing,
1166 trailingLength,
1167 minimizedLocaleID,
1168 minimizedLocaleIDCapacity,
1169 err);
1170 }
1171 }
1172
1173 {
1174 /**
1175 * If we got here, return the locale ID parameter.
1176 **/
1177 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1178
1179 uprv_memcpy(
1180 minimizedLocaleID,
1181 localeID,
1182 localeIDLength <= minimizedLocaleIDCapacity ?
1183 localeIDLength : minimizedLocaleIDCapacity);
1184
1185 return u_terminateChars(
1186 minimizedLocaleID,
1187 minimizedLocaleIDCapacity,
1188 localeIDLength,
1189 err);
1190 }
1191
1192 error:
1193
1194 if (!U_FAILURE(*err)) {
1195 *err = U_ILLEGAL_ARGUMENT_ERROR;
1196 }
1197
1198 return -1;
1199
1200
1201 }
1202
1203 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1204 do_canonicalize(const char* localeID,
1205 char* buffer,
1206 int32_t bufferCapacity,
1207 UErrorCode* err)
1208 {
1209 uloc_canonicalize(
1210 localeID,
1211 buffer,
1212 bufferCapacity,
1213 err);
1214
1215 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1216 *err == U_BUFFER_OVERFLOW_ERROR) {
1217 *err = U_ILLEGAL_ARGUMENT_ERROR;
1218
1219 return FALSE;
1220 }
1221 else if (U_FAILURE(*err)) {
1222
1223 return FALSE;
1224 }
1225 else {
1226 return TRUE;
1227 }
1228 }
1229
1230 U_DRAFT int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1231 uloc_addLikelySubtags(const char* localeID,
1232 char* maximizedLocaleID,
1233 int32_t maximizedLocaleIDCapacity,
1234 UErrorCode* err)
1235 {
1236 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1237
1238 if (!do_canonicalize(
1239 localeID,
1240 localeBuffer,
1241 sizeof(localeBuffer),
1242 err)) {
1243 return -1;
1244 }
1245 else {
1246 return _uloc_addLikelySubtags(
1247 localeBuffer,
1248 maximizedLocaleID,
1249 maximizedLocaleIDCapacity,
1250 err);
1251 }
1252 }
1253
1254 U_DRAFT int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1255 uloc_minimizeSubtags(const char* localeID,
1256 char* minimizedLocaleID,
1257 int32_t minimizedLocaleIDCapacity,
1258 UErrorCode* err)
1259 {
1260 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1261
1262 if (!do_canonicalize(
1263 localeID,
1264 localeBuffer,
1265 sizeof(localeBuffer),
1266 err)) {
1267 return -1;
1268 }
1269 else {
1270 return _uloc_minimizeSubtags(
1271 localeBuffer,
1272 minimizedLocaleID,
1273 minimizedLocaleIDCapacity,
1274 err);
1275 }
1276 }
1277