1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 1997-2010, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: loclikely.cpp
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2010feb25
14 * created by: Markus W. Scherer
15 *
16 * Code for likely and minimized locale subtags, separated out from other .cpp files
17 * that then do not depend on resource bundle code and likely-subtags data.
18 */
19
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ures.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "ulocimp.h"
27 #include "ustr_imp.h"
28
29 /**
30 * This function looks for the localeID in the likelySubtags resource.
31 *
32 * @param localeID The tag to find.
33 * @param buffer A buffer to hold the matching entry
34 * @param bufferLength The length of the output buffer
35 * @return A pointer to "buffer" if found, or a null pointer if not.
36 */
37 static const char* U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)38 findLikelySubtags(const char* localeID,
39 char* buffer,
40 int32_t bufferLength,
41 UErrorCode* err) {
42 const char* result = NULL;
43
44 if (!U_FAILURE(*err)) {
45 int32_t resLen = 0;
46 const UChar* s = NULL;
47 UErrorCode tmpErr = U_ZERO_ERROR;
48 UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49 if (U_SUCCESS(tmpErr)) {
50 s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51
52 if (U_FAILURE(tmpErr)) {
53 /*
54 * If a resource is missing, it's not really an error, it's
55 * just that we don't have any data for that particular locale ID.
56 */
57 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58 *err = tmpErr;
59 }
60 }
61 else if (resLen >= bufferLength) {
62 /* The buffer should never overflow. */
63 *err = U_INTERNAL_PROGRAM_ERROR;
64 }
65 else {
66 u_UCharsToChars(s, buffer, resLen + 1);
67 result = buffer;
68 }
69
70 ures_close(subtags);
71 } else {
72 *err = tmpErr;
73 }
74 }
75
76 return result;
77 }
78
79 /**
80 * Append a tag to a buffer, adding the separator if necessary. The buffer
81 * must be large enough to contain the resulting tag plus any separator
82 * necessary. The tag must not be a zero-length string.
83 *
84 * @param tag The tag to add.
85 * @param tagLength The length of the tag.
86 * @param buffer The output buffer.
87 * @param bufferLength The length of the output buffer. This is an input/ouput parameter.
88 **/
89 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)90 appendTag(
91 const char* tag,
92 int32_t tagLength,
93 char* buffer,
94 int32_t* bufferLength) {
95
96 if (*bufferLength > 0) {
97 buffer[*bufferLength] = '_';
98 ++(*bufferLength);
99 }
100
101 uprv_memmove(
102 &buffer[*bufferLength],
103 tag,
104 tagLength);
105
106 *bufferLength += tagLength;
107 }
108
109 /**
110 * These are the canonical strings for unknown languages, scripts and regions.
111 **/
112 static const char* const unknownLanguage = "und";
113 static const char* const unknownScript = "Zzzz";
114 static const char* const unknownRegion = "ZZ";
115
116 /**
117 * Create a tag string from the supplied parameters. The lang, script and region
118 * parameters may be NULL pointers. If they are, their corresponding length parameters
119 * must be less than or equal to 0.
120 *
121 * If any of the language, script or region parameters are empty, and the alternateTags
122 * parameter is not NULL, it will be parsed for potential language, script and region tags
123 * to be used when constructing the new tag. If the alternateTags parameter is NULL, or
124 * it contains no language tag, the default tag for the unknown language is used.
125 *
126 * If the length of the new string exceeds the capacity of the output buffer,
127 * the function copies as many bytes to the output buffer as it can, and returns
128 * the error U_BUFFER_OVERFLOW_ERROR.
129 *
130 * If an illegal argument is provided, the function returns the error
131 * U_ILLEGAL_ARGUMENT_ERROR.
132 *
133 * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134 * the tag string fits in the output buffer, but the null terminator doesn't.
135 *
136 * @param lang The language tag to use.
137 * @param langLength The length of the language tag.
138 * @param script The script tag to use.
139 * @param scriptLength The length of the script tag.
140 * @param region The region tag to use.
141 * @param regionLength The length of the region tag.
142 * @param trailing Any trailing data to append to the new tag.
143 * @param trailingLength The length of the trailing data.
144 * @param alternateTags A string containing any alternate tags.
145 * @param tag The output buffer.
146 * @param tagCapacity The capacity of the output buffer.
147 * @param err A pointer to a UErrorCode for error reporting.
148 * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149 **/
150 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)151 createTagStringWithAlternates(
152 const char* lang,
153 int32_t langLength,
154 const char* script,
155 int32_t scriptLength,
156 const char* region,
157 int32_t regionLength,
158 const char* trailing,
159 int32_t trailingLength,
160 const char* alternateTags,
161 char* tag,
162 int32_t tagCapacity,
163 UErrorCode* err) {
164
165 if (U_FAILURE(*err)) {
166 goto error;
167 }
168 else if (tag == NULL ||
169 tagCapacity <= 0 ||
170 langLength >= ULOC_LANG_CAPACITY ||
171 scriptLength >= ULOC_SCRIPT_CAPACITY ||
172 regionLength >= ULOC_COUNTRY_CAPACITY) {
173 goto error;
174 }
175 else {
176 /**
177 * ULOC_FULLNAME_CAPACITY will provide enough capacity
178 * that we can build a string that contains the language,
179 * script and region code without worrying about overrunning
180 * the user-supplied buffer.
181 **/
182 char tagBuffer[ULOC_FULLNAME_CAPACITY];
183 int32_t tagLength = 0;
184 int32_t capacityRemaining = tagCapacity;
185 UBool regionAppended = FALSE;
186
187 if (langLength > 0) {
188 appendTag(
189 lang,
190 langLength,
191 tagBuffer,
192 &tagLength);
193 }
194 else if (alternateTags == NULL) {
195 /*
196 * Append the value for an unknown language, if
197 * we found no language.
198 */
199 appendTag(
200 unknownLanguage,
201 (int32_t)uprv_strlen(unknownLanguage),
202 tagBuffer,
203 &tagLength);
204 }
205 else {
206 /*
207 * Parse the alternateTags string for the language.
208 */
209 char alternateLang[ULOC_LANG_CAPACITY];
210 int32_t alternateLangLength = sizeof(alternateLang);
211
212 alternateLangLength =
213 uloc_getLanguage(
214 alternateTags,
215 alternateLang,
216 alternateLangLength,
217 err);
218 if(U_FAILURE(*err) ||
219 alternateLangLength >= ULOC_LANG_CAPACITY) {
220 goto error;
221 }
222 else if (alternateLangLength == 0) {
223 /*
224 * Append the value for an unknown language, if
225 * we found no language.
226 */
227 appendTag(
228 unknownLanguage,
229 (int32_t)uprv_strlen(unknownLanguage),
230 tagBuffer,
231 &tagLength);
232 }
233 else {
234 appendTag(
235 alternateLang,
236 alternateLangLength,
237 tagBuffer,
238 &tagLength);
239 }
240 }
241
242 if (scriptLength > 0) {
243 appendTag(
244 script,
245 scriptLength,
246 tagBuffer,
247 &tagLength);
248 }
249 else if (alternateTags != NULL) {
250 /*
251 * Parse the alternateTags string for the script.
252 */
253 char alternateScript[ULOC_SCRIPT_CAPACITY];
254
255 const int32_t alternateScriptLength =
256 uloc_getScript(
257 alternateTags,
258 alternateScript,
259 sizeof(alternateScript),
260 err);
261
262 if (U_FAILURE(*err) ||
263 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264 goto error;
265 }
266 else if (alternateScriptLength > 0) {
267 appendTag(
268 alternateScript,
269 alternateScriptLength,
270 tagBuffer,
271 &tagLength);
272 }
273 }
274
275 if (regionLength > 0) {
276 appendTag(
277 region,
278 regionLength,
279 tagBuffer,
280 &tagLength);
281
282 regionAppended = TRUE;
283 }
284 else if (alternateTags != NULL) {
285 /*
286 * Parse the alternateTags string for the region.
287 */
288 char alternateRegion[ULOC_COUNTRY_CAPACITY];
289
290 const int32_t alternateRegionLength =
291 uloc_getCountry(
292 alternateTags,
293 alternateRegion,
294 sizeof(alternateRegion),
295 err);
296 if (U_FAILURE(*err) ||
297 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298 goto error;
299 }
300 else if (alternateRegionLength > 0) {
301 appendTag(
302 alternateRegion,
303 alternateRegionLength,
304 tagBuffer,
305 &tagLength);
306
307 regionAppended = TRUE;
308 }
309 }
310
311 {
312 const int32_t toCopy =
313 tagLength >= tagCapacity ? tagCapacity : tagLength;
314
315 /**
316 * Copy the partial tag from our internal buffer to the supplied
317 * target.
318 **/
319 uprv_memcpy(
320 tag,
321 tagBuffer,
322 toCopy);
323
324 capacityRemaining -= toCopy;
325 }
326
327 if (trailingLength > 0) {
328 if (capacityRemaining > 0 && !regionAppended) {
329 tag[tagLength++] = '_';
330 --capacityRemaining;
331 }
332
333 if (capacityRemaining > 0) {
334 /*
335 * Copy the trailing data into the supplied buffer. Use uprv_memmove, since we
336 * don't know if the user-supplied buffers overlap.
337 */
338 const int32_t toCopy =
339 trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
340
341 uprv_memmove(
342 &tag[tagLength],
343 trailing,
344 toCopy);
345 }
346 }
347
348 tagLength += trailingLength;
349
350 return u_terminateChars(
351 tag,
352 tagCapacity,
353 tagLength,
354 err);
355 }
356
357 error:
358
359 /**
360 * An overflow indicates the locale ID passed in
361 * is ill-formed. If we got here, and there was
362 * no previous error, it's an implicit overflow.
363 **/
364 if (*err == U_BUFFER_OVERFLOW_ERROR ||
365 U_SUCCESS(*err)) {
366 *err = U_ILLEGAL_ARGUMENT_ERROR;
367 }
368
369 return -1;
370 }
371
372 /**
373 * Create a tag string from the supplied parameters. The lang, script and region
374 * parameters may be NULL pointers. If they are, their corresponding length parameters
375 * must be less than or equal to 0. If the lang parameter is an empty string, the
376 * default value for an unknown language is written to the output buffer.
377 *
378 * If the length of the new string exceeds the capacity of the output buffer,
379 * the function copies as many bytes to the output buffer as it can, and returns
380 * the error U_BUFFER_OVERFLOW_ERROR.
381 *
382 * If an illegal argument is provided, the function returns the error
383 * U_ILLEGAL_ARGUMENT_ERROR.
384 *
385 * @param lang The language tag to use.
386 * @param langLength The length of the language tag.
387 * @param script The script tag to use.
388 * @param scriptLength The length of the script tag.
389 * @param region The region tag to use.
390 * @param regionLength The length of the region tag.
391 * @param trailing Any trailing data to append to the new tag.
392 * @param trailingLength The length of the trailing data.
393 * @param tag The output buffer.
394 * @param tagCapacity The capacity of the output buffer.
395 * @param err A pointer to a UErrorCode for error reporting.
396 * @return The length of the tag string, which may be greater than tagCapacity.
397 **/
398 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)399 createTagString(
400 const char* lang,
401 int32_t langLength,
402 const char* script,
403 int32_t scriptLength,
404 const char* region,
405 int32_t regionLength,
406 const char* trailing,
407 int32_t trailingLength,
408 char* tag,
409 int32_t tagCapacity,
410 UErrorCode* err)
411 {
412 return createTagStringWithAlternates(
413 lang,
414 langLength,
415 script,
416 scriptLength,
417 region,
418 regionLength,
419 trailing,
420 trailingLength,
421 NULL,
422 tag,
423 tagCapacity,
424 err);
425 }
426
427 /**
428 * Parse the language, script, and region subtags from a tag string, and copy the
429 * results into the corresponding output parameters. The buffers are null-terminated,
430 * unless overflow occurs.
431 *
432 * The langLength, scriptLength, and regionLength parameters are input/output
433 * parameters, and must contain the capacity of their corresponding buffers on
434 * input. On output, they will contain the actual length of the buffers, not
435 * including the null terminator.
436 *
437 * If the length of any of the output subtags exceeds the capacity of the corresponding
438 * buffer, the function copies as many bytes to the output buffer as it can, and returns
439 * the error U_BUFFER_OVERFLOW_ERROR. It will not parse any more subtags once overflow
440 * occurs.
441 *
442 * If an illegal argument is provided, the function returns the error
443 * U_ILLEGAL_ARGUMENT_ERROR.
444 *
445 * @param localeID The locale ID to parse.
446 * @param lang The language tag buffer.
447 * @param langLength The length of the language tag.
448 * @param script The script tag buffer.
449 * @param scriptLength The length of the script tag.
450 * @param region The region tag buffer.
451 * @param regionLength The length of the region tag.
452 * @param err A pointer to a UErrorCode for error reporting.
453 * @return The number of chars of the localeID parameter consumed.
454 **/
455 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)456 parseTagString(
457 const char* localeID,
458 char* lang,
459 int32_t* langLength,
460 char* script,
461 int32_t* scriptLength,
462 char* region,
463 int32_t* regionLength,
464 UErrorCode* err)
465 {
466 const char* position = localeID;
467 int32_t subtagLength = 0;
468
469 if(U_FAILURE(*err) ||
470 localeID == NULL ||
471 lang == NULL ||
472 langLength == NULL ||
473 script == NULL ||
474 scriptLength == NULL ||
475 region == NULL ||
476 regionLength == NULL) {
477 goto error;
478 }
479
480 subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
481 u_terminateChars(lang, *langLength, subtagLength, err);
482
483 /*
484 * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
485 * to be an error, because it indicates the user-supplied tag is
486 * not well-formed.
487 */
488 if(U_FAILURE(*err)) {
489 goto error;
490 }
491
492 *langLength = subtagLength;
493
494 /*
495 * If no language was present, use the value of unknownLanguage
496 * instead. Otherwise, move past any separator.
497 */
498 if (*langLength == 0) {
499 uprv_strcpy(
500 lang,
501 unknownLanguage);
502 *langLength = (int32_t)uprv_strlen(lang);
503 }
504 else if (_isIDSeparator(*position)) {
505 ++position;
506 }
507
508 subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
509 u_terminateChars(script, *scriptLength, subtagLength, err);
510
511 if(U_FAILURE(*err)) {
512 goto error;
513 }
514
515 *scriptLength = subtagLength;
516
517 if (*scriptLength > 0) {
518 if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
519 /**
520 * If the script part is the "unknown" script, then don't return it.
521 **/
522 *scriptLength = 0;
523 }
524
525 /*
526 * Move past any separator.
527 */
528 if (_isIDSeparator(*position)) {
529 ++position;
530 }
531 }
532
533 subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
534 u_terminateChars(region, *regionLength, subtagLength, err);
535
536 if(U_FAILURE(*err)) {
537 goto error;
538 }
539
540 *regionLength = subtagLength;
541
542 if (*regionLength > 0) {
543 if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
544 /**
545 * If the region part is the "unknown" region, then don't return it.
546 **/
547 *regionLength = 0;
548 }
549 }
550
551 exit:
552
553 return (int32_t)(position - localeID);
554
555 error:
556
557 /**
558 * If we get here, we have no explicit error, it's the result of an
559 * illegal argument.
560 **/
561 if (!U_FAILURE(*err)) {
562 *err = U_ILLEGAL_ARGUMENT_ERROR;
563 }
564
565 goto exit;
566 }
567
568 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)569 createLikelySubtagsString(
570 const char* lang,
571 int32_t langLength,
572 const char* script,
573 int32_t scriptLength,
574 const char* region,
575 int32_t regionLength,
576 const char* variants,
577 int32_t variantsLength,
578 char* tag,
579 int32_t tagCapacity,
580 UErrorCode* err)
581 {
582 /**
583 * ULOC_FULLNAME_CAPACITY will provide enough capacity
584 * that we can build a string that contains the language,
585 * script and region code without worrying about overrunning
586 * the user-supplied buffer.
587 **/
588 char tagBuffer[ULOC_FULLNAME_CAPACITY];
589 char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
590 int32_t tagBufferLength = 0;
591
592 if(U_FAILURE(*err)) {
593 goto error;
594 }
595
596 /**
597 * Try the language with the script and region first.
598 **/
599 if (scriptLength > 0 && regionLength > 0) {
600
601 const char* likelySubtags = NULL;
602
603 tagBufferLength = createTagString(
604 lang,
605 langLength,
606 script,
607 scriptLength,
608 region,
609 regionLength,
610 NULL,
611 0,
612 tagBuffer,
613 sizeof(tagBuffer),
614 err);
615 if(U_FAILURE(*err)) {
616 goto error;
617 }
618
619 likelySubtags =
620 findLikelySubtags(
621 tagBuffer,
622 likelySubtagsBuffer,
623 sizeof(likelySubtagsBuffer),
624 err);
625 if(U_FAILURE(*err)) {
626 goto error;
627 }
628
629 if (likelySubtags != NULL) {
630 /* Always use the language tag from the
631 maximal string, since it may be more
632 specific than the one provided. */
633 return createTagStringWithAlternates(
634 NULL,
635 0,
636 NULL,
637 0,
638 NULL,
639 0,
640 variants,
641 variantsLength,
642 likelySubtags,
643 tag,
644 tagCapacity,
645 err);
646 }
647 }
648
649 /**
650 * Try the language with just the script.
651 **/
652 if (scriptLength > 0) {
653
654 const char* likelySubtags = NULL;
655
656 tagBufferLength = createTagString(
657 lang,
658 langLength,
659 script,
660 scriptLength,
661 NULL,
662 0,
663 NULL,
664 0,
665 tagBuffer,
666 sizeof(tagBuffer),
667 err);
668 if(U_FAILURE(*err)) {
669 goto error;
670 }
671
672 likelySubtags =
673 findLikelySubtags(
674 tagBuffer,
675 likelySubtagsBuffer,
676 sizeof(likelySubtagsBuffer),
677 err);
678 if(U_FAILURE(*err)) {
679 goto error;
680 }
681
682 if (likelySubtags != NULL) {
683 /* Always use the language tag from the
684 maximal string, since it may be more
685 specific than the one provided. */
686 return createTagStringWithAlternates(
687 NULL,
688 0,
689 NULL,
690 0,
691 region,
692 regionLength,
693 variants,
694 variantsLength,
695 likelySubtags,
696 tag,
697 tagCapacity,
698 err);
699 }
700 }
701
702 /**
703 * Try the language with just the region.
704 **/
705 if (regionLength > 0) {
706
707 const char* likelySubtags = NULL;
708
709 createTagString(
710 lang,
711 langLength,
712 NULL,
713 0,
714 region,
715 regionLength,
716 NULL,
717 0,
718 tagBuffer,
719 sizeof(tagBuffer),
720 err);
721 if(U_FAILURE(*err)) {
722 goto error;
723 }
724
725 likelySubtags =
726 findLikelySubtags(
727 tagBuffer,
728 likelySubtagsBuffer,
729 sizeof(likelySubtagsBuffer),
730 err);
731 if(U_FAILURE(*err)) {
732 goto error;
733 }
734
735 if (likelySubtags != NULL) {
736 /* Always use the language tag from the
737 maximal string, since it may be more
738 specific than the one provided. */
739 return createTagStringWithAlternates(
740 NULL,
741 0,
742 script,
743 scriptLength,
744 NULL,
745 0,
746 variants,
747 variantsLength,
748 likelySubtags,
749 tag,
750 tagCapacity,
751 err);
752 }
753 }
754
755 /**
756 * Finally, try just the language.
757 **/
758 {
759 const char* likelySubtags = NULL;
760
761 createTagString(
762 lang,
763 langLength,
764 NULL,
765 0,
766 NULL,
767 0,
768 NULL,
769 0,
770 tagBuffer,
771 sizeof(tagBuffer),
772 err);
773 if(U_FAILURE(*err)) {
774 goto error;
775 }
776
777 likelySubtags =
778 findLikelySubtags(
779 tagBuffer,
780 likelySubtagsBuffer,
781 sizeof(likelySubtagsBuffer),
782 err);
783 if(U_FAILURE(*err)) {
784 goto error;
785 }
786
787 if (likelySubtags != NULL) {
788 /* Always use the language tag from the
789 maximal string, since it may be more
790 specific than the one provided. */
791 return createTagStringWithAlternates(
792 NULL,
793 0,
794 script,
795 scriptLength,
796 region,
797 regionLength,
798 variants,
799 variantsLength,
800 likelySubtags,
801 tag,
802 tagCapacity,
803 err);
804 }
805 }
806
807 return u_terminateChars(
808 tag,
809 tagCapacity,
810 0,
811 err);
812
813 error:
814
815 if (!U_FAILURE(*err)) {
816 *err = U_ILLEGAL_ARGUMENT_ERROR;
817 }
818
819 return -1;
820 }
821
822 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
823 { int32_t count = 0; \
824 int32_t i; \
825 for (i = 0; i < trailingLength; i++) { \
826 if (trailing[i] == '-' || trailing[i] == '_') { \
827 count = 0; \
828 if (count > 8) { \
829 goto error; \
830 } \
831 } else if (trailing[i] == '@') { \
832 break; \
833 } else if (count > 8) { \
834 goto error; \
835 } else { \
836 count++; \
837 } \
838 } \
839 }
840
841 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)842 _uloc_addLikelySubtags(const char* localeID,
843 char* maximizedLocaleID,
844 int32_t maximizedLocaleIDCapacity,
845 UErrorCode* err)
846 {
847 char lang[ULOC_LANG_CAPACITY];
848 int32_t langLength = sizeof(lang);
849 char script[ULOC_SCRIPT_CAPACITY];
850 int32_t scriptLength = sizeof(script);
851 char region[ULOC_COUNTRY_CAPACITY];
852 int32_t regionLength = sizeof(region);
853 const char* trailing = "";
854 int32_t trailingLength = 0;
855 int32_t trailingIndex = 0;
856 int32_t resultLength = 0;
857
858 if(U_FAILURE(*err)) {
859 goto error;
860 }
861 else if (localeID == NULL ||
862 maximizedLocaleID == NULL ||
863 maximizedLocaleIDCapacity <= 0) {
864 goto error;
865 }
866
867 trailingIndex = parseTagString(
868 localeID,
869 lang,
870 &langLength,
871 script,
872 &scriptLength,
873 region,
874 ®ionLength,
875 err);
876 if(U_FAILURE(*err)) {
877 /* Overflow indicates an illegal argument error */
878 if (*err == U_BUFFER_OVERFLOW_ERROR) {
879 *err = U_ILLEGAL_ARGUMENT_ERROR;
880 }
881
882 goto error;
883 }
884
885 /* Find the length of the trailing portion. */
886 trailing = &localeID[trailingIndex];
887 trailingLength = (int32_t)uprv_strlen(trailing);
888
889 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
890
891 resultLength =
892 createLikelySubtagsString(
893 lang,
894 langLength,
895 script,
896 scriptLength,
897 region,
898 regionLength,
899 trailing,
900 trailingLength,
901 maximizedLocaleID,
902 maximizedLocaleIDCapacity,
903 err);
904
905 if (resultLength == 0) {
906 const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
907
908 /*
909 * If we get here, we need to return localeID.
910 */
911 uprv_memcpy(
912 maximizedLocaleID,
913 localeID,
914 localIDLength <= maximizedLocaleIDCapacity ?
915 localIDLength : maximizedLocaleIDCapacity);
916
917 resultLength =
918 u_terminateChars(
919 maximizedLocaleID,
920 maximizedLocaleIDCapacity,
921 localIDLength,
922 err);
923 }
924
925 return resultLength;
926
927 error:
928
929 if (!U_FAILURE(*err)) {
930 *err = U_ILLEGAL_ARGUMENT_ERROR;
931 }
932
933 return -1;
934 }
935
936 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)937 _uloc_minimizeSubtags(const char* localeID,
938 char* minimizedLocaleID,
939 int32_t minimizedLocaleIDCapacity,
940 UErrorCode* err)
941 {
942 /**
943 * ULOC_FULLNAME_CAPACITY will provide enough capacity
944 * that we can build a string that contains the language,
945 * script and region code without worrying about overrunning
946 * the user-supplied buffer.
947 **/
948 char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
949 int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
950
951 char lang[ULOC_LANG_CAPACITY];
952 int32_t langLength = sizeof(lang);
953 char script[ULOC_SCRIPT_CAPACITY];
954 int32_t scriptLength = sizeof(script);
955 char region[ULOC_COUNTRY_CAPACITY];
956 int32_t regionLength = sizeof(region);
957 const char* trailing = "";
958 int32_t trailingLength = 0;
959 int32_t trailingIndex = 0;
960
961 if(U_FAILURE(*err)) {
962 goto error;
963 }
964 else if (localeID == NULL ||
965 minimizedLocaleID == NULL ||
966 minimizedLocaleIDCapacity <= 0) {
967 goto error;
968 }
969
970 trailingIndex =
971 parseTagString(
972 localeID,
973 lang,
974 &langLength,
975 script,
976 &scriptLength,
977 region,
978 ®ionLength,
979 err);
980 if(U_FAILURE(*err)) {
981
982 /* Overflow indicates an illegal argument error */
983 if (*err == U_BUFFER_OVERFLOW_ERROR) {
984 *err = U_ILLEGAL_ARGUMENT_ERROR;
985 }
986
987 goto error;
988 }
989
990 /* Find the spot where the variants begin, if any. */
991 trailing = &localeID[trailingIndex];
992 trailingLength = (int32_t)uprv_strlen(trailing);
993
994 CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
995
996 createTagString(
997 lang,
998 langLength,
999 script,
1000 scriptLength,
1001 region,
1002 regionLength,
1003 NULL,
1004 0,
1005 maximizedTagBuffer,
1006 maximizedTagBufferLength,
1007 err);
1008 if(U_FAILURE(*err)) {
1009 goto error;
1010 }
1011
1012 /**
1013 * First, we need to first get the maximization
1014 * from AddLikelySubtags.
1015 **/
1016 maximizedTagBufferLength =
1017 uloc_addLikelySubtags(
1018 maximizedTagBuffer,
1019 maximizedTagBuffer,
1020 maximizedTagBufferLength,
1021 err);
1022
1023 if(U_FAILURE(*err)) {
1024 goto error;
1025 }
1026
1027 /**
1028 * Start first with just the language.
1029 **/
1030 {
1031 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1032
1033 const int32_t tagBufferLength =
1034 createLikelySubtagsString(
1035 lang,
1036 langLength,
1037 NULL,
1038 0,
1039 NULL,
1040 0,
1041 NULL,
1042 0,
1043 tagBuffer,
1044 sizeof(tagBuffer),
1045 err);
1046
1047 if(U_FAILURE(*err)) {
1048 goto error;
1049 }
1050 else if (uprv_strnicmp(
1051 maximizedTagBuffer,
1052 tagBuffer,
1053 tagBufferLength) == 0) {
1054
1055 return createTagString(
1056 lang,
1057 langLength,
1058 NULL,
1059 0,
1060 NULL,
1061 0,
1062 trailing,
1063 trailingLength,
1064 minimizedLocaleID,
1065 minimizedLocaleIDCapacity,
1066 err);
1067 }
1068 }
1069
1070 /**
1071 * Next, try the language and region.
1072 **/
1073 if (regionLength > 0) {
1074
1075 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1076
1077 const int32_t tagBufferLength =
1078 createLikelySubtagsString(
1079 lang,
1080 langLength,
1081 NULL,
1082 0,
1083 region,
1084 regionLength,
1085 NULL,
1086 0,
1087 tagBuffer,
1088 sizeof(tagBuffer),
1089 err);
1090
1091 if(U_FAILURE(*err)) {
1092 goto error;
1093 }
1094 else if (uprv_strnicmp(
1095 maximizedTagBuffer,
1096 tagBuffer,
1097 tagBufferLength) == 0) {
1098
1099 return createTagString(
1100 lang,
1101 langLength,
1102 NULL,
1103 0,
1104 region,
1105 regionLength,
1106 trailing,
1107 trailingLength,
1108 minimizedLocaleID,
1109 minimizedLocaleIDCapacity,
1110 err);
1111 }
1112 }
1113
1114 /**
1115 * Finally, try the language and script. This is our last chance,
1116 * since trying with all three subtags would only yield the
1117 * maximal version that we already have.
1118 **/
1119 if (scriptLength > 0 && regionLength > 0) {
1120 char tagBuffer[ULOC_FULLNAME_CAPACITY];
1121
1122 const int32_t tagBufferLength =
1123 createLikelySubtagsString(
1124 lang,
1125 langLength,
1126 script,
1127 scriptLength,
1128 NULL,
1129 0,
1130 NULL,
1131 0,
1132 tagBuffer,
1133 sizeof(tagBuffer),
1134 err);
1135
1136 if(U_FAILURE(*err)) {
1137 goto error;
1138 }
1139 else if (uprv_strnicmp(
1140 maximizedTagBuffer,
1141 tagBuffer,
1142 tagBufferLength) == 0) {
1143
1144 return createTagString(
1145 lang,
1146 langLength,
1147 script,
1148 scriptLength,
1149 NULL,
1150 0,
1151 trailing,
1152 trailingLength,
1153 minimizedLocaleID,
1154 minimizedLocaleIDCapacity,
1155 err);
1156 }
1157 }
1158
1159 {
1160 /**
1161 * If we got here, return the locale ID parameter.
1162 **/
1163 const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1164
1165 uprv_memcpy(
1166 minimizedLocaleID,
1167 localeID,
1168 localeIDLength <= minimizedLocaleIDCapacity ?
1169 localeIDLength : minimizedLocaleIDCapacity);
1170
1171 return u_terminateChars(
1172 minimizedLocaleID,
1173 minimizedLocaleIDCapacity,
1174 localeIDLength,
1175 err);
1176 }
1177
1178 error:
1179
1180 if (!U_FAILURE(*err)) {
1181 *err = U_ILLEGAL_ARGUMENT_ERROR;
1182 }
1183
1184 return -1;
1185
1186
1187 }
1188
1189 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1190 do_canonicalize(const char* localeID,
1191 char* buffer,
1192 int32_t bufferCapacity,
1193 UErrorCode* err)
1194 {
1195 uloc_canonicalize(
1196 localeID,
1197 buffer,
1198 bufferCapacity,
1199 err);
1200
1201 if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1202 *err == U_BUFFER_OVERFLOW_ERROR) {
1203 *err = U_ILLEGAL_ARGUMENT_ERROR;
1204
1205 return FALSE;
1206 }
1207 else if (U_FAILURE(*err)) {
1208
1209 return FALSE;
1210 }
1211 else {
1212 return TRUE;
1213 }
1214 }
1215
1216 U_DRAFT int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1217 uloc_addLikelySubtags(const char* localeID,
1218 char* maximizedLocaleID,
1219 int32_t maximizedLocaleIDCapacity,
1220 UErrorCode* err)
1221 {
1222 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1223
1224 if (!do_canonicalize(
1225 localeID,
1226 localeBuffer,
1227 sizeof(localeBuffer),
1228 err)) {
1229 return -1;
1230 }
1231 else {
1232 return _uloc_addLikelySubtags(
1233 localeBuffer,
1234 maximizedLocaleID,
1235 maximizedLocaleIDCapacity,
1236 err);
1237 }
1238 }
1239
1240 U_DRAFT int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1241 uloc_minimizeSubtags(const char* localeID,
1242 char* minimizedLocaleID,
1243 int32_t minimizedLocaleIDCapacity,
1244 UErrorCode* err)
1245 {
1246 char localeBuffer[ULOC_FULLNAME_CAPACITY];
1247
1248 if (!do_canonicalize(
1249 localeID,
1250 localeBuffer,
1251 sizeof(localeBuffer),
1252 err)) {
1253 return -1;
1254 }
1255 else {
1256 return _uloc_minimizeSubtags(
1257 localeBuffer,
1258 minimizedLocaleID,
1259 minimizedLocaleIDCapacity,
1260 err);
1261 }
1262 }
1263