• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1997-2011, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  loclikely.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010feb25
14 *   created by: Markus W. Scherer
15 *
16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
17 *   that then do not depend on resource bundle code and likely-subtags data.
18 */
19 
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ures.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "ulocimp.h"
27 #include "ustr_imp.h"
28 
29 /**
30  * This function looks for the localeID in the likelySubtags resource.
31  *
32  * @param localeID The tag to find.
33  * @param buffer A buffer to hold the matching entry
34  * @param bufferLength The length of the output buffer
35  * @return A pointer to "buffer" if found, or a null pointer if not.
36  */
37 static const char*  U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)38 findLikelySubtags(const char* localeID,
39                   char* buffer,
40                   int32_t bufferLength,
41                   UErrorCode* err) {
42     const char* result = NULL;
43 
44     if (!U_FAILURE(*err)) {
45         int32_t resLen = 0;
46         const UChar* s = NULL;
47         UErrorCode tmpErr = U_ZERO_ERROR;
48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49         if (U_SUCCESS(tmpErr)) {
50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51 
52             if (U_FAILURE(tmpErr)) {
53                 /*
54                  * If a resource is missing, it's not really an error, it's
55                  * just that we don't have any data for that particular locale ID.
56                  */
57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58                     *err = tmpErr;
59                 }
60             }
61             else if (resLen >= bufferLength) {
62                 /* The buffer should never overflow. */
63                 *err = U_INTERNAL_PROGRAM_ERROR;
64             }
65             else {
66                 u_UCharsToChars(s, buffer, resLen + 1);
67                 result = buffer;
68             }
69 
70             ures_close(subtags);
71         } else {
72             *err = tmpErr;
73         }
74     }
75 
76     return result;
77 }
78 
79 /**
80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
81  * must be large enough to contain the resulting tag plus any separator
82  * necessary. The tag must not be a zero-length string.
83  *
84  * @param tag The tag to add.
85  * @param tagLength The length of the tag.
86  * @param buffer The output buffer.
87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
88  **/
89 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)90 appendTag(
91     const char* tag,
92     int32_t tagLength,
93     char* buffer,
94     int32_t* bufferLength) {
95 
96     if (*bufferLength > 0) {
97         buffer[*bufferLength] = '_';
98         ++(*bufferLength);
99     }
100 
101     uprv_memmove(
102         &buffer[*bufferLength],
103         tag,
104         tagLength);
105 
106     *bufferLength += tagLength;
107 }
108 
109 /**
110  * These are the canonical strings for unknown languages, scripts and regions.
111  **/
112 static const char* const unknownLanguage = "und";
113 static const char* const unknownScript = "Zzzz";
114 static const char* const unknownRegion = "ZZ";
115 
116 /**
117  * Create a tag string from the supplied parameters.  The lang, script and region
118  * parameters may be NULL pointers. If they are, their corresponding length parameters
119  * must be less than or equal to 0.
120  *
121  * If any of the language, script or region parameters are empty, and the alternateTags
122  * parameter is not NULL, it will be parsed for potential language, script and region tags
123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
124  * it contains no language tag, the default tag for the unknown language is used.
125  *
126  * If the length of the new string exceeds the capacity of the output buffer,
127  * the function copies as many bytes to the output buffer as it can, and returns
128  * the error U_BUFFER_OVERFLOW_ERROR.
129  *
130  * If an illegal argument is provided, the function returns the error
131  * U_ILLEGAL_ARGUMENT_ERROR.
132  *
133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134  * the tag string fits in the output buffer, but the null terminator doesn't.
135  *
136  * @param lang The language tag to use.
137  * @param langLength The length of the language tag.
138  * @param script The script tag to use.
139  * @param scriptLength The length of the script tag.
140  * @param region The region tag to use.
141  * @param regionLength The length of the region tag.
142  * @param trailing Any trailing data to append to the new tag.
143  * @param trailingLength The length of the trailing data.
144  * @param alternateTags A string containing any alternate tags.
145  * @param tag The output buffer.
146  * @param tagCapacity The capacity of the output buffer.
147  * @param err A pointer to a UErrorCode for error reporting.
148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149  **/
150 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)151 createTagStringWithAlternates(
152     const char* lang,
153     int32_t langLength,
154     const char* script,
155     int32_t scriptLength,
156     const char* region,
157     int32_t regionLength,
158     const char* trailing,
159     int32_t trailingLength,
160     const char* alternateTags,
161     char* tag,
162     int32_t tagCapacity,
163     UErrorCode* err) {
164 
165     if (U_FAILURE(*err)) {
166         goto error;
167     }
168     else if (tag == NULL ||
169              tagCapacity <= 0 ||
170              langLength >= ULOC_LANG_CAPACITY ||
171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
172              regionLength >= ULOC_COUNTRY_CAPACITY) {
173         goto error;
174     }
175     else {
176         /**
177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
178          * that we can build a string that contains the language,
179          * script and region code without worrying about overrunning
180          * the user-supplied buffer.
181          **/
182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
183         int32_t tagLength = 0;
184         int32_t capacityRemaining = tagCapacity;
185         UBool regionAppended = FALSE;
186 
187         if (langLength > 0) {
188             appendTag(
189                 lang,
190                 langLength,
191                 tagBuffer,
192                 &tagLength);
193         }
194         else if (alternateTags == NULL) {
195             /*
196              * Append the value for an unknown language, if
197              * we found no language.
198              */
199             appendTag(
200                 unknownLanguage,
201                 (int32_t)uprv_strlen(unknownLanguage),
202                 tagBuffer,
203                 &tagLength);
204         }
205         else {
206             /*
207              * Parse the alternateTags string for the language.
208              */
209             char alternateLang[ULOC_LANG_CAPACITY];
210             int32_t alternateLangLength = sizeof(alternateLang);
211 
212             alternateLangLength =
213                 uloc_getLanguage(
214                     alternateTags,
215                     alternateLang,
216                     alternateLangLength,
217                     err);
218             if(U_FAILURE(*err) ||
219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
220                 goto error;
221             }
222             else if (alternateLangLength == 0) {
223                 /*
224                  * Append the value for an unknown language, if
225                  * we found no language.
226                  */
227                 appendTag(
228                     unknownLanguage,
229                     (int32_t)uprv_strlen(unknownLanguage),
230                     tagBuffer,
231                     &tagLength);
232             }
233             else {
234                 appendTag(
235                     alternateLang,
236                     alternateLangLength,
237                     tagBuffer,
238                     &tagLength);
239             }
240         }
241 
242         if (scriptLength > 0) {
243             appendTag(
244                 script,
245                 scriptLength,
246                 tagBuffer,
247                 &tagLength);
248         }
249         else if (alternateTags != NULL) {
250             /*
251              * Parse the alternateTags string for the script.
252              */
253             char alternateScript[ULOC_SCRIPT_CAPACITY];
254 
255             const int32_t alternateScriptLength =
256                 uloc_getScript(
257                     alternateTags,
258                     alternateScript,
259                     sizeof(alternateScript),
260                     err);
261 
262             if (U_FAILURE(*err) ||
263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264                 goto error;
265             }
266             else if (alternateScriptLength > 0) {
267                 appendTag(
268                     alternateScript,
269                     alternateScriptLength,
270                     tagBuffer,
271                     &tagLength);
272             }
273         }
274 
275         if (regionLength > 0) {
276             appendTag(
277                 region,
278                 regionLength,
279                 tagBuffer,
280                 &tagLength);
281 
282             regionAppended = TRUE;
283         }
284         else if (alternateTags != NULL) {
285             /*
286              * Parse the alternateTags string for the region.
287              */
288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
289 
290             const int32_t alternateRegionLength =
291                 uloc_getCountry(
292                     alternateTags,
293                     alternateRegion,
294                     sizeof(alternateRegion),
295                     err);
296             if (U_FAILURE(*err) ||
297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298                 goto error;
299             }
300             else if (alternateRegionLength > 0) {
301                 appendTag(
302                     alternateRegion,
303                     alternateRegionLength,
304                     tagBuffer,
305                     &tagLength);
306 
307                 regionAppended = TRUE;
308             }
309         }
310 
311         {
312             const int32_t toCopy =
313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
314 
315             /**
316              * Copy the partial tag from our internal buffer to the supplied
317              * target.
318              **/
319             uprv_memcpy(
320                 tag,
321                 tagBuffer,
322                 toCopy);
323 
324             capacityRemaining -= toCopy;
325         }
326 
327         if (trailingLength > 0) {
328             if (*trailing != '@' && capacityRemaining > 0) {
329                 tag[tagLength++] = '_';
330                 --capacityRemaining;
331                 if (capacityRemaining > 0 && !regionAppended) {
332                     /* extra separator is required */
333                     tag[tagLength++] = '_';
334                     --capacityRemaining;
335                 }
336             }
337 
338             if (capacityRemaining > 0) {
339                 /*
340                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
341                  * don't know if the user-supplied buffers overlap.
342                  */
343                 const int32_t toCopy =
344                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
345 
346                 uprv_memmove(
347                     &tag[tagLength],
348                     trailing,
349                     toCopy);
350             }
351         }
352 
353         tagLength += trailingLength;
354 
355         return u_terminateChars(
356                     tag,
357                     tagCapacity,
358                     tagLength,
359                     err);
360     }
361 
362 error:
363 
364     /**
365      * An overflow indicates the locale ID passed in
366      * is ill-formed.  If we got here, and there was
367      * no previous error, it's an implicit overflow.
368      **/
369     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
370         U_SUCCESS(*err)) {
371         *err = U_ILLEGAL_ARGUMENT_ERROR;
372     }
373 
374     return -1;
375 }
376 
377 /**
378  * Create a tag string from the supplied parameters.  The lang, script and region
379  * parameters may be NULL pointers. If they are, their corresponding length parameters
380  * must be less than or equal to 0.  If the lang parameter is an empty string, the
381  * default value for an unknown language is written to the output buffer.
382  *
383  * If the length of the new string exceeds the capacity of the output buffer,
384  * the function copies as many bytes to the output buffer as it can, and returns
385  * the error U_BUFFER_OVERFLOW_ERROR.
386  *
387  * If an illegal argument is provided, the function returns the error
388  * U_ILLEGAL_ARGUMENT_ERROR.
389  *
390  * @param lang The language tag to use.
391  * @param langLength The length of the language tag.
392  * @param script The script tag to use.
393  * @param scriptLength The length of the script tag.
394  * @param region The region tag to use.
395  * @param regionLength The length of the region tag.
396  * @param trailing Any trailing data to append to the new tag.
397  * @param trailingLength The length of the trailing data.
398  * @param tag The output buffer.
399  * @param tagCapacity The capacity of the output buffer.
400  * @param err A pointer to a UErrorCode for error reporting.
401  * @return The length of the tag string, which may be greater than tagCapacity.
402  **/
403 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)404 createTagString(
405     const char* lang,
406     int32_t langLength,
407     const char* script,
408     int32_t scriptLength,
409     const char* region,
410     int32_t regionLength,
411     const char* trailing,
412     int32_t trailingLength,
413     char* tag,
414     int32_t tagCapacity,
415     UErrorCode* err)
416 {
417     return createTagStringWithAlternates(
418                 lang,
419                 langLength,
420                 script,
421                 scriptLength,
422                 region,
423                 regionLength,
424                 trailing,
425                 trailingLength,
426                 NULL,
427                 tag,
428                 tagCapacity,
429                 err);
430 }
431 
432 /**
433  * Parse the language, script, and region subtags from a tag string, and copy the
434  * results into the corresponding output parameters. The buffers are null-terminated,
435  * unless overflow occurs.
436  *
437  * The langLength, scriptLength, and regionLength parameters are input/output
438  * parameters, and must contain the capacity of their corresponding buffers on
439  * input.  On output, they will contain the actual length of the buffers, not
440  * including the null terminator.
441  *
442  * If the length of any of the output subtags exceeds the capacity of the corresponding
443  * buffer, the function copies as many bytes to the output buffer as it can, and returns
444  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
445  * occurs.
446  *
447  * If an illegal argument is provided, the function returns the error
448  * U_ILLEGAL_ARGUMENT_ERROR.
449  *
450  * @param localeID The locale ID to parse.
451  * @param lang The language tag buffer.
452  * @param langLength The length of the language tag.
453  * @param script The script tag buffer.
454  * @param scriptLength The length of the script tag.
455  * @param region The region tag buffer.
456  * @param regionLength The length of the region tag.
457  * @param err A pointer to a UErrorCode for error reporting.
458  * @return The number of chars of the localeID parameter consumed.
459  **/
460 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)461 parseTagString(
462     const char* localeID,
463     char* lang,
464     int32_t* langLength,
465     char* script,
466     int32_t* scriptLength,
467     char* region,
468     int32_t* regionLength,
469     UErrorCode* err)
470 {
471     const char* position = localeID;
472     int32_t subtagLength = 0;
473 
474     if(U_FAILURE(*err) ||
475        localeID == NULL ||
476        lang == NULL ||
477        langLength == NULL ||
478        script == NULL ||
479        scriptLength == NULL ||
480        region == NULL ||
481        regionLength == NULL) {
482         goto error;
483     }
484 
485     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
486     u_terminateChars(lang, *langLength, subtagLength, err);
487 
488     /*
489      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
490      * to be an error, because it indicates the user-supplied tag is
491      * not well-formed.
492      */
493     if(U_FAILURE(*err)) {
494         goto error;
495     }
496 
497     *langLength = subtagLength;
498 
499     /*
500      * If no language was present, use the value of unknownLanguage
501      * instead.  Otherwise, move past any separator.
502      */
503     if (*langLength == 0) {
504         uprv_strcpy(
505             lang,
506             unknownLanguage);
507         *langLength = (int32_t)uprv_strlen(lang);
508     }
509     else if (_isIDSeparator(*position)) {
510         ++position;
511     }
512 
513     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
514     u_terminateChars(script, *scriptLength, subtagLength, err);
515 
516     if(U_FAILURE(*err)) {
517         goto error;
518     }
519 
520     *scriptLength = subtagLength;
521 
522     if (*scriptLength > 0) {
523         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
524             /**
525              * If the script part is the "unknown" script, then don't return it.
526              **/
527             *scriptLength = 0;
528         }
529 
530         /*
531          * Move past any separator.
532          */
533         if (_isIDSeparator(*position)) {
534             ++position;
535         }
536     }
537 
538     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
539     u_terminateChars(region, *regionLength, subtagLength, err);
540 
541     if(U_FAILURE(*err)) {
542         goto error;
543     }
544 
545     *regionLength = subtagLength;
546 
547     if (*regionLength > 0) {
548         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
549             /**
550              * If the region part is the "unknown" region, then don't return it.
551              **/
552             *regionLength = 0;
553         }
554     } else if (*position != 0 && *position != '@') {
555         /* back up over consumed trailing separator */
556         --position;
557     }
558 
559 exit:
560 
561     return (int32_t)(position - localeID);
562 
563 error:
564 
565     /**
566      * If we get here, we have no explicit error, it's the result of an
567      * illegal argument.
568      **/
569     if (!U_FAILURE(*err)) {
570         *err = U_ILLEGAL_ARGUMENT_ERROR;
571     }
572 
573     goto exit;
574 }
575 
576 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)577 createLikelySubtagsString(
578     const char* lang,
579     int32_t langLength,
580     const char* script,
581     int32_t scriptLength,
582     const char* region,
583     int32_t regionLength,
584     const char* variants,
585     int32_t variantsLength,
586     char* tag,
587     int32_t tagCapacity,
588     UErrorCode* err)
589 {
590     /**
591      * ULOC_FULLNAME_CAPACITY will provide enough capacity
592      * that we can build a string that contains the language,
593      * script and region code without worrying about overrunning
594      * the user-supplied buffer.
595      **/
596     char tagBuffer[ULOC_FULLNAME_CAPACITY];
597     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
598     int32_t tagBufferLength = 0;
599 
600     if(U_FAILURE(*err)) {
601         goto error;
602     }
603 
604     /**
605      * Try the language with the script and region first.
606      **/
607     if (scriptLength > 0 && regionLength > 0) {
608 
609         const char* likelySubtags = NULL;
610 
611         tagBufferLength = createTagString(
612             lang,
613             langLength,
614             script,
615             scriptLength,
616             region,
617             regionLength,
618             NULL,
619             0,
620             tagBuffer,
621             sizeof(tagBuffer),
622             err);
623         if(U_FAILURE(*err)) {
624             goto error;
625         }
626 
627         likelySubtags =
628             findLikelySubtags(
629                 tagBuffer,
630                 likelySubtagsBuffer,
631                 sizeof(likelySubtagsBuffer),
632                 err);
633         if(U_FAILURE(*err)) {
634             goto error;
635         }
636 
637         if (likelySubtags != NULL) {
638             /* Always use the language tag from the
639                maximal string, since it may be more
640                specific than the one provided. */
641             return createTagStringWithAlternates(
642                         NULL,
643                         0,
644                         NULL,
645                         0,
646                         NULL,
647                         0,
648                         variants,
649                         variantsLength,
650                         likelySubtags,
651                         tag,
652                         tagCapacity,
653                         err);
654         }
655     }
656 
657     /**
658      * Try the language with just the script.
659      **/
660     if (scriptLength > 0) {
661 
662         const char* likelySubtags = NULL;
663 
664         tagBufferLength = createTagString(
665             lang,
666             langLength,
667             script,
668             scriptLength,
669             NULL,
670             0,
671             NULL,
672             0,
673             tagBuffer,
674             sizeof(tagBuffer),
675             err);
676         if(U_FAILURE(*err)) {
677             goto error;
678         }
679 
680         likelySubtags =
681             findLikelySubtags(
682                 tagBuffer,
683                 likelySubtagsBuffer,
684                 sizeof(likelySubtagsBuffer),
685                 err);
686         if(U_FAILURE(*err)) {
687             goto error;
688         }
689 
690         if (likelySubtags != NULL) {
691             /* Always use the language tag from the
692                maximal string, since it may be more
693                specific than the one provided. */
694             return createTagStringWithAlternates(
695                         NULL,
696                         0,
697                         NULL,
698                         0,
699                         region,
700                         regionLength,
701                         variants,
702                         variantsLength,
703                         likelySubtags,
704                         tag,
705                         tagCapacity,
706                         err);
707         }
708     }
709 
710     /**
711      * Try the language with just the region.
712      **/
713     if (regionLength > 0) {
714 
715         const char* likelySubtags = NULL;
716 
717         createTagString(
718             lang,
719             langLength,
720             NULL,
721             0,
722             region,
723             regionLength,
724             NULL,
725             0,
726             tagBuffer,
727             sizeof(tagBuffer),
728             err);
729         if(U_FAILURE(*err)) {
730             goto error;
731         }
732 
733         likelySubtags =
734             findLikelySubtags(
735                 tagBuffer,
736                 likelySubtagsBuffer,
737                 sizeof(likelySubtagsBuffer),
738                 err);
739         if(U_FAILURE(*err)) {
740             goto error;
741         }
742 
743         if (likelySubtags != NULL) {
744             /* Always use the language tag from the
745                maximal string, since it may be more
746                specific than the one provided. */
747             return createTagStringWithAlternates(
748                         NULL,
749                         0,
750                         script,
751                         scriptLength,
752                         NULL,
753                         0,
754                         variants,
755                         variantsLength,
756                         likelySubtags,
757                         tag,
758                         tagCapacity,
759                         err);
760         }
761     }
762 
763     /**
764      * Finally, try just the language.
765      **/
766     {
767         const char* likelySubtags = NULL;
768 
769         createTagString(
770             lang,
771             langLength,
772             NULL,
773             0,
774             NULL,
775             0,
776             NULL,
777             0,
778             tagBuffer,
779             sizeof(tagBuffer),
780             err);
781         if(U_FAILURE(*err)) {
782             goto error;
783         }
784 
785         likelySubtags =
786             findLikelySubtags(
787                 tagBuffer,
788                 likelySubtagsBuffer,
789                 sizeof(likelySubtagsBuffer),
790                 err);
791         if(U_FAILURE(*err)) {
792             goto error;
793         }
794 
795         if (likelySubtags != NULL) {
796             /* Always use the language tag from the
797                maximal string, since it may be more
798                specific than the one provided. */
799             return createTagStringWithAlternates(
800                         NULL,
801                         0,
802                         script,
803                         scriptLength,
804                         region,
805                         regionLength,
806                         variants,
807                         variantsLength,
808                         likelySubtags,
809                         tag,
810                         tagCapacity,
811                         err);
812         }
813     }
814 
815     return u_terminateChars(
816                 tag,
817                 tagCapacity,
818                 0,
819                 err);
820 
821 error:
822 
823     if (!U_FAILURE(*err)) {
824         *err = U_ILLEGAL_ARGUMENT_ERROR;
825     }
826 
827     return -1;
828 }
829 
830 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
831     {   int32_t count = 0; \
832         int32_t i; \
833         for (i = 0; i < trailingLength; i++) { \
834             if (trailing[i] == '-' || trailing[i] == '_') { \
835                 count = 0; \
836                 if (count > 8) { \
837                     goto error; \
838                 } \
839             } else if (trailing[i] == '@') { \
840                 break; \
841             } else if (count > 8) { \
842                 goto error; \
843             } else { \
844                 count++; \
845             } \
846         } \
847     }
848 
849 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)850 _uloc_addLikelySubtags(const char*    localeID,
851          char* maximizedLocaleID,
852          int32_t maximizedLocaleIDCapacity,
853          UErrorCode* err)
854 {
855     char lang[ULOC_LANG_CAPACITY];
856     int32_t langLength = sizeof(lang);
857     char script[ULOC_SCRIPT_CAPACITY];
858     int32_t scriptLength = sizeof(script);
859     char region[ULOC_COUNTRY_CAPACITY];
860     int32_t regionLength = sizeof(region);
861     const char* trailing = "";
862     int32_t trailingLength = 0;
863     int32_t trailingIndex = 0;
864     int32_t resultLength = 0;
865 
866     if(U_FAILURE(*err)) {
867         goto error;
868     }
869     else if (localeID == NULL ||
870              maximizedLocaleID == NULL ||
871              maximizedLocaleIDCapacity <= 0) {
872         goto error;
873     }
874 
875     trailingIndex = parseTagString(
876         localeID,
877         lang,
878         &langLength,
879         script,
880         &scriptLength,
881         region,
882         &regionLength,
883         err);
884     if(U_FAILURE(*err)) {
885         /* Overflow indicates an illegal argument error */
886         if (*err == U_BUFFER_OVERFLOW_ERROR) {
887             *err = U_ILLEGAL_ARGUMENT_ERROR;
888         }
889 
890         goto error;
891     }
892 
893     /* Find the length of the trailing portion. */
894     while (_isIDSeparator(localeID[trailingIndex])) {
895         trailingIndex++;
896     }
897     trailing = &localeID[trailingIndex];
898     trailingLength = (int32_t)uprv_strlen(trailing);
899 
900     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
901 
902     resultLength =
903         createLikelySubtagsString(
904             lang,
905             langLength,
906             script,
907             scriptLength,
908             region,
909             regionLength,
910             trailing,
911             trailingLength,
912             maximizedLocaleID,
913             maximizedLocaleIDCapacity,
914             err);
915 
916     if (resultLength == 0) {
917         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
918 
919         /*
920          * If we get here, we need to return localeID.
921          */
922         uprv_memcpy(
923             maximizedLocaleID,
924             localeID,
925             localIDLength <= maximizedLocaleIDCapacity ?
926                 localIDLength : maximizedLocaleIDCapacity);
927 
928         resultLength =
929             u_terminateChars(
930                 maximizedLocaleID,
931                 maximizedLocaleIDCapacity,
932                 localIDLength,
933                 err);
934     }
935 
936     return resultLength;
937 
938 error:
939 
940     if (!U_FAILURE(*err)) {
941         *err = U_ILLEGAL_ARGUMENT_ERROR;
942     }
943 
944     return -1;
945 }
946 
947 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)948 _uloc_minimizeSubtags(const char*    localeID,
949          char* minimizedLocaleID,
950          int32_t minimizedLocaleIDCapacity,
951          UErrorCode* err)
952 {
953     /**
954      * ULOC_FULLNAME_CAPACITY will provide enough capacity
955      * that we can build a string that contains the language,
956      * script and region code without worrying about overrunning
957      * the user-supplied buffer.
958      **/
959     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
960     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
961 
962     char lang[ULOC_LANG_CAPACITY];
963     int32_t langLength = sizeof(lang);
964     char script[ULOC_SCRIPT_CAPACITY];
965     int32_t scriptLength = sizeof(script);
966     char region[ULOC_COUNTRY_CAPACITY];
967     int32_t regionLength = sizeof(region);
968     const char* trailing = "";
969     int32_t trailingLength = 0;
970     int32_t trailingIndex = 0;
971 
972     if(U_FAILURE(*err)) {
973         goto error;
974     }
975     else if (localeID == NULL ||
976              minimizedLocaleID == NULL ||
977              minimizedLocaleIDCapacity <= 0) {
978         goto error;
979     }
980 
981     trailingIndex =
982         parseTagString(
983             localeID,
984             lang,
985             &langLength,
986             script,
987             &scriptLength,
988             region,
989             &regionLength,
990             err);
991     if(U_FAILURE(*err)) {
992 
993         /* Overflow indicates an illegal argument error */
994         if (*err == U_BUFFER_OVERFLOW_ERROR) {
995             *err = U_ILLEGAL_ARGUMENT_ERROR;
996         }
997 
998         goto error;
999     }
1000 
1001     /* Find the spot where the variants or the keywords begin, if any. */
1002     while (_isIDSeparator(localeID[trailingIndex])) {
1003         trailingIndex++;
1004     }
1005     trailing = &localeID[trailingIndex];
1006     trailingLength = (int32_t)uprv_strlen(trailing);
1007 
1008     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1009 
1010     createTagString(
1011         lang,
1012         langLength,
1013         script,
1014         scriptLength,
1015         region,
1016         regionLength,
1017         NULL,
1018         0,
1019         maximizedTagBuffer,
1020         maximizedTagBufferLength,
1021         err);
1022     if(U_FAILURE(*err)) {
1023         goto error;
1024     }
1025 
1026     /**
1027      * First, we need to first get the maximization
1028      * from AddLikelySubtags.
1029      **/
1030     maximizedTagBufferLength =
1031         uloc_addLikelySubtags(
1032             maximizedTagBuffer,
1033             maximizedTagBuffer,
1034             maximizedTagBufferLength,
1035             err);
1036 
1037     if(U_FAILURE(*err)) {
1038         goto error;
1039     }
1040 
1041     /**
1042      * Start first with just the language.
1043      **/
1044     {
1045         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1046 
1047         const int32_t tagBufferLength =
1048             createLikelySubtagsString(
1049                 lang,
1050                 langLength,
1051                 NULL,
1052                 0,
1053                 NULL,
1054                 0,
1055                 NULL,
1056                 0,
1057                 tagBuffer,
1058                 sizeof(tagBuffer),
1059                 err);
1060 
1061         if(U_FAILURE(*err)) {
1062             goto error;
1063         }
1064         else if (uprv_strnicmp(
1065                     maximizedTagBuffer,
1066                     tagBuffer,
1067                     tagBufferLength) == 0) {
1068 
1069             return createTagString(
1070                         lang,
1071                         langLength,
1072                         NULL,
1073                         0,
1074                         NULL,
1075                         0,
1076                         trailing,
1077                         trailingLength,
1078                         minimizedLocaleID,
1079                         minimizedLocaleIDCapacity,
1080                         err);
1081         }
1082     }
1083 
1084     /**
1085      * Next, try the language and region.
1086      **/
1087     if (regionLength > 0) {
1088 
1089         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1090 
1091         const int32_t tagBufferLength =
1092             createLikelySubtagsString(
1093                 lang,
1094                 langLength,
1095                 NULL,
1096                 0,
1097                 region,
1098                 regionLength,
1099                 NULL,
1100                 0,
1101                 tagBuffer,
1102                 sizeof(tagBuffer),
1103                 err);
1104 
1105         if(U_FAILURE(*err)) {
1106             goto error;
1107         }
1108         else if (uprv_strnicmp(
1109                     maximizedTagBuffer,
1110                     tagBuffer,
1111                     tagBufferLength) == 0) {
1112 
1113             return createTagString(
1114                         lang,
1115                         langLength,
1116                         NULL,
1117                         0,
1118                         region,
1119                         regionLength,
1120                         trailing,
1121                         trailingLength,
1122                         minimizedLocaleID,
1123                         minimizedLocaleIDCapacity,
1124                         err);
1125         }
1126     }
1127 
1128     /**
1129      * Finally, try the language and script.  This is our last chance,
1130      * since trying with all three subtags would only yield the
1131      * maximal version that we already have.
1132      **/
1133     if (scriptLength > 0 && regionLength > 0) {
1134         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1135 
1136         const int32_t tagBufferLength =
1137             createLikelySubtagsString(
1138                 lang,
1139                 langLength,
1140                 script,
1141                 scriptLength,
1142                 NULL,
1143                 0,
1144                 NULL,
1145                 0,
1146                 tagBuffer,
1147                 sizeof(tagBuffer),
1148                 err);
1149 
1150         if(U_FAILURE(*err)) {
1151             goto error;
1152         }
1153         else if (uprv_strnicmp(
1154                     maximizedTagBuffer,
1155                     tagBuffer,
1156                     tagBufferLength) == 0) {
1157 
1158             return createTagString(
1159                         lang,
1160                         langLength,
1161                         script,
1162                         scriptLength,
1163                         NULL,
1164                         0,
1165                         trailing,
1166                         trailingLength,
1167                         minimizedLocaleID,
1168                         minimizedLocaleIDCapacity,
1169                         err);
1170         }
1171     }
1172 
1173     {
1174         /**
1175          * If we got here, return the locale ID parameter.
1176          **/
1177         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1178 
1179         uprv_memcpy(
1180             minimizedLocaleID,
1181             localeID,
1182             localeIDLength <= minimizedLocaleIDCapacity ?
1183                 localeIDLength : minimizedLocaleIDCapacity);
1184 
1185         return u_terminateChars(
1186                     minimizedLocaleID,
1187                     minimizedLocaleIDCapacity,
1188                     localeIDLength,
1189                     err);
1190     }
1191 
1192 error:
1193 
1194     if (!U_FAILURE(*err)) {
1195         *err = U_ILLEGAL_ARGUMENT_ERROR;
1196     }
1197 
1198     return -1;
1199 
1200 
1201 }
1202 
1203 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1204 do_canonicalize(const char*    localeID,
1205          char* buffer,
1206          int32_t bufferCapacity,
1207          UErrorCode* err)
1208 {
1209     uloc_canonicalize(
1210         localeID,
1211         buffer,
1212         bufferCapacity,
1213         err);
1214 
1215     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1216         *err == U_BUFFER_OVERFLOW_ERROR) {
1217         *err = U_ILLEGAL_ARGUMENT_ERROR;
1218 
1219         return FALSE;
1220     }
1221     else if (U_FAILURE(*err)) {
1222 
1223         return FALSE;
1224     }
1225     else {
1226         return TRUE;
1227     }
1228 }
1229 
1230 U_DRAFT int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1231 uloc_addLikelySubtags(const char*    localeID,
1232          char* maximizedLocaleID,
1233          int32_t maximizedLocaleIDCapacity,
1234          UErrorCode* err)
1235 {
1236     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1237 
1238     if (!do_canonicalize(
1239         localeID,
1240         localeBuffer,
1241         sizeof(localeBuffer),
1242         err)) {
1243         return -1;
1244     }
1245     else {
1246         return _uloc_addLikelySubtags(
1247                     localeBuffer,
1248                     maximizedLocaleID,
1249                     maximizedLocaleIDCapacity,
1250                     err);
1251     }
1252 }
1253 
1254 U_DRAFT int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1255 uloc_minimizeSubtags(const char*    localeID,
1256          char* minimizedLocaleID,
1257          int32_t minimizedLocaleIDCapacity,
1258          UErrorCode* err)
1259 {
1260     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1261 
1262     if (!do_canonicalize(
1263         localeID,
1264         localeBuffer,
1265         sizeof(localeBuffer),
1266         err)) {
1267         return -1;
1268     }
1269     else {
1270         return _uloc_minimizeSubtags(
1271                     localeBuffer,
1272                     minimizedLocaleID,
1273                     minimizedLocaleIDCapacity,
1274                     err);
1275     }
1276 }
1277