• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1997-2012, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  loclikely.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010feb25
14 *   created by: Markus W. Scherer
15 *
16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
17 *   that then do not depend on resource bundle code and likely-subtags data.
18 */
19 
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ures.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "ulocimp.h"
27 #include "ustr_imp.h"
28 
29 /**
30  * This function looks for the localeID in the likelySubtags resource.
31  *
32  * @param localeID The tag to find.
33  * @param buffer A buffer to hold the matching entry
34  * @param bufferLength The length of the output buffer
35  * @return A pointer to "buffer" if found, or a null pointer if not.
36  */
37 static const char*  U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)38 findLikelySubtags(const char* localeID,
39                   char* buffer,
40                   int32_t bufferLength,
41                   UErrorCode* err) {
42     const char* result = NULL;
43 
44     if (!U_FAILURE(*err)) {
45         int32_t resLen = 0;
46         const UChar* s = NULL;
47         UErrorCode tmpErr = U_ZERO_ERROR;
48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49         if (U_SUCCESS(tmpErr)) {
50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51 
52             if (U_FAILURE(tmpErr)) {
53                 /*
54                  * If a resource is missing, it's not really an error, it's
55                  * just that we don't have any data for that particular locale ID.
56                  */
57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58                     *err = tmpErr;
59                 }
60             }
61             else if (resLen >= bufferLength) {
62                 /* The buffer should never overflow. */
63                 *err = U_INTERNAL_PROGRAM_ERROR;
64             }
65             else {
66                 u_UCharsToChars(s, buffer, resLen + 1);
67                 result = buffer;
68             }
69 
70             ures_close(subtags);
71         } else {
72             *err = tmpErr;
73         }
74     }
75 
76     return result;
77 }
78 
79 /**
80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
81  * must be large enough to contain the resulting tag plus any separator
82  * necessary. The tag must not be a zero-length string.
83  *
84  * @param tag The tag to add.
85  * @param tagLength The length of the tag.
86  * @param buffer The output buffer.
87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
88  **/
89 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)90 appendTag(
91     const char* tag,
92     int32_t tagLength,
93     char* buffer,
94     int32_t* bufferLength) {
95 
96     if (*bufferLength > 0) {
97         buffer[*bufferLength] = '_';
98         ++(*bufferLength);
99     }
100 
101     uprv_memmove(
102         &buffer[*bufferLength],
103         tag,
104         tagLength);
105 
106     *bufferLength += tagLength;
107 }
108 
109 /**
110  * These are the canonical strings for unknown languages, scripts and regions.
111  **/
112 static const char* const unknownLanguage = "und";
113 static const char* const unknownScript = "Zzzz";
114 static const char* const unknownRegion = "ZZ";
115 
116 /**
117  * Create a tag string from the supplied parameters.  The lang, script and region
118  * parameters may be NULL pointers. If they are, their corresponding length parameters
119  * must be less than or equal to 0.
120  *
121  * If any of the language, script or region parameters are empty, and the alternateTags
122  * parameter is not NULL, it will be parsed for potential language, script and region tags
123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
124  * it contains no language tag, the default tag for the unknown language is used.
125  *
126  * If the length of the new string exceeds the capacity of the output buffer,
127  * the function copies as many bytes to the output buffer as it can, and returns
128  * the error U_BUFFER_OVERFLOW_ERROR.
129  *
130  * If an illegal argument is provided, the function returns the error
131  * U_ILLEGAL_ARGUMENT_ERROR.
132  *
133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134  * the tag string fits in the output buffer, but the null terminator doesn't.
135  *
136  * @param lang The language tag to use.
137  * @param langLength The length of the language tag.
138  * @param script The script tag to use.
139  * @param scriptLength The length of the script tag.
140  * @param region The region tag to use.
141  * @param regionLength The length of the region tag.
142  * @param trailing Any trailing data to append to the new tag.
143  * @param trailingLength The length of the trailing data.
144  * @param alternateTags A string containing any alternate tags.
145  * @param tag The output buffer.
146  * @param tagCapacity The capacity of the output buffer.
147  * @param err A pointer to a UErrorCode for error reporting.
148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149  **/
150 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)151 createTagStringWithAlternates(
152     const char* lang,
153     int32_t langLength,
154     const char* script,
155     int32_t scriptLength,
156     const char* region,
157     int32_t regionLength,
158     const char* trailing,
159     int32_t trailingLength,
160     const char* alternateTags,
161     char* tag,
162     int32_t tagCapacity,
163     UErrorCode* err) {
164 
165     if (U_FAILURE(*err)) {
166         goto error;
167     }
168     else if (tag == NULL ||
169              tagCapacity <= 0 ||
170              langLength >= ULOC_LANG_CAPACITY ||
171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
172              regionLength >= ULOC_COUNTRY_CAPACITY) {
173         goto error;
174     }
175     else {
176         /**
177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
178          * that we can build a string that contains the language,
179          * script and region code without worrying about overrunning
180          * the user-supplied buffer.
181          **/
182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
183         int32_t tagLength = 0;
184         int32_t capacityRemaining = tagCapacity;
185         UBool regionAppended = FALSE;
186 
187         if (langLength > 0) {
188             appendTag(
189                 lang,
190                 langLength,
191                 tagBuffer,
192                 &tagLength);
193         }
194         else if (alternateTags == NULL) {
195             /*
196              * Append the value for an unknown language, if
197              * we found no language.
198              */
199             appendTag(
200                 unknownLanguage,
201                 (int32_t)uprv_strlen(unknownLanguage),
202                 tagBuffer,
203                 &tagLength);
204         }
205         else {
206             /*
207              * Parse the alternateTags string for the language.
208              */
209             char alternateLang[ULOC_LANG_CAPACITY];
210             int32_t alternateLangLength = sizeof(alternateLang);
211 
212             alternateLangLength =
213                 uloc_getLanguage(
214                     alternateTags,
215                     alternateLang,
216                     alternateLangLength,
217                     err);
218             if(U_FAILURE(*err) ||
219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
220                 goto error;
221             }
222             else if (alternateLangLength == 0) {
223                 /*
224                  * Append the value for an unknown language, if
225                  * we found no language.
226                  */
227                 appendTag(
228                     unknownLanguage,
229                     (int32_t)uprv_strlen(unknownLanguage),
230                     tagBuffer,
231                     &tagLength);
232             }
233             else {
234                 appendTag(
235                     alternateLang,
236                     alternateLangLength,
237                     tagBuffer,
238                     &tagLength);
239             }
240         }
241 
242         if (scriptLength > 0) {
243             appendTag(
244                 script,
245                 scriptLength,
246                 tagBuffer,
247                 &tagLength);
248         }
249         else if (alternateTags != NULL) {
250             /*
251              * Parse the alternateTags string for the script.
252              */
253             char alternateScript[ULOC_SCRIPT_CAPACITY];
254 
255             const int32_t alternateScriptLength =
256                 uloc_getScript(
257                     alternateTags,
258                     alternateScript,
259                     sizeof(alternateScript),
260                     err);
261 
262             if (U_FAILURE(*err) ||
263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264                 goto error;
265             }
266             else if (alternateScriptLength > 0) {
267                 appendTag(
268                     alternateScript,
269                     alternateScriptLength,
270                     tagBuffer,
271                     &tagLength);
272             }
273         }
274 
275         if (regionLength > 0) {
276             appendTag(
277                 region,
278                 regionLength,
279                 tagBuffer,
280                 &tagLength);
281 
282             regionAppended = TRUE;
283         }
284         else if (alternateTags != NULL) {
285             /*
286              * Parse the alternateTags string for the region.
287              */
288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
289 
290             const int32_t alternateRegionLength =
291                 uloc_getCountry(
292                     alternateTags,
293                     alternateRegion,
294                     sizeof(alternateRegion),
295                     err);
296             if (U_FAILURE(*err) ||
297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298                 goto error;
299             }
300             else if (alternateRegionLength > 0) {
301                 appendTag(
302                     alternateRegion,
303                     alternateRegionLength,
304                     tagBuffer,
305                     &tagLength);
306 
307                 regionAppended = TRUE;
308             }
309         }
310 
311         {
312             const int32_t toCopy =
313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
314 
315             /**
316              * Copy the partial tag from our internal buffer to the supplied
317              * target.
318              **/
319             uprv_memcpy(
320                 tag,
321                 tagBuffer,
322                 toCopy);
323 
324             capacityRemaining -= toCopy;
325         }
326 
327         if (trailingLength > 0) {
328             if (*trailing != '@' && capacityRemaining > 0) {
329                 tag[tagLength++] = '_';
330                 --capacityRemaining;
331                 if (capacityRemaining > 0 && !regionAppended) {
332                     /* extra separator is required */
333                     tag[tagLength++] = '_';
334                     --capacityRemaining;
335                 }
336             }
337 
338             if (capacityRemaining > 0) {
339                 /*
340                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
341                  * don't know if the user-supplied buffers overlap.
342                  */
343                 const int32_t toCopy =
344                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
345 
346                 uprv_memmove(
347                     &tag[tagLength],
348                     trailing,
349                     toCopy);
350             }
351         }
352 
353         tagLength += trailingLength;
354 
355         return u_terminateChars(
356                     tag,
357                     tagCapacity,
358                     tagLength,
359                     err);
360     }
361 
362 error:
363 
364     /**
365      * An overflow indicates the locale ID passed in
366      * is ill-formed.  If we got here, and there was
367      * no previous error, it's an implicit overflow.
368      **/
369     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
370         U_SUCCESS(*err)) {
371         *err = U_ILLEGAL_ARGUMENT_ERROR;
372     }
373 
374     return -1;
375 }
376 
377 /**
378  * Create a tag string from the supplied parameters.  The lang, script and region
379  * parameters may be NULL pointers. If they are, their corresponding length parameters
380  * must be less than or equal to 0.  If the lang parameter is an empty string, the
381  * default value for an unknown language is written to the output buffer.
382  *
383  * If the length of the new string exceeds the capacity of the output buffer,
384  * the function copies as many bytes to the output buffer as it can, and returns
385  * the error U_BUFFER_OVERFLOW_ERROR.
386  *
387  * If an illegal argument is provided, the function returns the error
388  * U_ILLEGAL_ARGUMENT_ERROR.
389  *
390  * @param lang The language tag to use.
391  * @param langLength The length of the language tag.
392  * @param script The script tag to use.
393  * @param scriptLength The length of the script tag.
394  * @param region The region tag to use.
395  * @param regionLength The length of the region tag.
396  * @param trailing Any trailing data to append to the new tag.
397  * @param trailingLength The length of the trailing data.
398  * @param tag The output buffer.
399  * @param tagCapacity The capacity of the output buffer.
400  * @param err A pointer to a UErrorCode for error reporting.
401  * @return The length of the tag string, which may be greater than tagCapacity.
402  **/
403 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)404 createTagString(
405     const char* lang,
406     int32_t langLength,
407     const char* script,
408     int32_t scriptLength,
409     const char* region,
410     int32_t regionLength,
411     const char* trailing,
412     int32_t trailingLength,
413     char* tag,
414     int32_t tagCapacity,
415     UErrorCode* err)
416 {
417     return createTagStringWithAlternates(
418                 lang,
419                 langLength,
420                 script,
421                 scriptLength,
422                 region,
423                 regionLength,
424                 trailing,
425                 trailingLength,
426                 NULL,
427                 tag,
428                 tagCapacity,
429                 err);
430 }
431 
432 /**
433  * Parse the language, script, and region subtags from a tag string, and copy the
434  * results into the corresponding output parameters. The buffers are null-terminated,
435  * unless overflow occurs.
436  *
437  * The langLength, scriptLength, and regionLength parameters are input/output
438  * parameters, and must contain the capacity of their corresponding buffers on
439  * input.  On output, they will contain the actual length of the buffers, not
440  * including the null terminator.
441  *
442  * If the length of any of the output subtags exceeds the capacity of the corresponding
443  * buffer, the function copies as many bytes to the output buffer as it can, and returns
444  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
445  * occurs.
446  *
447  * If an illegal argument is provided, the function returns the error
448  * U_ILLEGAL_ARGUMENT_ERROR.
449  *
450  * @param localeID The locale ID to parse.
451  * @param lang The language tag buffer.
452  * @param langLength The length of the language tag.
453  * @param script The script tag buffer.
454  * @param scriptLength The length of the script tag.
455  * @param region The region tag buffer.
456  * @param regionLength The length of the region tag.
457  * @param err A pointer to a UErrorCode for error reporting.
458  * @return The number of chars of the localeID parameter consumed.
459  **/
460 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)461 parseTagString(
462     const char* localeID,
463     char* lang,
464     int32_t* langLength,
465     char* script,
466     int32_t* scriptLength,
467     char* region,
468     int32_t* regionLength,
469     UErrorCode* err)
470 {
471     const char* position = localeID;
472     int32_t subtagLength = 0;
473 
474     if(U_FAILURE(*err) ||
475        localeID == NULL ||
476        lang == NULL ||
477        langLength == NULL ||
478        script == NULL ||
479        scriptLength == NULL ||
480        region == NULL ||
481        regionLength == NULL) {
482         goto error;
483     }
484 
485     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
486     u_terminateChars(lang, *langLength, subtagLength, err);
487 
488     /*
489      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
490      * to be an error, because it indicates the user-supplied tag is
491      * not well-formed.
492      */
493     if(U_FAILURE(*err)) {
494         goto error;
495     }
496 
497     *langLength = subtagLength;
498 
499     /*
500      * If no language was present, use the value of unknownLanguage
501      * instead.  Otherwise, move past any separator.
502      */
503     if (*langLength == 0) {
504         uprv_strcpy(
505             lang,
506             unknownLanguage);
507         *langLength = (int32_t)uprv_strlen(lang);
508     }
509     else if (_isIDSeparator(*position)) {
510         ++position;
511     }
512 
513     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
514     u_terminateChars(script, *scriptLength, subtagLength, err);
515 
516     if(U_FAILURE(*err)) {
517         goto error;
518     }
519 
520     *scriptLength = subtagLength;
521 
522     if (*scriptLength > 0) {
523         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
524             /**
525              * If the script part is the "unknown" script, then don't return it.
526              **/
527             *scriptLength = 0;
528         }
529 
530         /*
531          * Move past any separator.
532          */
533         if (_isIDSeparator(*position)) {
534             ++position;
535         }
536     }
537 
538     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
539     u_terminateChars(region, *regionLength, subtagLength, err);
540 
541     if(U_FAILURE(*err)) {
542         goto error;
543     }
544 
545     *regionLength = subtagLength;
546 
547     if (*regionLength > 0) {
548         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
549             /**
550              * If the region part is the "unknown" region, then don't return it.
551              **/
552             *regionLength = 0;
553         }
554     } else if (*position != 0 && *position != '@') {
555         /* back up over consumed trailing separator */
556         --position;
557     }
558 
559 exit:
560 
561     return (int32_t)(position - localeID);
562 
563 error:
564 
565     /**
566      * If we get here, we have no explicit error, it's the result of an
567      * illegal argument.
568      **/
569     if (!U_FAILURE(*err)) {
570         *err = U_ILLEGAL_ARGUMENT_ERROR;
571     }
572 
573     goto exit;
574 }
575 
576 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)577 createLikelySubtagsString(
578     const char* lang,
579     int32_t langLength,
580     const char* script,
581     int32_t scriptLength,
582     const char* region,
583     int32_t regionLength,
584     const char* variants,
585     int32_t variantsLength,
586     char* tag,
587     int32_t tagCapacity,
588     UErrorCode* err)
589 {
590     /**
591      * ULOC_FULLNAME_CAPACITY will provide enough capacity
592      * that we can build a string that contains the language,
593      * script and region code without worrying about overrunning
594      * the user-supplied buffer.
595      **/
596     char tagBuffer[ULOC_FULLNAME_CAPACITY];
597     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
598 
599     if(U_FAILURE(*err)) {
600         goto error;
601     }
602 
603     /**
604      * Try the language with the script and region first.
605      **/
606     if (scriptLength > 0 && regionLength > 0) {
607 
608         const char* likelySubtags = NULL;
609 
610         createTagString(
611             lang,
612             langLength,
613             script,
614             scriptLength,
615             region,
616             regionLength,
617             NULL,
618             0,
619             tagBuffer,
620             sizeof(tagBuffer),
621             err);
622         if(U_FAILURE(*err)) {
623             goto error;
624         }
625 
626         likelySubtags =
627             findLikelySubtags(
628                 tagBuffer,
629                 likelySubtagsBuffer,
630                 sizeof(likelySubtagsBuffer),
631                 err);
632         if(U_FAILURE(*err)) {
633             goto error;
634         }
635 
636         if (likelySubtags != NULL) {
637             /* Always use the language tag from the
638                maximal string, since it may be more
639                specific than the one provided. */
640             return createTagStringWithAlternates(
641                         NULL,
642                         0,
643                         NULL,
644                         0,
645                         NULL,
646                         0,
647                         variants,
648                         variantsLength,
649                         likelySubtags,
650                         tag,
651                         tagCapacity,
652                         err);
653         }
654     }
655 
656     /**
657      * Try the language with just the script.
658      **/
659     if (scriptLength > 0) {
660 
661         const char* likelySubtags = NULL;
662 
663         createTagString(
664             lang,
665             langLength,
666             script,
667             scriptLength,
668             NULL,
669             0,
670             NULL,
671             0,
672             tagBuffer,
673             sizeof(tagBuffer),
674             err);
675         if(U_FAILURE(*err)) {
676             goto error;
677         }
678 
679         likelySubtags =
680             findLikelySubtags(
681                 tagBuffer,
682                 likelySubtagsBuffer,
683                 sizeof(likelySubtagsBuffer),
684                 err);
685         if(U_FAILURE(*err)) {
686             goto error;
687         }
688 
689         if (likelySubtags != NULL) {
690             /* Always use the language tag from the
691                maximal string, since it may be more
692                specific than the one provided. */
693             return createTagStringWithAlternates(
694                         NULL,
695                         0,
696                         NULL,
697                         0,
698                         region,
699                         regionLength,
700                         variants,
701                         variantsLength,
702                         likelySubtags,
703                         tag,
704                         tagCapacity,
705                         err);
706         }
707     }
708 
709     /**
710      * Try the language with just the region.
711      **/
712     if (regionLength > 0) {
713 
714         const char* likelySubtags = NULL;
715 
716         createTagString(
717             lang,
718             langLength,
719             NULL,
720             0,
721             region,
722             regionLength,
723             NULL,
724             0,
725             tagBuffer,
726             sizeof(tagBuffer),
727             err);
728         if(U_FAILURE(*err)) {
729             goto error;
730         }
731 
732         likelySubtags =
733             findLikelySubtags(
734                 tagBuffer,
735                 likelySubtagsBuffer,
736                 sizeof(likelySubtagsBuffer),
737                 err);
738         if(U_FAILURE(*err)) {
739             goto error;
740         }
741 
742         if (likelySubtags != NULL) {
743             /* Always use the language tag from the
744                maximal string, since it may be more
745                specific than the one provided. */
746             return createTagStringWithAlternates(
747                         NULL,
748                         0,
749                         script,
750                         scriptLength,
751                         NULL,
752                         0,
753                         variants,
754                         variantsLength,
755                         likelySubtags,
756                         tag,
757                         tagCapacity,
758                         err);
759         }
760     }
761 
762     /**
763      * Finally, try just the language.
764      **/
765     {
766         const char* likelySubtags = NULL;
767 
768         createTagString(
769             lang,
770             langLength,
771             NULL,
772             0,
773             NULL,
774             0,
775             NULL,
776             0,
777             tagBuffer,
778             sizeof(tagBuffer),
779             err);
780         if(U_FAILURE(*err)) {
781             goto error;
782         }
783 
784         likelySubtags =
785             findLikelySubtags(
786                 tagBuffer,
787                 likelySubtagsBuffer,
788                 sizeof(likelySubtagsBuffer),
789                 err);
790         if(U_FAILURE(*err)) {
791             goto error;
792         }
793 
794         if (likelySubtags != NULL) {
795             /* Always use the language tag from the
796                maximal string, since it may be more
797                specific than the one provided. */
798             return createTagStringWithAlternates(
799                         NULL,
800                         0,
801                         script,
802                         scriptLength,
803                         region,
804                         regionLength,
805                         variants,
806                         variantsLength,
807                         likelySubtags,
808                         tag,
809                         tagCapacity,
810                         err);
811         }
812     }
813 
814     return u_terminateChars(
815                 tag,
816                 tagCapacity,
817                 0,
818                 err);
819 
820 error:
821 
822     if (!U_FAILURE(*err)) {
823         *err = U_ILLEGAL_ARGUMENT_ERROR;
824     }
825 
826     return -1;
827 }
828 
829 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
830     {   int32_t count = 0; \
831         int32_t i; \
832         for (i = 0; i < trailingLength; i++) { \
833             if (trailing[i] == '-' || trailing[i] == '_') { \
834                 count = 0; \
835                 if (count > 8) { \
836                     goto error; \
837                 } \
838             } else if (trailing[i] == '@') { \
839                 break; \
840             } else if (count > 8) { \
841                 goto error; \
842             } else { \
843                 count++; \
844             } \
845         } \
846     }
847 
848 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)849 _uloc_addLikelySubtags(const char*    localeID,
850          char* maximizedLocaleID,
851          int32_t maximizedLocaleIDCapacity,
852          UErrorCode* err)
853 {
854     char lang[ULOC_LANG_CAPACITY];
855     int32_t langLength = sizeof(lang);
856     char script[ULOC_SCRIPT_CAPACITY];
857     int32_t scriptLength = sizeof(script);
858     char region[ULOC_COUNTRY_CAPACITY];
859     int32_t regionLength = sizeof(region);
860     const char* trailing = "";
861     int32_t trailingLength = 0;
862     int32_t trailingIndex = 0;
863     int32_t resultLength = 0;
864 
865     if(U_FAILURE(*err)) {
866         goto error;
867     }
868     else if (localeID == NULL ||
869              maximizedLocaleID == NULL ||
870              maximizedLocaleIDCapacity <= 0) {
871         goto error;
872     }
873 
874     trailingIndex = parseTagString(
875         localeID,
876         lang,
877         &langLength,
878         script,
879         &scriptLength,
880         region,
881         &regionLength,
882         err);
883     if(U_FAILURE(*err)) {
884         /* Overflow indicates an illegal argument error */
885         if (*err == U_BUFFER_OVERFLOW_ERROR) {
886             *err = U_ILLEGAL_ARGUMENT_ERROR;
887         }
888 
889         goto error;
890     }
891 
892     /* Find the length of the trailing portion. */
893     while (_isIDSeparator(localeID[trailingIndex])) {
894         trailingIndex++;
895     }
896     trailing = &localeID[trailingIndex];
897     trailingLength = (int32_t)uprv_strlen(trailing);
898 
899     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
900 
901     resultLength =
902         createLikelySubtagsString(
903             lang,
904             langLength,
905             script,
906             scriptLength,
907             region,
908             regionLength,
909             trailing,
910             trailingLength,
911             maximizedLocaleID,
912             maximizedLocaleIDCapacity,
913             err);
914 
915     if (resultLength == 0) {
916         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
917 
918         /*
919          * If we get here, we need to return localeID.
920          */
921         uprv_memcpy(
922             maximizedLocaleID,
923             localeID,
924             localIDLength <= maximizedLocaleIDCapacity ?
925                 localIDLength : maximizedLocaleIDCapacity);
926 
927         resultLength =
928             u_terminateChars(
929                 maximizedLocaleID,
930                 maximizedLocaleIDCapacity,
931                 localIDLength,
932                 err);
933     }
934 
935     return resultLength;
936 
937 error:
938 
939     if (!U_FAILURE(*err)) {
940         *err = U_ILLEGAL_ARGUMENT_ERROR;
941     }
942 
943     return -1;
944 }
945 
946 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)947 _uloc_minimizeSubtags(const char*    localeID,
948          char* minimizedLocaleID,
949          int32_t minimizedLocaleIDCapacity,
950          UErrorCode* err)
951 {
952     /**
953      * ULOC_FULLNAME_CAPACITY will provide enough capacity
954      * that we can build a string that contains the language,
955      * script and region code without worrying about overrunning
956      * the user-supplied buffer.
957      **/
958     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
959     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
960 
961     char lang[ULOC_LANG_CAPACITY];
962     int32_t langLength = sizeof(lang);
963     char script[ULOC_SCRIPT_CAPACITY];
964     int32_t scriptLength = sizeof(script);
965     char region[ULOC_COUNTRY_CAPACITY];
966     int32_t regionLength = sizeof(region);
967     const char* trailing = "";
968     int32_t trailingLength = 0;
969     int32_t trailingIndex = 0;
970 
971     if(U_FAILURE(*err)) {
972         goto error;
973     }
974     else if (localeID == NULL ||
975              minimizedLocaleID == NULL ||
976              minimizedLocaleIDCapacity <= 0) {
977         goto error;
978     }
979 
980     trailingIndex =
981         parseTagString(
982             localeID,
983             lang,
984             &langLength,
985             script,
986             &scriptLength,
987             region,
988             &regionLength,
989             err);
990     if(U_FAILURE(*err)) {
991 
992         /* Overflow indicates an illegal argument error */
993         if (*err == U_BUFFER_OVERFLOW_ERROR) {
994             *err = U_ILLEGAL_ARGUMENT_ERROR;
995         }
996 
997         goto error;
998     }
999 
1000     /* Find the spot where the variants or the keywords begin, if any. */
1001     while (_isIDSeparator(localeID[trailingIndex])) {
1002         trailingIndex++;
1003     }
1004     trailing = &localeID[trailingIndex];
1005     trailingLength = (int32_t)uprv_strlen(trailing);
1006 
1007     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1008 
1009     createTagString(
1010         lang,
1011         langLength,
1012         script,
1013         scriptLength,
1014         region,
1015         regionLength,
1016         NULL,
1017         0,
1018         maximizedTagBuffer,
1019         maximizedTagBufferLength,
1020         err);
1021     if(U_FAILURE(*err)) {
1022         goto error;
1023     }
1024 
1025     /**
1026      * First, we need to first get the maximization
1027      * from AddLikelySubtags.
1028      **/
1029     maximizedTagBufferLength =
1030         uloc_addLikelySubtags(
1031             maximizedTagBuffer,
1032             maximizedTagBuffer,
1033             maximizedTagBufferLength,
1034             err);
1035 
1036     if(U_FAILURE(*err)) {
1037         goto error;
1038     }
1039 
1040     /**
1041      * Start first with just the language.
1042      **/
1043     {
1044         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1045 
1046         const int32_t tagBufferLength =
1047             createLikelySubtagsString(
1048                 lang,
1049                 langLength,
1050                 NULL,
1051                 0,
1052                 NULL,
1053                 0,
1054                 NULL,
1055                 0,
1056                 tagBuffer,
1057                 sizeof(tagBuffer),
1058                 err);
1059 
1060         if(U_FAILURE(*err)) {
1061             goto error;
1062         }
1063         else if (uprv_strnicmp(
1064                     maximizedTagBuffer,
1065                     tagBuffer,
1066                     tagBufferLength) == 0) {
1067 
1068             return createTagString(
1069                         lang,
1070                         langLength,
1071                         NULL,
1072                         0,
1073                         NULL,
1074                         0,
1075                         trailing,
1076                         trailingLength,
1077                         minimizedLocaleID,
1078                         minimizedLocaleIDCapacity,
1079                         err);
1080         }
1081     }
1082 
1083     /**
1084      * Next, try the language and region.
1085      **/
1086     if (regionLength > 0) {
1087 
1088         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1089 
1090         const int32_t tagBufferLength =
1091             createLikelySubtagsString(
1092                 lang,
1093                 langLength,
1094                 NULL,
1095                 0,
1096                 region,
1097                 regionLength,
1098                 NULL,
1099                 0,
1100                 tagBuffer,
1101                 sizeof(tagBuffer),
1102                 err);
1103 
1104         if(U_FAILURE(*err)) {
1105             goto error;
1106         }
1107         else if (uprv_strnicmp(
1108                     maximizedTagBuffer,
1109                     tagBuffer,
1110                     tagBufferLength) == 0) {
1111 
1112             return createTagString(
1113                         lang,
1114                         langLength,
1115                         NULL,
1116                         0,
1117                         region,
1118                         regionLength,
1119                         trailing,
1120                         trailingLength,
1121                         minimizedLocaleID,
1122                         minimizedLocaleIDCapacity,
1123                         err);
1124         }
1125     }
1126 
1127     /**
1128      * Finally, try the language and script.  This is our last chance,
1129      * since trying with all three subtags would only yield the
1130      * maximal version that we already have.
1131      **/
1132     if (scriptLength > 0 && regionLength > 0) {
1133         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1134 
1135         const int32_t tagBufferLength =
1136             createLikelySubtagsString(
1137                 lang,
1138                 langLength,
1139                 script,
1140                 scriptLength,
1141                 NULL,
1142                 0,
1143                 NULL,
1144                 0,
1145                 tagBuffer,
1146                 sizeof(tagBuffer),
1147                 err);
1148 
1149         if(U_FAILURE(*err)) {
1150             goto error;
1151         }
1152         else if (uprv_strnicmp(
1153                     maximizedTagBuffer,
1154                     tagBuffer,
1155                     tagBufferLength) == 0) {
1156 
1157             return createTagString(
1158                         lang,
1159                         langLength,
1160                         script,
1161                         scriptLength,
1162                         NULL,
1163                         0,
1164                         trailing,
1165                         trailingLength,
1166                         minimizedLocaleID,
1167                         minimizedLocaleIDCapacity,
1168                         err);
1169         }
1170     }
1171 
1172     {
1173         /**
1174          * If we got here, return the locale ID parameter.
1175          **/
1176         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1177 
1178         uprv_memcpy(
1179             minimizedLocaleID,
1180             localeID,
1181             localeIDLength <= minimizedLocaleIDCapacity ?
1182                 localeIDLength : minimizedLocaleIDCapacity);
1183 
1184         return u_terminateChars(
1185                     minimizedLocaleID,
1186                     minimizedLocaleIDCapacity,
1187                     localeIDLength,
1188                     err);
1189     }
1190 
1191 error:
1192 
1193     if (!U_FAILURE(*err)) {
1194         *err = U_ILLEGAL_ARGUMENT_ERROR;
1195     }
1196 
1197     return -1;
1198 
1199 
1200 }
1201 
1202 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1203 do_canonicalize(const char*    localeID,
1204          char* buffer,
1205          int32_t bufferCapacity,
1206          UErrorCode* err)
1207 {
1208     uloc_canonicalize(
1209         localeID,
1210         buffer,
1211         bufferCapacity,
1212         err);
1213 
1214     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1215         *err == U_BUFFER_OVERFLOW_ERROR) {
1216         *err = U_ILLEGAL_ARGUMENT_ERROR;
1217 
1218         return FALSE;
1219     }
1220     else if (U_FAILURE(*err)) {
1221 
1222         return FALSE;
1223     }
1224     else {
1225         return TRUE;
1226     }
1227 }
1228 
1229 U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1230 uloc_addLikelySubtags(const char*    localeID,
1231          char* maximizedLocaleID,
1232          int32_t maximizedLocaleIDCapacity,
1233          UErrorCode* err)
1234 {
1235     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1236 
1237     if (!do_canonicalize(
1238         localeID,
1239         localeBuffer,
1240         sizeof(localeBuffer),
1241         err)) {
1242         return -1;
1243     }
1244     else {
1245         return _uloc_addLikelySubtags(
1246                     localeBuffer,
1247                     maximizedLocaleID,
1248                     maximizedLocaleIDCapacity,
1249                     err);
1250     }
1251 }
1252 
1253 U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1254 uloc_minimizeSubtags(const char*    localeID,
1255          char* minimizedLocaleID,
1256          int32_t minimizedLocaleIDCapacity,
1257          UErrorCode* err)
1258 {
1259     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1260 
1261     if (!do_canonicalize(
1262         localeID,
1263         localeBuffer,
1264         sizeof(localeBuffer),
1265         err)) {
1266         return -1;
1267     }
1268     else {
1269         return _uloc_minimizeSubtags(
1270                     localeBuffer,
1271                     minimizedLocaleID,
1272                     minimizedLocaleIDCapacity,
1273                     err);
1274     }
1275 }
1276