• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *
4 *   Copyright (C) 1997-2010, International Business Machines
5 *   Corporation and others.  All Rights Reserved.
6 *
7 *******************************************************************************
8 *   file name:  loclikely.cpp
9 *   encoding:   US-ASCII
10 *   tab size:   8 (not used)
11 *   indentation:4
12 *
13 *   created on: 2010feb25
14 *   created by: Markus W. Scherer
15 *
16 *   Code for likely and minimized locale subtags, separated out from other .cpp files
17 *   that then do not depend on resource bundle code and likely-subtags data.
18 */
19 
20 #include "unicode/utypes.h"
21 #include "unicode/putil.h"
22 #include "unicode/uloc.h"
23 #include "unicode/ures.h"
24 #include "cmemory.h"
25 #include "cstring.h"
26 #include "ulocimp.h"
27 #include "ustr_imp.h"
28 
29 /**
30  * This function looks for the localeID in the likelySubtags resource.
31  *
32  * @param localeID The tag to find.
33  * @param buffer A buffer to hold the matching entry
34  * @param bufferLength The length of the output buffer
35  * @return A pointer to "buffer" if found, or a null pointer if not.
36  */
37 static const char*  U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)38 findLikelySubtags(const char* localeID,
39                   char* buffer,
40                   int32_t bufferLength,
41                   UErrorCode* err) {
42     const char* result = NULL;
43 
44     if (!U_FAILURE(*err)) {
45         int32_t resLen = 0;
46         const UChar* s = NULL;
47         UErrorCode tmpErr = U_ZERO_ERROR;
48         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
49         if (U_SUCCESS(tmpErr)) {
50             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
51 
52             if (U_FAILURE(tmpErr)) {
53                 /*
54                  * If a resource is missing, it's not really an error, it's
55                  * just that we don't have any data for that particular locale ID.
56                  */
57                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
58                     *err = tmpErr;
59                 }
60             }
61             else if (resLen >= bufferLength) {
62                 /* The buffer should never overflow. */
63                 *err = U_INTERNAL_PROGRAM_ERROR;
64             }
65             else {
66                 u_UCharsToChars(s, buffer, resLen + 1);
67                 result = buffer;
68             }
69 
70             ures_close(subtags);
71         } else {
72             *err = tmpErr;
73         }
74     }
75 
76     return result;
77 }
78 
79 /**
80  * Append a tag to a buffer, adding the separator if necessary.  The buffer
81  * must be large enough to contain the resulting tag plus any separator
82  * necessary. The tag must not be a zero-length string.
83  *
84  * @param tag The tag to add.
85  * @param tagLength The length of the tag.
86  * @param buffer The output buffer.
87  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
88  **/
89 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)90 appendTag(
91     const char* tag,
92     int32_t tagLength,
93     char* buffer,
94     int32_t* bufferLength) {
95 
96     if (*bufferLength > 0) {
97         buffer[*bufferLength] = '_';
98         ++(*bufferLength);
99     }
100 
101     uprv_memmove(
102         &buffer[*bufferLength],
103         tag,
104         tagLength);
105 
106     *bufferLength += tagLength;
107 }
108 
109 /**
110  * These are the canonical strings for unknown languages, scripts and regions.
111  **/
112 static const char* const unknownLanguage = "und";
113 static const char* const unknownScript = "Zzzz";
114 static const char* const unknownRegion = "ZZ";
115 
116 /**
117  * Create a tag string from the supplied parameters.  The lang, script and region
118  * parameters may be NULL pointers. If they are, their corresponding length parameters
119  * must be less than or equal to 0.
120  *
121  * If any of the language, script or region parameters are empty, and the alternateTags
122  * parameter is not NULL, it will be parsed for potential language, script and region tags
123  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
124  * it contains no language tag, the default tag for the unknown language is used.
125  *
126  * If the length of the new string exceeds the capacity of the output buffer,
127  * the function copies as many bytes to the output buffer as it can, and returns
128  * the error U_BUFFER_OVERFLOW_ERROR.
129  *
130  * If an illegal argument is provided, the function returns the error
131  * U_ILLEGAL_ARGUMENT_ERROR.
132  *
133  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
134  * the tag string fits in the output buffer, but the null terminator doesn't.
135  *
136  * @param lang The language tag to use.
137  * @param langLength The length of the language tag.
138  * @param script The script tag to use.
139  * @param scriptLength The length of the script tag.
140  * @param region The region tag to use.
141  * @param regionLength The length of the region tag.
142  * @param trailing Any trailing data to append to the new tag.
143  * @param trailingLength The length of the trailing data.
144  * @param alternateTags A string containing any alternate tags.
145  * @param tag The output buffer.
146  * @param tagCapacity The capacity of the output buffer.
147  * @param err A pointer to a UErrorCode for error reporting.
148  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
149  **/
150 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)151 createTagStringWithAlternates(
152     const char* lang,
153     int32_t langLength,
154     const char* script,
155     int32_t scriptLength,
156     const char* region,
157     int32_t regionLength,
158     const char* trailing,
159     int32_t trailingLength,
160     const char* alternateTags,
161     char* tag,
162     int32_t tagCapacity,
163     UErrorCode* err) {
164 
165     if (U_FAILURE(*err)) {
166         goto error;
167     }
168     else if (tag == NULL ||
169              tagCapacity <= 0 ||
170              langLength >= ULOC_LANG_CAPACITY ||
171              scriptLength >= ULOC_SCRIPT_CAPACITY ||
172              regionLength >= ULOC_COUNTRY_CAPACITY) {
173         goto error;
174     }
175     else {
176         /**
177          * ULOC_FULLNAME_CAPACITY will provide enough capacity
178          * that we can build a string that contains the language,
179          * script and region code without worrying about overrunning
180          * the user-supplied buffer.
181          **/
182         char tagBuffer[ULOC_FULLNAME_CAPACITY];
183         int32_t tagLength = 0;
184         int32_t capacityRemaining = tagCapacity;
185         UBool regionAppended = FALSE;
186 
187         if (langLength > 0) {
188             appendTag(
189                 lang,
190                 langLength,
191                 tagBuffer,
192                 &tagLength);
193         }
194         else if (alternateTags == NULL) {
195             /*
196              * Append the value for an unknown language, if
197              * we found no language.
198              */
199             appendTag(
200                 unknownLanguage,
201                 (int32_t)uprv_strlen(unknownLanguage),
202                 tagBuffer,
203                 &tagLength);
204         }
205         else {
206             /*
207              * Parse the alternateTags string for the language.
208              */
209             char alternateLang[ULOC_LANG_CAPACITY];
210             int32_t alternateLangLength = sizeof(alternateLang);
211 
212             alternateLangLength =
213                 uloc_getLanguage(
214                     alternateTags,
215                     alternateLang,
216                     alternateLangLength,
217                     err);
218             if(U_FAILURE(*err) ||
219                 alternateLangLength >= ULOC_LANG_CAPACITY) {
220                 goto error;
221             }
222             else if (alternateLangLength == 0) {
223                 /*
224                  * Append the value for an unknown language, if
225                  * we found no language.
226                  */
227                 appendTag(
228                     unknownLanguage,
229                     (int32_t)uprv_strlen(unknownLanguage),
230                     tagBuffer,
231                     &tagLength);
232             }
233             else {
234                 appendTag(
235                     alternateLang,
236                     alternateLangLength,
237                     tagBuffer,
238                     &tagLength);
239             }
240         }
241 
242         if (scriptLength > 0) {
243             appendTag(
244                 script,
245                 scriptLength,
246                 tagBuffer,
247                 &tagLength);
248         }
249         else if (alternateTags != NULL) {
250             /*
251              * Parse the alternateTags string for the script.
252              */
253             char alternateScript[ULOC_SCRIPT_CAPACITY];
254 
255             const int32_t alternateScriptLength =
256                 uloc_getScript(
257                     alternateTags,
258                     alternateScript,
259                     sizeof(alternateScript),
260                     err);
261 
262             if (U_FAILURE(*err) ||
263                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
264                 goto error;
265             }
266             else if (alternateScriptLength > 0) {
267                 appendTag(
268                     alternateScript,
269                     alternateScriptLength,
270                     tagBuffer,
271                     &tagLength);
272             }
273         }
274 
275         if (regionLength > 0) {
276             appendTag(
277                 region,
278                 regionLength,
279                 tagBuffer,
280                 &tagLength);
281 
282             regionAppended = TRUE;
283         }
284         else if (alternateTags != NULL) {
285             /*
286              * Parse the alternateTags string for the region.
287              */
288             char alternateRegion[ULOC_COUNTRY_CAPACITY];
289 
290             const int32_t alternateRegionLength =
291                 uloc_getCountry(
292                     alternateTags,
293                     alternateRegion,
294                     sizeof(alternateRegion),
295                     err);
296             if (U_FAILURE(*err) ||
297                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
298                 goto error;
299             }
300             else if (alternateRegionLength > 0) {
301                 appendTag(
302                     alternateRegion,
303                     alternateRegionLength,
304                     tagBuffer,
305                     &tagLength);
306 
307                 regionAppended = TRUE;
308             }
309         }
310 
311         {
312             const int32_t toCopy =
313                 tagLength >= tagCapacity ? tagCapacity : tagLength;
314 
315             /**
316              * Copy the partial tag from our internal buffer to the supplied
317              * target.
318              **/
319             uprv_memcpy(
320                 tag,
321                 tagBuffer,
322                 toCopy);
323 
324             capacityRemaining -= toCopy;
325         }
326 
327         if (trailingLength > 0) {
328             if (capacityRemaining > 0 && !regionAppended) {
329                 tag[tagLength++] = '_';
330                 --capacityRemaining;
331             }
332 
333             if (capacityRemaining > 0) {
334                 /*
335                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
336                  * don't know if the user-supplied buffers overlap.
337                  */
338                 const int32_t toCopy =
339                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
340 
341                 uprv_memmove(
342                     &tag[tagLength],
343                     trailing,
344                     toCopy);
345             }
346         }
347 
348         tagLength += trailingLength;
349 
350         return u_terminateChars(
351                     tag,
352                     tagCapacity,
353                     tagLength,
354                     err);
355     }
356 
357 error:
358 
359     /**
360      * An overflow indicates the locale ID passed in
361      * is ill-formed.  If we got here, and there was
362      * no previous error, it's an implicit overflow.
363      **/
364     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
365         U_SUCCESS(*err)) {
366         *err = U_ILLEGAL_ARGUMENT_ERROR;
367     }
368 
369     return -1;
370 }
371 
372 /**
373  * Create a tag string from the supplied parameters.  The lang, script and region
374  * parameters may be NULL pointers. If they are, their corresponding length parameters
375  * must be less than or equal to 0.  If the lang parameter is an empty string, the
376  * default value for an unknown language is written to the output buffer.
377  *
378  * If the length of the new string exceeds the capacity of the output buffer,
379  * the function copies as many bytes to the output buffer as it can, and returns
380  * the error U_BUFFER_OVERFLOW_ERROR.
381  *
382  * If an illegal argument is provided, the function returns the error
383  * U_ILLEGAL_ARGUMENT_ERROR.
384  *
385  * @param lang The language tag to use.
386  * @param langLength The length of the language tag.
387  * @param script The script tag to use.
388  * @param scriptLength The length of the script tag.
389  * @param region The region tag to use.
390  * @param regionLength The length of the region tag.
391  * @param trailing Any trailing data to append to the new tag.
392  * @param trailingLength The length of the trailing data.
393  * @param tag The output buffer.
394  * @param tagCapacity The capacity of the output buffer.
395  * @param err A pointer to a UErrorCode for error reporting.
396  * @return The length of the tag string, which may be greater than tagCapacity.
397  **/
398 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)399 createTagString(
400     const char* lang,
401     int32_t langLength,
402     const char* script,
403     int32_t scriptLength,
404     const char* region,
405     int32_t regionLength,
406     const char* trailing,
407     int32_t trailingLength,
408     char* tag,
409     int32_t tagCapacity,
410     UErrorCode* err)
411 {
412     return createTagStringWithAlternates(
413                 lang,
414                 langLength,
415                 script,
416                 scriptLength,
417                 region,
418                 regionLength,
419                 trailing,
420                 trailingLength,
421                 NULL,
422                 tag,
423                 tagCapacity,
424                 err);
425 }
426 
427 /**
428  * Parse the language, script, and region subtags from a tag string, and copy the
429  * results into the corresponding output parameters. The buffers are null-terminated,
430  * unless overflow occurs.
431  *
432  * The langLength, scriptLength, and regionLength parameters are input/output
433  * parameters, and must contain the capacity of their corresponding buffers on
434  * input.  On output, they will contain the actual length of the buffers, not
435  * including the null terminator.
436  *
437  * If the length of any of the output subtags exceeds the capacity of the corresponding
438  * buffer, the function copies as many bytes to the output buffer as it can, and returns
439  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
440  * occurs.
441  *
442  * If an illegal argument is provided, the function returns the error
443  * U_ILLEGAL_ARGUMENT_ERROR.
444  *
445  * @param localeID The locale ID to parse.
446  * @param lang The language tag buffer.
447  * @param langLength The length of the language tag.
448  * @param script The script tag buffer.
449  * @param scriptLength The length of the script tag.
450  * @param region The region tag buffer.
451  * @param regionLength The length of the region tag.
452  * @param err A pointer to a UErrorCode for error reporting.
453  * @return The number of chars of the localeID parameter consumed.
454  **/
455 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)456 parseTagString(
457     const char* localeID,
458     char* lang,
459     int32_t* langLength,
460     char* script,
461     int32_t* scriptLength,
462     char* region,
463     int32_t* regionLength,
464     UErrorCode* err)
465 {
466     const char* position = localeID;
467     int32_t subtagLength = 0;
468 
469     if(U_FAILURE(*err) ||
470        localeID == NULL ||
471        lang == NULL ||
472        langLength == NULL ||
473        script == NULL ||
474        scriptLength == NULL ||
475        region == NULL ||
476        regionLength == NULL) {
477         goto error;
478     }
479 
480     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
481     u_terminateChars(lang, *langLength, subtagLength, err);
482 
483     /*
484      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
485      * to be an error, because it indicates the user-supplied tag is
486      * not well-formed.
487      */
488     if(U_FAILURE(*err)) {
489         goto error;
490     }
491 
492     *langLength = subtagLength;
493 
494     /*
495      * If no language was present, use the value of unknownLanguage
496      * instead.  Otherwise, move past any separator.
497      */
498     if (*langLength == 0) {
499         uprv_strcpy(
500             lang,
501             unknownLanguage);
502         *langLength = (int32_t)uprv_strlen(lang);
503     }
504     else if (_isIDSeparator(*position)) {
505         ++position;
506     }
507 
508     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
509     u_terminateChars(script, *scriptLength, subtagLength, err);
510 
511     if(U_FAILURE(*err)) {
512         goto error;
513     }
514 
515     *scriptLength = subtagLength;
516 
517     if (*scriptLength > 0) {
518         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
519             /**
520              * If the script part is the "unknown" script, then don't return it.
521              **/
522             *scriptLength = 0;
523         }
524 
525         /*
526          * Move past any separator.
527          */
528         if (_isIDSeparator(*position)) {
529             ++position;
530         }
531     }
532 
533     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
534     u_terminateChars(region, *regionLength, subtagLength, err);
535 
536     if(U_FAILURE(*err)) {
537         goto error;
538     }
539 
540     *regionLength = subtagLength;
541 
542     if (*regionLength > 0) {
543         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
544             /**
545              * If the region part is the "unknown" region, then don't return it.
546              **/
547             *regionLength = 0;
548         }
549     }
550 
551 exit:
552 
553     return (int32_t)(position - localeID);
554 
555 error:
556 
557     /**
558      * If we get here, we have no explicit error, it's the result of an
559      * illegal argument.
560      **/
561     if (!U_FAILURE(*err)) {
562         *err = U_ILLEGAL_ARGUMENT_ERROR;
563     }
564 
565     goto exit;
566 }
567 
568 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)569 createLikelySubtagsString(
570     const char* lang,
571     int32_t langLength,
572     const char* script,
573     int32_t scriptLength,
574     const char* region,
575     int32_t regionLength,
576     const char* variants,
577     int32_t variantsLength,
578     char* tag,
579     int32_t tagCapacity,
580     UErrorCode* err)
581 {
582     /**
583      * ULOC_FULLNAME_CAPACITY will provide enough capacity
584      * that we can build a string that contains the language,
585      * script and region code without worrying about overrunning
586      * the user-supplied buffer.
587      **/
588     char tagBuffer[ULOC_FULLNAME_CAPACITY];
589     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
590     int32_t tagBufferLength = 0;
591 
592     if(U_FAILURE(*err)) {
593         goto error;
594     }
595 
596     /**
597      * Try the language with the script and region first.
598      **/
599     if (scriptLength > 0 && regionLength > 0) {
600 
601         const char* likelySubtags = NULL;
602 
603         tagBufferLength = createTagString(
604             lang,
605             langLength,
606             script,
607             scriptLength,
608             region,
609             regionLength,
610             NULL,
611             0,
612             tagBuffer,
613             sizeof(tagBuffer),
614             err);
615         if(U_FAILURE(*err)) {
616             goto error;
617         }
618 
619         likelySubtags =
620             findLikelySubtags(
621                 tagBuffer,
622                 likelySubtagsBuffer,
623                 sizeof(likelySubtagsBuffer),
624                 err);
625         if(U_FAILURE(*err)) {
626             goto error;
627         }
628 
629         if (likelySubtags != NULL) {
630             /* Always use the language tag from the
631                maximal string, since it may be more
632                specific than the one provided. */
633             return createTagStringWithAlternates(
634                         NULL,
635                         0,
636                         NULL,
637                         0,
638                         NULL,
639                         0,
640                         variants,
641                         variantsLength,
642                         likelySubtags,
643                         tag,
644                         tagCapacity,
645                         err);
646         }
647     }
648 
649     /**
650      * Try the language with just the script.
651      **/
652     if (scriptLength > 0) {
653 
654         const char* likelySubtags = NULL;
655 
656         tagBufferLength = createTagString(
657             lang,
658             langLength,
659             script,
660             scriptLength,
661             NULL,
662             0,
663             NULL,
664             0,
665             tagBuffer,
666             sizeof(tagBuffer),
667             err);
668         if(U_FAILURE(*err)) {
669             goto error;
670         }
671 
672         likelySubtags =
673             findLikelySubtags(
674                 tagBuffer,
675                 likelySubtagsBuffer,
676                 sizeof(likelySubtagsBuffer),
677                 err);
678         if(U_FAILURE(*err)) {
679             goto error;
680         }
681 
682         if (likelySubtags != NULL) {
683             /* Always use the language tag from the
684                maximal string, since it may be more
685                specific than the one provided. */
686             return createTagStringWithAlternates(
687                         NULL,
688                         0,
689                         NULL,
690                         0,
691                         region,
692                         regionLength,
693                         variants,
694                         variantsLength,
695                         likelySubtags,
696                         tag,
697                         tagCapacity,
698                         err);
699         }
700     }
701 
702     /**
703      * Try the language with just the region.
704      **/
705     if (regionLength > 0) {
706 
707         const char* likelySubtags = NULL;
708 
709         createTagString(
710             lang,
711             langLength,
712             NULL,
713             0,
714             region,
715             regionLength,
716             NULL,
717             0,
718             tagBuffer,
719             sizeof(tagBuffer),
720             err);
721         if(U_FAILURE(*err)) {
722             goto error;
723         }
724 
725         likelySubtags =
726             findLikelySubtags(
727                 tagBuffer,
728                 likelySubtagsBuffer,
729                 sizeof(likelySubtagsBuffer),
730                 err);
731         if(U_FAILURE(*err)) {
732             goto error;
733         }
734 
735         if (likelySubtags != NULL) {
736             /* Always use the language tag from the
737                maximal string, since it may be more
738                specific than the one provided. */
739             return createTagStringWithAlternates(
740                         NULL,
741                         0,
742                         script,
743                         scriptLength,
744                         NULL,
745                         0,
746                         variants,
747                         variantsLength,
748                         likelySubtags,
749                         tag,
750                         tagCapacity,
751                         err);
752         }
753     }
754 
755     /**
756      * Finally, try just the language.
757      **/
758     {
759         const char* likelySubtags = NULL;
760 
761         createTagString(
762             lang,
763             langLength,
764             NULL,
765             0,
766             NULL,
767             0,
768             NULL,
769             0,
770             tagBuffer,
771             sizeof(tagBuffer),
772             err);
773         if(U_FAILURE(*err)) {
774             goto error;
775         }
776 
777         likelySubtags =
778             findLikelySubtags(
779                 tagBuffer,
780                 likelySubtagsBuffer,
781                 sizeof(likelySubtagsBuffer),
782                 err);
783         if(U_FAILURE(*err)) {
784             goto error;
785         }
786 
787         if (likelySubtags != NULL) {
788             /* Always use the language tag from the
789                maximal string, since it may be more
790                specific than the one provided. */
791             return createTagStringWithAlternates(
792                         NULL,
793                         0,
794                         script,
795                         scriptLength,
796                         region,
797                         regionLength,
798                         variants,
799                         variantsLength,
800                         likelySubtags,
801                         tag,
802                         tagCapacity,
803                         err);
804         }
805     }
806 
807     return u_terminateChars(
808                 tag,
809                 tagCapacity,
810                 0,
811                 err);
812 
813 error:
814 
815     if (!U_FAILURE(*err)) {
816         *err = U_ILLEGAL_ARGUMENT_ERROR;
817     }
818 
819     return -1;
820 }
821 
822 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
823     {   int32_t count = 0; \
824         int32_t i; \
825         for (i = 0; i < trailingLength; i++) { \
826             if (trailing[i] == '-' || trailing[i] == '_') { \
827                 count = 0; \
828                 if (count > 8) { \
829                     goto error; \
830                 } \
831             } else if (trailing[i] == '@') { \
832                 break; \
833             } else if (count > 8) { \
834                 goto error; \
835             } else { \
836                 count++; \
837             } \
838         } \
839     }
840 
841 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)842 _uloc_addLikelySubtags(const char*    localeID,
843          char* maximizedLocaleID,
844          int32_t maximizedLocaleIDCapacity,
845          UErrorCode* err)
846 {
847     char lang[ULOC_LANG_CAPACITY];
848     int32_t langLength = sizeof(lang);
849     char script[ULOC_SCRIPT_CAPACITY];
850     int32_t scriptLength = sizeof(script);
851     char region[ULOC_COUNTRY_CAPACITY];
852     int32_t regionLength = sizeof(region);
853     const char* trailing = "";
854     int32_t trailingLength = 0;
855     int32_t trailingIndex = 0;
856     int32_t resultLength = 0;
857 
858     if(U_FAILURE(*err)) {
859         goto error;
860     }
861     else if (localeID == NULL ||
862              maximizedLocaleID == NULL ||
863              maximizedLocaleIDCapacity <= 0) {
864         goto error;
865     }
866 
867     trailingIndex = parseTagString(
868         localeID,
869         lang,
870         &langLength,
871         script,
872         &scriptLength,
873         region,
874         &regionLength,
875         err);
876     if(U_FAILURE(*err)) {
877         /* Overflow indicates an illegal argument error */
878         if (*err == U_BUFFER_OVERFLOW_ERROR) {
879             *err = U_ILLEGAL_ARGUMENT_ERROR;
880         }
881 
882         goto error;
883     }
884 
885     /* Find the length of the trailing portion. */
886     trailing = &localeID[trailingIndex];
887     trailingLength = (int32_t)uprv_strlen(trailing);
888 
889     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
890 
891     resultLength =
892         createLikelySubtagsString(
893             lang,
894             langLength,
895             script,
896             scriptLength,
897             region,
898             regionLength,
899             trailing,
900             trailingLength,
901             maximizedLocaleID,
902             maximizedLocaleIDCapacity,
903             err);
904 
905     if (resultLength == 0) {
906         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
907 
908         /*
909          * If we get here, we need to return localeID.
910          */
911         uprv_memcpy(
912             maximizedLocaleID,
913             localeID,
914             localIDLength <= maximizedLocaleIDCapacity ?
915                 localIDLength : maximizedLocaleIDCapacity);
916 
917         resultLength =
918             u_terminateChars(
919                 maximizedLocaleID,
920                 maximizedLocaleIDCapacity,
921                 localIDLength,
922                 err);
923     }
924 
925     return resultLength;
926 
927 error:
928 
929     if (!U_FAILURE(*err)) {
930         *err = U_ILLEGAL_ARGUMENT_ERROR;
931     }
932 
933     return -1;
934 }
935 
936 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)937 _uloc_minimizeSubtags(const char*    localeID,
938          char* minimizedLocaleID,
939          int32_t minimizedLocaleIDCapacity,
940          UErrorCode* err)
941 {
942     /**
943      * ULOC_FULLNAME_CAPACITY will provide enough capacity
944      * that we can build a string that contains the language,
945      * script and region code without worrying about overrunning
946      * the user-supplied buffer.
947      **/
948     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
949     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
950 
951     char lang[ULOC_LANG_CAPACITY];
952     int32_t langLength = sizeof(lang);
953     char script[ULOC_SCRIPT_CAPACITY];
954     int32_t scriptLength = sizeof(script);
955     char region[ULOC_COUNTRY_CAPACITY];
956     int32_t regionLength = sizeof(region);
957     const char* trailing = "";
958     int32_t trailingLength = 0;
959     int32_t trailingIndex = 0;
960 
961     if(U_FAILURE(*err)) {
962         goto error;
963     }
964     else if (localeID == NULL ||
965              minimizedLocaleID == NULL ||
966              minimizedLocaleIDCapacity <= 0) {
967         goto error;
968     }
969 
970     trailingIndex =
971         parseTagString(
972             localeID,
973             lang,
974             &langLength,
975             script,
976             &scriptLength,
977             region,
978             &regionLength,
979             err);
980     if(U_FAILURE(*err)) {
981 
982         /* Overflow indicates an illegal argument error */
983         if (*err == U_BUFFER_OVERFLOW_ERROR) {
984             *err = U_ILLEGAL_ARGUMENT_ERROR;
985         }
986 
987         goto error;
988     }
989 
990     /* Find the spot where the variants begin, if any. */
991     trailing = &localeID[trailingIndex];
992     trailingLength = (int32_t)uprv_strlen(trailing);
993 
994     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
995 
996     createTagString(
997         lang,
998         langLength,
999         script,
1000         scriptLength,
1001         region,
1002         regionLength,
1003         NULL,
1004         0,
1005         maximizedTagBuffer,
1006         maximizedTagBufferLength,
1007         err);
1008     if(U_FAILURE(*err)) {
1009         goto error;
1010     }
1011 
1012     /**
1013      * First, we need to first get the maximization
1014      * from AddLikelySubtags.
1015      **/
1016     maximizedTagBufferLength =
1017         uloc_addLikelySubtags(
1018             maximizedTagBuffer,
1019             maximizedTagBuffer,
1020             maximizedTagBufferLength,
1021             err);
1022 
1023     if(U_FAILURE(*err)) {
1024         goto error;
1025     }
1026 
1027     /**
1028      * Start first with just the language.
1029      **/
1030     {
1031         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1032 
1033         const int32_t tagBufferLength =
1034             createLikelySubtagsString(
1035                 lang,
1036                 langLength,
1037                 NULL,
1038                 0,
1039                 NULL,
1040                 0,
1041                 NULL,
1042                 0,
1043                 tagBuffer,
1044                 sizeof(tagBuffer),
1045                 err);
1046 
1047         if(U_FAILURE(*err)) {
1048             goto error;
1049         }
1050         else if (uprv_strnicmp(
1051                     maximizedTagBuffer,
1052                     tagBuffer,
1053                     tagBufferLength) == 0) {
1054 
1055             return createTagString(
1056                         lang,
1057                         langLength,
1058                         NULL,
1059                         0,
1060                         NULL,
1061                         0,
1062                         trailing,
1063                         trailingLength,
1064                         minimizedLocaleID,
1065                         minimizedLocaleIDCapacity,
1066                         err);
1067         }
1068     }
1069 
1070     /**
1071      * Next, try the language and region.
1072      **/
1073     if (regionLength > 0) {
1074 
1075         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1076 
1077         const int32_t tagBufferLength =
1078             createLikelySubtagsString(
1079                 lang,
1080                 langLength,
1081                 NULL,
1082                 0,
1083                 region,
1084                 regionLength,
1085                 NULL,
1086                 0,
1087                 tagBuffer,
1088                 sizeof(tagBuffer),
1089                 err);
1090 
1091         if(U_FAILURE(*err)) {
1092             goto error;
1093         }
1094         else if (uprv_strnicmp(
1095                     maximizedTagBuffer,
1096                     tagBuffer,
1097                     tagBufferLength) == 0) {
1098 
1099             return createTagString(
1100                         lang,
1101                         langLength,
1102                         NULL,
1103                         0,
1104                         region,
1105                         regionLength,
1106                         trailing,
1107                         trailingLength,
1108                         minimizedLocaleID,
1109                         minimizedLocaleIDCapacity,
1110                         err);
1111         }
1112     }
1113 
1114     /**
1115      * Finally, try the language and script.  This is our last chance,
1116      * since trying with all three subtags would only yield the
1117      * maximal version that we already have.
1118      **/
1119     if (scriptLength > 0 && regionLength > 0) {
1120         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1121 
1122         const int32_t tagBufferLength =
1123             createLikelySubtagsString(
1124                 lang,
1125                 langLength,
1126                 script,
1127                 scriptLength,
1128                 NULL,
1129                 0,
1130                 NULL,
1131                 0,
1132                 tagBuffer,
1133                 sizeof(tagBuffer),
1134                 err);
1135 
1136         if(U_FAILURE(*err)) {
1137             goto error;
1138         }
1139         else if (uprv_strnicmp(
1140                     maximizedTagBuffer,
1141                     tagBuffer,
1142                     tagBufferLength) == 0) {
1143 
1144             return createTagString(
1145                         lang,
1146                         langLength,
1147                         script,
1148                         scriptLength,
1149                         NULL,
1150                         0,
1151                         trailing,
1152                         trailingLength,
1153                         minimizedLocaleID,
1154                         minimizedLocaleIDCapacity,
1155                         err);
1156         }
1157     }
1158 
1159     {
1160         /**
1161          * If we got here, return the locale ID parameter.
1162          **/
1163         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1164 
1165         uprv_memcpy(
1166             minimizedLocaleID,
1167             localeID,
1168             localeIDLength <= minimizedLocaleIDCapacity ?
1169                 localeIDLength : minimizedLocaleIDCapacity);
1170 
1171         return u_terminateChars(
1172                     minimizedLocaleID,
1173                     minimizedLocaleIDCapacity,
1174                     localeIDLength,
1175                     err);
1176     }
1177 
1178 error:
1179 
1180     if (!U_FAILURE(*err)) {
1181         *err = U_ILLEGAL_ARGUMENT_ERROR;
1182     }
1183 
1184     return -1;
1185 
1186 
1187 }
1188 
1189 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1190 do_canonicalize(const char*    localeID,
1191          char* buffer,
1192          int32_t bufferCapacity,
1193          UErrorCode* err)
1194 {
1195     uloc_canonicalize(
1196         localeID,
1197         buffer,
1198         bufferCapacity,
1199         err);
1200 
1201     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1202         *err == U_BUFFER_OVERFLOW_ERROR) {
1203         *err = U_ILLEGAL_ARGUMENT_ERROR;
1204 
1205         return FALSE;
1206     }
1207     else if (U_FAILURE(*err)) {
1208 
1209         return FALSE;
1210     }
1211     else {
1212         return TRUE;
1213     }
1214 }
1215 
1216 U_DRAFT int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1217 uloc_addLikelySubtags(const char*    localeID,
1218          char* maximizedLocaleID,
1219          int32_t maximizedLocaleIDCapacity,
1220          UErrorCode* err)
1221 {
1222     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1223 
1224     if (!do_canonicalize(
1225         localeID,
1226         localeBuffer,
1227         sizeof(localeBuffer),
1228         err)) {
1229         return -1;
1230     }
1231     else {
1232         return _uloc_addLikelySubtags(
1233                     localeBuffer,
1234                     maximizedLocaleID,
1235                     maximizedLocaleIDCapacity,
1236                     err);
1237     }
1238 }
1239 
1240 U_DRAFT int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1241 uloc_minimizeSubtags(const char*    localeID,
1242          char* minimizedLocaleID,
1243          int32_t minimizedLocaleIDCapacity,
1244          UErrorCode* err)
1245 {
1246     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1247 
1248     if (!do_canonicalize(
1249         localeID,
1250         localeBuffer,
1251         sizeof(localeBuffer),
1252         err)) {
1253         return -1;
1254     }
1255     else {
1256         return _uloc_minimizeSubtags(
1257                     localeBuffer,
1258                     minimizedLocaleID,
1259                     minimizedLocaleIDCapacity,
1260                     err);
1261     }
1262 }
1263