• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1997-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  loclikely.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2010feb25
16 *   created by: Markus W. Scherer
17 *
18 *   Code for likely and minimized locale subtags, separated out from other .cpp files
19 *   that then do not depend on resource bundle code and likely-subtags data.
20 */
21 
22 #include "unicode/utypes.h"
23 #include "unicode/locid.h"
24 #include "unicode/putil.h"
25 #include "unicode/uchar.h"
26 #include "unicode/uloc.h"
27 #include "unicode/ures.h"
28 #include "unicode/uscript.h"
29 #include "cmemory.h"
30 #include "cstring.h"
31 #include "ulocimp.h"
32 #include "ustr_imp.h"
33 
34 /**
35  * This function looks for the localeID in the likelySubtags resource.
36  *
37  * @param localeID The tag to find.
38  * @param buffer A buffer to hold the matching entry
39  * @param bufferLength The length of the output buffer
40  * @return A pointer to "buffer" if found, or a null pointer if not.
41  */
42 static const char*  U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)43 findLikelySubtags(const char* localeID,
44                   char* buffer,
45                   int32_t bufferLength,
46                   UErrorCode* err) {
47     const char* result = NULL;
48 
49     if (!U_FAILURE(*err)) {
50         int32_t resLen = 0;
51         const UChar* s = NULL;
52         UErrorCode tmpErr = U_ZERO_ERROR;
53         UResourceBundle* subtags = ures_openDirect(NULL, "likelySubtags", &tmpErr);
54         if (U_SUCCESS(tmpErr)) {
55             s = ures_getStringByKey(subtags, localeID, &resLen, &tmpErr);
56 
57             if (U_FAILURE(tmpErr)) {
58                 /*
59                  * If a resource is missing, it's not really an error, it's
60                  * just that we don't have any data for that particular locale ID.
61                  */
62                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
63                     *err = tmpErr;
64                 }
65             }
66             else if (resLen >= bufferLength) {
67                 /* The buffer should never overflow. */
68                 *err = U_INTERNAL_PROGRAM_ERROR;
69             }
70             else {
71                 u_UCharsToChars(s, buffer, resLen + 1);
72                 result = buffer;
73             }
74 
75             ures_close(subtags);
76         } else {
77             *err = tmpErr;
78         }
79     }
80 
81     return result;
82 }
83 
84 /**
85  * Append a tag to a buffer, adding the separator if necessary.  The buffer
86  * must be large enough to contain the resulting tag plus any separator
87  * necessary. The tag must not be a zero-length string.
88  *
89  * @param tag The tag to add.
90  * @param tagLength The length of the tag.
91  * @param buffer The output buffer.
92  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
93  **/
94 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength)95 appendTag(
96     const char* tag,
97     int32_t tagLength,
98     char* buffer,
99     int32_t* bufferLength) {
100 
101     if (*bufferLength > 0) {
102         buffer[*bufferLength] = '_';
103         ++(*bufferLength);
104     }
105 
106     uprv_memmove(
107         &buffer[*bufferLength],
108         tag,
109         tagLength);
110 
111     *bufferLength += tagLength;
112 }
113 
114 /**
115  * These are the canonical strings for unknown languages, scripts and regions.
116  **/
117 static const char* const unknownLanguage = "und";
118 static const char* const unknownScript = "Zzzz";
119 static const char* const unknownRegion = "ZZ";
120 
121 /**
122  * Create a tag string from the supplied parameters.  The lang, script and region
123  * parameters may be NULL pointers. If they are, their corresponding length parameters
124  * must be less than or equal to 0.
125  *
126  * If any of the language, script or region parameters are empty, and the alternateTags
127  * parameter is not NULL, it will be parsed for potential language, script and region tags
128  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
129  * it contains no language tag, the default tag for the unknown language is used.
130  *
131  * If the length of the new string exceeds the capacity of the output buffer,
132  * the function copies as many bytes to the output buffer as it can, and returns
133  * the error U_BUFFER_OVERFLOW_ERROR.
134  *
135  * If an illegal argument is provided, the function returns the error
136  * U_ILLEGAL_ARGUMENT_ERROR.
137  *
138  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
139  * the tag string fits in the output buffer, but the null terminator doesn't.
140  *
141  * @param lang The language tag to use.
142  * @param langLength The length of the language tag.
143  * @param script The script tag to use.
144  * @param scriptLength The length of the script tag.
145  * @param region The region tag to use.
146  * @param regionLength The length of the region tag.
147  * @param trailing Any trailing data to append to the new tag.
148  * @param trailingLength The length of the trailing data.
149  * @param alternateTags A string containing any alternate tags.
150  * @param tag The output buffer.
151  * @param tagCapacity The capacity of the output buffer.
152  * @param err A pointer to a UErrorCode for error reporting.
153  * @return The length of the tag string, which may be greater than tagCapacity, or -1 on error.
154  **/
155 static int32_t U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,char * tag,int32_t tagCapacity,UErrorCode * err)156 createTagStringWithAlternates(
157     const char* lang,
158     int32_t langLength,
159     const char* script,
160     int32_t scriptLength,
161     const char* region,
162     int32_t regionLength,
163     const char* trailing,
164     int32_t trailingLength,
165     const char* alternateTags,
166     char* tag,
167     int32_t tagCapacity,
168     UErrorCode* err) {
169 
170     if (U_FAILURE(*err)) {
171         goto error;
172     }
173     else if (tag == NULL ||
174              tagCapacity <= 0 ||
175              langLength >= ULOC_LANG_CAPACITY ||
176              scriptLength >= ULOC_SCRIPT_CAPACITY ||
177              regionLength >= ULOC_COUNTRY_CAPACITY) {
178         goto error;
179     }
180     else {
181         /**
182          * ULOC_FULLNAME_CAPACITY will provide enough capacity
183          * that we can build a string that contains the language,
184          * script and region code without worrying about overrunning
185          * the user-supplied buffer.
186          **/
187         char tagBuffer[ULOC_FULLNAME_CAPACITY];
188         int32_t tagLength = 0;
189         int32_t capacityRemaining = tagCapacity;
190         UBool regionAppended = FALSE;
191 
192         if (langLength > 0) {
193             appendTag(
194                 lang,
195                 langLength,
196                 tagBuffer,
197                 &tagLength);
198         }
199         else if (alternateTags == NULL) {
200             /*
201              * Append the value for an unknown language, if
202              * we found no language.
203              */
204             appendTag(
205                 unknownLanguage,
206                 (int32_t)uprv_strlen(unknownLanguage),
207                 tagBuffer,
208                 &tagLength);
209         }
210         else {
211             /*
212              * Parse the alternateTags string for the language.
213              */
214             char alternateLang[ULOC_LANG_CAPACITY];
215             int32_t alternateLangLength = sizeof(alternateLang);
216 
217             alternateLangLength =
218                 uloc_getLanguage(
219                     alternateTags,
220                     alternateLang,
221                     alternateLangLength,
222                     err);
223             if(U_FAILURE(*err) ||
224                 alternateLangLength >= ULOC_LANG_CAPACITY) {
225                 goto error;
226             }
227             else if (alternateLangLength == 0) {
228                 /*
229                  * Append the value for an unknown language, if
230                  * we found no language.
231                  */
232                 appendTag(
233                     unknownLanguage,
234                     (int32_t)uprv_strlen(unknownLanguage),
235                     tagBuffer,
236                     &tagLength);
237             }
238             else {
239                 appendTag(
240                     alternateLang,
241                     alternateLangLength,
242                     tagBuffer,
243                     &tagLength);
244             }
245         }
246 
247         if (scriptLength > 0) {
248             appendTag(
249                 script,
250                 scriptLength,
251                 tagBuffer,
252                 &tagLength);
253         }
254         else if (alternateTags != NULL) {
255             /*
256              * Parse the alternateTags string for the script.
257              */
258             char alternateScript[ULOC_SCRIPT_CAPACITY];
259 
260             const int32_t alternateScriptLength =
261                 uloc_getScript(
262                     alternateTags,
263                     alternateScript,
264                     sizeof(alternateScript),
265                     err);
266 
267             if (U_FAILURE(*err) ||
268                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
269                 goto error;
270             }
271             else if (alternateScriptLength > 0) {
272                 appendTag(
273                     alternateScript,
274                     alternateScriptLength,
275                     tagBuffer,
276                     &tagLength);
277             }
278         }
279 
280         if (regionLength > 0) {
281             appendTag(
282                 region,
283                 regionLength,
284                 tagBuffer,
285                 &tagLength);
286 
287             regionAppended = TRUE;
288         }
289         else if (alternateTags != NULL) {
290             /*
291              * Parse the alternateTags string for the region.
292              */
293             char alternateRegion[ULOC_COUNTRY_CAPACITY];
294 
295             const int32_t alternateRegionLength =
296                 uloc_getCountry(
297                     alternateTags,
298                     alternateRegion,
299                     sizeof(alternateRegion),
300                     err);
301             if (U_FAILURE(*err) ||
302                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
303                 goto error;
304             }
305             else if (alternateRegionLength > 0) {
306                 appendTag(
307                     alternateRegion,
308                     alternateRegionLength,
309                     tagBuffer,
310                     &tagLength);
311 
312                 regionAppended = TRUE;
313             }
314         }
315 
316         {
317             const int32_t toCopy =
318                 tagLength >= tagCapacity ? tagCapacity : tagLength;
319 
320             /**
321              * Copy the partial tag from our internal buffer to the supplied
322              * target.
323              **/
324             uprv_memcpy(
325                 tag,
326                 tagBuffer,
327                 toCopy);
328 
329             capacityRemaining -= toCopy;
330         }
331 
332         if (trailingLength > 0) {
333             if (*trailing != '@' && capacityRemaining > 0) {
334                 tag[tagLength++] = '_';
335                 --capacityRemaining;
336                 if (capacityRemaining > 0 && !regionAppended) {
337                     /* extra separator is required */
338                     tag[tagLength++] = '_';
339                     --capacityRemaining;
340                 }
341             }
342 
343             if (capacityRemaining > 0) {
344                 /*
345                  * Copy the trailing data into the supplied buffer.  Use uprv_memmove, since we
346                  * don't know if the user-supplied buffers overlap.
347                  */
348                 const int32_t toCopy =
349                     trailingLength >= capacityRemaining ? capacityRemaining : trailingLength;
350 
351                 uprv_memmove(
352                     &tag[tagLength],
353                     trailing,
354                     toCopy);
355             }
356         }
357 
358         tagLength += trailingLength;
359 
360         return u_terminateChars(
361                     tag,
362                     tagCapacity,
363                     tagLength,
364                     err);
365     }
366 
367 error:
368 
369     /**
370      * An overflow indicates the locale ID passed in
371      * is ill-formed.  If we got here, and there was
372      * no previous error, it's an implicit overflow.
373      **/
374     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
375         U_SUCCESS(*err)) {
376         *err = U_ILLEGAL_ARGUMENT_ERROR;
377     }
378 
379     return -1;
380 }
381 
382 /**
383  * Create a tag string from the supplied parameters.  The lang, script and region
384  * parameters may be NULL pointers. If they are, their corresponding length parameters
385  * must be less than or equal to 0.  If the lang parameter is an empty string, the
386  * default value for an unknown language is written to the output buffer.
387  *
388  * If the length of the new string exceeds the capacity of the output buffer,
389  * the function copies as many bytes to the output buffer as it can, and returns
390  * the error U_BUFFER_OVERFLOW_ERROR.
391  *
392  * If an illegal argument is provided, the function returns the error
393  * U_ILLEGAL_ARGUMENT_ERROR.
394  *
395  * @param lang The language tag to use.
396  * @param langLength The length of the language tag.
397  * @param script The script tag to use.
398  * @param scriptLength The length of the script tag.
399  * @param region The region tag to use.
400  * @param regionLength The length of the region tag.
401  * @param trailing Any trailing data to append to the new tag.
402  * @param trailingLength The length of the trailing data.
403  * @param tag The output buffer.
404  * @param tagCapacity The capacity of the output buffer.
405  * @param err A pointer to a UErrorCode for error reporting.
406  * @return The length of the tag string, which may be greater than tagCapacity.
407  **/
408 static int32_t U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,char * tag,int32_t tagCapacity,UErrorCode * err)409 createTagString(
410     const char* lang,
411     int32_t langLength,
412     const char* script,
413     int32_t scriptLength,
414     const char* region,
415     int32_t regionLength,
416     const char* trailing,
417     int32_t trailingLength,
418     char* tag,
419     int32_t tagCapacity,
420     UErrorCode* err)
421 {
422     return createTagStringWithAlternates(
423                 lang,
424                 langLength,
425                 script,
426                 scriptLength,
427                 region,
428                 regionLength,
429                 trailing,
430                 trailingLength,
431                 NULL,
432                 tag,
433                 tagCapacity,
434                 err);
435 }
436 
437 /**
438  * Parse the language, script, and region subtags from a tag string, and copy the
439  * results into the corresponding output parameters. The buffers are null-terminated,
440  * unless overflow occurs.
441  *
442  * The langLength, scriptLength, and regionLength parameters are input/output
443  * parameters, and must contain the capacity of their corresponding buffers on
444  * input.  On output, they will contain the actual length of the buffers, not
445  * including the null terminator.
446  *
447  * If the length of any of the output subtags exceeds the capacity of the corresponding
448  * buffer, the function copies as many bytes to the output buffer as it can, and returns
449  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
450  * occurs.
451  *
452  * If an illegal argument is provided, the function returns the error
453  * U_ILLEGAL_ARGUMENT_ERROR.
454  *
455  * @param localeID The locale ID to parse.
456  * @param lang The language tag buffer.
457  * @param langLength The length of the language tag.
458  * @param script The script tag buffer.
459  * @param scriptLength The length of the script tag.
460  * @param region The region tag buffer.
461  * @param regionLength The length of the region tag.
462  * @param err A pointer to a UErrorCode for error reporting.
463  * @return The number of chars of the localeID parameter consumed.
464  **/
465 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)466 parseTagString(
467     const char* localeID,
468     char* lang,
469     int32_t* langLength,
470     char* script,
471     int32_t* scriptLength,
472     char* region,
473     int32_t* regionLength,
474     UErrorCode* err)
475 {
476     const char* position = localeID;
477     int32_t subtagLength = 0;
478 
479     if(U_FAILURE(*err) ||
480        localeID == NULL ||
481        lang == NULL ||
482        langLength == NULL ||
483        script == NULL ||
484        scriptLength == NULL ||
485        region == NULL ||
486        regionLength == NULL) {
487         goto error;
488     }
489 
490     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
491     u_terminateChars(lang, *langLength, subtagLength, err);
492 
493     /*
494      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
495      * to be an error, because it indicates the user-supplied tag is
496      * not well-formed.
497      */
498     if(U_FAILURE(*err)) {
499         goto error;
500     }
501 
502     *langLength = subtagLength;
503 
504     /*
505      * If no language was present, use the value of unknownLanguage
506      * instead.  Otherwise, move past any separator.
507      */
508     if (*langLength == 0) {
509         uprv_strcpy(
510             lang,
511             unknownLanguage);
512         *langLength = (int32_t)uprv_strlen(lang);
513     }
514     if (_isIDSeparator(*position)) {
515         ++position;
516     }
517 
518     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
519     u_terminateChars(script, *scriptLength, subtagLength, err);
520 
521     if(U_FAILURE(*err)) {
522         goto error;
523     }
524 
525     *scriptLength = subtagLength;
526 
527     if (*scriptLength > 0) {
528         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
529             /**
530              * If the script part is the "unknown" script, then don't return it.
531              **/
532             *scriptLength = 0;
533         }
534 
535         /*
536          * Move past any separator.
537          */
538         if (_isIDSeparator(*position)) {
539             ++position;
540         }
541     }
542 
543     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
544     u_terminateChars(region, *regionLength, subtagLength, err);
545 
546     if(U_FAILURE(*err)) {
547         goto error;
548     }
549 
550     *regionLength = subtagLength;
551 
552     if (*regionLength > 0) {
553         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
554             /**
555              * If the region part is the "unknown" region, then don't return it.
556              **/
557             *regionLength = 0;
558         }
559     } else if (*position != 0 && *position != '@') {
560         /* back up over consumed trailing separator */
561         --position;
562     }
563 
564 exit:
565 
566     return (int32_t)(position - localeID);
567 
568 error:
569 
570     /**
571      * If we get here, we have no explicit error, it's the result of an
572      * illegal argument.
573      **/
574     if (!U_FAILURE(*err)) {
575         *err = U_ILLEGAL_ARGUMENT_ERROR;
576     }
577 
578     goto exit;
579 }
580 
581 static int32_t U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,char * tag,int32_t tagCapacity,UErrorCode * err)582 createLikelySubtagsString(
583     const char* lang,
584     int32_t langLength,
585     const char* script,
586     int32_t scriptLength,
587     const char* region,
588     int32_t regionLength,
589     const char* variants,
590     int32_t variantsLength,
591     char* tag,
592     int32_t tagCapacity,
593     UErrorCode* err)
594 {
595     /**
596      * ULOC_FULLNAME_CAPACITY will provide enough capacity
597      * that we can build a string that contains the language,
598      * script and region code without worrying about overrunning
599      * the user-supplied buffer.
600      **/
601     char tagBuffer[ULOC_FULLNAME_CAPACITY];
602     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
603 
604     if(U_FAILURE(*err)) {
605         goto error;
606     }
607 
608     /**
609      * Try the language with the script and region first.
610      **/
611     if (scriptLength > 0 && regionLength > 0) {
612 
613         const char* likelySubtags = NULL;
614 
615         createTagString(
616             lang,
617             langLength,
618             script,
619             scriptLength,
620             region,
621             regionLength,
622             NULL,
623             0,
624             tagBuffer,
625             sizeof(tagBuffer),
626             err);
627         if(U_FAILURE(*err)) {
628             goto error;
629         }
630 
631         likelySubtags =
632             findLikelySubtags(
633                 tagBuffer,
634                 likelySubtagsBuffer,
635                 sizeof(likelySubtagsBuffer),
636                 err);
637         if(U_FAILURE(*err)) {
638             goto error;
639         }
640 
641         if (likelySubtags != NULL) {
642             /* Always use the language tag from the
643                maximal string, since it may be more
644                specific than the one provided. */
645             return createTagStringWithAlternates(
646                         NULL,
647                         0,
648                         NULL,
649                         0,
650                         NULL,
651                         0,
652                         variants,
653                         variantsLength,
654                         likelySubtags,
655                         tag,
656                         tagCapacity,
657                         err);
658         }
659     }
660 
661     /**
662      * Try the language with just the script.
663      **/
664     if (scriptLength > 0) {
665 
666         const char* likelySubtags = NULL;
667 
668         createTagString(
669             lang,
670             langLength,
671             script,
672             scriptLength,
673             NULL,
674             0,
675             NULL,
676             0,
677             tagBuffer,
678             sizeof(tagBuffer),
679             err);
680         if(U_FAILURE(*err)) {
681             goto error;
682         }
683 
684         likelySubtags =
685             findLikelySubtags(
686                 tagBuffer,
687                 likelySubtagsBuffer,
688                 sizeof(likelySubtagsBuffer),
689                 err);
690         if(U_FAILURE(*err)) {
691             goto error;
692         }
693 
694         if (likelySubtags != NULL) {
695             /* Always use the language tag from the
696                maximal string, since it may be more
697                specific than the one provided. */
698             return createTagStringWithAlternates(
699                         NULL,
700                         0,
701                         NULL,
702                         0,
703                         region,
704                         regionLength,
705                         variants,
706                         variantsLength,
707                         likelySubtags,
708                         tag,
709                         tagCapacity,
710                         err);
711         }
712     }
713 
714     /**
715      * Try the language with just the region.
716      **/
717     if (regionLength > 0) {
718 
719         const char* likelySubtags = NULL;
720 
721         createTagString(
722             lang,
723             langLength,
724             NULL,
725             0,
726             region,
727             regionLength,
728             NULL,
729             0,
730             tagBuffer,
731             sizeof(tagBuffer),
732             err);
733         if(U_FAILURE(*err)) {
734             goto error;
735         }
736 
737         likelySubtags =
738             findLikelySubtags(
739                 tagBuffer,
740                 likelySubtagsBuffer,
741                 sizeof(likelySubtagsBuffer),
742                 err);
743         if(U_FAILURE(*err)) {
744             goto error;
745         }
746 
747         if (likelySubtags != NULL) {
748             /* Always use the language tag from the
749                maximal string, since it may be more
750                specific than the one provided. */
751             return createTagStringWithAlternates(
752                         NULL,
753                         0,
754                         script,
755                         scriptLength,
756                         NULL,
757                         0,
758                         variants,
759                         variantsLength,
760                         likelySubtags,
761                         tag,
762                         tagCapacity,
763                         err);
764         }
765     }
766 
767     /**
768      * Finally, try just the language.
769      **/
770     {
771         const char* likelySubtags = NULL;
772 
773         createTagString(
774             lang,
775             langLength,
776             NULL,
777             0,
778             NULL,
779             0,
780             NULL,
781             0,
782             tagBuffer,
783             sizeof(tagBuffer),
784             err);
785         if(U_FAILURE(*err)) {
786             goto error;
787         }
788 
789         likelySubtags =
790             findLikelySubtags(
791                 tagBuffer,
792                 likelySubtagsBuffer,
793                 sizeof(likelySubtagsBuffer),
794                 err);
795         if(U_FAILURE(*err)) {
796             goto error;
797         }
798 
799         if (likelySubtags != NULL) {
800             /* Always use the language tag from the
801                maximal string, since it may be more
802                specific than the one provided. */
803             return createTagStringWithAlternates(
804                         NULL,
805                         0,
806                         script,
807                         scriptLength,
808                         region,
809                         regionLength,
810                         variants,
811                         variantsLength,
812                         likelySubtags,
813                         tag,
814                         tagCapacity,
815                         err);
816         }
817     }
818 
819     return u_terminateChars(
820                 tag,
821                 tagCapacity,
822                 0,
823                 err);
824 
825 error:
826 
827     if (!U_FAILURE(*err)) {
828         *err = U_ILLEGAL_ARGUMENT_ERROR;
829     }
830 
831     return -1;
832 }
833 
834 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) \
835     {   int32_t count = 0; \
836         int32_t i; \
837         for (i = 0; i < trailingLength; i++) { \
838             if (trailing[i] == '-' || trailing[i] == '_') { \
839                 count = 0; \
840                 if (count > 8) { \
841                     goto error; \
842                 } \
843             } else if (trailing[i] == '@') { \
844                 break; \
845             } else if (count > 8) { \
846                 goto error; \
847             } else { \
848                 count++; \
849             } \
850         } \
851     }
852 
853 static int32_t
_uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)854 _uloc_addLikelySubtags(const char*    localeID,
855          char* maximizedLocaleID,
856          int32_t maximizedLocaleIDCapacity,
857          UErrorCode* err)
858 {
859     char lang[ULOC_LANG_CAPACITY];
860     int32_t langLength = sizeof(lang);
861     char script[ULOC_SCRIPT_CAPACITY];
862     int32_t scriptLength = sizeof(script);
863     char region[ULOC_COUNTRY_CAPACITY];
864     int32_t regionLength = sizeof(region);
865     const char* trailing = "";
866     int32_t trailingLength = 0;
867     int32_t trailingIndex = 0;
868     int32_t resultLength = 0;
869 
870     if(U_FAILURE(*err)) {
871         goto error;
872     }
873     else if (localeID == NULL ||
874              maximizedLocaleID == NULL ||
875              maximizedLocaleIDCapacity <= 0) {
876         goto error;
877     }
878 
879     trailingIndex = parseTagString(
880         localeID,
881         lang,
882         &langLength,
883         script,
884         &scriptLength,
885         region,
886         &regionLength,
887         err);
888     if(U_FAILURE(*err)) {
889         /* Overflow indicates an illegal argument error */
890         if (*err == U_BUFFER_OVERFLOW_ERROR) {
891             *err = U_ILLEGAL_ARGUMENT_ERROR;
892         }
893 
894         goto error;
895     }
896 
897     /* Find the length of the trailing portion. */
898     while (_isIDSeparator(localeID[trailingIndex])) {
899         trailingIndex++;
900     }
901     trailing = &localeID[trailingIndex];
902     trailingLength = (int32_t)uprv_strlen(trailing);
903 
904     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
905 
906     resultLength =
907         createLikelySubtagsString(
908             lang,
909             langLength,
910             script,
911             scriptLength,
912             region,
913             regionLength,
914             trailing,
915             trailingLength,
916             maximizedLocaleID,
917             maximizedLocaleIDCapacity,
918             err);
919 
920     if (resultLength == 0) {
921         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
922 
923         /*
924          * If we get here, we need to return localeID.
925          */
926         uprv_memcpy(
927             maximizedLocaleID,
928             localeID,
929             localIDLength <= maximizedLocaleIDCapacity ?
930                 localIDLength : maximizedLocaleIDCapacity);
931 
932         resultLength =
933             u_terminateChars(
934                 maximizedLocaleID,
935                 maximizedLocaleIDCapacity,
936                 localIDLength,
937                 err);
938     }
939 
940     return resultLength;
941 
942 error:
943 
944     if (!U_FAILURE(*err)) {
945         *err = U_ILLEGAL_ARGUMENT_ERROR;
946     }
947 
948     return -1;
949 }
950 
951 static int32_t
_uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)952 _uloc_minimizeSubtags(const char*    localeID,
953          char* minimizedLocaleID,
954          int32_t minimizedLocaleIDCapacity,
955          UErrorCode* err)
956 {
957     /**
958      * ULOC_FULLNAME_CAPACITY will provide enough capacity
959      * that we can build a string that contains the language,
960      * script and region code without worrying about overrunning
961      * the user-supplied buffer.
962      **/
963     char maximizedTagBuffer[ULOC_FULLNAME_CAPACITY];
964     int32_t maximizedTagBufferLength = sizeof(maximizedTagBuffer);
965 
966     char lang[ULOC_LANG_CAPACITY];
967     int32_t langLength = sizeof(lang);
968     char script[ULOC_SCRIPT_CAPACITY];
969     int32_t scriptLength = sizeof(script);
970     char region[ULOC_COUNTRY_CAPACITY];
971     int32_t regionLength = sizeof(region);
972     const char* trailing = "";
973     int32_t trailingLength = 0;
974     int32_t trailingIndex = 0;
975 
976     if(U_FAILURE(*err)) {
977         goto error;
978     }
979     else if (localeID == NULL ||
980              minimizedLocaleID == NULL ||
981              minimizedLocaleIDCapacity <= 0) {
982         goto error;
983     }
984 
985     trailingIndex =
986         parseTagString(
987             localeID,
988             lang,
989             &langLength,
990             script,
991             &scriptLength,
992             region,
993             &regionLength,
994             err);
995     if(U_FAILURE(*err)) {
996 
997         /* Overflow indicates an illegal argument error */
998         if (*err == U_BUFFER_OVERFLOW_ERROR) {
999             *err = U_ILLEGAL_ARGUMENT_ERROR;
1000         }
1001 
1002         goto error;
1003     }
1004 
1005     /* Find the spot where the variants or the keywords begin, if any. */
1006     while (_isIDSeparator(localeID[trailingIndex])) {
1007         trailingIndex++;
1008     }
1009     trailing = &localeID[trailingIndex];
1010     trailingLength = (int32_t)uprv_strlen(trailing);
1011 
1012     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
1013 
1014     createTagString(
1015         lang,
1016         langLength,
1017         script,
1018         scriptLength,
1019         region,
1020         regionLength,
1021         NULL,
1022         0,
1023         maximizedTagBuffer,
1024         maximizedTagBufferLength,
1025         err);
1026     if(U_FAILURE(*err)) {
1027         goto error;
1028     }
1029 
1030     /**
1031      * First, we need to first get the maximization
1032      * from AddLikelySubtags.
1033      **/
1034     maximizedTagBufferLength =
1035         uloc_addLikelySubtags(
1036             maximizedTagBuffer,
1037             maximizedTagBuffer,
1038             maximizedTagBufferLength,
1039             err);
1040 
1041     if(U_FAILURE(*err)) {
1042         goto error;
1043     }
1044 
1045     /**
1046      * Start first with just the language.
1047      **/
1048     {
1049         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1050 
1051         const int32_t tagBufferLength =
1052             createLikelySubtagsString(
1053                 lang,
1054                 langLength,
1055                 NULL,
1056                 0,
1057                 NULL,
1058                 0,
1059                 NULL,
1060                 0,
1061                 tagBuffer,
1062                 sizeof(tagBuffer),
1063                 err);
1064 
1065         if(U_FAILURE(*err)) {
1066             goto error;
1067         }
1068         else if (uprv_strnicmp(
1069                     maximizedTagBuffer,
1070                     tagBuffer,
1071                     tagBufferLength) == 0) {
1072 
1073             return createTagString(
1074                         lang,
1075                         langLength,
1076                         NULL,
1077                         0,
1078                         NULL,
1079                         0,
1080                         trailing,
1081                         trailingLength,
1082                         minimizedLocaleID,
1083                         minimizedLocaleIDCapacity,
1084                         err);
1085         }
1086     }
1087 
1088     /**
1089      * Next, try the language and region.
1090      **/
1091     if (regionLength > 0) {
1092 
1093         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1094 
1095         const int32_t tagBufferLength =
1096             createLikelySubtagsString(
1097                 lang,
1098                 langLength,
1099                 NULL,
1100                 0,
1101                 region,
1102                 regionLength,
1103                 NULL,
1104                 0,
1105                 tagBuffer,
1106                 sizeof(tagBuffer),
1107                 err);
1108 
1109         if(U_FAILURE(*err)) {
1110             goto error;
1111         }
1112         else if (uprv_strnicmp(
1113                     maximizedTagBuffer,
1114                     tagBuffer,
1115                     tagBufferLength) == 0) {
1116 
1117             return createTagString(
1118                         lang,
1119                         langLength,
1120                         NULL,
1121                         0,
1122                         region,
1123                         regionLength,
1124                         trailing,
1125                         trailingLength,
1126                         minimizedLocaleID,
1127                         minimizedLocaleIDCapacity,
1128                         err);
1129         }
1130     }
1131 
1132     /**
1133      * Finally, try the language and script.  This is our last chance,
1134      * since trying with all three subtags would only yield the
1135      * maximal version that we already have.
1136      **/
1137     if (scriptLength > 0 && regionLength > 0) {
1138         char tagBuffer[ULOC_FULLNAME_CAPACITY];
1139 
1140         const int32_t tagBufferLength =
1141             createLikelySubtagsString(
1142                 lang,
1143                 langLength,
1144                 script,
1145                 scriptLength,
1146                 NULL,
1147                 0,
1148                 NULL,
1149                 0,
1150                 tagBuffer,
1151                 sizeof(tagBuffer),
1152                 err);
1153 
1154         if(U_FAILURE(*err)) {
1155             goto error;
1156         }
1157         else if (uprv_strnicmp(
1158                     maximizedTagBuffer,
1159                     tagBuffer,
1160                     tagBufferLength) == 0) {
1161 
1162             return createTagString(
1163                         lang,
1164                         langLength,
1165                         script,
1166                         scriptLength,
1167                         NULL,
1168                         0,
1169                         trailing,
1170                         trailingLength,
1171                         minimizedLocaleID,
1172                         minimizedLocaleIDCapacity,
1173                         err);
1174         }
1175     }
1176 
1177     {
1178         /**
1179          * If we got here, return the locale ID parameter.
1180          **/
1181         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1182 
1183         uprv_memcpy(
1184             minimizedLocaleID,
1185             localeID,
1186             localeIDLength <= minimizedLocaleIDCapacity ?
1187                 localeIDLength : minimizedLocaleIDCapacity);
1188 
1189         return u_terminateChars(
1190                     minimizedLocaleID,
1191                     minimizedLocaleIDCapacity,
1192                     localeIDLength,
1193                     err);
1194     }
1195 
1196 error:
1197 
1198     if (!U_FAILURE(*err)) {
1199         *err = U_ILLEGAL_ARGUMENT_ERROR;
1200     }
1201 
1202     return -1;
1203 
1204 
1205 }
1206 
1207 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1208 do_canonicalize(const char*    localeID,
1209          char* buffer,
1210          int32_t bufferCapacity,
1211          UErrorCode* err)
1212 {
1213     uloc_canonicalize(
1214         localeID,
1215         buffer,
1216         bufferCapacity,
1217         err);
1218 
1219     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1220         *err == U_BUFFER_OVERFLOW_ERROR) {
1221         *err = U_ILLEGAL_ARGUMENT_ERROR;
1222 
1223         return FALSE;
1224     }
1225     else if (U_FAILURE(*err)) {
1226 
1227         return FALSE;
1228     }
1229     else {
1230         return TRUE;
1231     }
1232 }
1233 
1234 U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * err)1235 uloc_addLikelySubtags(const char*    localeID,
1236          char* maximizedLocaleID,
1237          int32_t maximizedLocaleIDCapacity,
1238          UErrorCode* err)
1239 {
1240     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241 
1242     if (!do_canonicalize(
1243         localeID,
1244         localeBuffer,
1245         sizeof(localeBuffer),
1246         err)) {
1247         return -1;
1248     }
1249     else {
1250         return _uloc_addLikelySubtags(
1251                     localeBuffer,
1252                     maximizedLocaleID,
1253                     maximizedLocaleIDCapacity,
1254                     err);
1255     }
1256 }
1257 
1258 U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * err)1259 uloc_minimizeSubtags(const char*    localeID,
1260          char* minimizedLocaleID,
1261          int32_t minimizedLocaleIDCapacity,
1262          UErrorCode* err)
1263 {
1264     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1265 
1266     if (!do_canonicalize(
1267         localeID,
1268         localeBuffer,
1269         sizeof(localeBuffer),
1270         err)) {
1271         return -1;
1272     }
1273     else {
1274         return _uloc_minimizeSubtags(
1275                     localeBuffer,
1276                     minimizedLocaleID,
1277                     minimizedLocaleIDCapacity,
1278                     err);
1279     }
1280 }
1281 
1282 // Pairs of (language subtag, + or -) for finding out fast if common languages
1283 // are LTR (minus) or RTL (plus).
1284 static const char LANG_DIR_STRING[] =
1285         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1286 
1287 // Implemented here because this calls uloc_addLikelySubtags().
1288 U_CAPI UBool U_EXPORT2
uloc_isRightToLeft(const char * locale)1289 uloc_isRightToLeft(const char *locale) {
1290     UErrorCode errorCode = U_ZERO_ERROR;
1291     char script[8];
1292     int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1293     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1294             scriptLength == 0) {
1295         // Fastpath: We know the likely scripts and their writing direction
1296         // for some common languages.
1297         errorCode = U_ZERO_ERROR;
1298         char lang[8];
1299         int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1300         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1301                 langLength == 0) {
1302             return FALSE;
1303         }
1304         const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1305         if (langPtr != NULL) {
1306             switch (langPtr[langLength]) {
1307             case '-': return FALSE;
1308             case '+': return TRUE;
1309             default: break;  // partial match of a longer code
1310             }
1311         }
1312         // Otherwise, find the likely script.
1313         errorCode = U_ZERO_ERROR;
1314         char likely[ULOC_FULLNAME_CAPACITY];
1315         (void)uloc_addLikelySubtags(locale, likely, UPRV_LENGTHOF(likely), &errorCode);
1316         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1317             return FALSE;
1318         }
1319         scriptLength = uloc_getScript(likely, script, UPRV_LENGTHOF(script), &errorCode);
1320         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1321                 scriptLength == 0) {
1322             return FALSE;
1323         }
1324     }
1325     UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1326     return uscript_isRightToLeft(scriptCode);
1327 }
1328 
1329 U_NAMESPACE_BEGIN
1330 
1331 UBool
isRightToLeft() const1332 Locale::isRightToLeft() const {
1333     return uloc_isRightToLeft(getBaseName());
1334 }
1335 
1336 U_NAMESPACE_END
1337 
1338 // The following must at least allow for rg key value (6) plus terminator (1).
1339 #define ULOC_RG_BUFLEN 8
1340 
1341 U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char * localeID,UBool inferRegion,char * region,int32_t regionCapacity,UErrorCode * status)1342 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1343                                      char *region, int32_t regionCapacity, UErrorCode* status) {
1344     if (U_FAILURE(*status)) {
1345         return 0;
1346     }
1347     char rgBuf[ULOC_RG_BUFLEN];
1348     UErrorCode rgStatus = U_ZERO_ERROR;
1349 
1350     // First check for rg keyword value
1351     int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1352     if (U_FAILURE(rgStatus) || rgLen != 6) {
1353         rgLen = 0;
1354     } else {
1355         // rgBuf guaranteed to be zero terminated here, with text len 6
1356         char *rgPtr = rgBuf;
1357         for (; *rgPtr!= 0; rgPtr++) {
1358             *rgPtr = uprv_toupper(*rgPtr);
1359         }
1360         rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1361     }
1362 
1363     if (rgLen == 0) {
1364         // No valid rg keyword value, try for unicode_region_subtag
1365         rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1366         if (U_FAILURE(*status)) {
1367             rgLen = 0;
1368         } else if (rgLen == 0 && inferRegion) {
1369             // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1370             char locBuf[ULOC_FULLNAME_CAPACITY];
1371             rgStatus = U_ZERO_ERROR;
1372             (void)uloc_addLikelySubtags(localeID, locBuf, ULOC_FULLNAME_CAPACITY, &rgStatus);
1373             if (U_SUCCESS(rgStatus)) {
1374                 rgLen = uloc_getCountry(locBuf, rgBuf, ULOC_RG_BUFLEN, status);
1375                 if (U_FAILURE(*status)) {
1376                     rgLen = 0;
1377                 }
1378             }
1379         }
1380     }
1381 
1382     rgBuf[rgLen] = 0;
1383     uprv_strncpy(region, rgBuf, regionCapacity);
1384     return u_terminateChars(region, regionCapacity, rgLen, status);
1385 }
1386 
1387