• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 *   Copyright (C) 1997-2016, International Business Machines
7 *   Corporation and others.  All Rights Reserved.
8 *
9 *******************************************************************************
10 *   file name:  loclikely.cpp
11 *   encoding:   UTF-8
12 *   tab size:   8 (not used)
13 *   indentation:4
14 *
15 *   created on: 2010feb25
16 *   created by: Markus W. Scherer
17 *
18 *   Code for likely and minimized locale subtags, separated out from other .cpp files
19 *   that then do not depend on resource bundle code and likely-subtags data.
20 */
21 
22 #include "unicode/bytestream.h"
23 #include "unicode/utypes.h"
24 #include "unicode/locid.h"
25 #include "unicode/putil.h"
26 #include "unicode/uchar.h"
27 #include "unicode/uloc.h"
28 #include "unicode/ures.h"
29 #include "unicode/uscript.h"
30 #include "bytesinkutil.h"
31 #include "charstr.h"
32 #include "cmemory.h"
33 #include "cstring.h"
34 #include "ulocimp.h"
35 #include "ustr_imp.h"
36 
37 /**
38  * These are the canonical strings for unknown languages, scripts and regions.
39  **/
40 static const char* const unknownLanguage = "und";
41 static const char* const unknownScript = "Zzzz";
42 static const char* const unknownRegion = "ZZ";
43 
44 /**
45  * This function looks for the localeID in the likelySubtags resource.
46  *
47  * @param localeID The tag to find.
48  * @param buffer A buffer to hold the matching entry
49  * @param bufferLength The length of the output buffer
50  * @return A pointer to "buffer" if found, or a null pointer if not.
51  */
52 static const char*  U_CALLCONV
findLikelySubtags(const char * localeID,char * buffer,int32_t bufferLength,UErrorCode * err)53 findLikelySubtags(const char* localeID,
54                   char* buffer,
55                   int32_t bufferLength,
56                   UErrorCode* err) {
57     const char* result = NULL;
58 
59     if (!U_FAILURE(*err)) {
60         int32_t resLen = 0;
61         const UChar* s = NULL;
62         UErrorCode tmpErr = U_ZERO_ERROR;
63         icu::LocalUResourceBundlePointer subtags(ures_openDirect(NULL, "likelySubtags", &tmpErr));
64         if (U_SUCCESS(tmpErr)) {
65             icu::CharString und;
66             if (localeID != NULL) {
67                 if (*localeID == '\0') {
68                     localeID = unknownLanguage;
69                 } else if (*localeID == '_') {
70                     und.append(unknownLanguage, *err);
71                     und.append(localeID, *err);
72                     if (U_FAILURE(*err)) {
73                         return NULL;
74                     }
75                     localeID = und.data();
76                 }
77             }
78             s = ures_getStringByKey(subtags.getAlias(), localeID, &resLen, &tmpErr);
79 
80             if (U_FAILURE(tmpErr)) {
81                 /*
82                  * If a resource is missing, it's not really an error, it's
83                  * just that we don't have any data for that particular locale ID.
84                  */
85                 if (tmpErr != U_MISSING_RESOURCE_ERROR) {
86                     *err = tmpErr;
87                 }
88             }
89             else if (resLen >= bufferLength) {
90                 /* The buffer should never overflow. */
91                 *err = U_INTERNAL_PROGRAM_ERROR;
92             }
93             else {
94                 u_UCharsToChars(s, buffer, resLen + 1);
95                 if (resLen >= 3 &&
96                     uprv_strnicmp(buffer, unknownLanguage, 3) == 0 &&
97                     (resLen == 3 || buffer[3] == '_')) {
98                     uprv_memmove(buffer, buffer + 3, resLen - 3 + 1);
99                 }
100                 result = buffer;
101             }
102         } else {
103             *err = tmpErr;
104         }
105     }
106 
107     return result;
108 }
109 
110 /**
111  * Append a tag to a buffer, adding the separator if necessary.  The buffer
112  * must be large enough to contain the resulting tag plus any separator
113  * necessary. The tag must not be a zero-length string.
114  *
115  * @param tag The tag to add.
116  * @param tagLength The length of the tag.
117  * @param buffer The output buffer.
118  * @param bufferLength The length of the output buffer.  This is an input/ouput parameter.
119  **/
120 static void U_CALLCONV
appendTag(const char * tag,int32_t tagLength,char * buffer,int32_t * bufferLength,UBool withSeparator)121 appendTag(
122     const char* tag,
123     int32_t tagLength,
124     char* buffer,
125     int32_t* bufferLength,
126     UBool withSeparator) {
127 
128     if (withSeparator) {
129         buffer[*bufferLength] = '_';
130         ++(*bufferLength);
131     }
132 
133     uprv_memmove(
134         &buffer[*bufferLength],
135         tag,
136         tagLength);
137 
138     *bufferLength += tagLength;
139 }
140 
141 /**
142  * Create a tag string from the supplied parameters.  The lang, script and region
143  * parameters may be NULL pointers. If they are, their corresponding length parameters
144  * must be less than or equal to 0.
145  *
146  * If any of the language, script or region parameters are empty, and the alternateTags
147  * parameter is not NULL, it will be parsed for potential language, script and region tags
148  * to be used when constructing the new tag.  If the alternateTags parameter is NULL, or
149  * it contains no language tag, the default tag for the unknown language is used.
150  *
151  * If the length of the new string exceeds the capacity of the output buffer,
152  * the function copies as many bytes to the output buffer as it can, and returns
153  * the error U_BUFFER_OVERFLOW_ERROR.
154  *
155  * If an illegal argument is provided, the function returns the error
156  * U_ILLEGAL_ARGUMENT_ERROR.
157  *
158  * Note that this function can return the warning U_STRING_NOT_TERMINATED_WARNING if
159  * the tag string fits in the output buffer, but the null terminator doesn't.
160  *
161  * @param lang The language tag to use.
162  * @param langLength The length of the language tag.
163  * @param script The script tag to use.
164  * @param scriptLength The length of the script tag.
165  * @param region The region tag to use.
166  * @param regionLength The length of the region tag.
167  * @param trailing Any trailing data to append to the new tag.
168  * @param trailingLength The length of the trailing data.
169  * @param alternateTags A string containing any alternate tags.
170  * @param sink The output sink receiving the tag string.
171  * @param err A pointer to a UErrorCode for error reporting.
172  **/
173 static void U_CALLCONV
createTagStringWithAlternates(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,const char * alternateTags,icu::ByteSink & sink,UErrorCode * err)174 createTagStringWithAlternates(
175     const char* lang,
176     int32_t langLength,
177     const char* script,
178     int32_t scriptLength,
179     const char* region,
180     int32_t regionLength,
181     const char* trailing,
182     int32_t trailingLength,
183     const char* alternateTags,
184     icu::ByteSink& sink,
185     UErrorCode* err) {
186 
187     if (U_FAILURE(*err)) {
188         goto error;
189     }
190     else if (langLength >= ULOC_LANG_CAPACITY ||
191              scriptLength >= ULOC_SCRIPT_CAPACITY ||
192              regionLength >= ULOC_COUNTRY_CAPACITY) {
193         goto error;
194     }
195     else {
196         /**
197          * ULOC_FULLNAME_CAPACITY will provide enough capacity
198          * that we can build a string that contains the language,
199          * script and region code without worrying about overrunning
200          * the user-supplied buffer.
201          **/
202         char tagBuffer[ULOC_FULLNAME_CAPACITY];
203         int32_t tagLength = 0;
204         UBool regionAppended = FALSE;
205 
206         if (langLength > 0) {
207             appendTag(
208                 lang,
209                 langLength,
210                 tagBuffer,
211                 &tagLength,
212                 /*withSeparator=*/FALSE);
213         }
214         else if (alternateTags == NULL) {
215             /*
216              * Use the empty string for an unknown language, if
217              * we found no language.
218              */
219         }
220         else {
221             /*
222              * Parse the alternateTags string for the language.
223              */
224             char alternateLang[ULOC_LANG_CAPACITY];
225             int32_t alternateLangLength = sizeof(alternateLang);
226 
227             alternateLangLength =
228                 uloc_getLanguage(
229                     alternateTags,
230                     alternateLang,
231                     alternateLangLength,
232                     err);
233             if(U_FAILURE(*err) ||
234                 alternateLangLength >= ULOC_LANG_CAPACITY) {
235                 goto error;
236             }
237             else if (alternateLangLength == 0) {
238                 /*
239                  * Use the empty string for an unknown language, if
240                  * we found no language.
241                  */
242             }
243             else {
244                 appendTag(
245                     alternateLang,
246                     alternateLangLength,
247                     tagBuffer,
248                     &tagLength,
249                     /*withSeparator=*/FALSE);
250             }
251         }
252 
253         if (scriptLength > 0) {
254             appendTag(
255                 script,
256                 scriptLength,
257                 tagBuffer,
258                 &tagLength,
259                 /*withSeparator=*/TRUE);
260         }
261         else if (alternateTags != NULL) {
262             /*
263              * Parse the alternateTags string for the script.
264              */
265             char alternateScript[ULOC_SCRIPT_CAPACITY];
266 
267             const int32_t alternateScriptLength =
268                 uloc_getScript(
269                     alternateTags,
270                     alternateScript,
271                     sizeof(alternateScript),
272                     err);
273 
274             if (U_FAILURE(*err) ||
275                 alternateScriptLength >= ULOC_SCRIPT_CAPACITY) {
276                 goto error;
277             }
278             else if (alternateScriptLength > 0) {
279                 appendTag(
280                     alternateScript,
281                     alternateScriptLength,
282                     tagBuffer,
283                     &tagLength,
284                     /*withSeparator=*/TRUE);
285             }
286         }
287 
288         if (regionLength > 0) {
289             appendTag(
290                 region,
291                 regionLength,
292                 tagBuffer,
293                 &tagLength,
294                 /*withSeparator=*/TRUE);
295 
296             regionAppended = TRUE;
297         }
298         else if (alternateTags != NULL) {
299             /*
300              * Parse the alternateTags string for the region.
301              */
302             char alternateRegion[ULOC_COUNTRY_CAPACITY];
303 
304             const int32_t alternateRegionLength =
305                 uloc_getCountry(
306                     alternateTags,
307                     alternateRegion,
308                     sizeof(alternateRegion),
309                     err);
310             if (U_FAILURE(*err) ||
311                 alternateRegionLength >= ULOC_COUNTRY_CAPACITY) {
312                 goto error;
313             }
314             else if (alternateRegionLength > 0) {
315                 appendTag(
316                     alternateRegion,
317                     alternateRegionLength,
318                     tagBuffer,
319                     &tagLength,
320                     /*withSeparator=*/TRUE);
321 
322                 regionAppended = TRUE;
323             }
324         }
325 
326         /**
327          * Copy the partial tag from our internal buffer to the supplied
328          * target.
329          **/
330         sink.Append(tagBuffer, tagLength);
331 
332         if (trailingLength > 0) {
333             if (*trailing != '@') {
334                 sink.Append("_", 1);
335                 if (!regionAppended) {
336                     /* extra separator is required */
337                     sink.Append("_", 1);
338                 }
339             }
340 
341             /*
342              * Copy the trailing data into the supplied buffer.
343              */
344             sink.Append(trailing, trailingLength);
345         }
346 
347         return;
348     }
349 
350 error:
351 
352     /**
353      * An overflow indicates the locale ID passed in
354      * is ill-formed.  If we got here, and there was
355      * no previous error, it's an implicit overflow.
356      **/
357     if (*err ==  U_BUFFER_OVERFLOW_ERROR ||
358         U_SUCCESS(*err)) {
359         *err = U_ILLEGAL_ARGUMENT_ERROR;
360     }
361 }
362 
363 /**
364  * Create a tag string from the supplied parameters.  The lang, script and region
365  * parameters may be NULL pointers. If they are, their corresponding length parameters
366  * must be less than or equal to 0.  If the lang parameter is an empty string, the
367  * default value for an unknown language is written to the output buffer.
368  *
369  * If the length of the new string exceeds the capacity of the output buffer,
370  * the function copies as many bytes to the output buffer as it can, and returns
371  * the error U_BUFFER_OVERFLOW_ERROR.
372  *
373  * If an illegal argument is provided, the function returns the error
374  * U_ILLEGAL_ARGUMENT_ERROR.
375  *
376  * @param lang The language tag to use.
377  * @param langLength The length of the language tag.
378  * @param script The script tag to use.
379  * @param scriptLength The length of the script tag.
380  * @param region The region tag to use.
381  * @param regionLength The length of the region tag.
382  * @param trailing Any trailing data to append to the new tag.
383  * @param trailingLength The length of the trailing data.
384  * @param sink The output sink receiving the tag string.
385  * @param err A pointer to a UErrorCode for error reporting.
386  **/
387 static void U_CALLCONV
createTagString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * trailing,int32_t trailingLength,icu::ByteSink & sink,UErrorCode * err)388 createTagString(
389     const char* lang,
390     int32_t langLength,
391     const char* script,
392     int32_t scriptLength,
393     const char* region,
394     int32_t regionLength,
395     const char* trailing,
396     int32_t trailingLength,
397     icu::ByteSink& sink,
398     UErrorCode* err)
399 {
400     createTagStringWithAlternates(
401                 lang,
402                 langLength,
403                 script,
404                 scriptLength,
405                 region,
406                 regionLength,
407                 trailing,
408                 trailingLength,
409                 NULL,
410                 sink,
411                 err);
412 }
413 
414 /**
415  * Parse the language, script, and region subtags from a tag string, and copy the
416  * results into the corresponding output parameters. The buffers are null-terminated,
417  * unless overflow occurs.
418  *
419  * The langLength, scriptLength, and regionLength parameters are input/output
420  * parameters, and must contain the capacity of their corresponding buffers on
421  * input.  On output, they will contain the actual length of the buffers, not
422  * including the null terminator.
423  *
424  * If the length of any of the output subtags exceeds the capacity of the corresponding
425  * buffer, the function copies as many bytes to the output buffer as it can, and returns
426  * the error U_BUFFER_OVERFLOW_ERROR.  It will not parse any more subtags once overflow
427  * occurs.
428  *
429  * If an illegal argument is provided, the function returns the error
430  * U_ILLEGAL_ARGUMENT_ERROR.
431  *
432  * @param localeID The locale ID to parse.
433  * @param lang The language tag buffer.
434  * @param langLength The length of the language tag.
435  * @param script The script tag buffer.
436  * @param scriptLength The length of the script tag.
437  * @param region The region tag buffer.
438  * @param regionLength The length of the region tag.
439  * @param err A pointer to a UErrorCode for error reporting.
440  * @return The number of chars of the localeID parameter consumed.
441  **/
442 static int32_t U_CALLCONV
parseTagString(const char * localeID,char * lang,int32_t * langLength,char * script,int32_t * scriptLength,char * region,int32_t * regionLength,UErrorCode * err)443 parseTagString(
444     const char* localeID,
445     char* lang,
446     int32_t* langLength,
447     char* script,
448     int32_t* scriptLength,
449     char* region,
450     int32_t* regionLength,
451     UErrorCode* err)
452 {
453     const char* position = localeID;
454     int32_t subtagLength = 0;
455 
456     if(U_FAILURE(*err) ||
457        localeID == NULL ||
458        lang == NULL ||
459        langLength == NULL ||
460        script == NULL ||
461        scriptLength == NULL ||
462        region == NULL ||
463        regionLength == NULL) {
464         goto error;
465     }
466 
467     subtagLength = ulocimp_getLanguage(position, lang, *langLength, &position);
468     u_terminateChars(lang, *langLength, subtagLength, err);
469 
470     /*
471      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
472      * to be an error, because it indicates the user-supplied tag is
473      * not well-formed.
474      */
475     if(U_FAILURE(*err)) {
476         goto error;
477     }
478 
479     *langLength = subtagLength;
480 
481     /*
482      * If no language was present, use the empty string instead.
483      * Otherwise, move past any separator.
484      */
485     if (_isIDSeparator(*position)) {
486         ++position;
487     }
488 
489     subtagLength = ulocimp_getScript(position, script, *scriptLength, &position);
490     u_terminateChars(script, *scriptLength, subtagLength, err);
491 
492     if(U_FAILURE(*err)) {
493         goto error;
494     }
495 
496     *scriptLength = subtagLength;
497 
498     if (*scriptLength > 0) {
499         if (uprv_strnicmp(script, unknownScript, *scriptLength) == 0) {
500             /**
501              * If the script part is the "unknown" script, then don't return it.
502              **/
503             *scriptLength = 0;
504         }
505 
506         /*
507          * Move past any separator.
508          */
509         if (_isIDSeparator(*position)) {
510             ++position;
511         }
512     }
513 
514     subtagLength = ulocimp_getCountry(position, region, *regionLength, &position);
515     u_terminateChars(region, *regionLength, subtagLength, err);
516 
517     if(U_FAILURE(*err)) {
518         goto error;
519     }
520 
521     *regionLength = subtagLength;
522 
523     if (*regionLength > 0) {
524         if (uprv_strnicmp(region, unknownRegion, *regionLength) == 0) {
525             /**
526              * If the region part is the "unknown" region, then don't return it.
527              **/
528             *regionLength = 0;
529         }
530     } else if (*position != 0 && *position != '@') {
531         /* back up over consumed trailing separator */
532         --position;
533     }
534 
535 exit:
536 
537     return (int32_t)(position - localeID);
538 
539 error:
540 
541     /**
542      * If we get here, we have no explicit error, it's the result of an
543      * illegal argument.
544      **/
545     if (!U_FAILURE(*err)) {
546         *err = U_ILLEGAL_ARGUMENT_ERROR;
547     }
548 
549     goto exit;
550 }
551 
552 static UBool U_CALLCONV
createLikelySubtagsString(const char * lang,int32_t langLength,const char * script,int32_t scriptLength,const char * region,int32_t regionLength,const char * variants,int32_t variantsLength,icu::ByteSink & sink,UErrorCode * err)553 createLikelySubtagsString(
554     const char* lang,
555     int32_t langLength,
556     const char* script,
557     int32_t scriptLength,
558     const char* region,
559     int32_t regionLength,
560     const char* variants,
561     int32_t variantsLength,
562     icu::ByteSink& sink,
563     UErrorCode* err) {
564     /**
565      * ULOC_FULLNAME_CAPACITY will provide enough capacity
566      * that we can build a string that contains the language,
567      * script and region code without worrying about overrunning
568      * the user-supplied buffer.
569      **/
570     char likelySubtagsBuffer[ULOC_FULLNAME_CAPACITY];
571 
572     if(U_FAILURE(*err)) {
573         goto error;
574     }
575 
576     /**
577      * Try the language with the script and region first.
578      **/
579     if (scriptLength > 0 && regionLength > 0) {
580 
581         const char* likelySubtags = NULL;
582 
583         icu::CharString tagBuffer;
584         {
585             icu::CharStringByteSink sink(&tagBuffer);
586             createTagString(
587                 lang,
588                 langLength,
589                 script,
590                 scriptLength,
591                 region,
592                 regionLength,
593                 NULL,
594                 0,
595                 sink,
596                 err);
597         }
598         if(U_FAILURE(*err)) {
599             goto error;
600         }
601 
602         likelySubtags =
603             findLikelySubtags(
604                 tagBuffer.data(),
605                 likelySubtagsBuffer,
606                 sizeof(likelySubtagsBuffer),
607                 err);
608         if(U_FAILURE(*err)) {
609             goto error;
610         }
611 
612         if (likelySubtags != NULL) {
613             /* Always use the language tag from the
614                maximal string, since it may be more
615                specific than the one provided. */
616             createTagStringWithAlternates(
617                         NULL,
618                         0,
619                         NULL,
620                         0,
621                         NULL,
622                         0,
623                         variants,
624                         variantsLength,
625                         likelySubtags,
626                         sink,
627                         err);
628             return TRUE;
629         }
630     }
631 
632     /**
633      * Try the language with just the script.
634      **/
635     if (scriptLength > 0) {
636 
637         const char* likelySubtags = NULL;
638 
639         icu::CharString tagBuffer;
640         {
641             icu::CharStringByteSink sink(&tagBuffer);
642             createTagString(
643                 lang,
644                 langLength,
645                 script,
646                 scriptLength,
647                 NULL,
648                 0,
649                 NULL,
650                 0,
651                 sink,
652                 err);
653         }
654         if(U_FAILURE(*err)) {
655             goto error;
656         }
657 
658         likelySubtags =
659             findLikelySubtags(
660                 tagBuffer.data(),
661                 likelySubtagsBuffer,
662                 sizeof(likelySubtagsBuffer),
663                 err);
664         if(U_FAILURE(*err)) {
665             goto error;
666         }
667 
668         if (likelySubtags != NULL) {
669             /* Always use the language tag from the
670                maximal string, since it may be more
671                specific than the one provided. */
672             createTagStringWithAlternates(
673                         NULL,
674                         0,
675                         NULL,
676                         0,
677                         region,
678                         regionLength,
679                         variants,
680                         variantsLength,
681                         likelySubtags,
682                         sink,
683                         err);
684             return TRUE;
685         }
686     }
687 
688     /**
689      * Try the language with just the region.
690      **/
691     if (regionLength > 0) {
692 
693         const char* likelySubtags = NULL;
694 
695         icu::CharString tagBuffer;
696         {
697             icu::CharStringByteSink sink(&tagBuffer);
698             createTagString(
699                 lang,
700                 langLength,
701                 NULL,
702                 0,
703                 region,
704                 regionLength,
705                 NULL,
706                 0,
707                 sink,
708                 err);
709         }
710         if(U_FAILURE(*err)) {
711             goto error;
712         }
713 
714         likelySubtags =
715             findLikelySubtags(
716                 tagBuffer.data(),
717                 likelySubtagsBuffer,
718                 sizeof(likelySubtagsBuffer),
719                 err);
720         if(U_FAILURE(*err)) {
721             goto error;
722         }
723 
724         if (likelySubtags != NULL) {
725             /* Always use the language tag from the
726                maximal string, since it may be more
727                specific than the one provided. */
728             createTagStringWithAlternates(
729                         NULL,
730                         0,
731                         script,
732                         scriptLength,
733                         NULL,
734                         0,
735                         variants,
736                         variantsLength,
737                         likelySubtags,
738                         sink,
739                         err);
740             return TRUE;
741         }
742     }
743 
744     /**
745      * Finally, try just the language.
746      **/
747     {
748         const char* likelySubtags = NULL;
749 
750         icu::CharString tagBuffer;
751         {
752             icu::CharStringByteSink sink(&tagBuffer);
753             createTagString(
754                 lang,
755                 langLength,
756                 NULL,
757                 0,
758                 NULL,
759                 0,
760                 NULL,
761                 0,
762                 sink,
763                 err);
764         }
765         if(U_FAILURE(*err)) {
766             goto error;
767         }
768 
769         likelySubtags =
770             findLikelySubtags(
771                 tagBuffer.data(),
772                 likelySubtagsBuffer,
773                 sizeof(likelySubtagsBuffer),
774                 err);
775         if(U_FAILURE(*err)) {
776             goto error;
777         }
778 
779         if (likelySubtags != NULL) {
780             /* Always use the language tag from the
781                maximal string, since it may be more
782                specific than the one provided. */
783             createTagStringWithAlternates(
784                         NULL,
785                         0,
786                         script,
787                         scriptLength,
788                         region,
789                         regionLength,
790                         variants,
791                         variantsLength,
792                         likelySubtags,
793                         sink,
794                         err);
795             return TRUE;
796         }
797     }
798 
799     return FALSE;
800 
801 error:
802 
803     if (!U_FAILURE(*err)) {
804         *err = U_ILLEGAL_ARGUMENT_ERROR;
805     }
806 
807     return FALSE;
808 }
809 
810 #define CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength) UPRV_BLOCK_MACRO_BEGIN { \
811     int32_t count = 0; \
812     int32_t i; \
813     for (i = 0; i < trailingLength; i++) { \
814         if (trailing[i] == '-' || trailing[i] == '_') { \
815             count = 0; \
816             if (count > 8) { \
817                 goto error; \
818             } \
819         } else if (trailing[i] == '@') { \
820             break; \
821         } else if (count > 8) { \
822             goto error; \
823         } else { \
824             count++; \
825         } \
826     } \
827 } UPRV_BLOCK_MACRO_END
828 
829 static void
_uloc_addLikelySubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * err)830 _uloc_addLikelySubtags(const char* localeID,
831                        icu::ByteSink& sink,
832                        UErrorCode* err) {
833     char lang[ULOC_LANG_CAPACITY];
834     int32_t langLength = sizeof(lang);
835     char script[ULOC_SCRIPT_CAPACITY];
836     int32_t scriptLength = sizeof(script);
837     char region[ULOC_COUNTRY_CAPACITY];
838     int32_t regionLength = sizeof(region);
839     const char* trailing = "";
840     int32_t trailingLength = 0;
841     int32_t trailingIndex = 0;
842     UBool success = FALSE;
843 
844     if(U_FAILURE(*err)) {
845         goto error;
846     }
847     if (localeID == NULL) {
848         goto error;
849     }
850 
851     trailingIndex = parseTagString(
852         localeID,
853         lang,
854         &langLength,
855         script,
856         &scriptLength,
857         region,
858         &regionLength,
859         err);
860     if(U_FAILURE(*err)) {
861         /* Overflow indicates an illegal argument error */
862         if (*err == U_BUFFER_OVERFLOW_ERROR) {
863             *err = U_ILLEGAL_ARGUMENT_ERROR;
864         }
865 
866         goto error;
867     }
868 
869     /* Find the length of the trailing portion. */
870     while (_isIDSeparator(localeID[trailingIndex])) {
871         trailingIndex++;
872     }
873     trailing = &localeID[trailingIndex];
874     trailingLength = (int32_t)uprv_strlen(trailing);
875 
876     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
877 
878     success =
879         createLikelySubtagsString(
880             lang,
881             langLength,
882             script,
883             scriptLength,
884             region,
885             regionLength,
886             trailing,
887             trailingLength,
888             sink,
889             err);
890 
891     if (!success) {
892         const int32_t localIDLength = (int32_t)uprv_strlen(localeID);
893 
894         /*
895          * If we get here, we need to return localeID.
896          */
897         sink.Append(localeID, localIDLength);
898     }
899 
900     return;
901 
902 error:
903 
904     if (!U_FAILURE(*err)) {
905         *err = U_ILLEGAL_ARGUMENT_ERROR;
906     }
907 }
908 
909 static void
_uloc_minimizeSubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * err)910 _uloc_minimizeSubtags(const char* localeID,
911                       icu::ByteSink& sink,
912                       UErrorCode* err) {
913     icu::CharString maximizedTagBuffer;
914 
915     char lang[ULOC_LANG_CAPACITY];
916     int32_t langLength = sizeof(lang);
917     char script[ULOC_SCRIPT_CAPACITY];
918     int32_t scriptLength = sizeof(script);
919     char region[ULOC_COUNTRY_CAPACITY];
920     int32_t regionLength = sizeof(region);
921     const char* trailing = "";
922     int32_t trailingLength = 0;
923     int32_t trailingIndex = 0;
924 
925     if(U_FAILURE(*err)) {
926         goto error;
927     }
928     else if (localeID == NULL) {
929         goto error;
930     }
931 
932     trailingIndex =
933         parseTagString(
934             localeID,
935             lang,
936             &langLength,
937             script,
938             &scriptLength,
939             region,
940             &regionLength,
941             err);
942     if(U_FAILURE(*err)) {
943 
944         /* Overflow indicates an illegal argument error */
945         if (*err == U_BUFFER_OVERFLOW_ERROR) {
946             *err = U_ILLEGAL_ARGUMENT_ERROR;
947         }
948 
949         goto error;
950     }
951 
952     /* Find the spot where the variants or the keywords begin, if any. */
953     while (_isIDSeparator(localeID[trailingIndex])) {
954         trailingIndex++;
955     }
956     trailing = &localeID[trailingIndex];
957     trailingLength = (int32_t)uprv_strlen(trailing);
958 
959     CHECK_TRAILING_VARIANT_SIZE(trailing, trailingLength);
960 
961     {
962         icu::CharString base;
963         {
964             icu::CharStringByteSink sink(&base);
965             createTagString(
966                 lang,
967                 langLength,
968                 script,
969                 scriptLength,
970                 region,
971                 regionLength,
972                 NULL,
973                 0,
974                 sink,
975                 err);
976         }
977 
978         /**
979          * First, we need to first get the maximization
980          * from AddLikelySubtags.
981          **/
982         {
983             icu::CharStringByteSink sink(&maximizedTagBuffer);
984             ulocimp_addLikelySubtags(base.data(), sink, err);
985         }
986     }
987 
988     if(U_FAILURE(*err)) {
989         goto error;
990     }
991 
992     /**
993      * Start first with just the language.
994      **/
995     {
996         icu::CharString tagBuffer;
997         {
998             icu::CharStringByteSink sink(&tagBuffer);
999             createLikelySubtagsString(
1000                 lang,
1001                 langLength,
1002                 NULL,
1003                 0,
1004                 NULL,
1005                 0,
1006                 NULL,
1007                 0,
1008                 sink,
1009                 err);
1010         }
1011 
1012         if(U_FAILURE(*err)) {
1013             goto error;
1014         }
1015         else if (!tagBuffer.isEmpty() && uprv_strnicmp(
1016                     maximizedTagBuffer.data(),
1017                     tagBuffer.data(),
1018                     tagBuffer.length()) == 0) {
1019 
1020             createTagString(
1021                         lang,
1022                         langLength,
1023                         NULL,
1024                         0,
1025                         NULL,
1026                         0,
1027                         trailing,
1028                         trailingLength,
1029                         sink,
1030                         err);
1031             return;
1032         }
1033     }
1034 
1035     /**
1036      * Next, try the language and region.
1037      **/
1038     if (regionLength > 0) {
1039 
1040         icu::CharString tagBuffer;
1041         {
1042             icu::CharStringByteSink sink(&tagBuffer);
1043             createLikelySubtagsString(
1044                 lang,
1045                 langLength,
1046                 NULL,
1047                 0,
1048                 region,
1049                 regionLength,
1050                 NULL,
1051                 0,
1052                 sink,
1053                 err);
1054         }
1055 
1056         if(U_FAILURE(*err)) {
1057             goto error;
1058         }
1059         else if (uprv_strnicmp(
1060                     maximizedTagBuffer.data(),
1061                     tagBuffer.data(),
1062                     tagBuffer.length()) == 0) {
1063 
1064             createTagString(
1065                         lang,
1066                         langLength,
1067                         NULL,
1068                         0,
1069                         region,
1070                         regionLength,
1071                         trailing,
1072                         trailingLength,
1073                         sink,
1074                         err);
1075             return;
1076         }
1077     }
1078 
1079     /**
1080      * Finally, try the language and script.  This is our last chance,
1081      * since trying with all three subtags would only yield the
1082      * maximal version that we already have.
1083      **/
1084     if (scriptLength > 0 && regionLength > 0) {
1085         icu::CharString tagBuffer;
1086         {
1087             icu::CharStringByteSink sink(&tagBuffer);
1088             createLikelySubtagsString(
1089                 lang,
1090                 langLength,
1091                 script,
1092                 scriptLength,
1093                 NULL,
1094                 0,
1095                 NULL,
1096                 0,
1097                 sink,
1098                 err);
1099         }
1100 
1101         if(U_FAILURE(*err)) {
1102             goto error;
1103         }
1104         else if (uprv_strnicmp(
1105                     maximizedTagBuffer.data(),
1106                     tagBuffer.data(),
1107                     tagBuffer.length()) == 0) {
1108 
1109             createTagString(
1110                         lang,
1111                         langLength,
1112                         script,
1113                         scriptLength,
1114                         NULL,
1115                         0,
1116                         trailing,
1117                         trailingLength,
1118                         sink,
1119                         err);
1120             return;
1121         }
1122     }
1123 
1124     {
1125         /**
1126          * If we got here, return the locale ID parameter.
1127          **/
1128         const int32_t localeIDLength = (int32_t)uprv_strlen(localeID);
1129         sink.Append(localeID, localeIDLength);
1130         return;
1131     }
1132 
1133 error:
1134 
1135     if (!U_FAILURE(*err)) {
1136         *err = U_ILLEGAL_ARGUMENT_ERROR;
1137     }
1138 }
1139 
1140 static UBool
do_canonicalize(const char * localeID,char * buffer,int32_t bufferCapacity,UErrorCode * err)1141 do_canonicalize(const char*    localeID,
1142          char* buffer,
1143          int32_t bufferCapacity,
1144          UErrorCode* err)
1145 {
1146     uloc_canonicalize(
1147         localeID,
1148         buffer,
1149         bufferCapacity,
1150         err);
1151 
1152     if (*err == U_STRING_NOT_TERMINATED_WARNING ||
1153         *err == U_BUFFER_OVERFLOW_ERROR) {
1154         *err = U_ILLEGAL_ARGUMENT_ERROR;
1155 
1156         return FALSE;
1157     }
1158     else if (U_FAILURE(*err)) {
1159 
1160         return FALSE;
1161     }
1162     else {
1163         return TRUE;
1164     }
1165 }
1166 
1167 U_CAPI int32_t U_EXPORT2
uloc_addLikelySubtags(const char * localeID,char * maximizedLocaleID,int32_t maximizedLocaleIDCapacity,UErrorCode * status)1168 uloc_addLikelySubtags(const char* localeID,
1169                       char* maximizedLocaleID,
1170                       int32_t maximizedLocaleIDCapacity,
1171                       UErrorCode* status) {
1172     if (U_FAILURE(*status)) {
1173         return 0;
1174     }
1175 
1176     icu::CheckedArrayByteSink sink(
1177             maximizedLocaleID, maximizedLocaleIDCapacity);
1178 
1179     ulocimp_addLikelySubtags(localeID, sink, status);
1180     int32_t reslen = sink.NumberOfBytesAppended();
1181 
1182     if (U_FAILURE(*status)) {
1183         return sink.Overflowed() ? reslen : -1;
1184     }
1185 
1186     if (sink.Overflowed()) {
1187         *status = U_BUFFER_OVERFLOW_ERROR;
1188     } else {
1189         u_terminateChars(
1190                 maximizedLocaleID, maximizedLocaleIDCapacity, reslen, status);
1191     }
1192 
1193     return reslen;
1194 }
1195 
1196 U_CAPI void U_EXPORT2
ulocimp_addLikelySubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * status)1197 ulocimp_addLikelySubtags(const char* localeID,
1198                          icu::ByteSink& sink,
1199                          UErrorCode* status) {
1200     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1201 
1202     if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1203         _uloc_addLikelySubtags(localeBuffer, sink, status);
1204     }
1205 }
1206 
1207 U_CAPI int32_t U_EXPORT2
uloc_minimizeSubtags(const char * localeID,char * minimizedLocaleID,int32_t minimizedLocaleIDCapacity,UErrorCode * status)1208 uloc_minimizeSubtags(const char* localeID,
1209                      char* minimizedLocaleID,
1210                      int32_t minimizedLocaleIDCapacity,
1211                      UErrorCode* status) {
1212     if (U_FAILURE(*status)) {
1213         return 0;
1214     }
1215 
1216     icu::CheckedArrayByteSink sink(
1217             minimizedLocaleID, minimizedLocaleIDCapacity);
1218 
1219     ulocimp_minimizeSubtags(localeID, sink, status);
1220     int32_t reslen = sink.NumberOfBytesAppended();
1221 
1222     if (U_FAILURE(*status)) {
1223         return sink.Overflowed() ? reslen : -1;
1224     }
1225 
1226     if (sink.Overflowed()) {
1227         *status = U_BUFFER_OVERFLOW_ERROR;
1228     } else {
1229         u_terminateChars(
1230                 minimizedLocaleID, minimizedLocaleIDCapacity, reslen, status);
1231     }
1232 
1233     return reslen;
1234 }
1235 
1236 U_CAPI void U_EXPORT2
ulocimp_minimizeSubtags(const char * localeID,icu::ByteSink & sink,UErrorCode * status)1237 ulocimp_minimizeSubtags(const char* localeID,
1238                         icu::ByteSink& sink,
1239                         UErrorCode* status) {
1240     char localeBuffer[ULOC_FULLNAME_CAPACITY];
1241 
1242     if (do_canonicalize(localeID, localeBuffer, sizeof localeBuffer, status)) {
1243         _uloc_minimizeSubtags(localeBuffer, sink, status);
1244     }
1245 }
1246 
1247 // Pairs of (language subtag, + or -) for finding out fast if common languages
1248 // are LTR (minus) or RTL (plus).
1249 static const char LANG_DIR_STRING[] =
1250         "root-en-es-pt-zh-ja-ko-de-fr-it-ar+he+fa+ru-nl-pl-th-tr-";
1251 
1252 // Implemented here because this calls ulocimp_addLikelySubtags().
1253 U_CAPI UBool U_EXPORT2
uloc_isRightToLeft(const char * locale)1254 uloc_isRightToLeft(const char *locale) {
1255     UErrorCode errorCode = U_ZERO_ERROR;
1256     char script[8];
1257     int32_t scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &errorCode);
1258     if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1259             scriptLength == 0) {
1260         // Fastpath: We know the likely scripts and their writing direction
1261         // for some common languages.
1262         errorCode = U_ZERO_ERROR;
1263         char lang[8];
1264         int32_t langLength = uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &errorCode);
1265         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1266             return FALSE;
1267         }
1268         if (langLength > 0) {
1269             const char* langPtr = uprv_strstr(LANG_DIR_STRING, lang);
1270             if (langPtr != NULL) {
1271                 switch (langPtr[langLength]) {
1272                 case '-': return FALSE;
1273                 case '+': return TRUE;
1274                 default: break;  // partial match of a longer code
1275                 }
1276             }
1277         }
1278         // Otherwise, find the likely script.
1279         errorCode = U_ZERO_ERROR;
1280         icu::CharString likely;
1281         {
1282             icu::CharStringByteSink sink(&likely);
1283             ulocimp_addLikelySubtags(locale, sink, &errorCode);
1284         }
1285         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING) {
1286             return FALSE;
1287         }
1288         scriptLength = uloc_getScript(likely.data(), script, UPRV_LENGTHOF(script), &errorCode);
1289         if (U_FAILURE(errorCode) || errorCode == U_STRING_NOT_TERMINATED_WARNING ||
1290                 scriptLength == 0) {
1291             return FALSE;
1292         }
1293     }
1294     UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
1295     return uscript_isRightToLeft(scriptCode);
1296 }
1297 
1298 U_NAMESPACE_BEGIN
1299 
1300 UBool
isRightToLeft() const1301 Locale::isRightToLeft() const {
1302     return uloc_isRightToLeft(getBaseName());
1303 }
1304 
1305 U_NAMESPACE_END
1306 
1307 // The following must at least allow for rg key value (6) plus terminator (1).
1308 #define ULOC_RG_BUFLEN 8
1309 
1310 U_CAPI int32_t U_EXPORT2
ulocimp_getRegionForSupplementalData(const char * localeID,UBool inferRegion,char * region,int32_t regionCapacity,UErrorCode * status)1311 ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
1312                                      char *region, int32_t regionCapacity, UErrorCode* status) {
1313     if (U_FAILURE(*status)) {
1314         return 0;
1315     }
1316     char rgBuf[ULOC_RG_BUFLEN];
1317     UErrorCode rgStatus = U_ZERO_ERROR;
1318 
1319     // First check for rg keyword value
1320     int32_t rgLen = uloc_getKeywordValue(localeID, "rg", rgBuf, ULOC_RG_BUFLEN, &rgStatus);
1321     if (U_FAILURE(rgStatus) || rgLen != 6) {
1322         rgLen = 0;
1323     } else {
1324         // rgBuf guaranteed to be zero terminated here, with text len 6
1325         char *rgPtr = rgBuf;
1326         for (; *rgPtr!= 0; rgPtr++) {
1327             *rgPtr = uprv_toupper(*rgPtr);
1328         }
1329         rgLen = (uprv_strcmp(rgBuf+2, "ZZZZ") == 0)? 2: 0;
1330     }
1331 
1332     if (rgLen == 0) {
1333         // No valid rg keyword value, try for unicode_region_subtag
1334         rgLen = uloc_getCountry(localeID, rgBuf, ULOC_RG_BUFLEN, status);
1335         if (U_FAILURE(*status)) {
1336             rgLen = 0;
1337         } else if (rgLen == 0 && inferRegion) {
1338             // no unicode_region_subtag but inferRegion TRUE, try likely subtags
1339             rgStatus = U_ZERO_ERROR;
1340             icu::CharString locBuf;
1341             {
1342                 icu::CharStringByteSink sink(&locBuf);
1343                 ulocimp_addLikelySubtags(localeID, sink, &rgStatus);
1344             }
1345             if (U_SUCCESS(rgStatus)) {
1346                 rgLen = uloc_getCountry(locBuf.data(), rgBuf, ULOC_RG_BUFLEN, status);
1347                 if (U_FAILURE(*status)) {
1348                     rgLen = 0;
1349                 }
1350             }
1351         }
1352     }
1353 
1354     rgBuf[rgLen] = 0;
1355     uprv_strncpy(region, rgBuf, regionCapacity);
1356     return u_terminateChars(region, regionCapacity, rgLen, status);
1357 }
1358 
1359