• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1997-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * File USCRIPT.C
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   07/06/2001    Ram         Creation.
15 ******************************************************************************
16 */
17 
18 #include "unicode/uchar.h"
19 #include "unicode/uscript.h"
20 #include "unicode/uloc.h"
21 #include "bytesinkutil.h"
22 #include "charstr.h"
23 #include "cmemory.h"
24 #include "cstring.h"
25 #include "ulocimp.h"
26 
27 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
28 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
29 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
30 
31 static int32_t
setCodes(const UScriptCode * src,int32_t length,UScriptCode * dest,int32_t capacity,UErrorCode * err)32 setCodes(const UScriptCode *src, int32_t length,
33          UScriptCode *dest, int32_t capacity, UErrorCode *err) {
34     int32_t i;
35     if(U_FAILURE(*err)) { return 0; }
36     if(length > capacity) {
37         *err = U_BUFFER_OVERFLOW_ERROR;
38         return length;
39     }
40     for(i = 0; i < length; ++i) {
41         dest[i] = src[i];
42     }
43     return length;
44 }
45 
46 static int32_t
setOneCode(UScriptCode script,UScriptCode * scripts,int32_t capacity,UErrorCode * err)47 setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
48     if(U_FAILURE(*err)) { return 0; }
49     if(1 > capacity) {
50         *err = U_BUFFER_OVERFLOW_ERROR;
51         return 1;
52     }
53     scripts[0] = script;
54     return 1;
55 }
56 
57 static int32_t
getCodesFromLocale(const char * locale,UScriptCode * scripts,int32_t capacity,UErrorCode * err)58 getCodesFromLocale(const char *locale,
59                    UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
60     UErrorCode internalErrorCode = U_ZERO_ERROR;
61     char lang[8] = {0};
62     char script[8] = {0};
63     int32_t scriptLength;
64     if(U_FAILURE(*err)) { return 0; }
65     // Multi-script languages, equivalent to the LocaleScript data
66     // that we used to load from locale resource bundles.
67     /*length = */ uloc_getLanguage(locale, lang, UPRV_LENGTHOF(lang), &internalErrorCode);
68     if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
69         return 0;
70     }
71     if(0 == uprv_strcmp(lang, "ja")) {
72         return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
73     }
74     if(0 == uprv_strcmp(lang, "ko")) {
75         return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
76     }
77     scriptLength = uloc_getScript(locale, script, UPRV_LENGTHOF(script), &internalErrorCode);
78     if(U_FAILURE(internalErrorCode) || internalErrorCode == U_STRING_NOT_TERMINATED_WARNING) {
79         return 0;
80     }
81     if(0 == uprv_strcmp(lang, "zh") && 0 == uprv_strcmp(script, "Hant")) {
82         return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
83     }
84     // Explicit script code.
85     if(scriptLength != 0) {
86         UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script);
87         if(scriptCode != USCRIPT_INVALID_CODE) {
88             if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
89                 scriptCode = USCRIPT_HAN;
90             }
91             return setOneCode(scriptCode, scripts, capacity, err);
92         }
93     }
94     return 0;
95 }
96 
97 /* TODO: this is a bad API and should be deprecated, ticket #11141 */
98 U_CAPI int32_t  U_EXPORT2
uscript_getCode(const char * nameOrAbbrOrLocale,UScriptCode * fillIn,int32_t capacity,UErrorCode * err)99 uscript_getCode(const char* nameOrAbbrOrLocale,
100                 UScriptCode* fillIn,
101                 int32_t capacity,
102                 UErrorCode* err){
103     UBool triedCode;
104     UErrorCode internalErrorCode;
105     int32_t length;
106 
107     if(U_FAILURE(*err)) {
108         return 0;
109     }
110     if(nameOrAbbrOrLocale==nullptr ||
111             (fillIn == nullptr ? capacity != 0 : capacity < 0)) {
112         *err = U_ILLEGAL_ARGUMENT_ERROR;
113         return 0;
114     }
115 
116     triedCode = false;
117     const char* lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '-');
118     if (lastSepPtr==nullptr) {
119         lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '_');
120     }
121     // Favor interpretation of nameOrAbbrOrLocale as a script alias if either
122     // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc.
123     // 2. The last instance of -/_ is at offset 3, and the portion after that is
124     //    longer than 4 characters (i.e. not a script or region code). This handles
125     //    Old_Hungarian, Old_Italic, etc. ("old" is a valid language code)
126     // 3. The last instance of -/_ is at offset 7, and the portion after that is
127     //    3 characters. This handles New_Tai_Lue ("new" is a valid language code).
128     if (lastSepPtr==nullptr
129             || (lastSepPtr-nameOrAbbrOrLocale == 3 && uprv_strlen(nameOrAbbrOrLocale) > 8)
130             || (lastSepPtr-nameOrAbbrOrLocale == 7 && uprv_strlen(nameOrAbbrOrLocale) == 11) ) {
131         /* try long and abbreviated script names first */
132         UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
133         if(code!=USCRIPT_INVALID_CODE) {
134             return setOneCode(code, fillIn, capacity, err);
135         }
136         triedCode = true;
137     }
138     internalErrorCode = U_ZERO_ERROR;
139     length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
140     if(U_FAILURE(*err) || length != 0) {
141         return length;
142     }
143     icu::CharString likely;
144     {
145         icu::CharStringByteSink sink(&likely);
146         ulocimp_addLikelySubtags(nameOrAbbrOrLocale, sink, &internalErrorCode);
147     }
148     if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
149         length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
150         if(U_FAILURE(*err) || length != 0) {
151             return length;
152         }
153     }
154     if(!triedCode) {
155         /* still not found .. try long and abbreviated script names again */
156         UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
157         if(code!=USCRIPT_INVALID_CODE) {
158             return setOneCode(code, fillIn, capacity, err);
159         }
160     }
161     return 0;
162 }
163