• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1997-2014, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8 *
9 * File USCRIPT.C
10 *
11 * Modification History:
12 *
13 *   Date        Name        Description
14 *   07/06/2001    Ram         Creation.
15 ******************************************************************************
16 */
17 
18 #include "unicode/uchar.h"
19 #include "unicode/uscript.h"
20 #include "unicode/uloc.h"
21 #include "charstr.h"
22 #include "cmemory.h"
23 #include "cstring.h"
24 #include "ulocimp.h"
25 
26 static const UScriptCode JAPANESE[3] = { USCRIPT_KATAKANA, USCRIPT_HIRAGANA, USCRIPT_HAN };
27 static const UScriptCode KOREAN[2] = { USCRIPT_HANGUL, USCRIPT_HAN };
28 static const UScriptCode HAN_BOPO[2] = { USCRIPT_HAN, USCRIPT_BOPOMOFO };
29 
30 static int32_t
setCodes(const UScriptCode * src,int32_t length,UScriptCode * dest,int32_t capacity,UErrorCode * err)31 setCodes(const UScriptCode *src, int32_t length,
32          UScriptCode *dest, int32_t capacity, UErrorCode *err) {
33     int32_t i;
34     if(U_FAILURE(*err)) { return 0; }
35     if(length > capacity) {
36         *err = U_BUFFER_OVERFLOW_ERROR;
37         return length;
38     }
39     for(i = 0; i < length; ++i) {
40         dest[i] = src[i];
41     }
42     return length;
43 }
44 
45 static int32_t
setOneCode(UScriptCode script,UScriptCode * scripts,int32_t capacity,UErrorCode * err)46 setOneCode(UScriptCode script, UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
47     if(U_FAILURE(*err)) { return 0; }
48     if(1 > capacity) {
49         *err = U_BUFFER_OVERFLOW_ERROR;
50         return 1;
51     }
52     scripts[0] = script;
53     return 1;
54 }
55 
56 static int32_t
getCodesFromLocale(const char * locale,UScriptCode * scripts,int32_t capacity,UErrorCode * err)57 getCodesFromLocale(const char *locale,
58                    UScriptCode *scripts, int32_t capacity, UErrorCode *err) {
59     if (U_FAILURE(*err)) { return 0; }
60     icu::CharString lang;
61     icu::CharString script;
62     ulocimp_getSubtags(locale, &lang, &script, nullptr, nullptr, nullptr, *err);
63     if (U_FAILURE(*err)) { return 0; }
64     // Multi-script languages, equivalent to the LocaleScript data
65     // that we used to load from locale resource bundles.
66     if (lang == "ja") {
67         return setCodes(JAPANESE, UPRV_LENGTHOF(JAPANESE), scripts, capacity, err);
68     }
69     if (lang == "ko") {
70         return setCodes(KOREAN, UPRV_LENGTHOF(KOREAN), scripts, capacity, err);
71     }
72     if (lang == "zh" && script == "Hant") {
73         return setCodes(HAN_BOPO, UPRV_LENGTHOF(HAN_BOPO), scripts, capacity, err);
74     }
75     // Explicit script code.
76     if (!script.isEmpty()) {
77         UScriptCode scriptCode = (UScriptCode)u_getPropertyValueEnum(UCHAR_SCRIPT, script.data());
78         if(scriptCode != USCRIPT_INVALID_CODE) {
79             if(scriptCode == USCRIPT_SIMPLIFIED_HAN || scriptCode == USCRIPT_TRADITIONAL_HAN) {
80                 scriptCode = USCRIPT_HAN;
81             }
82             return setOneCode(scriptCode, scripts, capacity, err);
83         }
84     }
85     return 0;
86 }
87 
88 /* TODO: this is a bad API and should be deprecated, ticket #11141 */
89 U_CAPI int32_t  U_EXPORT2
uscript_getCode(const char * nameOrAbbrOrLocale,UScriptCode * fillIn,int32_t capacity,UErrorCode * err)90 uscript_getCode(const char* nameOrAbbrOrLocale,
91                 UScriptCode* fillIn,
92                 int32_t capacity,
93                 UErrorCode* err){
94     UBool triedCode;
95     UErrorCode internalErrorCode;
96     int32_t length;
97 
98     if(U_FAILURE(*err)) {
99         return 0;
100     }
101     if(nameOrAbbrOrLocale==nullptr ||
102             (fillIn == nullptr ? capacity != 0 : capacity < 0)) {
103         *err = U_ILLEGAL_ARGUMENT_ERROR;
104         return 0;
105     }
106 
107     triedCode = false;
108     const char* lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '-');
109     if (lastSepPtr==nullptr) {
110         lastSepPtr = uprv_strrchr(nameOrAbbrOrLocale, '_');
111     }
112     // Favor interpretation of nameOrAbbrOrLocale as a script alias if either
113     // 1. nameOrAbbrOrLocale does not contain -/_. Handles Han, Mro, Nko, etc.
114     // 2. The last instance of -/_ is at offset 3, and the portion after that is
115     //    longer than 4 characters (i.e. not a script or region code). This handles
116     //    Old_Hungarian, Old_Italic, etc. ("old" is a valid language code)
117     // 3. The last instance of -/_ is at offset 7, and the portion after that is
118     //    3 characters. This handles New_Tai_Lue ("new" is a valid language code).
119     if (lastSepPtr==nullptr
120             || (lastSepPtr-nameOrAbbrOrLocale == 3 && uprv_strlen(nameOrAbbrOrLocale) > 8)
121             || (lastSepPtr-nameOrAbbrOrLocale == 7 && uprv_strlen(nameOrAbbrOrLocale) == 11) ) {
122         /* try long and abbreviated script names first */
123         UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
124         if(code!=USCRIPT_INVALID_CODE) {
125             return setOneCode(code, fillIn, capacity, err);
126         }
127         triedCode = true;
128     }
129     internalErrorCode = U_ZERO_ERROR;
130     length = getCodesFromLocale(nameOrAbbrOrLocale, fillIn, capacity, err);
131     if(U_FAILURE(*err) || length != 0) {
132         return length;
133     }
134     icu::CharString likely = ulocimp_addLikelySubtags(nameOrAbbrOrLocale, internalErrorCode);
135     if(U_SUCCESS(internalErrorCode) && internalErrorCode != U_STRING_NOT_TERMINATED_WARNING) {
136         length = getCodesFromLocale(likely.data(), fillIn, capacity, err);
137         if(U_FAILURE(*err) || length != 0) {
138             return length;
139         }
140     }
141     if(!triedCode) {
142         /* still not found .. try long and abbreviated script names again */
143         UScriptCode code = (UScriptCode) u_getPropertyValueEnum(UCHAR_SCRIPT, nameOrAbbrOrLocale);
144         if(code!=USCRIPT_INVALID_CODE) {
145             return setOneCode(code, fillIn, capacity, err);
146         }
147     }
148     return 0;
149 }
150