• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 *******************************************************************************
3 *   Copyright (C) 2013, International Business Machines
4 *   Corporation and others.  All Rights Reserved.
5 *******************************************************************************
6 *   file name:  uscript_props.cpp
7 *   encoding:   US-ASCII
8 *   tab size:   8 (not used)
9 *   indentation:4
10 *
11 *   created on: 2013feb16
12 *   created by: Markus W. Scherer
13 */
14 
15 #include "unicode/utypes.h"
16 #include "unicode/unistr.h"
17 #include "unicode/uscript.h"
18 #include "unicode/utf16.h"
19 #include "ustr_imp.h"
20 
21 #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
22 
23 namespace {
24 
25 // Script metadata (script properties).
26 // See http://unicode.org/cldr/trac/browser/trunk/common/properties/scriptMetadata.txt
27 
28 // 0 = NOT_ENCODED, no sample character, default false script properties.
29 // Bits 20.. 0: sample character
30 
31 // Bits 23..21: usage
32 const int32_t UNKNOWN = 1 << 21;
33 const int32_t EXCLUSION = 2 << 21;
34 const int32_t LIMITED_USE = 3 << 21;
35 const int32_t ASPIRATIONAL = 4 << 21;
36 const int32_t RECOMMENDED = 5 << 21;
37 
38 // Bits 31..24: Single-bit flags
39 const int32_t RTL = 1 << 24;
40 const int32_t LB_LETTERS = 1 << 25;
41 const int32_t CASED = 1 << 26;
42 
43 const int32_t SCRIPT_PROPS[] = {
44     // Begin copy-paste output from
45     // tools/trunk/unicode/py/parsescriptmetadata.py
46     0x0040 | UNKNOWN,  // Zyyy
47     0x0308 | UNKNOWN,  // Zinh
48     0x0628 | RECOMMENDED | RTL,  // Arab
49     0x0531 | RECOMMENDED | CASED,  // Armn
50     0x0995 | RECOMMENDED,  // Beng
51     0x3105 | RECOMMENDED | LB_LETTERS,  // Bopo
52     0x13C4 | LIMITED_USE,  // Cher
53     0x03E2 | EXCLUSION | CASED,  // Copt
54     0x042F | RECOMMENDED | CASED,  // Cyrl
55     0x10414 | EXCLUSION | CASED,  // Dsrt
56     0x0905 | RECOMMENDED,  // Deva
57     0x12A0 | RECOMMENDED,  // Ethi
58     0x10D3 | RECOMMENDED,  // Geor
59     0x10330 | EXCLUSION,  // Goth
60     0x03A9 | RECOMMENDED | CASED,  // Grek
61     0x0A95 | RECOMMENDED,  // Gujr
62     0x0A15 | RECOMMENDED,  // Guru
63     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hani
64     0xAC00 | RECOMMENDED,  // Hang
65     0x05D0 | RECOMMENDED | RTL,  // Hebr
66     0x304B | RECOMMENDED | LB_LETTERS,  // Hira
67     0x0C95 | RECOMMENDED,  // Knda
68     0x30AB | RECOMMENDED | LB_LETTERS,  // Kana
69     0x1780 | RECOMMENDED | LB_LETTERS,  // Khmr
70     0x0EA5 | RECOMMENDED | LB_LETTERS,  // Laoo
71     0x004C | RECOMMENDED | CASED,  // Latn
72     0x0D15 | RECOMMENDED,  // Mlym
73     0x1826 | ASPIRATIONAL,  // Mong
74     0x1000 | RECOMMENDED | LB_LETTERS,  // Mymr
75     0x168F | EXCLUSION,  // Ogam
76     0x10300 | EXCLUSION,  // Ital
77     0x0B15 | RECOMMENDED,  // Orya
78     0x16A0 | EXCLUSION,  // Runr
79     0x0D85 | RECOMMENDED,  // Sinh
80     0x0710 | LIMITED_USE | RTL,  // Syrc
81     0x0B95 | RECOMMENDED,  // Taml
82     0x0C15 | RECOMMENDED,  // Telu
83     0x078C | RECOMMENDED | RTL,  // Thaa
84     0x0E17 | RECOMMENDED | LB_LETTERS,  // Thai
85     0x0F40 | RECOMMENDED,  // Tibt
86     0x14C0 | ASPIRATIONAL,  // Cans
87     0xA288 | ASPIRATIONAL | LB_LETTERS,  // Yiii
88     0x1703 | EXCLUSION,  // Tglg
89     0x1723 | EXCLUSION,  // Hano
90     0x1743 | EXCLUSION,  // Buhd
91     0x1763 | EXCLUSION,  // Tagb
92     0x2800 | UNKNOWN,  // Brai
93     0x10800 | EXCLUSION | RTL,  // Cprt
94     0x1900 | LIMITED_USE,  // Limb
95     0x10000 | EXCLUSION,  // Linb
96     0x10480 | EXCLUSION,  // Osma
97     0x10450 | EXCLUSION,  // Shaw
98     0x1950 | LIMITED_USE | LB_LETTERS,  // Tale
99     0x10380 | EXCLUSION,  // Ugar
100     0,
101     0x1A00 | EXCLUSION,  // Bugi
102     0x2C00 | EXCLUSION | CASED,  // Glag
103     0x10A00 | EXCLUSION | RTL,  // Khar
104     0xA800 | LIMITED_USE,  // Sylo
105     0x1980 | LIMITED_USE | LB_LETTERS,  // Talu
106     0x2D30 | ASPIRATIONAL,  // Tfng
107     0x103A0 | EXCLUSION,  // Xpeo
108     0x1B05 | LIMITED_USE | LB_LETTERS,  // Bali
109     0x1BC0 | LIMITED_USE,  // Batk
110     0,
111     0x11005 | EXCLUSION,  // Brah
112     0xAA00 | LIMITED_USE,  // Cham
113     0,
114     0,
115     0,
116     0,
117     0x13153 | EXCLUSION,  // Egyp
118     0,
119     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hans
120     0x5B57 | RECOMMENDED | LB_LETTERS,  // Hant
121     0,
122     0,
123     0,
124     0xA984 | LIMITED_USE | LB_LETTERS,  // Java
125     0xA90A | LIMITED_USE,  // Kali
126     0,
127     0,
128     0x1C00 | LIMITED_USE,  // Lepc
129     0,
130     0x0840 | LIMITED_USE | RTL,  // Mand
131     0,
132     0x10980 | EXCLUSION | RTL,  // Mero
133     0x07CA | LIMITED_USE | RTL,  // Nkoo
134     0x10C00 | EXCLUSION | RTL,  // Orkh
135     0,
136     0xA840 | EXCLUSION,  // Phag
137     0x10900 | EXCLUSION | RTL,  // Phnx
138     0x16F00 | ASPIRATIONAL,  // Plrd
139     0,
140     0,
141     0,
142     0,
143     0,
144     0,
145     0xA549 | LIMITED_USE,  // Vaii
146     0,
147     0x12000 | EXCLUSION,  // Xsux
148     0,
149     0xFDD0 | UNKNOWN,  // Zzzz
150     0x102A0 | EXCLUSION,  // Cari
151     0x304B | RECOMMENDED | LB_LETTERS,  // Jpan
152     0x1A20 | LIMITED_USE | LB_LETTERS,  // Lana
153     0x10280 | EXCLUSION,  // Lyci
154     0x10920 | EXCLUSION | RTL,  // Lydi
155     0x1C5A | LIMITED_USE,  // Olck
156     0xA930 | EXCLUSION,  // Rjng
157     0xA882 | LIMITED_USE,  // Saur
158     0,
159     0x1B83 | LIMITED_USE,  // Sund
160     0,
161     0xABC0 | LIMITED_USE,  // Mtei
162     0x10840 | EXCLUSION | RTL,  // Armi
163     0x10B00 | EXCLUSION | RTL,  // Avst
164     0x11103 | LIMITED_USE,  // Cakm
165     0xAC00 | RECOMMENDED,  // Kore
166     0x11083 | EXCLUSION,  // Kthi
167     0,
168     0x10B60 | EXCLUSION | RTL,  // Phli
169     0,
170     0,
171     0x10B40 | EXCLUSION | RTL,  // Prti
172     0x0800 | EXCLUSION | RTL,  // Samr
173     0xAA80 | LIMITED_USE | LB_LETTERS,  // Tavt
174     0,
175     0,
176     0xA6A0 | LIMITED_USE,  // Bamu
177     0xA4D0 | LIMITED_USE,  // Lisu
178     0,
179     0x10A60 | EXCLUSION | RTL,  // Sarb
180     0,
181     0,
182     0,
183     0,
184     0,
185     0,
186     0,
187     0x109A0 | EXCLUSION | RTL,  // Merc
188     0,
189     0,
190     0,
191     0,
192     0,
193     0,
194     0,
195     0,
196     0,
197     0x11183 | EXCLUSION,  // Shrd
198     0x110D0 | EXCLUSION,  // Sora
199     0x11680 | EXCLUSION,  // Takr
200     0,
201     0,
202     0,
203     0,
204     0,
205     // End copy-paste from parsescriptmetadata.py
206 };
207 
getScriptProps(UScriptCode script)208 int32_t getScriptProps(UScriptCode script) {
209     if (0 <= script && script < LENGTHOF(SCRIPT_PROPS)) {
210         return SCRIPT_PROPS[script];
211     } else {
212         return 0;
213     }
214 }
215 
216 }  // namespace
217 
218 U_CAPI int32_t U_EXPORT2
uscript_getSampleString(UScriptCode script,UChar * dest,int32_t capacity,UErrorCode * pErrorCode)219 uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode) {
220     if(U_FAILURE(*pErrorCode)) { return 0; }
221     if(capacity < 0 || (capacity > 0 && dest == NULL)) {
222         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
223         return 0;
224     }
225     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
226     int32_t length;
227     if(sampleChar == 0) {
228         length = 0;
229     } else {
230         length = U16_LENGTH(sampleChar);
231         if(length <= capacity) {
232             int32_t i = 0;
233             U16_APPEND_UNSAFE(dest, i, sampleChar);
234         }
235     }
236     return u_terminateUChars(dest, capacity, length, pErrorCode);
237 }
238 
239 U_COMMON_API icu::UnicodeString U_EXPORT2
uscript_getSampleUnicodeString(UScriptCode script)240 uscript_getSampleUnicodeString(UScriptCode script) {
241     icu::UnicodeString sample;
242     int32_t sampleChar = getScriptProps(script) & 0x1fffff;
243     if(sampleChar != 0) {
244         sample.append(sampleChar);
245     }
246     return sample;
247 }
248 
249 U_CAPI UScriptUsage U_EXPORT2
uscript_getUsage(UScriptCode script)250 uscript_getUsage(UScriptCode script) {
251     return (UScriptUsage)((getScriptProps(script) >> 21) & 7);
252 }
253 
254 U_CAPI UBool U_EXPORT2
uscript_isRightToLeft(UScriptCode script)255 uscript_isRightToLeft(UScriptCode script) {
256     return (getScriptProps(script) & RTL) != 0;
257 }
258 
259 U_CAPI UBool U_EXPORT2
uscript_breaksBetweenLetters(UScriptCode script)260 uscript_breaksBetweenLetters(UScriptCode script) {
261     return (getScriptProps(script) & LB_LETTERS) != 0;
262 }
263 
264 U_CAPI UBool U_EXPORT2
uscript_isCased(UScriptCode script)265 uscript_isCased(UScriptCode script) {
266     return (getScriptProps(script) & CASED) != 0;
267 }
268