• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3  *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef WTF_UNICODE_ICU_H
24 #define WTF_UNICODE_ICU_H
25 
26 #include <unicode/brkiter.h>
27 #include <unicode/rbbi.h>
28 #include <unicode/uchar.h>
29 #include <unicode/uscript.h>
30 #include <unicode/ustring.h>
31 #include <unicode/utf16.h>
32 
33 namespace WTF {
34 
35 namespace Unicode {
36 
37 enum Direction {
38     LeftToRight = U_LEFT_TO_RIGHT,
39     RightToLeft = U_RIGHT_TO_LEFT,
40     EuropeanNumber = U_EUROPEAN_NUMBER,
41     EuropeanNumberSeparator = U_EUROPEAN_NUMBER_SEPARATOR,
42     EuropeanNumberTerminator = U_EUROPEAN_NUMBER_TERMINATOR,
43     ArabicNumber = U_ARABIC_NUMBER,
44     CommonNumberSeparator = U_COMMON_NUMBER_SEPARATOR,
45     BlockSeparator = U_BLOCK_SEPARATOR,
46     SegmentSeparator = U_SEGMENT_SEPARATOR,
47     WhiteSpaceNeutral = U_WHITE_SPACE_NEUTRAL,
48     OtherNeutral = U_OTHER_NEUTRAL,
49     LeftToRightEmbedding = U_LEFT_TO_RIGHT_EMBEDDING,
50     LeftToRightOverride = U_LEFT_TO_RIGHT_OVERRIDE,
51     RightToLeftArabic = U_RIGHT_TO_LEFT_ARABIC,
52     RightToLeftEmbedding = U_RIGHT_TO_LEFT_EMBEDDING,
53     RightToLeftOverride = U_RIGHT_TO_LEFT_OVERRIDE,
54     PopDirectionalFormat = U_POP_DIRECTIONAL_FORMAT,
55     NonSpacingMark = U_DIR_NON_SPACING_MARK,
56     BoundaryNeutral = U_BOUNDARY_NEUTRAL
57 };
58 
59 enum DecompositionType {
60     DecompositionNone = U_DT_NONE,
61     DecompositionCanonical = U_DT_CANONICAL,
62     DecompositionCompat = U_DT_COMPAT,
63     DecompositionCircle = U_DT_CIRCLE,
64     DecompositionFinal = U_DT_FINAL,
65     DecompositionFont = U_DT_FONT,
66     DecompositionFraction = U_DT_FRACTION,
67     DecompositionInitial = U_DT_INITIAL,
68     DecompositionIsolated = U_DT_ISOLATED,
69     DecompositionMedial = U_DT_MEDIAL,
70     DecompositionNarrow = U_DT_NARROW,
71     DecompositionNoBreak = U_DT_NOBREAK,
72     DecompositionSmall = U_DT_SMALL,
73     DecompositionSquare = U_DT_SQUARE,
74     DecompositionSub = U_DT_SUB,
75     DecompositionSuper = U_DT_SUPER,
76     DecompositionVertical = U_DT_VERTICAL,
77     DecompositionWide = U_DT_WIDE,
78 };
79 
80 enum CharCategory {
81     NoCategory =  0,
82     Other_NotAssigned = U_MASK(U_GENERAL_OTHER_TYPES),
83     Letter_Uppercase = U_MASK(U_UPPERCASE_LETTER),
84     Letter_Lowercase = U_MASK(U_LOWERCASE_LETTER),
85     Letter_Titlecase = U_MASK(U_TITLECASE_LETTER),
86     Letter_Modifier = U_MASK(U_MODIFIER_LETTER),
87     Letter_Other = U_MASK(U_OTHER_LETTER),
88 
89     Mark_NonSpacing = U_MASK(U_NON_SPACING_MARK),
90     Mark_Enclosing = U_MASK(U_ENCLOSING_MARK),
91     Mark_SpacingCombining = U_MASK(U_COMBINING_SPACING_MARK),
92 
93     Number_DecimalDigit = U_MASK(U_DECIMAL_DIGIT_NUMBER),
94     Number_Letter = U_MASK(U_LETTER_NUMBER),
95     Number_Other = U_MASK(U_OTHER_NUMBER),
96 
97     Separator_Space = U_MASK(U_SPACE_SEPARATOR),
98     Separator_Line = U_MASK(U_LINE_SEPARATOR),
99     Separator_Paragraph = U_MASK(U_PARAGRAPH_SEPARATOR),
100 
101     Other_Control = U_MASK(U_CONTROL_CHAR),
102     Other_Format = U_MASK(U_FORMAT_CHAR),
103     Other_PrivateUse = U_MASK(U_PRIVATE_USE_CHAR),
104     Other_Surrogate = U_MASK(U_SURROGATE),
105 
106     Punctuation_Dash = U_MASK(U_DASH_PUNCTUATION),
107     Punctuation_Open = U_MASK(U_START_PUNCTUATION),
108     Punctuation_Close = U_MASK(U_END_PUNCTUATION),
109     Punctuation_Connector = U_MASK(U_CONNECTOR_PUNCTUATION),
110     Punctuation_Other = U_MASK(U_OTHER_PUNCTUATION),
111 
112     Symbol_Math = U_MASK(U_MATH_SYMBOL),
113     Symbol_Currency = U_MASK(U_CURRENCY_SYMBOL),
114     Symbol_Modifier = U_MASK(U_MODIFIER_SYMBOL),
115     Symbol_Other = U_MASK(U_OTHER_SYMBOL),
116 
117     Punctuation_InitialQuote = U_MASK(U_INITIAL_PUNCTUATION),
118     Punctuation_FinalQuote = U_MASK(U_FINAL_PUNCTUATION)
119 };
120 
foldCase(UChar32 c)121 inline UChar32 foldCase(UChar32 c)
122 {
123     return u_foldCase(c, U_FOLD_CASE_DEFAULT);
124 }
125 
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)126 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
127 {
128     UErrorCode status = U_ZERO_ERROR;
129     int realLength = u_strFoldCase(result, resultLength, src, srcLength, U_FOLD_CASE_DEFAULT, &status);
130     *error = !U_SUCCESS(status);
131     return realLength;
132 }
133 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)134 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
135 {
136     UErrorCode status = U_ZERO_ERROR;
137     int realLength = u_strToLower(result, resultLength, src, srcLength, "", &status);
138     *error = !!U_FAILURE(status);
139     return realLength;
140 }
141 
toLower(UChar32 c)142 inline UChar32 toLower(UChar32 c)
143 {
144     return u_tolower(c);
145 }
146 
toUpper(UChar32 c)147 inline UChar32 toUpper(UChar32 c)
148 {
149     return u_toupper(c);
150 }
151 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)152 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
153 {
154     UErrorCode status = U_ZERO_ERROR;
155     int realLength = u_strToUpper(result, resultLength, src, srcLength, "", &status);
156     *error = !!U_FAILURE(status);
157     return realLength;
158 }
159 
toTitleCase(UChar32 c)160 inline UChar32 toTitleCase(UChar32 c)
161 {
162     return u_totitle(c);
163 }
164 
isArabicChar(UChar32 c)165 inline bool isArabicChar(UChar32 c)
166 {
167       return ublock_getCode(c) == UBLOCK_ARABIC;
168 }
169 
isAlphanumeric(UChar32 c)170 inline bool isAlphanumeric(UChar32 c)
171 {
172     return u_isalnum(c);
173 }
174 
isSeparatorSpace(UChar32 c)175 inline bool isSeparatorSpace(UChar32 c)
176 {
177     return u_charType(c) == U_SPACE_SEPARATOR;
178 }
179 
isPrintableChar(UChar32 c)180 inline bool isPrintableChar(UChar32 c)
181 {
182     return !!u_isprint(c);
183 }
184 
isPunct(UChar32 c)185 inline bool isPunct(UChar32 c)
186 {
187     return !!u_ispunct(c);
188 }
189 
hasLineBreakingPropertyComplexContext(UChar32 c)190 inline bool hasLineBreakingPropertyComplexContext(UChar32 c)
191 {
192     return u_getIntPropertyValue(c, UCHAR_LINE_BREAK) == U_LB_COMPLEX_CONTEXT;
193 }
194 
mirroredChar(UChar32 c)195 inline UChar32 mirroredChar(UChar32 c)
196 {
197     return u_charMirror(c);
198 }
199 
category(UChar32 c)200 inline CharCategory category(UChar32 c)
201 {
202     return static_cast<CharCategory>(U_GET_GC_MASK(c));
203 }
204 
direction(UChar32 c)205 inline Direction direction(UChar32 c)
206 {
207     return static_cast<Direction>(u_charDirection(c));
208 }
209 
isLower(UChar32 c)210 inline bool isLower(UChar32 c)
211 {
212     return !!u_islower(c);
213 }
214 
combiningClass(UChar32 c)215 inline uint8_t combiningClass(UChar32 c)
216 {
217     return u_getCombiningClass(c);
218 }
219 
decompositionType(UChar32 c)220 inline DecompositionType decompositionType(UChar32 c)
221 {
222     return static_cast<DecompositionType>(u_getIntPropertyValue(c, UCHAR_DECOMPOSITION_TYPE));
223 }
224 
umemcasecmp(const UChar * a,const UChar * b,int len)225 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
226 {
227     return u_memcasecmp(a, b, len, U_FOLD_CASE_DEFAULT);
228 }
229 
230 } // namespace Unicode
231 
232 } // namespace WTF
233 
234 #endif // WTF_UNICODE_ICU_H
235