1 /*
2 * Copyright (C) 2006 George Staikos <staikos@kde.org>
3 * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4 * Copyright (C) 2007 Apple Computer, Inc. All rights reserved.
5 * Copyright (C) 2007-2009 Torch Mobile, Inc.
6 * Copyright (C) 2010 Company 100, Inc.
7 *
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
12 *
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
17 *
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
22 *
23 */
24
25 #ifndef UnicodeBrew_h
26 #define UnicodeBrew_h
27
28 #include "UnicodeFromICU.h"
29 #include "UnicodeMacrosFromICU.h"
30
31 namespace WTF {
32 namespace Unicode {
33
34 enum Direction {
35 LeftToRight = ICU::U_LEFT_TO_RIGHT,
36 RightToLeft = ICU::U_RIGHT_TO_LEFT,
37 EuropeanNumber = ICU::U_EUROPEAN_NUMBER,
38 EuropeanNumberSeparator = ICU::U_EUROPEAN_NUMBER_SEPARATOR,
39 EuropeanNumberTerminator = ICU::U_EUROPEAN_NUMBER_TERMINATOR,
40 ArabicNumber = ICU::U_ARABIC_NUMBER,
41 CommonNumberSeparator = ICU::U_COMMON_NUMBER_SEPARATOR,
42 BlockSeparator = ICU::U_BLOCK_SEPARATOR,
43 SegmentSeparator = ICU::U_SEGMENT_SEPARATOR,
44 WhiteSpaceNeutral = ICU::U_WHITE_SPACE_NEUTRAL,
45 OtherNeutral = ICU::U_OTHER_NEUTRAL,
46 LeftToRightEmbedding = ICU::U_LEFT_TO_RIGHT_EMBEDDING,
47 LeftToRightOverride = ICU::U_LEFT_TO_RIGHT_OVERRIDE,
48 RightToLeftArabic = ICU::U_RIGHT_TO_LEFT_ARABIC,
49 RightToLeftEmbedding = ICU::U_RIGHT_TO_LEFT_EMBEDDING,
50 RightToLeftOverride = ICU::U_RIGHT_TO_LEFT_OVERRIDE,
51 PopDirectionalFormat = ICU::U_POP_DIRECTIONAL_FORMAT,
52 NonSpacingMark = ICU::U_DIR_NON_SPACING_MARK,
53 BoundaryNeutral = ICU::U_BOUNDARY_NEUTRAL
54 };
55
56 enum DecompositionType {
57 DecompositionNone = ICU::U_DT_NONE,
58 DecompositionCanonical = ICU::U_DT_CANONICAL,
59 DecompositionCompat = ICU::U_DT_COMPAT,
60 DecompositionCircle = ICU::U_DT_CIRCLE,
61 DecompositionFinal = ICU::U_DT_FINAL,
62 DecompositionFont = ICU::U_DT_FONT,
63 DecompositionFraction = ICU::U_DT_FRACTION,
64 DecompositionInitial = ICU::U_DT_INITIAL,
65 DecompositionIsolated = ICU::U_DT_ISOLATED,
66 DecompositionMedial = ICU::U_DT_MEDIAL,
67 DecompositionNarrow = ICU::U_DT_NARROW,
68 DecompositionNoBreak = ICU::U_DT_NOBREAK,
69 DecompositionSmall = ICU::U_DT_SMALL,
70 DecompositionSquare = ICU::U_DT_SQUARE,
71 DecompositionSub = ICU::U_DT_SUB,
72 DecompositionSuper = ICU::U_DT_SUPER,
73 DecompositionVertical = ICU::U_DT_VERTICAL,
74 DecompositionWide = ICU::U_DT_WIDE,
75 };
76
77 enum CharCategory {
78 NoCategory = 0,
79 Other_NotAssigned = TO_MASK(ICU::U_GENERAL_OTHER_TYPES),
80 Letter_Uppercase = TO_MASK(ICU::U_UPPERCASE_LETTER),
81 Letter_Lowercase = TO_MASK(ICU::U_LOWERCASE_LETTER),
82 Letter_Titlecase = TO_MASK(ICU::U_TITLECASE_LETTER),
83 Letter_Modifier = TO_MASK(ICU::U_MODIFIER_LETTER),
84 Letter_Other = TO_MASK(ICU::U_OTHER_LETTER),
85
86 Mark_NonSpacing = TO_MASK(ICU::U_NON_SPACING_MARK),
87 Mark_Enclosing = TO_MASK(ICU::U_ENCLOSING_MARK),
88 Mark_SpacingCombining = TO_MASK(ICU::U_COMBINING_SPACING_MARK),
89
90 Number_DecimalDigit = TO_MASK(ICU::U_DECIMAL_DIGIT_NUMBER),
91 Number_Letter = TO_MASK(ICU::U_LETTER_NUMBER),
92 Number_Other = TO_MASK(ICU::U_OTHER_NUMBER),
93
94 Separator_Space = TO_MASK(ICU::U_SPACE_SEPARATOR),
95 Separator_Line = TO_MASK(ICU::U_LINE_SEPARATOR),
96 Separator_Paragraph = TO_MASK(ICU::U_PARAGRAPH_SEPARATOR),
97
98 Other_Control = TO_MASK(ICU::U_CONTROL_CHAR),
99 Other_Format = TO_MASK(ICU::U_FORMAT_CHAR),
100 Other_PrivateUse = TO_MASK(ICU::U_PRIVATE_USE_CHAR),
101 Other_Surrogate = TO_MASK(ICU::U_SURROGATE),
102
103 Punctuation_Dash = TO_MASK(ICU::U_DASH_PUNCTUATION),
104 Punctuation_Open = TO_MASK(ICU::U_START_PUNCTUATION),
105 Punctuation_Close = TO_MASK(ICU::U_END_PUNCTUATION),
106 Punctuation_Connector = TO_MASK(ICU::U_CONNECTOR_PUNCTUATION),
107 Punctuation_Other = TO_MASK(ICU::U_OTHER_PUNCTUATION),
108
109 Symbol_Math = TO_MASK(ICU::U_MATH_SYMBOL),
110 Symbol_Currency = TO_MASK(ICU::U_CURRENCY_SYMBOL),
111 Symbol_Modifier = TO_MASK(ICU::U_MODIFIER_SYMBOL),
112 Symbol_Other = TO_MASK(ICU::U_OTHER_SYMBOL),
113
114 Punctuation_InitialQuote = TO_MASK(ICU::U_INITIAL_PUNCTUATION),
115 Punctuation_FinalQuote = TO_MASK(ICU::U_FINAL_PUNCTUATION)
116 };
117
118 UChar foldCase(UChar);
119
120 int foldCase(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
121
122 int toLower(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
123
124 UChar toUpper(UChar);
125 UChar toLower(UChar);
126
127 bool isUpper(UChar);
128
129 int toUpper(UChar* result, int resultLength, const UChar* source, int sourceLength, bool* isError);
130
131 UChar toTitleCase(UChar);
132
isArabicChar(UChar32 c)133 inline bool isArabicChar(UChar32 c)
134 {
135 return c >= 0x0600 && c <= 0x06FF;
136 }
137
138 bool isAlphanumeric(UChar);
139
140 CharCategory category(unsigned int);
141
isSeparatorSpace(UChar c)142 inline bool isSeparatorSpace(UChar c)
143 {
144 return category(c) == Separator_Space;
145 }
146
147 bool isPrintableChar(UChar);
148
149 bool isDigit(UChar);
150
151 bool isPunct(UChar);
152
hasLineBreakingPropertyComplexContext(UChar32)153 inline bool hasLineBreakingPropertyComplexContext(UChar32)
154 {
155 // FIXME: implement!
156 return false;
157 }
158
hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)159 inline bool hasLineBreakingPropertyComplexContextOrIdeographic(UChar32 c)
160 {
161 // FIXME
162 return false;
163 }
164
165 UChar mirroredChar(UChar32);
166
167 Direction direction(UChar32);
168
169 bool isLower(UChar);
170
171 int digitValue(UChar);
172
173 unsigned char combiningClass(UChar32);
174
175 DecompositionType decompositionType(UChar32);
176
umemcasecmp(const UChar * a,const UChar * b,int len)177 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
178 {
179 for (int i = 0; i < len; ++i) {
180 UChar c1 = foldCase(a[i]);
181 UChar c2 = foldCase(b[i]);
182 if (c1 != c2)
183 return c1 - c2;
184 }
185 return 0;
186 }
187
188 bool isSpace(UChar);
189 bool isLetter(UChar);
190
191 } // namespace Unicode
192 } // namespace WTF
193
194 #endif
195