• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3  *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef WTF_UNICODE_QT4_H
24 #define WTF_UNICODE_QT4_H
25 
26 #include <QChar>
27 #include <QString>
28 
29 #include <config.h>
30 
31 #include <stdint.h>
32 
33 QT_BEGIN_NAMESPACE
34 namespace QUnicodeTables {
35     struct Properties {
36         ushort category : 8;
37         ushort line_break_class : 8;
38         ushort direction : 8;
39         ushort combiningClass :8;
40         ushort joining : 2;
41         signed short digitValue : 6; /* 5 needed */
42         ushort unicodeVersion : 4;
43         ushort lowerCaseSpecial : 1;
44         ushort upperCaseSpecial : 1;
45         ushort titleCaseSpecial : 1;
46         ushort caseFoldSpecial : 1; /* currently unused */
47         signed short mirrorDiff : 16;
48         signed short lowerCaseDiff : 16;
49         signed short upperCaseDiff : 16;
50         signed short titleCaseDiff : 16;
51         signed short caseFoldDiff : 16;
52     };
53     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
54     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
55 }
56 QT_END_NAMESPACE
57 
58 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
59 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || COMPILER(RVCT)
60 typedef wchar_t UChar;
61 #else
62 typedef uint16_t UChar;
63 #endif
64 typedef uint32_t UChar32;
65 
66 // some defines from ICU
67 
68 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
69 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
70 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
71 #define U16_GET_SUPPLEMENTARY(lead, trail) \
72     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
73 
74 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
75 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
76 
77 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
78 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
79 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
80 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
81 
82 #define U16_NEXT(s, i, length, c) { \
83     (c)=(s)[(i)++]; \
84     if(U16_IS_LEAD(c)) { \
85         uint16_t __c2; \
86         if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
87             ++(i); \
88             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
89         } \
90     } \
91 }
92 
93 #define U16_PREV(s, start, i, c) { \
94     (c)=(s)[--(i)]; \
95     if(U16_IS_TRAIL(c)) { \
96         uint16_t __c2; \
97         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
98             --(i); \
99             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
100         } \
101     } \
102 }
103 
104 #define U_MASK(x) ((uint32_t)1<<(x))
105 
106 namespace WTF {
107 namespace Unicode {
108 
109 enum Direction {
110     LeftToRight = QChar::DirL,
111     RightToLeft = QChar::DirR,
112     EuropeanNumber = QChar::DirEN,
113     EuropeanNumberSeparator = QChar::DirES,
114     EuropeanNumberTerminator = QChar::DirET,
115     ArabicNumber = QChar::DirAN,
116     CommonNumberSeparator = QChar::DirCS,
117     BlockSeparator = QChar::DirB,
118     SegmentSeparator = QChar::DirS,
119     WhiteSpaceNeutral = QChar::DirWS,
120     OtherNeutral = QChar::DirON,
121     LeftToRightEmbedding = QChar::DirLRE,
122     LeftToRightOverride = QChar::DirLRO,
123     RightToLeftArabic = QChar::DirAL,
124     RightToLeftEmbedding = QChar::DirRLE,
125     RightToLeftOverride = QChar::DirRLO,
126     PopDirectionalFormat = QChar::DirPDF,
127     NonSpacingMark = QChar::DirNSM,
128     BoundaryNeutral = QChar::DirBN
129 };
130 
131 enum DecompositionType {
132     DecompositionNone = QChar::NoDecomposition,
133     DecompositionCanonical = QChar::Canonical,
134     DecompositionCompat = QChar::Compat,
135     DecompositionCircle = QChar::Circle,
136     DecompositionFinal = QChar::Final,
137     DecompositionFont = QChar::Font,
138     DecompositionFraction = QChar::Fraction,
139     DecompositionInitial = QChar::Initial,
140     DecompositionIsolated = QChar::Isolated,
141     DecompositionMedial = QChar::Medial,
142     DecompositionNarrow = QChar::Narrow,
143     DecompositionNoBreak = QChar::NoBreak,
144     DecompositionSmall = QChar::Small,
145     DecompositionSquare = QChar::Square,
146     DecompositionSub = QChar::Sub,
147     DecompositionSuper = QChar::Super,
148     DecompositionVertical = QChar::Vertical,
149     DecompositionWide = QChar::Wide
150 };
151 
152 enum CharCategory {
153     NoCategory = 0,
154     Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
155     Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
156     Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
157     Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
158     Number_Letter = U_MASK(QChar::Number_Letter),
159     Number_Other = U_MASK(QChar::Number_Other),
160     Separator_Space = U_MASK(QChar::Separator_Space),
161     Separator_Line = U_MASK(QChar::Separator_Line),
162     Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
163     Other_Control = U_MASK(QChar::Other_Control),
164     Other_Format = U_MASK(QChar::Other_Format),
165     Other_Surrogate = U_MASK(QChar::Other_Surrogate),
166     Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
167     Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
168     Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
169     Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
170     Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
171     Letter_Modifier = U_MASK(QChar::Letter_Modifier),
172     Letter_Other = U_MASK(QChar::Letter_Other),
173     Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
174     Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
175     Punctuation_Open = U_MASK(QChar::Punctuation_Open),
176     Punctuation_Close = U_MASK(QChar::Punctuation_Close),
177     Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
178     Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
179     Punctuation_Other = U_MASK(QChar::Punctuation_Other),
180     Symbol_Math = U_MASK(QChar::Symbol_Math),
181     Symbol_Currency = U_MASK(QChar::Symbol_Currency),
182     Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
183     Symbol_Other = U_MASK(QChar::Symbol_Other)
184 };
185 
186 
187 // FIXME: handle surrogates correctly in all methods
188 
toLower(UChar32 ch)189 inline UChar32 toLower(UChar32 ch)
190 {
191     return QChar::toLower(ch);
192 }
193 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)194 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
195 {
196     const UChar *e = src + srcLength;
197     const UChar *s = src;
198     UChar *r = result;
199     uint rindex = 0;
200 
201     // this avoids one out of bounds check in the loop
202     if (s < e && QChar(*s).isLowSurrogate()) {
203         if (r)
204             r[rindex] = *s++;
205         ++rindex;
206     }
207 
208     int needed = 0;
209     while (s < e && (rindex < uint(resultLength) || !r)) {
210         uint c = *s;
211         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
212             c = QChar::surrogateToUcs4(*(s - 1), c);
213         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
214         if (prop->lowerCaseSpecial) {
215             QString qstring;
216             if (c < 0x10000) {
217                 qstring += QChar(c);
218             } else {
219                 qstring += QChar(*(s-1));
220                 qstring += QChar(*s);
221             }
222             qstring = qstring.toLower();
223             for (int i = 0; i < qstring.length(); ++i) {
224                 if (rindex >= uint(resultLength)) {
225                     needed += qstring.length() - i;
226                     break;
227                 }
228                 if (r)
229                     r[rindex] = qstring.at(i).unicode();
230                 ++rindex;
231             }
232         } else {
233             if (r)
234                 r[rindex] = *s + prop->lowerCaseDiff;
235             ++rindex;
236         }
237         ++s;
238     }
239     if (s < e)
240         needed += e - s;
241     *error = (needed != 0);
242     if (rindex < uint(resultLength))
243         r[rindex] = 0;
244     return rindex + needed;
245 }
246 
toUpper(UChar32 ch)247 inline UChar32 toUpper(UChar32 ch)
248 {
249     return QChar::toUpper(ch);
250 }
251 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)252 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
253 {
254     const UChar *e = src + srcLength;
255     const UChar *s = src;
256     UChar *r = result;
257     int rindex = 0;
258 
259     // this avoids one out of bounds check in the loop
260     if (s < e && QChar(*s).isLowSurrogate()) {
261         if (r)
262             r[rindex] = *s++;
263         ++rindex;
264     }
265 
266     int needed = 0;
267     while (s < e && (rindex < resultLength || !r)) {
268         uint c = *s;
269         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
270             c = QChar::surrogateToUcs4(*(s - 1), c);
271         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
272         if (prop->upperCaseSpecial) {
273             QString qstring;
274             if (c < 0x10000) {
275                 qstring += QChar(c);
276             } else {
277                 qstring += QChar(*(s-1));
278                 qstring += QChar(*s);
279             }
280             qstring = qstring.toUpper();
281             for (int i = 0; i < qstring.length(); ++i) {
282                 if (rindex >= resultLength) {
283                     needed += qstring.length() - i;
284                     break;
285                 }
286                 if (r)
287                     r[rindex] = qstring.at(i).unicode();
288                 ++rindex;
289             }
290         } else {
291             if (r)
292                 r[rindex] = *s + prop->upperCaseDiff;
293             ++rindex;
294         }
295         ++s;
296     }
297     if (s < e)
298         needed += e - s;
299     *error = (needed != 0);
300     if (rindex < resultLength)
301         r[rindex] = 0;
302     return rindex + needed;
303 }
304 
toTitleCase(UChar32 c)305 inline int toTitleCase(UChar32 c)
306 {
307     return QChar::toTitleCase(c);
308 }
309 
foldCase(UChar32 c)310 inline UChar32 foldCase(UChar32 c)
311 {
312     return QChar::toCaseFolded(c);
313 }
314 
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)315 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
316 {
317     // FIXME: handle special casing. Easiest with some low level API in Qt
318     *error = false;
319     if (resultLength < srcLength) {
320         *error = true;
321         return srcLength;
322     }
323     for (int i = 0; i < srcLength; ++i)
324         result[i] = QChar::toCaseFolded(ushort(src[i]));
325     return srcLength;
326 }
327 
isArabicChar(UChar32 c)328 inline bool isArabicChar(UChar32 c)
329 {
330     return c >= 0x0600 && c <= 0x06FF;
331 }
332 
isPrintableChar(UChar32 c)333 inline bool isPrintableChar(UChar32 c)
334 {
335     const uint test = U_MASK(QChar::Other_Control) |
336                       U_MASK(QChar::Other_NotAssigned);
337     return !(U_MASK(QChar::category(c)) & test);
338 }
339 
isSeparatorSpace(UChar32 c)340 inline bool isSeparatorSpace(UChar32 c)
341 {
342     return QChar::category(c) == QChar::Separator_Space;
343 }
344 
isPunct(UChar32 c)345 inline bool isPunct(UChar32 c)
346 {
347     const uint test = U_MASK(QChar::Punctuation_Connector) |
348                       U_MASK(QChar::Punctuation_Dash) |
349                       U_MASK(QChar::Punctuation_Open) |
350                       U_MASK(QChar::Punctuation_Close) |
351                       U_MASK(QChar::Punctuation_InitialQuote) |
352                       U_MASK(QChar::Punctuation_FinalQuote) |
353                       U_MASK(QChar::Punctuation_Other);
354     return U_MASK(QChar::category(c)) & test;
355 }
356 
isLower(UChar32 c)357 inline bool isLower(UChar32 c)
358 {
359     return QChar::category(c) == QChar::Letter_Lowercase;
360 }
361 
hasLineBreakingPropertyComplexContext(UChar32)362 inline bool hasLineBreakingPropertyComplexContext(UChar32)
363 {
364     // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
365     return false;
366 }
367 
mirroredChar(UChar32 c)368 inline UChar32 mirroredChar(UChar32 c)
369 {
370     return QChar::mirroredChar(c);
371 }
372 
combiningClass(UChar32 c)373 inline uint8_t combiningClass(UChar32 c)
374 {
375     return QChar::combiningClass(c);
376 }
377 
decompositionType(UChar32 c)378 inline DecompositionType decompositionType(UChar32 c)
379 {
380     return (DecompositionType)QChar::decompositionTag(c);
381 }
382 
umemcasecmp(const UChar * a,const UChar * b,int len)383 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
384 {
385     // handle surrogates correctly
386     for (int i = 0; i < len; ++i) {
387         uint c1 = QChar::toCaseFolded(ushort(a[i]));
388         uint c2 = QChar::toCaseFolded(ushort(b[i]));
389         if (c1 != c2)
390             return c1 - c2;
391     }
392     return 0;
393 }
394 
direction(UChar32 c)395 inline Direction direction(UChar32 c)
396 {
397     return (Direction)QChar::direction(c);
398 }
399 
category(UChar32 c)400 inline CharCategory category(UChar32 c)
401 {
402     return (CharCategory) U_MASK(QChar::category(c));
403 }
404 
405 } }
406 
407 #endif // WTF_UNICODE_QT4_H
408