• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3  *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef WTF_UNICODE_QT4_H
24 #define WTF_UNICODE_QT4_H
25 
26 #include <QChar>
27 #include <QString>
28 
29 #include <config.h>
30 
31 #include <stdint.h>
32 
33 #if QT_VERSION >= 0x040300
34 QT_BEGIN_NAMESPACE
35 namespace QUnicodeTables {
36     struct Properties {
37         ushort category : 8;
38         ushort line_break_class : 8;
39         ushort direction : 8;
40         ushort combiningClass :8;
41         ushort joining : 2;
42         signed short digitValue : 6; /* 5 needed */
43         ushort unicodeVersion : 4;
44         ushort lowerCaseSpecial : 1;
45         ushort upperCaseSpecial : 1;
46         ushort titleCaseSpecial : 1;
47         ushort caseFoldSpecial : 1; /* currently unused */
48         signed short mirrorDiff : 16;
49         signed short lowerCaseDiff : 16;
50         signed short upperCaseDiff : 16;
51         signed short titleCaseDiff : 16;
52         signed short caseFoldDiff : 16;
53     };
54     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
55     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
56 }
57 QT_END_NAMESPACE
58 #endif
59 
60 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
61 #if defined(Q_OS_WIN)
62 typedef wchar_t UChar;
63 #else
64 typedef uint16_t UChar;
65 #endif
66 typedef uint32_t UChar32;
67 
68 // some defines from ICU
69 
70 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
71 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
72 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
73 #define U16_GET_SUPPLEMENTARY(lead, trail) \
74     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
75 
76 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
77 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
78 
79 #define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
80 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
81 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
82 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
83 
84 #define U16_NEXT(s, i, length, c) { \
85     (c)=(s)[(i)++]; \
86     if(U16_IS_LEAD(c)) { \
87         uint16_t __c2; \
88         if((i)<(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
89             ++(i); \
90             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
91         } \
92     } \
93 }
94 
95 #define U16_PREV(s, start, i, c) { \
96     (c)=(s)[--(i)]; \
97     if(U16_IS_TRAIL(c)) { \
98         uint16_t __c2; \
99         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
100             --(i); \
101             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
102         } \
103     } \
104 }
105 
106 #define U_MASK(x) ((uint32_t)1<<(x))
107 
108 namespace WTF {
109 namespace Unicode {
110 
111 enum Direction {
112     LeftToRight = QChar::DirL,
113     RightToLeft = QChar::DirR,
114     EuropeanNumber = QChar::DirEN,
115     EuropeanNumberSeparator = QChar::DirES,
116     EuropeanNumberTerminator = QChar::DirET,
117     ArabicNumber = QChar::DirAN,
118     CommonNumberSeparator = QChar::DirCS,
119     BlockSeparator = QChar::DirB,
120     SegmentSeparator = QChar::DirS,
121     WhiteSpaceNeutral = QChar::DirWS,
122     OtherNeutral = QChar::DirON,
123     LeftToRightEmbedding = QChar::DirLRE,
124     LeftToRightOverride = QChar::DirLRO,
125     RightToLeftArabic = QChar::DirAL,
126     RightToLeftEmbedding = QChar::DirRLE,
127     RightToLeftOverride = QChar::DirRLO,
128     PopDirectionalFormat = QChar::DirPDF,
129     NonSpacingMark = QChar::DirNSM,
130     BoundaryNeutral = QChar::DirBN
131 };
132 
133 enum DecompositionType {
134     DecompositionNone = QChar::NoDecomposition,
135     DecompositionCanonical = QChar::Canonical,
136     DecompositionCompat = QChar::Compat,
137     DecompositionCircle = QChar::Circle,
138     DecompositionFinal = QChar::Final,
139     DecompositionFont = QChar::Font,
140     DecompositionFraction = QChar::Fraction,
141     DecompositionInitial = QChar::Initial,
142     DecompositionIsolated = QChar::Isolated,
143     DecompositionMedial = QChar::Medial,
144     DecompositionNarrow = QChar::Narrow,
145     DecompositionNoBreak = QChar::NoBreak,
146     DecompositionSmall = QChar::Small,
147     DecompositionSquare = QChar::Square,
148     DecompositionSub = QChar::Sub,
149     DecompositionSuper = QChar::Super,
150     DecompositionVertical = QChar::Vertical,
151     DecompositionWide = QChar::Wide
152 };
153 
154 enum CharCategory {
155     NoCategory = 0,
156     Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
157     Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
158     Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
159     Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
160     Number_Letter = U_MASK(QChar::Number_Letter),
161     Number_Other = U_MASK(QChar::Number_Other),
162     Separator_Space = U_MASK(QChar::Separator_Space),
163     Separator_Line = U_MASK(QChar::Separator_Line),
164     Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
165     Other_Control = U_MASK(QChar::Other_Control),
166     Other_Format = U_MASK(QChar::Other_Format),
167     Other_Surrogate = U_MASK(QChar::Other_Surrogate),
168     Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
169     Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
170     Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
171     Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
172     Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
173     Letter_Modifier = U_MASK(QChar::Letter_Modifier),
174     Letter_Other = U_MASK(QChar::Letter_Other),
175     Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
176     Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
177     Punctuation_Open = U_MASK(QChar::Punctuation_Open),
178     Punctuation_Close = U_MASK(QChar::Punctuation_Close),
179     Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
180     Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
181     Punctuation_Other = U_MASK(QChar::Punctuation_Other),
182     Symbol_Math = U_MASK(QChar::Symbol_Math),
183     Symbol_Currency = U_MASK(QChar::Symbol_Currency),
184     Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
185     Symbol_Other = U_MASK(QChar::Symbol_Other)
186 };
187 
188 
189 #if QT_VERSION >= 0x040300
190 
191 // FIXME: handle surrogates correctly in all methods
192 
toLower(UChar32 ch)193 inline UChar32 toLower(UChar32 ch)
194 {
195     return QChar::toLower(ch);
196 }
197 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)198 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
199 {
200     const UChar *e = src + srcLength;
201     const UChar *s = src;
202     UChar *r = result;
203     uint rindex = 0;
204 
205     // this avoids one out of bounds check in the loop
206     if (s < e && QChar(*s).isLowSurrogate()) {
207         if (r)
208             r[rindex] = *s++;
209         ++rindex;
210     }
211 
212     int needed = 0;
213     while (s < e && (rindex < uint(resultLength) || !r)) {
214         uint c = *s;
215         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
216             c = QChar::surrogateToUcs4(*(s - 1), c);
217         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
218         if (prop->lowerCaseSpecial) {
219             QString qstring;
220             if (c < 0x10000) {
221                 qstring += QChar(c);
222             } else {
223                 qstring += QChar(*(s-1));
224                 qstring += QChar(*s);
225             }
226             qstring = qstring.toLower();
227             for (int i = 0; i < qstring.length(); ++i) {
228                 if (rindex >= uint(resultLength)) {
229                     needed += qstring.length() - i;
230                     break;
231                 }
232                 if (r)
233                     r[rindex] = qstring.at(i).unicode();
234                 ++rindex;
235             }
236         } else {
237             if (r)
238                 r[rindex] = *s + prop->lowerCaseDiff;
239             ++rindex;
240         }
241         ++s;
242     }
243     if (s < e)
244         needed += e - s;
245     *error = (needed != 0);
246     if (rindex < uint(resultLength))
247         r[rindex] = 0;
248     return rindex + needed;
249 }
250 
toUpper(UChar32 ch)251 inline UChar32 toUpper(UChar32 ch)
252 {
253     return QChar::toUpper(ch);
254 }
255 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)256 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
257 {
258     const UChar *e = src + srcLength;
259     const UChar *s = src;
260     UChar *r = result;
261     int rindex = 0;
262 
263     // this avoids one out of bounds check in the loop
264     if (s < e && QChar(*s).isLowSurrogate()) {
265         if (r)
266             r[rindex] = *s++;
267         ++rindex;
268     }
269 
270     int needed = 0;
271     while (s < e && (rindex < resultLength || !r)) {
272         uint c = *s;
273         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
274             c = QChar::surrogateToUcs4(*(s - 1), c);
275         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
276         if (prop->upperCaseSpecial) {
277             QString qstring;
278             if (c < 0x10000) {
279                 qstring += QChar(c);
280             } else {
281                 qstring += QChar(*(s-1));
282                 qstring += QChar(*s);
283             }
284             qstring = qstring.toUpper();
285             for (int i = 0; i < qstring.length(); ++i) {
286                 if (rindex >= resultLength) {
287                     needed += qstring.length() - i;
288                     break;
289                 }
290                 if (r)
291                     r[rindex] = qstring.at(i).unicode();
292                 ++rindex;
293             }
294         } else {
295             if (r)
296                 r[rindex] = *s + prop->upperCaseDiff;
297             ++rindex;
298         }
299         ++s;
300     }
301     if (s < e)
302         needed += e - s;
303     *error = (needed != 0);
304     if (rindex < resultLength)
305         r[rindex] = 0;
306     return rindex + needed;
307 }
308 
toTitleCase(UChar32 c)309 inline int toTitleCase(UChar32 c)
310 {
311     return QChar::toTitleCase(c);
312 }
313 
foldCase(UChar32 c)314 inline UChar32 foldCase(UChar32 c)
315 {
316     return QChar::toCaseFolded(c);
317 }
318 
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)319 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
320 {
321     // FIXME: handle special casing. Easiest with some low level API in Qt
322     *error = false;
323     if (resultLength < srcLength) {
324         *error = true;
325         return srcLength;
326     }
327     for (int i = 0; i < srcLength; ++i)
328         result[i] = QChar::toCaseFolded(ushort(src[i]));
329     return srcLength;
330 }
331 
isArabicChar(UChar32 c)332 inline bool isArabicChar(UChar32 c)
333 {
334     return c >= 0x0600 && c <= 0x06FF;
335 }
336 
isPrintableChar(UChar32 c)337 inline bool isPrintableChar(UChar32 c)
338 {
339     const uint test = U_MASK(QChar::Other_Control) |
340                       U_MASK(QChar::Other_NotAssigned);
341     return !(U_MASK(QChar::category(c)) & test);
342 }
343 
isSeparatorSpace(UChar32 c)344 inline bool isSeparatorSpace(UChar32 c)
345 {
346     return QChar::category(c) == QChar::Separator_Space;
347 }
348 
isPunct(UChar32 c)349 inline bool isPunct(UChar32 c)
350 {
351     const uint test = U_MASK(QChar::Punctuation_Connector) |
352                       U_MASK(QChar::Punctuation_Dash) |
353                       U_MASK(QChar::Punctuation_Open) |
354                       U_MASK(QChar::Punctuation_Close) |
355                       U_MASK(QChar::Punctuation_InitialQuote) |
356                       U_MASK(QChar::Punctuation_FinalQuote) |
357                       U_MASK(QChar::Punctuation_Other);
358     return U_MASK(QChar::category(c)) & test;
359 }
360 
isLower(UChar32 c)361 inline bool isLower(UChar32 c)
362 {
363     return QChar::category(c) == QChar::Letter_Lowercase;
364 }
365 
hasLineBreakingPropertyComplexContext(UChar32)366 inline bool hasLineBreakingPropertyComplexContext(UChar32)
367 {
368     // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
369     return false;
370 }
371 
mirroredChar(UChar32 c)372 inline UChar32 mirroredChar(UChar32 c)
373 {
374     return QChar::mirroredChar(c);
375 }
376 
combiningClass(UChar32 c)377 inline uint8_t combiningClass(UChar32 c)
378 {
379     return QChar::combiningClass(c);
380 }
381 
decompositionType(UChar32 c)382 inline DecompositionType decompositionType(UChar32 c)
383 {
384     return (DecompositionType)QChar::decompositionTag(c);
385 }
386 
umemcasecmp(const UChar * a,const UChar * b,int len)387 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
388 {
389     // handle surrogates correctly
390     for (int i = 0; i < len; ++i) {
391         uint c1 = QChar::toCaseFolded(ushort(a[i]));
392         uint c2 = QChar::toCaseFolded(ushort(b[i]));
393         if (c1 != c2)
394             return c1 - c2;
395     }
396     return 0;
397 }
398 
direction(UChar32 c)399 inline Direction direction(UChar32 c)
400 {
401     return (Direction)QChar::direction(c);
402 }
403 
category(UChar32 c)404 inline CharCategory category(UChar32 c)
405 {
406     return (CharCategory) U_MASK(QChar::category(c));
407 }
408 
409 #else
410 
toLower(UChar32 ch)411 inline UChar32 toLower(UChar32 ch)
412 {
413     if (ch > 0xffff)
414         return ch;
415     return QChar((unsigned short)ch).toLower().unicode();
416 }
417 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)418 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
419 {
420   *error = false;
421   if (resultLength < srcLength) {
422     *error = true;
423     return srcLength;
424   }
425   for (int i = 0; i < srcLength; ++i)
426     result[i] = QChar(src[i]).toLower().unicode();
427   return srcLength;
428 }
429 
toUpper(UChar32 ch)430 inline UChar32 toUpper(UChar32 ch)
431 {
432     if (ch > 0xffff)
433         return ch;
434     return QChar((unsigned short)ch).toUpper().unicode();
435 }
436 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)437 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
438 {
439     *error = false;
440     if (resultLength < srcLength) {
441         *error = true;
442         return srcLength;
443     }
444     for (int i = 0; i < srcLength; ++i)
445         result[i] = QChar(src[i]).toUpper().unicode();
446     return srcLength;
447 }
448 
toTitleCase(UChar32 c)449 inline int toTitleCase(UChar32 c)
450 {
451     if (c > 0xffff)
452         return c;
453     return QChar((unsigned short)c).toUpper().unicode();
454 }
455 
foldCase(UChar32 c)456 inline UChar32 foldCase(UChar32 c)
457 {
458     if (c > 0xffff)
459         return c;
460     return QChar((unsigned short)c).toLower().unicode();
461 }
462 
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)463 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
464 {
465     return toLower(result, resultLength, src, srcLength, error);
466 }
467 
isPrintableChar(UChar32 c)468 inline bool isPrintableChar(UChar32 c)
469 {
470     return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPrint();
471 }
472 
isArabicChar(UChar32 c)473 inline bool isArabicChar(UChar32 c)
474 {
475     return c >= 0x0600 && c <= 0x06FF;
476 }
477 
isSeparatorSpace(UChar32 c)478 inline bool isSeparatorSpace(UChar32 c)
479 {
480     return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Separator_Space;
481 }
482 
isPunct(UChar32 c)483 inline bool isPunct(UChar32 c)
484 {
485     return (c & 0xffff0000) == 0 && QChar((unsigned short)c).isPunct();
486 }
487 
isLower(UChar32 c)488 inline bool isLower(UChar32 c)
489 {
490     return (c & 0xffff0000) == 0 && QChar((unsigned short)c).category() == QChar::Letter_Lowercase;
491 }
492 
mirroredChar(UChar32 c)493 inline UChar32 mirroredChar(UChar32 c)
494 {
495     if (c > 0xffff)
496         return c;
497     return QChar(c).mirroredChar().unicode();
498 }
499 
combiningClass(UChar32 c)500 inline uint8_t combiningClass(UChar32 c)
501 {
502     if (c > 0xffff)
503         return 0;
504     return QChar((unsigned short)c).combiningClass();
505 }
506 
decompositionType(UChar32 c)507 inline DecompositionType decompositionType(UChar32 c)
508 {
509     if (c > 0xffff)
510         return DecompositionNone;
511     return (DecompositionType)QChar(c).decompositionTag();
512 }
513 
umemcasecmp(const UChar * a,const UChar * b,int len)514 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
515 {
516     for (int i = 0; i < len; ++i) {
517         QChar c1 = QChar(a[i]).toLower();
518         QChar c2 = QChar(b[i]).toLower();
519         if (c1 != c2)
520         return c1.unicode() - c2.unicode();
521     }
522     return 0;
523 }
524 
direction(UChar32 c)525 inline Direction direction(UChar32 c)
526 {
527     if (c > 0xffff)
528         return LeftToRight;
529     return (Direction)QChar(c).direction();
530 }
531 
category(UChar32 c)532 inline CharCategory category(UChar32 c)
533 {
534     if (c > 0xffff)
535         return NoCategory;
536     return (CharCategory) U_MASK(QChar(c).category());
537 }
538 
539 #endif
540 
541 } }
542 
543 #endif // WTF_UNICODE_QT4_H
544