• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 2006 George Staikos <staikos@kde.org>
3  *  Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
4  *  Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #ifndef WTF_UNICODE_QT4_H
24 #define WTF_UNICODE_QT4_H
25 
26 #include "UnicodeMacrosFromICU.h"
27 
28 #include <QChar>
29 #include <QString>
30 
31 #include <config.h>
32 
33 #include <stdint.h>
34 #if USE(QT_ICU_TEXT_BREAKING)
35 #include <unicode/ubrk.h>
36 #endif
37 
38 QT_BEGIN_NAMESPACE
39 namespace QUnicodeTables {
40     struct Properties {
41         ushort category : 8;
42         ushort line_break_class : 8;
43         ushort direction : 8;
44         ushort combiningClass :8;
45         ushort joining : 2;
46         signed short digitValue : 6; /* 5 needed */
47         ushort unicodeVersion : 4;
48         ushort lowerCaseSpecial : 1;
49         ushort upperCaseSpecial : 1;
50         ushort titleCaseSpecial : 1;
51         ushort caseFoldSpecial : 1; /* currently unused */
52         signed short mirrorDiff : 16;
53         signed short lowerCaseDiff : 16;
54         signed short upperCaseDiff : 16;
55         signed short titleCaseDiff : 16;
56         signed short caseFoldDiff : 16;
57     };
58     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4);
59     Q_CORE_EXPORT const Properties * QT_FASTCALL properties(ushort ucs2);
60 }
61 QT_END_NAMESPACE
62 
63 // ugly hack to make UChar compatible with JSChar in API/JSStringRef.h
64 #if defined(Q_OS_WIN) || COMPILER(WINSCW) || (COMPILER(RVCT) && !OS(LINUX))
65 typedef wchar_t UChar;
66 #else
67 typedef uint16_t UChar;
68 #endif
69 
70 #if !USE(QT_ICU_TEXT_BREAKING)
71 typedef uint32_t UChar32;
72 #endif
73 
74 namespace WTF {
75 namespace Unicode {
76 
77 enum Direction {
78     LeftToRight = QChar::DirL,
79     RightToLeft = QChar::DirR,
80     EuropeanNumber = QChar::DirEN,
81     EuropeanNumberSeparator = QChar::DirES,
82     EuropeanNumberTerminator = QChar::DirET,
83     ArabicNumber = QChar::DirAN,
84     CommonNumberSeparator = QChar::DirCS,
85     BlockSeparator = QChar::DirB,
86     SegmentSeparator = QChar::DirS,
87     WhiteSpaceNeutral = QChar::DirWS,
88     OtherNeutral = QChar::DirON,
89     LeftToRightEmbedding = QChar::DirLRE,
90     LeftToRightOverride = QChar::DirLRO,
91     RightToLeftArabic = QChar::DirAL,
92     RightToLeftEmbedding = QChar::DirRLE,
93     RightToLeftOverride = QChar::DirRLO,
94     PopDirectionalFormat = QChar::DirPDF,
95     NonSpacingMark = QChar::DirNSM,
96     BoundaryNeutral = QChar::DirBN
97 };
98 
99 enum DecompositionType {
100     DecompositionNone = QChar::NoDecomposition,
101     DecompositionCanonical = QChar::Canonical,
102     DecompositionCompat = QChar::Compat,
103     DecompositionCircle = QChar::Circle,
104     DecompositionFinal = QChar::Final,
105     DecompositionFont = QChar::Font,
106     DecompositionFraction = QChar::Fraction,
107     DecompositionInitial = QChar::Initial,
108     DecompositionIsolated = QChar::Isolated,
109     DecompositionMedial = QChar::Medial,
110     DecompositionNarrow = QChar::Narrow,
111     DecompositionNoBreak = QChar::NoBreak,
112     DecompositionSmall = QChar::Small,
113     DecompositionSquare = QChar::Square,
114     DecompositionSub = QChar::Sub,
115     DecompositionSuper = QChar::Super,
116     DecompositionVertical = QChar::Vertical,
117     DecompositionWide = QChar::Wide
118 };
119 
120 enum CharCategory {
121     NoCategory = 0,
122     Mark_NonSpacing = U_MASK(QChar::Mark_NonSpacing),
123     Mark_SpacingCombining = U_MASK(QChar::Mark_SpacingCombining),
124     Mark_Enclosing = U_MASK(QChar::Mark_Enclosing),
125     Number_DecimalDigit = U_MASK(QChar::Number_DecimalDigit),
126     Number_Letter = U_MASK(QChar::Number_Letter),
127     Number_Other = U_MASK(QChar::Number_Other),
128     Separator_Space = U_MASK(QChar::Separator_Space),
129     Separator_Line = U_MASK(QChar::Separator_Line),
130     Separator_Paragraph = U_MASK(QChar::Separator_Paragraph),
131     Other_Control = U_MASK(QChar::Other_Control),
132     Other_Format = U_MASK(QChar::Other_Format),
133     Other_Surrogate = U_MASK(QChar::Other_Surrogate),
134     Other_PrivateUse = U_MASK(QChar::Other_PrivateUse),
135     Other_NotAssigned = U_MASK(QChar::Other_NotAssigned),
136     Letter_Uppercase = U_MASK(QChar::Letter_Uppercase),
137     Letter_Lowercase = U_MASK(QChar::Letter_Lowercase),
138     Letter_Titlecase = U_MASK(QChar::Letter_Titlecase),
139     Letter_Modifier = U_MASK(QChar::Letter_Modifier),
140     Letter_Other = U_MASK(QChar::Letter_Other),
141     Punctuation_Connector = U_MASK(QChar::Punctuation_Connector),
142     Punctuation_Dash = U_MASK(QChar::Punctuation_Dash),
143     Punctuation_Open = U_MASK(QChar::Punctuation_Open),
144     Punctuation_Close = U_MASK(QChar::Punctuation_Close),
145     Punctuation_InitialQuote = U_MASK(QChar::Punctuation_InitialQuote),
146     Punctuation_FinalQuote = U_MASK(QChar::Punctuation_FinalQuote),
147     Punctuation_Other = U_MASK(QChar::Punctuation_Other),
148     Symbol_Math = U_MASK(QChar::Symbol_Math),
149     Symbol_Currency = U_MASK(QChar::Symbol_Currency),
150     Symbol_Modifier = U_MASK(QChar::Symbol_Modifier),
151     Symbol_Other = U_MASK(QChar::Symbol_Other)
152 };
153 
154 
155 // FIXME: handle surrogates correctly in all methods
156 
toLower(UChar32 ch)157 inline UChar32 toLower(UChar32 ch)
158 {
159     return QChar::toLower(uint32_t(ch));
160 }
161 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)162 inline int toLower(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
163 {
164     const UChar *e = src + srcLength;
165     const UChar *s = src;
166     UChar *r = result;
167     uint rindex = 0;
168 
169     // this avoids one out of bounds check in the loop
170     if (s < e && QChar(*s).isLowSurrogate()) {
171         if (r)
172             r[rindex] = *s++;
173         ++rindex;
174     }
175 
176     int needed = 0;
177     while (s < e && (rindex < uint(resultLength) || !r)) {
178         uint c = *s;
179         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
180             c = QChar::surrogateToUcs4(*(s - 1), c);
181         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
182         if (prop->lowerCaseSpecial) {
183             QString qstring;
184             if (c < 0x10000) {
185                 qstring += QChar(c);
186             } else {
187                 qstring += QChar(*(s-1));
188                 qstring += QChar(*s);
189             }
190             qstring = qstring.toLower();
191             for (int i = 0; i < qstring.length(); ++i) {
192                 if (rindex >= uint(resultLength)) {
193                     needed += qstring.length() - i;
194                     break;
195                 }
196                 if (r)
197                     r[rindex] = qstring.at(i).unicode();
198                 ++rindex;
199             }
200         } else {
201             if (r)
202                 r[rindex] = *s + prop->lowerCaseDiff;
203             ++rindex;
204         }
205         ++s;
206     }
207     if (s < e)
208         needed += e - s;
209     *error = (needed != 0);
210     if (rindex < uint(resultLength))
211         r[rindex] = 0;
212     return rindex + needed;
213 }
214 
toUpper(UChar32 c)215 inline UChar32 toUpper(UChar32 c)
216 {
217     return QChar::toUpper(uint32_t(c));
218 }
219 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)220 inline int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
221 {
222     const UChar *e = src + srcLength;
223     const UChar *s = src;
224     UChar *r = result;
225     int rindex = 0;
226 
227     // this avoids one out of bounds check in the loop
228     if (s < e && QChar(*s).isLowSurrogate()) {
229         if (r)
230             r[rindex] = *s++;
231         ++rindex;
232     }
233 
234     int needed = 0;
235     while (s < e && (rindex < resultLength || !r)) {
236         uint c = *s;
237         if (QChar(c).isLowSurrogate() && QChar(*(s - 1)).isHighSurrogate())
238             c = QChar::surrogateToUcs4(*(s - 1), c);
239         const QUnicodeTables::Properties *prop = QUnicodeTables::properties(c);
240         if (prop->upperCaseSpecial) {
241             QString qstring;
242             if (c < 0x10000) {
243                 qstring += QChar(c);
244             } else {
245                 qstring += QChar(*(s-1));
246                 qstring += QChar(*s);
247             }
248             qstring = qstring.toUpper();
249             for (int i = 0; i < qstring.length(); ++i) {
250                 if (rindex >= resultLength) {
251                     needed += qstring.length() - i;
252                     break;
253                 }
254                 if (r)
255                     r[rindex] = qstring.at(i).unicode();
256                 ++rindex;
257             }
258         } else {
259             if (r)
260                 r[rindex] = *s + prop->upperCaseDiff;
261             ++rindex;
262         }
263         ++s;
264     }
265     if (s < e)
266         needed += e - s;
267     *error = (needed != 0);
268     if (rindex < resultLength)
269         r[rindex] = 0;
270     return rindex + needed;
271 }
272 
toTitleCase(UChar32 c)273 inline int toTitleCase(UChar32 c)
274 {
275     return QChar::toTitleCase(uint32_t(c));
276 }
277 
foldCase(UChar32 c)278 inline UChar32 foldCase(UChar32 c)
279 {
280     return QChar::toCaseFolded(uint32_t(c));
281 }
282 
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)283 inline int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength,  bool* error)
284 {
285     // FIXME: handle special casing. Easiest with some low level API in Qt
286     *error = false;
287     if (resultLength < srcLength) {
288         *error = true;
289         return srcLength;
290     }
291     for (int i = 0; i < srcLength; ++i)
292         result[i] = QChar::toCaseFolded(ushort(src[i]));
293     return srcLength;
294 }
295 
isArabicChar(UChar32 c)296 inline bool isArabicChar(UChar32 c)
297 {
298     return c >= 0x0600 && c <= 0x06FF;
299 }
300 
isPrintableChar(UChar32 c)301 inline bool isPrintableChar(UChar32 c)
302 {
303     const uint test = U_MASK(QChar::Other_Control) |
304                       U_MASK(QChar::Other_NotAssigned);
305     return !(U_MASK(QChar::category(uint32_t(c))) & test);
306 }
307 
isSeparatorSpace(UChar32 c)308 inline bool isSeparatorSpace(UChar32 c)
309 {
310     return QChar::category(uint32_t(c)) == QChar::Separator_Space;
311 }
312 
isPunct(UChar32 c)313 inline bool isPunct(UChar32 c)
314 {
315     const uint test = U_MASK(QChar::Punctuation_Connector) |
316                       U_MASK(QChar::Punctuation_Dash) |
317                       U_MASK(QChar::Punctuation_Open) |
318                       U_MASK(QChar::Punctuation_Close) |
319                       U_MASK(QChar::Punctuation_InitialQuote) |
320                       U_MASK(QChar::Punctuation_FinalQuote) |
321                       U_MASK(QChar::Punctuation_Other);
322     return U_MASK(QChar::category(uint32_t(c))) & test;
323 }
324 
isLower(UChar32 c)325 inline bool isLower(UChar32 c)
326 {
327     return QChar::category(uint32_t(c)) == QChar::Letter_Lowercase;
328 }
329 
hasLineBreakingPropertyComplexContext(UChar32)330 inline bool hasLineBreakingPropertyComplexContext(UChar32)
331 {
332     // FIXME: Implement this to return whether the character has line breaking property SA (Complex Context).
333     return false;
334 }
335 
mirroredChar(UChar32 c)336 inline UChar32 mirroredChar(UChar32 c)
337 {
338     return QChar::mirroredChar(uint32_t(c));
339 }
340 
combiningClass(UChar32 c)341 inline uint8_t combiningClass(UChar32 c)
342 {
343     return QChar::combiningClass(uint32_t(c));
344 }
345 
decompositionType(UChar32 c)346 inline DecompositionType decompositionType(UChar32 c)
347 {
348     return (DecompositionType)QChar::decompositionTag(c);
349 }
350 
umemcasecmp(const UChar * a,const UChar * b,int len)351 inline int umemcasecmp(const UChar* a, const UChar* b, int len)
352 {
353     // handle surrogates correctly
354     for (int i = 0; i < len; ++i) {
355         uint c1 = QChar::toCaseFolded(ushort(a[i]));
356         uint c2 = QChar::toCaseFolded(ushort(b[i]));
357         if (c1 != c2)
358             return c1 - c2;
359     }
360     return 0;
361 }
362 
direction(UChar32 c)363 inline Direction direction(UChar32 c)
364 {
365     return (Direction)QChar::direction(uint32_t(c));
366 }
367 
category(UChar32 c)368 inline CharCategory category(UChar32 c)
369 {
370     return (CharCategory) U_MASK(QChar::category(uint32_t(c)));
371 }
372 
373 } }
374 
375 #endif // WTF_UNICODE_QT4_H
376