• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  *  Copyright (C) 2008 Jürg Billeter <j@bitron.ch>
3  *  Copyright (C) 2008 Dominik Röttsches <dominik.roettsches@access-company.com>
4  *  Copyright (C) 2010 Igalia S.L.
5  *
6  *  This library is free software; you can redistribute it and/or
7  *  modify it under the terms of the GNU Library General Public
8  *  License as published by the Free Software Foundation; either
9  *  version 2 of the License, or (at your option) any later version.
10  *
11  *  This library is distributed in the hope that it will be useful,
12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  *  Library General Public License for more details.
15  *
16  *  You should have received a copy of the GNU Library General Public License
17  *  along with this library; see the file COPYING.LIB.  If not, write to
18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  *  Boston, MA 02110-1301, USA.
20  *
21  */
22 
23 #include "config.h"
24 #include "UnicodeGLib.h"
25 
26 #include <wtf/Vector.h>
27 #include <wtf/unicode/UTF8.h>
28 
29 #define UTF8_IS_SURROGATE(character) (character >= 0x10000 && character <= 0x10FFFF)
30 
31 namespace WTF {
32 namespace Unicode {
33 
foldCase(UChar32 ch)34 UChar32 foldCase(UChar32 ch)
35 {
36     GOwnPtr<GError> gerror;
37 
38     GOwnPtr<char> utf8char;
39     utf8char.set(g_ucs4_to_utf8(reinterpret_cast<gunichar*>(&ch), 1, 0, 0, &gerror.outPtr()));
40     if (gerror)
41         return ch;
42 
43     GOwnPtr<char> utf8caseFolded;
44     utf8caseFolded.set(g_utf8_casefold(utf8char.get(), -1));
45 
46     GOwnPtr<gunichar> ucs4Result;
47     ucs4Result.set(g_utf8_to_ucs4_fast(utf8caseFolded.get(), -1, 0));
48 
49     return *ucs4Result;
50 }
51 
getUTF16LengthFromUTF8(const gchar * utf8String,int length)52 static int getUTF16LengthFromUTF8(const gchar* utf8String, int length)
53 {
54     int utf16Length = 0;
55     const gchar* inputString = utf8String;
56 
57     while ((utf8String + length - inputString > 0) && *inputString) {
58         gunichar character = g_utf8_get_char(inputString);
59 
60         utf16Length += UTF8_IS_SURROGATE(character) ? 2 : 1;
61         inputString = g_utf8_next_char(inputString);
62     }
63 
64     return utf16Length;
65 }
66 
67 typedef gchar* (*UTF8CaseFunction)(const gchar*, gssize length);
68 
convertCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error,UTF8CaseFunction caseFunction)69 static int convertCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error, UTF8CaseFunction caseFunction)
70 {
71     *error = false;
72 
73     // Allocate a buffer big enough to hold all the characters.
74     Vector<char> buffer(srcLength * 3);
75     char* utf8Target = buffer.data();
76     const UChar* utf16Source = src;
77     ConversionResult conversionResult = convertUTF16ToUTF8(&utf16Source, utf16Source + srcLength, &utf8Target, utf8Target + buffer.size(), true);
78     if (conversionResult != conversionOK) {
79         *error = true;
80         return -1;
81     }
82     buffer.shrink(utf8Target - buffer.data());
83 
84     GOwnPtr<char> utf8Result(caseFunction(buffer.data(), buffer.size()));
85     long utf8ResultLength = strlen(utf8Result.get());
86 
87     // Calculate the destination buffer size.
88     int realLength = getUTF16LengthFromUTF8(utf8Result.get(), utf8ResultLength);
89     if (realLength > resultLength) {
90         *error = true;
91         return realLength;
92     }
93 
94     // Convert the result to UTF-16.
95     UChar* utf16Target = result;
96     const char* utf8Source = utf8Result.get();
97     conversionResult = convertUTF8ToUTF16(&utf8Source, utf8Source + utf8ResultLength, &utf16Target, utf16Target + resultLength, true);
98     long utf16ResultLength = utf16Target - result;
99     if (conversionResult != conversionOK)
100         *error = true;
101 
102     return utf16ResultLength <= 0 ? -1 : utf16ResultLength;
103 }
foldCase(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)104 int foldCase(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
105 {
106     return convertCase(result, resultLength, src, srcLength, error, g_utf8_casefold);
107 }
108 
toLower(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)109 int toLower(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
110 {
111     return convertCase(result, resultLength, src, srcLength, error, g_utf8_strdown);
112 }
113 
toUpper(UChar * result,int resultLength,const UChar * src,int srcLength,bool * error)114 int toUpper(UChar* result, int resultLength, const UChar* src, int srcLength, bool* error)
115 {
116     return convertCase(result, resultLength, src, srcLength, error, g_utf8_strup);
117 }
118 
direction(UChar32 c)119 Direction direction(UChar32 c)
120 {
121     PangoBidiType type = pango_bidi_type_for_unichar(c);
122     switch (type) {
123     case PANGO_BIDI_TYPE_L:
124         return LeftToRight;
125     case PANGO_BIDI_TYPE_R:
126         return RightToLeft;
127     case PANGO_BIDI_TYPE_AL:
128         return RightToLeftArabic;
129     case PANGO_BIDI_TYPE_LRE:
130         return LeftToRightEmbedding;
131     case PANGO_BIDI_TYPE_RLE:
132         return RightToLeftEmbedding;
133     case PANGO_BIDI_TYPE_LRO:
134         return LeftToRightOverride;
135     case PANGO_BIDI_TYPE_RLO:
136         return RightToLeftOverride;
137     case PANGO_BIDI_TYPE_PDF:
138         return PopDirectionalFormat;
139     case PANGO_BIDI_TYPE_EN:
140         return EuropeanNumber;
141     case PANGO_BIDI_TYPE_AN:
142         return ArabicNumber;
143     case PANGO_BIDI_TYPE_ES:
144         return EuropeanNumberSeparator;
145     case PANGO_BIDI_TYPE_ET:
146         return EuropeanNumberTerminator;
147     case PANGO_BIDI_TYPE_CS:
148         return CommonNumberSeparator;
149     case PANGO_BIDI_TYPE_NSM:
150         return NonSpacingMark;
151     case PANGO_BIDI_TYPE_BN:
152         return BoundaryNeutral;
153     case PANGO_BIDI_TYPE_B:
154         return BlockSeparator;
155     case PANGO_BIDI_TYPE_S:
156         return SegmentSeparator;
157     case PANGO_BIDI_TYPE_WS:
158         return WhiteSpaceNeutral;
159     default:
160         return OtherNeutral;
161     }
162 }
163 
umemcasecmp(const UChar * a,const UChar * b,int len)164 int umemcasecmp(const UChar* a, const UChar* b, int len)
165 {
166     GOwnPtr<char> utf8a;
167     GOwnPtr<char> utf8b;
168 
169     utf8a.set(g_utf16_to_utf8(a, len, 0, 0, 0));
170     utf8b.set(g_utf16_to_utf8(b, len, 0, 0, 0));
171 
172     GOwnPtr<char> foldedA;
173     GOwnPtr<char> foldedB;
174 
175     foldedA.set(g_utf8_casefold(utf8a.get(), -1));
176     foldedB.set(g_utf8_casefold(utf8b.get(), -1));
177 
178     // FIXME: umemcasecmp needs to mimic u_memcasecmp of icu
179     // from the ICU docs:
180     // "Compare two strings case-insensitively using full case folding.
181     // his is equivalent to u_strcmp(u_strFoldCase(s1, n, options), u_strFoldCase(s2, n, options))."
182     //
183     // So it looks like we don't need the full g_utf8_collate here,
184     // but really a bitwise comparison of casefolded unicode chars (not utf-8 bytes).
185     // As there is no direct equivalent to this icu function in GLib, for now
186     // we'll use g_utf8_collate():
187 
188     return g_utf8_collate(foldedA.get(), foldedB.get());
189 }
190 
191 }
192 }
193