• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* decomp.c - Character decomposition.
2  *
3  *  Copyright (C) 1999, 2000 Tom Tromey
4  *  Copyright 2000 Red Hat, Inc.
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public License
17  * along with this library; if not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 /**
21  * SECTION:unicode
22  * @Title: Unicode Manipulation
23  * @Short_description: functions operating on Unicode characters and
24  *     UTF-8 strings
25  * @See_also: g_locale_to_utf8(), g_locale_from_utf8()
26  *
27  * This section describes a number of functions for dealing with
28  * Unicode characters and strings. There are analogues of the
29  * traditional `ctype.h` character classification and case conversion
30  * functions, UTF-8 analogues of some string utility functions,
31  * functions to perform normalization, case conversion and collation
32  * on UTF-8 strings and finally functions to convert between the UTF-8,
33  * UTF-16 and UCS-4 encodings of Unicode.
34  *
35  * The implementations of the Unicode functions in GLib are based
36  * on the Unicode Character Data tables, which are available from
37  * [www.unicode.org](http://www.unicode.org/).
38  * GLib 2.8 supports Unicode 4.0, GLib 2.10 supports Unicode 4.1,
39  * GLib 2.12 supports Unicode 5.0, GLib 2.16.3 supports Unicode 5.1,
40  * GLib 2.30 supports Unicode 6.0.
41  */
42 
43 #include "config.h"
44 
45 #include <stdlib.h>
46 
47 #include "gunicode.h"
48 #include "gunidecomp.h"
49 #include "gmem.h"
50 #include "gunicomp.h"
51 #include "gunicodeprivate.h"
52 
53 
54 #define CC_PART1(Page, Char) \
55   ((combining_class_table_part1[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
56    ? (combining_class_table_part1[Page] - G_UNICODE_MAX_TABLE_INDEX) \
57    : (cclass_data[combining_class_table_part1[Page]][Char]))
58 
59 #define CC_PART2(Page, Char) \
60   ((combining_class_table_part2[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
61    ? (combining_class_table_part2[Page] - G_UNICODE_MAX_TABLE_INDEX) \
62    : (cclass_data[combining_class_table_part2[Page]][Char]))
63 
64 #define COMBINING_CLASS(Char) \
65   (((Char) <= G_UNICODE_LAST_CHAR_PART1) \
66    ? CC_PART1 ((Char) >> 8, (Char) & 0xff) \
67    : (((Char) >= 0xe0000 && (Char) <= G_UNICODE_LAST_CHAR) \
68       ? CC_PART2 (((Char) - 0xe0000) >> 8, (Char) & 0xff) \
69       : 0))
70 
71 /**
72  * g_unichar_combining_class:
73  * @uc: a Unicode character
74  *
75  * Determines the canonical combining class of a Unicode character.
76  *
77  * Returns: the combining class of the character
78  *
79  * Since: 2.14
80  **/
81 gint
g_unichar_combining_class(gunichar uc)82 g_unichar_combining_class (gunichar uc)
83 {
84   return COMBINING_CLASS (uc);
85 }
86 
87 /* constants for hangul syllable [de]composition */
88 #define SBase 0xAC00
89 #define LBase 0x1100
90 #define VBase 0x1161
91 #define TBase 0x11A7
92 #define LCount 19
93 #define VCount 21
94 #define TCount 28
95 #define NCount (VCount * TCount)
96 #define SCount (LCount * NCount)
97 
98 /**
99  * g_unicode_canonical_ordering:
100  * @string: a UCS-4 encoded string.
101  * @len: the maximum length of @string to use.
102  *
103  * Computes the canonical ordering of a string in-place.
104  * This rearranges decomposed characters in the string
105  * according to their combining classes.  See the Unicode
106  * manual for more information.
107  **/
108 void
g_unicode_canonical_ordering(gunichar * string,gsize len)109 g_unicode_canonical_ordering (gunichar *string,
110 			      gsize     len)
111 {
112   gsize i;
113   int swap = 1;
114 
115   while (swap)
116     {
117       int last;
118       swap = 0;
119       last = COMBINING_CLASS (string[0]);
120       for (i = 0; i < len - 1; ++i)
121 	{
122 	  int next = COMBINING_CLASS (string[i + 1]);
123 	  if (next != 0 && last > next)
124 	    {
125 	      gsize j;
126 	      /* Percolate item leftward through string.  */
127 	      for (j = i + 1; j > 0; --j)
128 		{
129 		  gunichar t;
130 		  if (COMBINING_CLASS (string[j - 1]) <= next)
131 		    break;
132 		  t = string[j];
133 		  string[j] = string[j - 1];
134 		  string[j - 1] = t;
135 		  swap = 1;
136 		}
137 	      /* We're re-entering the loop looking at the old
138 		 character again.  */
139 	      next = last;
140 	    }
141 	  last = next;
142 	}
143     }
144 }
145 
146 /* http://www.unicode.org/unicode/reports/tr15/#Hangul
147  * r should be null or have sufficient space. Calling with r == NULL will
148  * only calculate the result_len; however, a buffer with space for three
149  * characters will always be big enough. */
150 static void
decompose_hangul(gunichar s,gunichar * r,gsize * result_len)151 decompose_hangul (gunichar s,
152                   gunichar *r,
153                   gsize *result_len)
154 {
155   gint SIndex = s - SBase;
156   gint TIndex = SIndex % TCount;
157 
158   if (r)
159     {
160       r[0] = LBase + SIndex / NCount;
161       r[1] = VBase + (SIndex % NCount) / TCount;
162     }
163 
164   if (TIndex)
165     {
166       if (r)
167 	r[2] = TBase + TIndex;
168       *result_len = 3;
169     }
170   else
171     *result_len = 2;
172 }
173 
174 /* returns a pointer to a null-terminated UTF-8 string */
175 static const gchar *
find_decomposition(gunichar ch,gboolean compat)176 find_decomposition (gunichar ch,
177 		    gboolean compat)
178 {
179   int start = 0;
180   int end = G_N_ELEMENTS (decomp_table);
181 
182   if (ch >= decomp_table[start].ch &&
183       ch <= decomp_table[end - 1].ch)
184     {
185       while (TRUE)
186 	{
187 	  int half = (start + end) / 2;
188 	  if (ch == decomp_table[half].ch)
189 	    {
190 	      int offset;
191 
192 	      if (compat)
193 		{
194 		  offset = decomp_table[half].compat_offset;
195 		  if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
196 		    offset = decomp_table[half].canon_offset;
197 		}
198 	      else
199 		{
200 		  offset = decomp_table[half].canon_offset;
201 		  if (offset == G_UNICODE_NOT_PRESENT_OFFSET)
202 		    return NULL;
203 		}
204 
205 	      return &(decomp_expansion_string[offset]);
206 	    }
207 	  else if (half == start)
208 	    break;
209 	  else if (ch > decomp_table[half].ch)
210 	    start = half;
211 	  else
212 	    end = half;
213 	}
214     }
215 
216   return NULL;
217 }
218 
219 /**
220  * g_unicode_canonical_decomposition:
221  * @ch: a Unicode character.
222  * @result_len: location to store the length of the return value.
223  *
224  * Computes the canonical decomposition of a Unicode character.
225  *
226  * Returns: a newly allocated string of Unicode characters.
227  *   @result_len is set to the resulting length of the string.
228  *
229  * Deprecated: 2.30: Use the more flexible g_unichar_fully_decompose()
230  *   instead.
231  **/
232 gunichar *
g_unicode_canonical_decomposition(gunichar ch,gsize * result_len)233 g_unicode_canonical_decomposition (gunichar ch,
234 				   gsize   *result_len)
235 {
236   const gchar *decomp;
237   const gchar *p;
238   gunichar *r;
239 
240   /* Hangul syllable */
241   if (ch >= SBase && ch < SBase + SCount)
242     {
243       decompose_hangul (ch, NULL, result_len);
244       r = g_malloc (*result_len * sizeof (gunichar));
245       decompose_hangul (ch, r, result_len);
246     }
247   else if ((decomp = find_decomposition (ch, FALSE)) != NULL)
248     {
249       /* Found it.  */
250       int i;
251 
252       *result_len = g_utf8_strlen (decomp, -1);
253       r = g_malloc (*result_len * sizeof (gunichar));
254 
255       for (p = decomp, i = 0; *p != '\0'; p = g_utf8_next_char (p), i++)
256         r[i] = g_utf8_get_char (p);
257     }
258   else
259     {
260       /* Not in our table.  */
261       r = g_malloc (sizeof (gunichar));
262       *r = ch;
263       *result_len = 1;
264     }
265 
266   return r;
267 }
268 
269 /* L,V => LV and LV,T => LVT  */
270 static gboolean
combine_hangul(gunichar a,gunichar b,gunichar * result)271 combine_hangul (gunichar a,
272                 gunichar b,
273                 gunichar *result)
274 {
275   gint LIndex = a - LBase;
276   gint SIndex = a - SBase;
277 
278   gint VIndex = b - VBase;
279   gint TIndex = b - TBase;
280 
281   if (0 <= LIndex && LIndex < LCount
282       && 0 <= VIndex && VIndex < VCount)
283     {
284       *result = SBase + (LIndex * VCount + VIndex) * TCount;
285       return TRUE;
286     }
287   else if (0 <= SIndex && SIndex < SCount && (SIndex % TCount) == 0
288            && 0 < TIndex && TIndex < TCount)
289     {
290       *result = a + TIndex;
291       return TRUE;
292     }
293 
294   return FALSE;
295 }
296 
297 #define CI(Page, Char) \
298   ((compose_table[Page] >= G_UNICODE_MAX_TABLE_INDEX) \
299    ? (compose_table[Page] - G_UNICODE_MAX_TABLE_INDEX) \
300    : (compose_data[compose_table[Page]][Char]))
301 
302 #define COMPOSE_INDEX(Char) \
303      (((Char >> 8) > (COMPOSE_TABLE_LAST)) ? 0 : CI((Char) >> 8, (Char) & 0xff))
304 
305 static gboolean
combine(gunichar a,gunichar b,gunichar * result)306 combine (gunichar  a,
307 	 gunichar  b,
308 	 gunichar *result)
309 {
310   gushort index_a, index_b;
311 
312   if (combine_hangul (a, b, result))
313     return TRUE;
314 
315   index_a = COMPOSE_INDEX(a);
316 
317   if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
318     {
319       if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
320 	{
321 	  *result = compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
322 	  return TRUE;
323 	}
324       else
325         return FALSE;
326     }
327 
328   index_b = COMPOSE_INDEX(b);
329 
330   if (index_b >= COMPOSE_SECOND_SINGLE_START)
331     {
332       if (a == compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
333 	{
334 	  *result = compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
335 	  return TRUE;
336 	}
337       else
338         return FALSE;
339     }
340 
341   if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START &&
342       index_b >= COMPOSE_SECOND_START && index_b < COMPOSE_SECOND_SINGLE_START)
343     {
344       gunichar res = compose_array[index_a - COMPOSE_FIRST_START][index_b - COMPOSE_SECOND_START];
345 
346       if (res)
347 	{
348 	  *result = res;
349 	  return TRUE;
350 	}
351     }
352 
353   return FALSE;
354 }
355 
356 gunichar *
_g_utf8_normalize_wc(const gchar * str,gssize max_len,GNormalizeMode mode)357 _g_utf8_normalize_wc (const gchar    *str,
358 		      gssize          max_len,
359 		      GNormalizeMode  mode)
360 {
361   gsize n_wc;
362   gunichar *wc_buffer;
363   const char *p;
364   gsize last_start;
365   gboolean do_compat = (mode == G_NORMALIZE_NFKC ||
366 			mode == G_NORMALIZE_NFKD);
367   gboolean do_compose = (mode == G_NORMALIZE_NFC ||
368 			 mode == G_NORMALIZE_NFKC);
369 
370   n_wc = 0;
371   p = str;
372   while ((max_len < 0 || p < str + max_len) && *p)
373     {
374       const gchar *decomp;
375       gunichar wc = g_utf8_get_char (p);
376 
377       if (wc >= SBase && wc < SBase + SCount)
378         {
379           gsize result_len;
380           decompose_hangul (wc, NULL, &result_len);
381           n_wc += result_len;
382         }
383       else
384         {
385           decomp = find_decomposition (wc, do_compat);
386 
387           if (decomp)
388             n_wc += g_utf8_strlen (decomp, -1);
389           else
390             n_wc++;
391         }
392 
393       p = g_utf8_next_char (p);
394     }
395 
396   wc_buffer = g_new (gunichar, n_wc + 1);
397 
398   last_start = 0;
399   n_wc = 0;
400   p = str;
401   while ((max_len < 0 || p < str + max_len) && *p)
402     {
403       gunichar wc = g_utf8_get_char (p);
404       const gchar *decomp;
405       int cc;
406       gsize old_n_wc = n_wc;
407 
408       if (wc >= SBase && wc < SBase + SCount)
409         {
410           gsize result_len;
411           decompose_hangul (wc, wc_buffer + n_wc, &result_len);
412           n_wc += result_len;
413         }
414       else
415         {
416           decomp = find_decomposition (wc, do_compat);
417 
418           if (decomp)
419             {
420               const char *pd;
421               for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
422                 wc_buffer[n_wc++] = g_utf8_get_char (pd);
423             }
424           else
425             wc_buffer[n_wc++] = wc;
426         }
427 
428       if (n_wc > 0)
429 	{
430 	  cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
431 
432 	  if (cc == 0)
433 	    {
434 	      g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);
435 	      last_start = old_n_wc;
436 	    }
437 	}
438 
439       p = g_utf8_next_char (p);
440     }
441 
442   if (n_wc > 0)
443     {
444       g_unicode_canonical_ordering (wc_buffer + last_start, n_wc - last_start);
445       last_start = n_wc;
446     }
447 
448   wc_buffer[n_wc] = 0;
449 
450   /* All decomposed and reordered */
451 
452   if (do_compose && n_wc > 0)
453     {
454       gsize i, j;
455       int last_cc = 0;
456       last_start = 0;
457 
458       for (i = 0; i < n_wc; i++)
459 	{
460 	  int cc = COMBINING_CLASS (wc_buffer[i]);
461 
462 	  if (i > 0 &&
463 	      (last_cc == 0 || last_cc < cc) &&
464 	      combine (wc_buffer[last_start], wc_buffer[i],
465 		       &wc_buffer[last_start]))
466 	    {
467 	      for (j = i + 1; j < n_wc; j++)
468 		wc_buffer[j-1] = wc_buffer[j];
469 	      n_wc--;
470 	      i--;
471 
472 	      if (i == last_start)
473 		last_cc = 0;
474 	      else
475 		last_cc = COMBINING_CLASS (wc_buffer[i-1]);
476 
477 	      continue;
478 	    }
479 
480 	  if (cc == 0)
481 	    last_start = i;
482 
483 	  last_cc = cc;
484 	}
485     }
486 
487   wc_buffer[n_wc] = 0;
488 
489   return wc_buffer;
490 }
491 
492 /**
493  * g_utf8_normalize:
494  * @str: a UTF-8 encoded string.
495  * @len: length of @str, in bytes, or -1 if @str is nul-terminated.
496  * @mode: the type of normalization to perform.
497  *
498  * Converts a string into canonical form, standardizing
499  * such issues as whether a character with an accent
500  * is represented as a base character and combining
501  * accent or as a single precomposed character. The
502  * string has to be valid UTF-8, otherwise %NULL is
503  * returned. You should generally call g_utf8_normalize()
504  * before comparing two Unicode strings.
505  *
506  * The normalization mode %G_NORMALIZE_DEFAULT only
507  * standardizes differences that do not affect the
508  * text content, such as the above-mentioned accent
509  * representation. %G_NORMALIZE_ALL also standardizes
510  * the "compatibility" characters in Unicode, such
511  * as SUPERSCRIPT THREE to the standard forms
512  * (in this case DIGIT THREE). Formatting information
513  * may be lost but for most text operations such
514  * characters should be considered the same.
515  *
516  * %G_NORMALIZE_DEFAULT_COMPOSE and %G_NORMALIZE_ALL_COMPOSE
517  * are like %G_NORMALIZE_DEFAULT and %G_NORMALIZE_ALL,
518  * but returned a result with composed forms rather
519  * than a maximally decomposed form. This is often
520  * useful if you intend to convert the string to
521  * a legacy encoding or pass it to a system with
522  * less capable Unicode handling.
523  *
524  * Returns: (nullable): a newly allocated string, that
525  *   is the normalized form of @str, or %NULL if @str
526  *   is not valid UTF-8.
527  **/
528 gchar *
g_utf8_normalize(const gchar * str,gssize len,GNormalizeMode mode)529 g_utf8_normalize (const gchar    *str,
530 		  gssize          len,
531 		  GNormalizeMode  mode)
532 {
533   gunichar *result_wc = _g_utf8_normalize_wc (str, len, mode);
534   gchar *result;
535 
536   result = g_ucs4_to_utf8 (result_wc, -1, NULL, NULL, NULL);
537   g_free (result_wc);
538 
539   return result;
540 }
541 
542 static gboolean
decompose_hangul_step(gunichar ch,gunichar * a,gunichar * b)543 decompose_hangul_step (gunichar  ch,
544                        gunichar *a,
545                        gunichar *b)
546 {
547   gint SIndex, TIndex;
548 
549   if (ch < SBase || ch >= SBase + SCount)
550     return FALSE;  /* not a hangul syllable */
551 
552   SIndex = ch - SBase;
553   TIndex = SIndex % TCount;
554 
555   if (TIndex)
556     {
557       /* split LVT -> LV,T */
558       *a = ch - TIndex;
559       *b = TBase + TIndex;
560     }
561   else
562     {
563       /* split LV -> L,V */
564       *a = LBase + SIndex / NCount;
565       *b = VBase + (SIndex % NCount) / TCount;
566     }
567 
568   return TRUE;
569 }
570 
571 /**
572  * g_unichar_decompose:
573  * @ch: a Unicode character
574  * @a: (out) (not optional): return location for the first component of @ch
575  * @b: (out) (not optional): return location for the second component of @ch
576  *
577  * Performs a single decomposition step of the
578  * Unicode canonical decomposition algorithm.
579  *
580  * This function does not include compatibility
581  * decompositions. It does, however, include algorithmic
582  * Hangul Jamo decomposition, as well as 'singleton'
583  * decompositions which replace a character by a single
584  * other character. In the case of singletons *@b will
585  * be set to zero.
586  *
587  * If @ch is not decomposable, *@a is set to @ch and *@b
588  * is set to zero.
589  *
590  * Note that the way Unicode decomposition pairs are
591  * defined, it is guaranteed that @b would not decompose
592  * further, but @a may itself decompose.  To get the full
593  * canonical decomposition for @ch, one would need to
594  * recursively call this function on @a.  Or use
595  * g_unichar_fully_decompose().
596  *
597  * See
598  * [UAX#15](http://unicode.org/reports/tr15/)
599  * for details.
600  *
601  * Returns: %TRUE if the character could be decomposed
602  *
603  * Since: 2.30
604  */
605 gboolean
g_unichar_decompose(gunichar ch,gunichar * a,gunichar * b)606 g_unichar_decompose (gunichar  ch,
607                      gunichar *a,
608                      gunichar *b)
609 {
610   gint start = 0;
611   gint end = G_N_ELEMENTS (decomp_step_table);
612 
613   if (decompose_hangul_step (ch, a, b))
614     return TRUE;
615 
616   /* TODO use bsearch() */
617   if (ch >= decomp_step_table[start].ch &&
618       ch <= decomp_step_table[end - 1].ch)
619     {
620       while (TRUE)
621         {
622           gint half = (start + end) / 2;
623           const decomposition_step *p = &(decomp_step_table[half]);
624           if (ch == p->ch)
625             {
626               *a = p->a;
627               *b = p->b;
628               return TRUE;
629             }
630           else if (half == start)
631             break;
632           else if (ch > p->ch)
633             start = half;
634           else
635             end = half;
636         }
637     }
638 
639   *a = ch;
640   *b = 0;
641 
642   return FALSE;
643 }
644 
645 /**
646  * g_unichar_compose:
647  * @a: a Unicode character
648  * @b: a Unicode character
649  * @ch: (out) (not optional): return location for the composed character
650  *
651  * Performs a single composition step of the
652  * Unicode canonical composition algorithm.
653  *
654  * This function includes algorithmic Hangul Jamo composition,
655  * but it is not exactly the inverse of g_unichar_decompose().
656  * No composition can have either of @a or @b equal to zero.
657  * To be precise, this function composes if and only if
658  * there exists a Primary Composite P which is canonically
659  * equivalent to the sequence <@a,@b>.  See the Unicode
660  * Standard for the definition of Primary Composite.
661  *
662  * If @a and @b do not compose a new character, @ch is set to zero.
663  *
664  * See
665  * [UAX#15](http://unicode.org/reports/tr15/)
666  * for details.
667  *
668  * Returns: %TRUE if the characters could be composed
669  *
670  * Since: 2.30
671  */
672 gboolean
g_unichar_compose(gunichar a,gunichar b,gunichar * ch)673 g_unichar_compose (gunichar  a,
674                    gunichar  b,
675                    gunichar *ch)
676 {
677   if (combine (a, b, ch))
678     return TRUE;
679 
680   *ch = 0;
681   return FALSE;
682 }
683 
684 /**
685  * g_unichar_fully_decompose:
686  * @ch: a Unicode character.
687  * @compat: whether perform canonical or compatibility decomposition
688  * @result: (optional) (out caller-allocates): location to store decomposed result, or %NULL
689  * @result_len: length of @result
690  *
691  * Computes the canonical or compatibility decomposition of a
692  * Unicode character.  For compatibility decomposition,
693  * pass %TRUE for @compat; for canonical decomposition
694  * pass %FALSE for @compat.
695  *
696  * The decomposed sequence is placed in @result.  Only up to
697  * @result_len characters are written into @result.  The length
698  * of the full decomposition (irrespective of @result_len) is
699  * returned by the function.  For canonical decomposition,
700  * currently all decompositions are of length at most 4, but
701  * this may change in the future (very unlikely though).
702  * At any rate, Unicode does guarantee that a buffer of length
703  * 18 is always enough for both compatibility and canonical
704  * decompositions, so that is the size recommended. This is provided
705  * as %G_UNICHAR_MAX_DECOMPOSITION_LENGTH.
706  *
707  * See
708  * [UAX#15](http://unicode.org/reports/tr15/)
709  * for details.
710  *
711  * Returns: the length of the full decomposition.
712  *
713  * Since: 2.30
714  **/
715 gsize
g_unichar_fully_decompose(gunichar ch,gboolean compat,gunichar * result,gsize result_len)716 g_unichar_fully_decompose (gunichar  ch,
717 			   gboolean  compat,
718 			   gunichar *result,
719 			   gsize     result_len)
720 {
721   const gchar *decomp;
722   const gchar *p;
723 
724   /* Hangul syllable */
725   if (ch >= SBase && ch < SBase + SCount)
726     {
727       gsize len, i;
728       gunichar buffer[3];
729       decompose_hangul (ch, result ? buffer : NULL, &len);
730       if (result)
731         for (i = 0; i < len && i < result_len; i++)
732 	  result[i] = buffer[i];
733       return len;
734     }
735   else if ((decomp = find_decomposition (ch, compat)) != NULL)
736     {
737       /* Found it.  */
738       gsize len, i;
739 
740       len = g_utf8_strlen (decomp, -1);
741 
742       for (p = decomp, i = 0; i < len && i < result_len; p = g_utf8_next_char (p), i++)
743         result[i] = g_utf8_get_char (p);
744 
745       return len;
746     }
747 
748   /* Does not decompose */
749   if (result && result_len >= 1)
750     *result = ch;
751   return 1;
752 }
753