• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2009 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef _ANDROID_PHONETIC_STRING_UTILS_H
18 #define _ANDROID_PHONETIC_STRING_UTILS_H
19 
20 #include <string.h>  // For size_t.
21 #include <utils/String8.h>
22 
23 namespace android {
24 
25 // Returns Unicode codepoint relevant to string "src", and set "next" to the
26 // next index. Returns negative value when input is invalid.
27 int GetCodePointFromUtf8(const char *src, size_t len, size_t index, int *next);
28 
29 // Returns codepoint which is "phonetically sortable", whose definition
30 // depends on each Locale. Note that currently this function considers only
31 // Japanese. The variable "next_is_consumed" is set to true if "next_codepoint"
32 // is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
33 // when previous "codepoint" is appropriate). If the codepoint should not be
34 // considered when sorting (e.g. whitespaces), -1 is returned.
35 int GetPhoneticallySortableCodePoint(char32_t codepoint,
36                                      char32_t next_codepoint,
37                                      bool *next_is_consumed);
38 
39 // Returns codepoint which is "normalized", whose definition depends on each
40 // Locale. Note that currently this function normalizes only Japanese; the
41 // other characters are remained as is.
42 // The variable "next_is_consumed" is set to true if "next_codepoint"
43 // is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
44 // when previous "codepoint" is appropriate, like half-width "ka").
45 //
46 // In Japanese, "normalized" means that half-width and full-width katakana is
47 // appropriately converted to hiragana.
48 int GetNormalizedCodePoint(char32_t codepoint,
49                            char32_t next_codepoint,
50                            bool *next_is_consumed);
51 
52 // Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful.
53 // If input is invalid or the length of the destination is not enough,
54 // returns false.
55 bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index);
56 
57 // Creates a "phonetically sortable" Utf8 string and push it into "dst".
58 // *dst must be freed after being used outside.
59 // If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
60 //
61 // Note that currently this function considers only Japanese.
62 bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len);
63 
64 // Creates a "normalized" Utf8 string and push it into "dst". *dst must be
65 // freed after being used outside.
66 // If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
67 //
68 // Note that currently this function considers only Japanese.
69 bool GetNormalizedString(const char *src, char **dst, size_t *len);
70 
71 }  // namespace android
72 
73 #endif
74