1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef _ANDROID_PHONETIC_STRING_UTILS_H 18 #define _ANDROID_PHONETIC_STRING_UTILS_H 19 20 #include <string.h> // For size_t. 21 #include <utils/String8.h> 22 23 namespace android { 24 25 // Returns Unicode codepoint relevant to string "src", and set "next" to the 26 // next index. Returns negative value when input is invalid. 27 int GetCodePointFromUtf8(const char *src, size_t len, size_t index, int *next); 28 29 // Returns codepoint which is "phonetically sortable", whose definition 30 // depends on each Locale. Note that currently this function considers only 31 // Japanese. The variable "next_is_consumed" is set to true if "next_codepoint" 32 // is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed 33 // when previous "codepoint" is appropriate). If the codepoint should not be 34 // considered when sorting (e.g. whitespaces), -1 is returned. 35 int GetPhoneticallySortableCodePoint(char32_t codepoint, 36 char32_t next_codepoint, 37 bool *next_is_consumed); 38 39 // Returns codepoint which is "normalized", whose definition depends on each 40 // Locale. Note that currently this function normalizes only Japanese; the 41 // other characters are remained as is. 42 // The variable "next_is_consumed" is set to true if "next_codepoint" 43 // is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed 44 // when previous "codepoint" is appropriate, like half-width "ka"). 45 // 46 // In Japanese, "normalized" means that half-width and full-width katakana is 47 // appropriately converted to hiragana. 48 int GetNormalizedCodePoint(char32_t codepoint, 49 char32_t next_codepoint, 50 bool *next_is_consumed); 51 52 // Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful. 53 // If input is invalid or the length of the destination is not enough, 54 // returns false. 55 bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index); 56 57 // Creates a "phonetically sortable" Utf8 string and push it into "dst". 58 // *dst must be freed after being used outside. 59 // If "src" is NULL or its length is 0, "dst" is set to \uFFFF. 60 // 61 // Note that currently this function considers only Japanese. 62 bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len); 63 64 // Creates a "normalized" Utf8 string and push it into "dst". *dst must be 65 // freed after being used outside. 66 // If "src" is NULL or its length is 0, "dst" is set to \uFFFF. 67 // 68 // Note that currently this function considers only Japanese. 69 bool GetNormalizedString(const char *src, char **dst, size_t *len); 70 71 } // namespace android 72 73 #endif 74