sqlite/android/PhoneticStringUtils.h

/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef _ANDROID_PHONETIC_STRING_UTILS_H
#define _ANDROID_PHONETIC_STRING_UTILS_H

#include <string.h>  // For size_t.
#include <utils/String8.h>

namespace android {

// Returns Unicode codepoint relevant to string "src", and set "next" to the
// next index. Returns negative value when input is invalid.
int GetCodePointFromUtf8(const char *src, size_t len, size_t index, int *next);

// Returns codepoint which is "phonetically sortable", whose definition
// depends on each Locale. Note that currently this function considers only
// Japanese. The variable "next_is_consumed" is set to true if "next_codepoint"
// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
// when previous "codepoint" is appropriate). If the codepoint should not be
// considered when sorting (e.g. whitespaces), -1 is returned.
int GetPhoneticallySortableCodePoint(char32_t codepoint,
                                     char32_t next_codepoint,
                                     bool *next_is_consumed);

// Returns codepoint which is "normalized", whose definition depends on each
// Locale. Note that currently this function normalizes only Japanese; the
// other characters are remained as is.
// The variable "next_is_consumed" is set to true if "next_codepoint"
// is "consumed" (e.g. Japanese halfwidth katakana's voiced mark is consumed
// when previous "codepoint" is appropriate, like half-width "ka").
//
// In Japanese, "normalized" means that half-width and full-width katakana is
// appropriately converted to hiragana.
int GetNormalizedCodePoint(char32_t codepoint,
                           char32_t next_codepoint,
                           bool *next_is_consumed);

// Pushes Utf8 expression of "codepoint" to "dst". Returns true when successful.
// If input is invalid or the length of the destination is not enough,
// returns false.
bool GetUtf8FromCodePoint(int codepoint, char *dst, size_t len, size_t *index);

// Creates a "phonetically sortable" Utf8 string and push it into "dst".
// *dst must be freed after being used outside.
// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
//
// Note that currently this function considers only Japanese.
bool GetPhoneticallySortableString(const char *src, char **dst, size_t *len);

// Creates a "normalized" Utf8 string and push it into "dst". *dst must be
// freed after being used outside.
// If "src" is NULL or its length is 0, "dst" is set to \uFFFF.
//
// Note that currently this function considers only Japanese.
bool GetNormalizedString(const char *src, char **dst, size_t *len);

}  // namespace android

#endif