1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 // -*- c++ -*- 19 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 20 21 // O S C L _ S T R I N G _ U T F 8 22 23 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 24 25 /*! \addtogroup osclutil OSCL Util 26 * 27 * @{ 28 */ 29 30 31 /** \file oscl_string_utf8.h 32 \brief Utilities to validate and truncate UTF-8 encoded strings. 33 */ 34 35 /*! 36 * \par UTF-8 String Manipualation 37 * These routines operate on UTF-8 character string. 38 * 39 */ 40 #ifndef OSCL_STRING_UTF8_H 41 #define OSCL_STRING_UTF8_H 42 43 // - - Inclusion - - - - - - - - - - - - - - - - - - - - - - - - - - - - 44 #ifndef OSCL_BASE_H_INCLUDED 45 #include "oscl_base.h" 46 #endif 47 48 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 49 // Function prototypes 50 /*! 51 \brief Check if the input string contains any illegal UTF-8 character. 52 The function scans the string and validate that each character is a valid utf-8. 53 It stops at the first NULL character, invalid character or the max_byte value. 54 The string is valid if and only if every character is a valid utf-8 character and 55 the scanning stopped on a character boundary. 56 57 \param str_buf Ptr to an input string, which may not terminate with null, to be checked 58 \param num_valid_chars This is an output parameter which is the number of valid utf-8 characters actually read. 59 \param max_bytes The maximum number of bytes to read (a zero value means read to the first NULL character). 60 \param max_char_2_valid This is an input parameter. 61 Specify the number of utf-8 characters the caller wants to validate. 62 \param num_byte_4_char This is an output parameter. 63 The number of bytes used by the max_char characters 64 \return True if the string is valid and false otherwise. 65 */ 66 OSCL_IMPORT_REF bool oscl_str_is_valid_utf8(const uint8 *str_buf, uint32& num_valid_characters, uint32 max_bytes = 0, 67 uint32 max_char_2_valid = 0, uint32 * num_byte_4_char = NULL); 68 /*! 69 \brief Truncates the UTF-8 string upto the required size. 70 71 The function will modify the str_buf so that it contains AT MOST len valid 72 utf-8 characters. If a NULL character is found before reading len utf-8 73 characters, then the function does not modify the string and simply returns 74 the number of characters. If an invalid character is found, then it will insert 75 a NULL character after the last valid character and return the length. Otherwise, 76 it will insert a NULL character after len valid utf-8 characters and return the length. 77 \param str_buf Ptr to an input string which may not terminate with null 78 \param max_char The max number of the UTF-8 CHARACTERS 79 \param max_bytes The maximum number of bytes to read (a zero value means read to the first NULL character). 80 \return It returns the length of the truncated string in utf-8 characters. 81 */ 82 OSCL_IMPORT_REF int32 oscl_str_truncate_utf8(uint8 *str_buf, uint32 max_char, uint32 max_bytes = 0); 83 84 #endif 85 86 /*! @} */ 87