• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 // -*- c++ -*-
19 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
20 
21 //               O S C L _ S T R I N G _ U T F 8
22 
23 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
24 
25 /*! \addtogroup osclutil OSCL Util
26  *
27  * @{
28  */
29 
30 
31 /** \file oscl_string_utf8.h
32     \brief Utilities to validate and truncate UTF-8 encoded strings.
33 */
34 
35 /*!
36  * \par UTF-8 String Manipualation
37  * These routines operate on UTF-8 character string.
38  *
39  */
40 #ifndef OSCL_STRING_UTF8_H
41 #define OSCL_STRING_UTF8_H
42 
43 // - - Inclusion - - - - - - - - - - - - - - - - - - - - - - - - - - - -
44 #ifndef OSCL_BASE_H_INCLUDED
45 #include "oscl_base.h"
46 #endif
47 
48 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
49 // Function prototypes
50 /*!
51     \brief Check if the input string contains any illegal UTF-8 character.
52            The function scans the string and validate that each character is a valid utf-8.
53            It stops at the first NULL character, invalid character or the max_byte value.
54            The string is valid if and only if every character is a valid utf-8 character and
55            the scanning stopped on a character boundary.
56 
57     \param str_buf          Ptr to an input string, which may not terminate with null, to be checked
58     \param num_valid_chars  This is an output parameter which is the number of valid utf-8 characters actually read.
59     \param max_bytes        The maximum number of bytes to read (a zero value means read to the first NULL character).
60     \param max_char_2_valid This is an input parameter.
61                             Specify the number of utf-8 characters the caller wants to validate.
62     \param num_byte_4_char  This is an output parameter.
63                             The number of bytes used by the max_char characters
64     \return                 True if the string is valid and false otherwise.
65 */
66 OSCL_IMPORT_REF  bool  oscl_str_is_valid_utf8(const uint8 *str_buf, uint32& num_valid_characters, uint32 max_bytes = 0,
67         uint32 max_char_2_valid = 0, uint32 * num_byte_4_char = NULL);
68 /*!
69     \brief Truncates the UTF-8 string upto the required size.
70 
71            The function will modify the str_buf so that it contains AT MOST len valid
72            utf-8 characters.  If a NULL character is found before reading len utf-8
73            characters, then the function does not modify the string and simply returns
74            the number of characters.  If an invalid character is found, then it will insert
75            a NULL character after the last valid character and return the length.  Otherwise,
76            it will insert a NULL character after len valid utf-8 characters and return the length.
77     \param str_buf          Ptr to an input string which may not terminate with null
78     \param max_char         The max number of the UTF-8 CHARACTERS
79     \param max_bytes        The maximum number of bytes to read (a zero value means read to the first NULL character).
80     \return                 It returns the length of the truncated string in utf-8 characters.
81 */
82 OSCL_IMPORT_REF int32  oscl_str_truncate_utf8(uint8 *str_buf, uint32 max_char, uint32 max_bytes = 0);
83 
84 #endif
85 
86 /*! @} */
87