• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 // -*- c++ -*-
19 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
20 
21 //               O S C L _ U T F 8 C O N V
22 
23 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
24 
25 /*! \addtogroup osclutil OSCL Util
26  *
27  * @{
28  */
29 
30 
31 /** \file oscl_utf8conv.h
32     \brief Utilities to convert unicode to utf8 and vice versa
33 */
34 
35 
36 /********************************************************************************
37                             UTF-8 Bit Distribution
38 
39 UTF-16                                  1st Byte 2nd Byte 3rd Byte 4th Byte
40 -------- -------- -------- --------     -------- -------- -------- --------
41 00000000 0xxxxxxx                       0xxxxxxx
42 00000yyy yyxxxxxx                       110yyyyy 10xxxxxx
43 zzzzyyyy yyxxxxxx                       1110zzzz 10yyyyyy 10xxxxxx
44 110110ww wwzzzzyy 110111yy yyxxxxxx     11110uuu 10uuzzzz 10yyyyyy 10xxxxxx
45 
46 NOTE:
47  uuuuu = wwww+1 (to account for addition of 0x10000 as in Section 3.7, Surrogates)
48 
49 **********************************************************************************/
50 
51 
52 #ifndef OSCL_UTF8CONV_H
53 #define OSCL_UTF8CONV_H
54 
55 #ifndef OSCL_BASE_INCLUDED_H
56 #include "oscl_base.h"
57 #endif
58 
59 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
60 // Function prototypes
61 /*!
62     \brief Convert UTF8 byte sequence to Unicode string
63 
64            The function converts UTF8 byte sequence (or ASCII sequence) to Unicode string.
65            The length of input UTF8 byte sequence is specified. It stops at two conditions:
66            (A) Whole input UTF8 byte sequence is successfully converted.
67            (B) Output buferr is not enough for output, or parse error.
68            In case of (A), it adds a terminated '\0' at the end of the output Unicode string,
69            and returns length of the output Unicode string(without counting terminated '\0').
70            In case of (B), it converts as much as possible to the output buffer and adds a terminated '\0'
71            at the end of the output Unicode string"(no '\0' added if outLength is less than or
72            equal to 0, return 0)", and returns 0.
73 
74     \param input            Ptr to an input UTF8 byte sequence. '\0' termanation is not neccesary.
75     \param inLength         The length of the input UTF8 byte sequence, without counting terminated '\0'(if any).
76     \param output           Ptr to an output buffer which output Unicode string is written in.
77     \param outLength        The size of output buffer, also the maximum number of oscl_wchar could be written in.
78     \return                 Length of output (excludes '\0') : completely converts all input string and appends '\0' to output;
79                             0 : insufficient buffer or error in conversion
80 */
81 
82 OSCL_IMPORT_REF int32 oscl_UTF8ToUnicode(const char *input, int32 inLength, oscl_wchar *output, int32 outLength);
83 
84 
85 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
86 // Function prototypes
87 /*!
88     \brief Convert Unicode string to UTF8 byte sequence
89 
90            The function converts Unicode string to UTF8 byte sequence.
91            The length of input Unicode string is specified. It stops at two conditions:
92            (A) Whole input Unicode string is successfully converted.
93            (B) Destination buferr is not enough for output.
94            In case of (A), it adds a terminated '\0' at the end of the output UTF8 byte sequence.
95            and returns length of the output UTF8 byte sequence(without counting terminated '\0').
96            In case of (B), it converts as much as possible to the output buffer and adds a terminated '\0'
97            at the end of the output UTF8 byte sequence"(no '\0' added if outLength is less than or
98            equal to 0, return 0)", and returns 0.
99 
100     \param input            Ptr to an input Unicode string. '\0' termanation is not neccesary.
101     \param inLength         The length of the input Unicode string, without counting terminated '\0'(if any).
102     \param output           Ptr to an output buffer which output UTF8 byte sequence is written in.
103     \param outLength        The size of output buffer, also the maximum number of char could be written in.
104     \return                 length of output (excludes '\0') : completely converts all input string and appends '\0' to output;
105                             0 : insufficient buffer or error in conversion
106 */
107 
108 OSCL_IMPORT_REF int32 oscl_UnicodeToUTF8(const oscl_wchar *input, int32 inLength, char *output, int32 outLength);
109 
110 #endif /* OSCL_UTF8CONV_H */
111 
112 /*! @} */
113