1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 // -*- c++ -*- 19 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 20 21 // O S C L _ U T F 8 C O N V 22 23 // = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 24 25 /*! \addtogroup osclutil OSCL Util 26 * 27 * @{ 28 */ 29 30 31 /** \file oscl_utf8conv.h 32 \brief Utilities to convert unicode to utf8 and vice versa 33 */ 34 35 36 /******************************************************************************** 37 UTF-8 Bit Distribution 38 39 UTF-16 1st Byte 2nd Byte 3rd Byte 4th Byte 40 -------- -------- -------- -------- -------- -------- -------- -------- 41 00000000 0xxxxxxx 0xxxxxxx 42 00000yyy yyxxxxxx 110yyyyy 10xxxxxx 43 zzzzyyyy yyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx 44 110110ww wwzzzzyy 110111yy yyxxxxxx 11110uuu 10uuzzzz 10yyyyyy 10xxxxxx 45 46 NOTE: 47 uuuuu = wwww+1 (to account for addition of 0x10000 as in Section 3.7, Surrogates) 48 49 **********************************************************************************/ 50 51 52 #ifndef OSCL_UTF8CONV_H 53 #define OSCL_UTF8CONV_H 54 55 #ifndef OSCL_BASE_INCLUDED_H 56 #include "oscl_base.h" 57 #endif 58 59 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 60 // Function prototypes 61 /*! 62 \brief Convert UTF8 byte sequence to Unicode string 63 64 The function converts UTF8 byte sequence (or ASCII sequence) to Unicode string. 65 The length of input UTF8 byte sequence is specified. It stops at two conditions: 66 (A) Whole input UTF8 byte sequence is successfully converted. 67 (B) Output buferr is not enough for output, or parse error. 68 In case of (A), it adds a terminated '\0' at the end of the output Unicode string, 69 and returns length of the output Unicode string(without counting terminated '\0'). 70 In case of (B), it converts as much as possible to the output buffer and adds a terminated '\0' 71 at the end of the output Unicode string"(no '\0' added if outLength is less than or 72 equal to 0, return 0)", and returns 0. 73 74 \param input Ptr to an input UTF8 byte sequence. '\0' termanation is not neccesary. 75 \param inLength The length of the input UTF8 byte sequence, without counting terminated '\0'(if any). 76 \param output Ptr to an output buffer which output Unicode string is written in. 77 \param outLength The size of output buffer, also the maximum number of oscl_wchar could be written in. 78 \return Length of output (excludes '\0') : completely converts all input string and appends '\0' to output; 79 0 : insufficient buffer or error in conversion 80 */ 81 82 OSCL_IMPORT_REF int32 oscl_UTF8ToUnicode(const char *input, int32 inLength, oscl_wchar *output, int32 outLength); 83 84 85 // - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 86 // Function prototypes 87 /*! 88 \brief Convert Unicode string to UTF8 byte sequence 89 90 The function converts Unicode string to UTF8 byte sequence. 91 The length of input Unicode string is specified. It stops at two conditions: 92 (A) Whole input Unicode string is successfully converted. 93 (B) Destination buferr is not enough for output. 94 In case of (A), it adds a terminated '\0' at the end of the output UTF8 byte sequence. 95 and returns length of the output UTF8 byte sequence(without counting terminated '\0'). 96 In case of (B), it converts as much as possible to the output buffer and adds a terminated '\0' 97 at the end of the output UTF8 byte sequence"(no '\0' added if outLength is less than or 98 equal to 0, return 0)", and returns 0. 99 100 \param input Ptr to an input Unicode string. '\0' termanation is not neccesary. 101 \param inLength The length of the input Unicode string, without counting terminated '\0'(if any). 102 \param output Ptr to an output buffer which output UTF8 byte sequence is written in. 103 \param outLength The size of output buffer, also the maximum number of char could be written in. 104 \return length of output (excludes '\0') : completely converts all input string and appends '\0' to output; 105 0 : insufficient buffer or error in conversion 106 */ 107 108 OSCL_IMPORT_REF int32 oscl_UnicodeToUTF8(const oscl_wchar *input, int32 inLength, char *output, int32 outLength); 109 110 #endif /* OSCL_UTF8CONV_H */ 111 112 /*! @} */ 113