• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2 // Copyright (C) 2022 Beken Corporation
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 #include <string.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <common/bk_typedef.h>
19 #include <common/bk_include.h>
20 #include <os/mem.h>
21 
22 #if CONFIG_USE_CONV_UTF8
23 #include "conv_utf8_gb2312_table.h"
24 #include "conv_utf8_pub.h"
conv_utf8_for_gb2312(unsigned char * pin,unsigned char * pout)25 static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
26 {
27 	unsigned char hi_bits, low_bit;
28 	const unsigned char *utf8;
29 	unsigned short gb_code, dist_cnt;
30 
31 	hi_bits = *pin & 0xff;
32 	low_bit = *(pin + 1) & 0xff;
33 
34 	// chinese char start from 0xB0A1 to 0xF7FE
35 	if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
36 		return 0;
37 	if ((low_bit < 0xA1) || (low_bit > 0xFE))
38 		return 0;
39 
40 	gb_code = (hi_bits << 8) + low_bit;
41 
42 	// there are 0xa2 banks between two district
43 	dist_cnt = hi_bits - 0xB0;
44 	dist_cnt = dist_cnt * (0xA1 + 0x01);
45 
46 	// shift pos in table
47 	gb_code = gb_code - 0xB0A1 - dist_cnt;
48 	utf8 = t_gb2312_utf8[gb_code];
49 
50 	memcpy(pout, utf8, 3);
51 
52 	return 1;
53 }
54 
conv_utf8(unsigned char * input)55 unsigned char *conv_utf8(unsigned char *input)
56 {
57 	int len;
58 	unsigned char *ptr, *out_bak, *out;
59 
60 	len = strlen((char *)input);
61 	len = (len / 2) * 3 + 1;
62 
63 	out_bak = (unsigned char *)os_malloc(len);
64 	if (!out_bak)
65 		return NULL;
66 
67 	ptr = input;
68 	out = out_bak;
69 
70 	while (*ptr) {
71 		if (conv_utf8_for_gb2312(ptr, out)) {
72 			out += 3;
73 			ptr += 2;
74 		} else {
75 			*out = *ptr;
76 			out++;
77 			ptr++;
78 		}
79 	}
80 	*out = 0;
81 
82 
83 	//for(i=0; out_bak[i] != 0; i++)
84 	//    os_printf("%02x", out_bak[i]);
85 
86 	//os_printf("\r\n");
87 
88 	return out_bak;
89 }
90 
SearchCodeTable(unsigned short unicodeKey)91 unsigned short SearchCodeTable(unsigned short unicodeKey)
92 {
93 	int first = 0;
94 	int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
95 	int mid = 0;
96 
97 	while (first <= end) {
98 		mid = (first + end) / 2;
99 
100 		if (unicode_to_gb2312[mid][0] == unicodeKey)
101 			return unicode_to_gb2312[mid][1];
102 		else if (unicode_to_gb2312[mid][0] > unicodeKey)
103 			end = mid - 1;
104 		else
105 			first = mid + 1;
106 	}
107 	return 0;
108 }
109 
110 
GetUtf8ByteNumForWord(uint8_t firstCh)111 int GetUtf8ByteNumForWord(uint8_t firstCh)
112 {
113 	uint8_t temp = 0x80;
114 	int num = 0;
115 
116 	while (temp & firstCh) {
117 		num++;
118 		temp = (temp >> 1);
119 	}
120 	return num;
121 }
122 
Utf8ToGb2312(char * utf8)123 char *Utf8ToGb2312(char *utf8)
124 {
125 	char *temp = NULL;
126 	int byteCount = 0;
127 	int len, i = 0, j = 0;
128 
129 	uint16_t unicodeKey = 0;
130 	uint16_t gbKey = 0;
131 
132 	if (!utf8)
133 		return NULL;
134 
135 	len = strlen(utf8);
136 	if (len <= 0)
137 		return NULL;
138 
139 	temp = os_malloc(len * sizeof(char));
140 	if (!temp)
141 		return NULL;
142 
143 	//ѭ������
144 	while (i < len) {
145 		int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
146 		switch (nbyte) {
147 		case 0:
148 			temp[j] = utf8[i];
149 			byteCount = 1;
150 			j += 1;
151 			break;
152 
153 		case 2:
154 			temp[j] = utf8[i];
155 			temp[j + 1] = utf8[i + 1];
156 			byteCount = 2;
157 			j += 2;
158 			break;
159 
160 		case 3:
161 			//����Ϳ�ʼ����UTF8->Unicode
162 			temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
163 			temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
164 
165 			//ȡ��Unicode��ֵ
166 			memcpy(&unicodeKey, (temp + j), 2);
167 
168 			//�������ֵ���ȡ�ö�Ӧ��GB2312��ֵ
169 			gbKey = SearchCodeTable(unicodeKey);
170 			temp[j] = (gbKey & 0xff00) >> 8;
171 			temp[j + 1] = (gbKey & 0x00ff);
172 
173 			byteCount = 3;
174 			j += 2;
175 			break;
176 
177 		case 4:
178 		case 5:
179 		case 6:
180 			byteCount = nbyte;
181 			//printf("4-6 utf8 no process, copy them\n");
182 			break;
183 
184 		default:
185 			//printf("err! the len is more than 6\n");
186 			byteCount = 1;
187 			break;
188 		}
189 
190 		i += byteCount;
191 	}
192 
193 	temp[j++] = '\0';
194 	memcpy(utf8, temp, j);
195 
196 	//printf("%s", utf8);
197 	os_free(temp);
198 
199 	return utf8;
200 }
201 
202 
203 #endif // CONFIG_USE_CONV_UTF8
204 
205