1 /*
2 // Copyright (C) 2022 Beken Corporation
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15 #include <string.h>
16 #include <stdlib.h>
17 #include <stdio.h>
18 #include <common/bk_typedef.h>
19 #include <common/bk_include.h>
20 #include <os/mem.h>
21
22 #if CONFIG_USE_CONV_UTF8
23 #include "conv_utf8_gb2312_table.h"
24 #include "conv_utf8_pub.h"
conv_utf8_for_gb2312(unsigned char * pin,unsigned char * pout)25 static int conv_utf8_for_gb2312(unsigned char *pin, unsigned char *pout)
26 {
27 unsigned char hi_bits, low_bit;
28 const unsigned char *utf8;
29 unsigned short gb_code, dist_cnt;
30
31 hi_bits = *pin & 0xff;
32 low_bit = *(pin + 1) & 0xff;
33
34 // chinese char start from 0xB0A1 to 0xF7FE
35 if ((hi_bits < 0xB0) || (hi_bits > 0xF7))
36 return 0;
37 if ((low_bit < 0xA1) || (low_bit > 0xFE))
38 return 0;
39
40 gb_code = (hi_bits << 8) + low_bit;
41
42 // there are 0xa2 banks between two district
43 dist_cnt = hi_bits - 0xB0;
44 dist_cnt = dist_cnt * (0xA1 + 0x01);
45
46 // shift pos in table
47 gb_code = gb_code - 0xB0A1 - dist_cnt;
48 utf8 = t_gb2312_utf8[gb_code];
49
50 memcpy(pout, utf8, 3);
51
52 return 1;
53 }
54
conv_utf8(unsigned char * input)55 unsigned char *conv_utf8(unsigned char *input)
56 {
57 int len;
58 unsigned char *ptr, *out_bak, *out;
59
60 len = strlen((char *)input);
61 len = (len / 2) * 3 + 1;
62
63 out_bak = (unsigned char *)os_malloc(len);
64 if (!out_bak)
65 return NULL;
66
67 ptr = input;
68 out = out_bak;
69
70 while (*ptr) {
71 if (conv_utf8_for_gb2312(ptr, out)) {
72 out += 3;
73 ptr += 2;
74 } else {
75 *out = *ptr;
76 out++;
77 ptr++;
78 }
79 }
80 *out = 0;
81
82
83 //for(i=0; out_bak[i] != 0; i++)
84 // os_printf("%02x", out_bak[i]);
85
86 //os_printf("\r\n");
87
88 return out_bak;
89 }
90
SearchCodeTable(unsigned short unicodeKey)91 unsigned short SearchCodeTable(unsigned short unicodeKey)
92 {
93 int first = 0;
94 int end = sizeof(unicode_to_gb2312) / (2 * sizeof(uint16_t)) - 1;
95 int mid = 0;
96
97 while (first <= end) {
98 mid = (first + end) / 2;
99
100 if (unicode_to_gb2312[mid][0] == unicodeKey)
101 return unicode_to_gb2312[mid][1];
102 else if (unicode_to_gb2312[mid][0] > unicodeKey)
103 end = mid - 1;
104 else
105 first = mid + 1;
106 }
107 return 0;
108 }
109
110
GetUtf8ByteNumForWord(uint8_t firstCh)111 int GetUtf8ByteNumForWord(uint8_t firstCh)
112 {
113 uint8_t temp = 0x80;
114 int num = 0;
115
116 while (temp & firstCh) {
117 num++;
118 temp = (temp >> 1);
119 }
120 return num;
121 }
122
Utf8ToGb2312(char * utf8)123 char *Utf8ToGb2312(char *utf8)
124 {
125 char *temp = NULL;
126 int byteCount = 0;
127 int len, i = 0, j = 0;
128
129 uint16_t unicodeKey = 0;
130 uint16_t gbKey = 0;
131
132 if (!utf8)
133 return NULL;
134
135 len = strlen(utf8);
136 if (len <= 0)
137 return NULL;
138
139 temp = os_malloc(len * sizeof(char));
140 if (!temp)
141 return NULL;
142
143 //ѭ������
144 while (i < len) {
145 int nbyte = GetUtf8ByteNumForWord((uint8_t)utf8[i]);
146 switch (nbyte) {
147 case 0:
148 temp[j] = utf8[i];
149 byteCount = 1;
150 j += 1;
151 break;
152
153 case 2:
154 temp[j] = utf8[i];
155 temp[j + 1] = utf8[i + 1];
156 byteCount = 2;
157 j += 2;
158 break;
159
160 case 3:
161 //����Ϳ�ʼ����UTF8->Unicode
162 temp[j + 1] = ((utf8[i] & 0x0F) << 4) | ((utf8[i + 1] >> 2) & 0x0F);
163 temp[j] = ((utf8[i + 1] & 0x03) << 6) + (utf8[i + 2] & 0x3F);
164
165 //ȡ��Unicode��ֵ
166 memcpy(&unicodeKey, (temp + j), 2);
167
168 //�������ֵ���ȡ�ö�Ӧ��GB2312��ֵ
169 gbKey = SearchCodeTable(unicodeKey);
170 temp[j] = (gbKey & 0xff00) >> 8;
171 temp[j + 1] = (gbKey & 0x00ff);
172
173 byteCount = 3;
174 j += 2;
175 break;
176
177 case 4:
178 case 5:
179 case 6:
180 byteCount = nbyte;
181 //printf("4-6 utf8 no process, copy them\n");
182 break;
183
184 default:
185 //printf("err! the len is more than 6\n");
186 byteCount = 1;
187 break;
188 }
189
190 i += byteCount;
191 }
192
193 temp[j++] = '\0';
194 memcpy(utf8, temp, j);
195
196 //printf("%s", utf8);
197 os_free(temp);
198
199 return utf8;
200 }
201
202
203 #endif // CONFIG_USE_CONV_UTF8
204
205