1 /** @file
2 Implementation of translation upon VT-UTF8.
3
4 Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>
5 This program and the accompanying materials
6 are licensed and made available under the terms and conditions of the BSD License
7 which accompanies this distribution. The full text of the license may be found at
8 http://opensource.org/licenses/bsd-license.php
9
10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
12
13 **/
14
15 #include "Terminal.h"
16
17 /**
18 Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
19 and insert them into Unicode FIFO.
20
21 @param TerminalDevice The terminal device.
22
23 **/
24 VOID
VTUTF8RawDataToUnicode(IN TERMINAL_DEV * TerminalDevice)25 VTUTF8RawDataToUnicode (
26 IN TERMINAL_DEV *TerminalDevice
27 )
28 {
29 UTF8_CHAR Utf8Char;
30 UINT8 ValidBytes;
31 UINT16 UnicodeChar;
32
33 ValidBytes = 0;
34 //
35 // pop the raw data out from the raw fifo,
36 // and translate it into unicode, then push
37 // the unicode into unicode fifo, until the raw fifo is empty.
38 //
39 while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) {
40
41 GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
42
43 if (ValidBytes < 1 || ValidBytes > 3) {
44 continue;
45 }
46
47 Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
48
49 UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
50 }
51 }
52
53 /**
54 Get one valid VT-UTF8 characters set from Raw Data FIFO.
55
56 @param Utf8Device The terminal device.
57 @param Utf8Char Returned valid VT-UTF8 characters set.
58 @param ValidBytes The count of returned VT-VTF8 characters.
59 If ValidBytes is zero, no valid VT-UTF8 returned.
60
61 **/
62 VOID
GetOneValidUtf8Char(IN TERMINAL_DEV * Utf8Device,OUT UTF8_CHAR * Utf8Char,OUT UINT8 * ValidBytes)63 GetOneValidUtf8Char (
64 IN TERMINAL_DEV *Utf8Device,
65 OUT UTF8_CHAR *Utf8Char,
66 OUT UINT8 *ValidBytes
67 )
68 {
69 UINT8 Temp;
70 UINT8 Index;
71 BOOLEAN FetchFlag;
72
73 Temp = 0;
74 Index = 0;
75 FetchFlag = TRUE;
76
77 //
78 // if no valid Utf8 char is found in the RawFiFo,
79 // then *ValidBytes will be zero.
80 //
81 *ValidBytes = 0;
82
83 while (!IsRawFiFoEmpty (Utf8Device)) {
84
85 RawFiFoRemoveOneKey (Utf8Device, &Temp);
86
87 switch (*ValidBytes) {
88
89 case 0:
90 if ((Temp & 0x80) == 0) {
91 //
92 // one-byte utf8 char
93 //
94 *ValidBytes = 1;
95
96 Utf8Char->Utf8_1 = Temp;
97
98 FetchFlag = FALSE;
99
100 } else if ((Temp & 0xe0) == 0xc0) {
101 //
102 // two-byte utf8 char
103 //
104 *ValidBytes = 2;
105
106 Utf8Char->Utf8_2[1] = Temp;
107
108 } else if ((Temp & 0xf0) == 0xe0) {
109 //
110 // three-byte utf8 char
111 //
112 *ValidBytes = 3;
113
114 Utf8Char->Utf8_3[2] = Temp;
115
116 Index++;
117
118 } else {
119 //
120 // reset *ValidBytes to zero, let valid utf8 char search restart
121 //
122 *ValidBytes = 0;
123 }
124
125 break;
126
127 case 2:
128 //
129 // two-byte utf8 char go on
130 //
131 if ((Temp & 0xc0) == 0x80) {
132
133 Utf8Char->Utf8_2[0] = Temp;
134
135 FetchFlag = FALSE;
136
137 } else {
138
139 *ValidBytes = 0;
140 }
141 break;
142
143 case 3:
144 //
145 // three-byte utf8 char go on
146 //
147 if ((Temp & 0xc0) == 0x80) {
148 if (Index == 1) {
149 Utf8Char->Utf8_3[1] = Temp;
150 Index++;
151 } else {
152 Utf8Char->Utf8_3[0] = Temp;
153 FetchFlag = FALSE;
154 }
155 } else {
156 //
157 // reset *ValidBytes and Index to zero, let valid utf8 char search restart
158 //
159 *ValidBytes = 0;
160 Index = 0;
161 }
162 break;
163
164 default:
165 break;
166 }
167
168 if (!FetchFlag) {
169 break;
170 }
171 }
172
173 return ;
174 }
175
176 /**
177 Translate VT-UTF8 characters into one Unicode character.
178
179 UTF8 Encoding Table
180 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
181 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
182 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
183 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
184
185
186 @param Utf8Char VT-UTF8 character set needs translating.
187 @param ValidBytes The count of valid VT-UTF8 characters.
188 @param UnicodeChar Returned unicode character.
189
190 **/
191 VOID
Utf8ToUnicode(IN UTF8_CHAR Utf8Char,IN UINT8 ValidBytes,OUT CHAR16 * UnicodeChar)192 Utf8ToUnicode (
193 IN UTF8_CHAR Utf8Char,
194 IN UINT8 ValidBytes,
195 OUT CHAR16 *UnicodeChar
196 )
197 {
198 UINT8 UnicodeByte0;
199 UINT8 UnicodeByte1;
200 UINT8 Byte0;
201 UINT8 Byte1;
202 UINT8 Byte2;
203
204 *UnicodeChar = 0;
205
206 //
207 // translate utf8 code to unicode, in terminal standard,
208 // up to 3 bytes utf8 code is supported.
209 //
210 switch (ValidBytes) {
211 case 1:
212 //
213 // one-byte utf8 code
214 //
215 *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
216 break;
217
218 case 2:
219 //
220 // two-byte utf8 code
221 //
222 Byte0 = Utf8Char.Utf8_2[0];
223 Byte1 = Utf8Char.Utf8_2[1];
224
225 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
226 UnicodeByte1 = (UINT8) ((Byte1 >> 2) & 0x07);
227 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
228 break;
229
230 case 3:
231 //
232 // three-byte utf8 code
233 //
234 Byte0 = Utf8Char.Utf8_3[0];
235 Byte1 = Utf8Char.Utf8_3[1];
236 Byte2 = Utf8Char.Utf8_3[2];
237
238 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
239 UnicodeByte1 = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
240 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
241
242 default:
243 break;
244 }
245
246 return ;
247 }
248
249 /**
250 Translate one Unicode character into VT-UTF8 characters.
251
252 UTF8 Encoding Table
253 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding
254 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx
255 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx
256 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx
257
258
259 @param Unicode Unicode character need translating.
260 @param Utf8Char Return VT-UTF8 character set.
261 @param ValidBytes The count of valid VT-UTF8 characters. If
262 ValidBytes is zero, no valid VT-UTF8 returned.
263
264 **/
265 VOID
UnicodeToUtf8(IN CHAR16 Unicode,OUT UTF8_CHAR * Utf8Char,OUT UINT8 * ValidBytes)266 UnicodeToUtf8 (
267 IN CHAR16 Unicode,
268 OUT UTF8_CHAR *Utf8Char,
269 OUT UINT8 *ValidBytes
270 )
271 {
272 UINT8 UnicodeByte0;
273 UINT8 UnicodeByte1;
274 //
275 // translate unicode to utf8 code
276 //
277 UnicodeByte0 = (UINT8) Unicode;
278 UnicodeByte1 = (UINT8) (Unicode >> 8);
279
280 if (Unicode < 0x0080) {
281
282 Utf8Char->Utf8_1 = (UINT8) (UnicodeByte0 & 0x7f);
283 *ValidBytes = 1;
284
285 } else if (Unicode < 0x0800) {
286 //
287 // byte sequence: high -> low
288 // Utf8_2[0], Utf8_2[1]
289 //
290 Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
291 Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
292
293 *ValidBytes = 2;
294
295 } else {
296 //
297 // byte sequence: high -> low
298 // Utf8_3[0], Utf8_3[1], Utf8_3[2]
299 //
300 Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
301 Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
302 Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
303
304 *ValidBytes = 3;
305 }
306 }
307
308
309 /**
310 Check if input string is valid VT-UTF8 string.
311
312 @param TerminalDevice The terminal device.
313 @param WString The input string.
314
315 @retval EFI_SUCCESS If all input characters are valid.
316
317 **/
318 EFI_STATUS
VTUTF8TestString(IN TERMINAL_DEV * TerminalDevice,IN CHAR16 * WString)319 VTUTF8TestString (
320 IN TERMINAL_DEV *TerminalDevice,
321 IN CHAR16 *WString
322 )
323 {
324 //
325 // to utf8, all kind of characters are supported.
326 //
327 return EFI_SUCCESS;
328 }
329