• Home
  • Raw
  • Download

Lines Matching +full:is +full:- +full:wsl

13   // we define it if the system supports files with non-utf8 symbols:
19 …MY_UTF8_START(n) - is a base value for start byte (head), if there are (n) additional bytes after …
28 5 : 0xFC : 31 : UCS-4 : wcstombs() in ubuntu is limited to that value
29 6 : 0xFE : 36 : We can use it, if we want to encode any 32-bit value
33 #define MY_UTF8_START(n) (0x100 - (1 << (7 - (n))))
37 { numBytes = (n); val -= MY_UTF8_START(n); }
42 if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence,
43 when we convert wchar_t strings to UTF-8:
44 (_UTF8_NUM_TAIL_BYTES_MAX == 3) : (21-bits points) - Unicode
45 (_UTF8_NUM_TAIL_BYTES_MAX == 5) : (31-bits points) - UCS-4
46 (_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack)
69 else { numBytes = 3; val -= MY_UTF8_START(3); }
78 /* we use 128 bytes block in 16-bit BMP-PLANE to encode non-UTF-8 Escapes
79 Also we can use additional HIGH-PLANE (we use 21-bit points above 0x1f0000)
81 RAW-UTF-8 <-> internal wchar_t utf-16 strings <-> RAW-UTF-UTF-8
93 ef 80 - ee be 80 (3-bytes utf-8) : similar to WSL
94 ef ff - ee bf bf
96 1f ef 80 - f7 be be 80 (4-bytes utf-8) : last 4-bytes utf-8 plane (out of Unicode)
97 1f ef ff - f7 be bf bf (4-bytes utf-8) : last 4-bytes utf-8 plane (out of Unicode)
105 if (Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE is set)
107 if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH)
109 we can restore any 8-bit Escape from ESCAPE-PLANE-21 plane.
110 But ESCAPE-PLANE-21 point cannot be stored to utf-16 (7z archive)
111 So we still need a way to extract 8-bit Escapes and BMP-Escapes-8
112 from same BMP-Escapes-16 stored in 7z.
113 And if we want to restore any 8-bit from 7z archive,
114 we still must use Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT for (utf-8 -> utf-16)
115 Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21
119 we must convert original 3-bytes utf-8 BMP-Escape point to sequence
120 of 3 BMP-Escape-16 points with Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT
121 so we can extract original RAW-UTF-8 from UTFD-16 later.
155 size--; in Check_Buf()
192 c2 -= 0x80; in Check_Buf()
202 while (--numBytes); in Check_Buf()
224 size -= pos; in Check_Buf()
283 val |= (c2 - 0x80);
286 while (--numBytes);
288 if (val < MY_UTF8_RANGE(pos - 1))
297 // in case of UTF-8 error we have two ways:
298 // 21.01- : old : 0xfffd: REPLACEMENT CHARACTER : old version
299 // 21.02+ : new : 0xef00 + (c) : similar to WSL scheme for low symbols
321 // we store UTF-16 in wchar_t strings. So we use surrogates for big unicode points:
323 // for debug puposes only we can store UTF-32 in wchar_t:
324 // #define START_POINT_FOR_SURROGATE ((UInt32)0 - 1)
328 WIN32 MultiByteToWideChar(CP_UTF8) emits 0xfffd point, if utf-8 error was found.
330 It doesn't emit single 0xfffd from 3-4 src bytes.
334 That scheme is similar to Escape scheme, but we emit 0xfffd
336 3) emit single 0xfffd from 1-2 incorrect bytes, as WIN32 MultiByteToWideChar scheme
378 c2 -= 0x80; in Utf8_To_Utf16()
398 while (--numBytes); in Utf8_To_Utf16()
412 if (val < MY_UTF8_RANGE(pos - 1)) in Utf8_To_Utf16()
421 // We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes) in Utf8_To_Utf16()
428 We don't expect virtual Escape-21 points in UTF-8 stream. in Utf8_To_Utf16()
429 And we don't check for Escape-21. in Utf8_To_Utf16()
430 So utf8-Escape-21 will be converted to another 3 utf16-Escape-21 points. in Utf8_To_Utf16()
431 Maybe we could convert virtual utf8-Escape-21 to one utf16-Escape-21 point in some cases? in Utf8_To_Utf16()
440 // We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes) in Utf8_To_Utf16()
453 // We will emit utf16-Escape-16-21 point from each source byte in Utf8_To_Utf16()
459 dest[destPos + 0] = (wchar_t)(0xd800 - (0x10000 >> 10) + (val >> 10)); in Utf8_To_Utf16()
475 size_t size = (size_t)(srcLim - src); in Utf16_To_Utf8_Calc()
508 // it's hack to UTF-8 encoding in Utf16_To_Utf8_Calc()
569 if (wchar_t is 32-bit) in Utf16_To_Utf8()
570 && (Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE is set) in Utf16_To_Utf8()
571 && (point is virtual escape plane) in Utf16_To_Utf8()
572 we extract 8-bit byte from virtual HIGH-ESCAPE PLANE. in Utf16_To_Utf8()
582 /* if (Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE is defined) in Utf16_To_Utf8()
583 we extract 8-bit byte from BMP-ESCAPE PLANE. */ in Utf16_To_Utf8()
596 // it's hack to UTF-8 encoding in Utf16_To_Utf8()
603 val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000; in Utf16_To_Utf8()
651 numBits -= 6; in Utf16_To_Utf8()
714 if (destLen != (size_t)(destEnd - destStart)) in ConvertUnicodeToUTF8_Flags()
717 // dest.ReleaseBuf_SetEnd((unsigned)(destEnd - destStart)); in ConvertUnicodeToUTF8_Flags()
719 printf("\n(destEnd - destStart) = %d\n", (unsigned)(destEnd - destStart)); in ConvertUnicodeToUTF8_Flags()
755 if (destLen != (size_t)(destEnd - (char *)(void *)(Byte *)dest)) in Convert_Unicode_To_UTF8_Buf()
773 // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
791 w -= 0x10000;
831 // it's hack to UTF-8 encoding