UTFConvert.cpp - OpenGrok cross reference for /external/lzma/CPP/Common/UTFConvert.cpp

Lines Matching +full:is +full:- +full:wsl
13   // we define it if the system supports files with non-utf8 symbols:
19 …MY_UTF8_START(n) - is a base value for start byte (head), if there are (n) additional bytes after …
28   5 : 0xFC : 31 : UCS-4 : wcstombs() in ubuntu is limited to that value
29   6 : 0xFE : 36 : We can use it, if we want to encode any 32-bit value
33 #define MY_UTF8_START(n) (0x100 - (1 << (7 - (n))))
37     { numBytes = (n); val -= MY_UTF8_START(n); }
42    if (wchar_t is 32-bit), we can support large points in long UTF-8 sequence,
43    when we convert wchar_t strings to UTF-8:
44      (_UTF8_NUM_TAIL_BYTES_MAX == 3) : (21-bits points) - Unicode
45      (_UTF8_NUM_TAIL_BYTES_MAX == 5) : (31-bits points) - UCS-4
46      (_UTF8_NUM_TAIL_BYTES_MAX == 6) : (36-bit hack)
69     else { numBytes = 3; val -= MY_UTF8_START(3); }
78 /* we use 128 bytes block in 16-bit BMP-PLANE to encode non-UTF-8 Escapes
79    Also we can use additional HIGH-PLANE (we use 21-bit points above 0x1f0000)
81    RAW-UTF-8 <-> internal wchar_t utf-16 strings <-> RAW-UTF-UTF-8
93    ef 80 -    ee be 80 (3-bytes utf-8) : similar to WSL
94    ef ff -    ee bf bf
96 1f ef 80 - f7 be be 80 (4-bytes utf-8) : last  4-bytes utf-8 plane (out of Unicode)
97 1f ef ff - f7 be bf bf (4-bytes utf-8) : last  4-bytes utf-8 plane (out of Unicode)
105   if (Z7_UTF_FLAG_FROM_UTF8_USE_ESCAPE is set)
107     if (UTF_ESCAPE_PLANE is UTF_ESCAPE_PLANE_HIGH)
109       we can restore any 8-bit Escape from ESCAPE-PLANE-21 plane.
110       But ESCAPE-PLANE-21 point cannot be stored to utf-16 (7z archive)
111       So we still need a way to extract 8-bit Escapes and BMP-Escapes-8
112       from same BMP-Escapes-16 stored in 7z.
113       And if we want to restore any 8-bit from 7z archive,
114       we still must use Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT for (utf-8 -> utf-16)
115       Also we need additional Conversions to tranform from utf-16 to utf-16-With-Escapes-21
119       we must convert original 3-bytes utf-8 BMP-Escape point to sequence
120       of 3 BMP-Escape-16 points with Z7_UTF_FLAG_FROM_UTF8_BMP_ESCAPE_CONVERT
121       so we can extract original RAW-UTF-8 from UTFD-16 later.
155     size--;  in Check_Buf()
192       c2 -= 0x80;  in Check_Buf()
202     while (--numBytes);  in Check_Buf()
224     size -= pos;  in Check_Buf()
283       val |= (c2 - 0x80);
286     while (--numBytes);
288     if (val < MY_UTF8_RANGE(pos - 1))
297 // in case of UTF-8 error we have two ways:
298 // 21.01- : old : 0xfffd: REPLACEMENT CHARACTER : old version
299 // 21.02+ : new : 0xef00 + (c) : similar to WSL scheme for low symbols
321 // we store UTF-16 in wchar_t strings. So we use surrogates for big unicode points:
323 // for debug puposes only we can store UTF-32 in wchar_t:
324 // #define START_POINT_FOR_SURROGATE ((UInt32)0 - 1)
328   WIN32 MultiByteToWideChar(CP_UTF8) emits 0xfffd point, if utf-8 error was found.
330   It doesn't emit single 0xfffd from 3-4 src bytes.
334        That scheme is similar to Escape scheme, but we emit 0xfffd
336     3) emit single 0xfffd from 1-2 incorrect bytes, as WIN32 MultiByteToWideChar scheme
378       c2 -= 0x80;  in Utf8_To_Utf16()
398     while (--numBytes);  in Utf8_To_Utf16()
412     if (val < MY_UTF8_RANGE(pos - 1))  in Utf8_To_Utf16()
421         // We will emit 3 utf16-Escape-16-21 points from one Escape-16 point (3 bytes)  in Utf8_To_Utf16()
428        We don't expect virtual Escape-21 points in UTF-8 stream.  in Utf8_To_Utf16()
429        And we don't check for Escape-21.  in Utf8_To_Utf16()
430        So utf8-Escape-21 will be converted to another 3 utf16-Escape-21 points.  in Utf8_To_Utf16()
431        Maybe we could convert virtual utf8-Escape-21 to one utf16-Escape-21 point in some cases?  in Utf8_To_Utf16()
440         // We will emit 3 utf16-Escape-16-21 points from one Surrogate-16 point (3 bytes)  in Utf8_To_Utf16()
453         // We will emit utf16-Escape-16-21 point from each source byte  in Utf8_To_Utf16()
459         dest[destPos + 0] = (wchar_t)(0xd800 - (0x10000 >> 10) + (val >> 10));  in Utf8_To_Utf16()
475   size_t size = (size_t)(srcLim - src);  in Utf16_To_Utf8_Calc()
508       // it's hack to UTF-8 encoding  in Utf16_To_Utf8_Calc()
569        if (wchar_t is 32-bit)  in Utf16_To_Utf8()
570             && (Z7_UTF_FLAG_TO_UTF8_PARSE_HIGH_ESCAPE is set)  in Utf16_To_Utf8()
571             && (point is virtual escape plane)  in Utf16_To_Utf8()
572           we extract 8-bit byte from virtual HIGH-ESCAPE PLANE.  in Utf16_To_Utf8()
582     /* if (Z7_UTF_FLAG_TO_UTF8_EXTRACT_BMP_ESCAPE is defined)  in Utf16_To_Utf8()
583           we extract 8-bit byte from BMP-ESCAPE PLANE. */  in Utf16_To_Utf8()
596       // it's hack to UTF-8 encoding  in Utf16_To_Utf8()
603           val = (((val - 0xd800) << 10) | (c2 - 0xdc00)) + 0x10000;  in Utf16_To_Utf8()
651       numBits -= 6;  in Utf16_To_Utf8()
714   if (destLen != (size_t)(destEnd - destStart))  in ConvertUnicodeToUTF8_Flags()
717     // dest.ReleaseBuf_SetEnd((unsigned)(destEnd - destStart));  in ConvertUnicodeToUTF8_Flags()
719     printf("\n(destEnd - destStart) = %d\n", (unsigned)(destEnd - destStart));  in ConvertUnicodeToUTF8_Flags()
755   if (destLen != (size_t)(destEnd - (char *)(void *)(Byte *)dest))  in Convert_Unicode_To_UTF8_Buf()
773         // printf("\nSurragate [%d]: %4x %4x -> ", i, (int)c, (int)c2);
791       w -= 0x10000;
831       // it's hack to UTF-8 encoding