1 /*
2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "ustring.h"
17
18 namespace panda::es2panda::util {
19
Mutf8() const20 std::string StringView::Mutf8() const noexcept
21 {
22 std::string mutf8;
23 mutf8.reserve(sv_.size());
24
25 Iterator iter(*this);
26
27 while (iter.HasNext()) {
28 Mutf8Encode(&mutf8, iter.Next());
29 }
30
31 return mutf8;
32 }
33
DecodeSurrogates(char32_t high,char32_t low)34 char32_t StringView::DecodeSurrogates(char32_t high, char32_t low)
35 {
36 constexpr auto OFFSET = 10;
37 char32_t result = (high - Constants::SURROGATE_HIGH_MIN) << OFFSET;
38 result += low - Constants::SURROGATE_LOW_MAX;
39 result += Constants::CELESTIAL_OFFSET;
40 return result;
41 }
42
EncodeSurrogate(char32_t cp)43 std::tuple<char32_t, char32_t> StringView::EncodeSurrogate(char32_t cp)
44 {
45 constexpr auto OFFSET = 10;
46 char32_t cu1 = ((cp - Constants::CELESTIAL_OFFSET) >> OFFSET) | Constants::SURROGATE_HIGH_MIN;
47 char32_t cu2 = (cp & Constants::SURROGATE_LOW_MARKER) | Constants::SURROGATE_HIGH_MAX;
48
49 return {cu1, cu2};
50 }
51
SkipCp() const52 void StringView::Iterator::SkipCp() const
53 {
54 if (!HasNext()) {
55 return;
56 }
57
58 char32_t cu0 = static_cast<uint8_t>(*iter_++);
59
60 if (cu0 < Constants::UTF8_1BYTE_LIMIT) {
61 return;
62 }
63
64 if ((cu0 & Constants::UTF8_3BYTE_HEADER) == Constants::UTF8_2BYTE_HEADER) {
65 iter_ += 1U;
66 return;
67 }
68
69 if ((cu0 & Constants::UTF8_4BYTE_HEADER) == Constants::UTF8_3BYTE_HEADER) {
70 iter_ += 2U;
71 return;
72 }
73
74 if (((cu0 & Constants::UTF8_DECODE_4BYTE_MASK) == Constants::UTF8_4BYTE_HEADER) &&
75 (cu0 <= Constants::UTF8_DECODE_4BYTE_LIMIT)) {
76 iter_ += 3U;
77 return;
78 }
79 }
80
81 } // namespace panda::es2panda::util
82
83 // NOLINTNEXTLINE(cert-dcl58-cpp)
84 namespace std {
85
operator <<(ostream & os,const panda::es2panda::util::StringView & us)86 ostream &operator<<(ostream &os, const panda::es2panda::util::StringView &us)
87 {
88 os << us.Utf8();
89 return os;
90 }
91
92 } // namespace std
93