• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "ustring.h"
17 
18 #include <iterator>
19 
20 namespace panda::es2panda::util {
21 
Mutf8() const22 std::string StringView::Mutf8() const noexcept
23 {
24     std::string mutf8;
25     mutf8.reserve(sv_.size());
26 
27     Iterator iter(*this);
28 
29     while (iter.HasNext()) {
30         Mutf8Encode(&mutf8, iter.Next());
31     }
32 
33     return mutf8;
34 }
35 
DecodeSurrogates(char32_t high,char32_t low)36 char32_t StringView::DecodeSurrogates(char32_t high, char32_t low)
37 {
38     constexpr auto OFFSET = 10;
39     char32_t result = (high - Constants::SURROGATE_HIGH_MIN) << OFFSET;
40     result += low - Constants::SURROGATE_LOW_MAX;
41     result += Constants::CELESTIAL_OFFSET;
42     return result;
43 }
44 
EncodeSurrogate(char32_t cp)45 std::tuple<char32_t, char32_t> StringView::EncodeSurrogate(char32_t cp)
46 {
47     constexpr auto OFFSET = 10;
48     char32_t cu1 = ((cp - Constants::CELESTIAL_OFFSET) >> OFFSET) | Constants::SURROGATE_HIGH_MIN;
49     char32_t cu2 = (cp & Constants::SURROGATE_LOW_MARKER) | Constants::SURROGATE_HIGH_MAX;
50 
51     return {cu1, cu2};
52 }
53 
SkipCp() const54 void StringView::Iterator::SkipCp() const
55 {
56     if (!HasNext()) {
57         return;
58     }
59 
60     char32_t cu0 = static_cast<uint8_t>(*iter_++);
61 
62     if (cu0 < Constants::UTF8_1BYTE_LIMIT) {
63         return;
64     }
65 
66     if ((cu0 & Constants::UTF8_3BYTE_HEADER) == Constants::UTF8_2BYTE_HEADER) {
67         iter_ += 1U;
68         return;
69     }
70 
71     if ((cu0 & Constants::UTF8_4BYTE_HEADER) == Constants::UTF8_3BYTE_HEADER) {
72         iter_ += 2U;
73         return;
74     }
75 
76     if (((cu0 & Constants::UTF8_DECODE_4BYTE_MASK) == Constants::UTF8_4BYTE_HEADER) &&
77         (cu0 <= Constants::UTF8_DECODE_4BYTE_LIMIT)) {
78         iter_ += 3U;
79         return;
80     }
81 }
82 
83 }  // namespace panda::es2panda::util
84 
85 // NOLINTNEXTLINE(cert-dcl58-cpp)
86 namespace std {
87 
operator <<(ostream & os,const panda::es2panda::util::StringView & us)88 ostream &operator<<(ostream &os, const panda::es2panda::util::StringView &us)
89 {
90     os << us.Utf8();
91     return os;
92 }
93 
94 }  // namespace std
95