• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /**
2  * Copyright (c) 2021-2025 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  * http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #ifndef ES2PANDA_UTIL_INCLUDE_USTRING_H
17 #define ES2PANDA_UTIL_INCLUDE_USTRING_H
18 
19 #include "util/es2pandaMacros.h"
20 #include "utils/arena_containers.h"
21 
22 #include <cstddef>
23 #include <limits>
24 #include <memory>
25 #include <string>
26 #include <string_view>
27 
28 namespace ark::es2panda::util {
29 class StringView {
30 public:
31     explicit StringView() noexcept = default;
StringView(const ArenaString * str)32     explicit StringView(const ArenaString *str) noexcept : sv_(*str) {}
33     // CC-OFFNXT(G.FMT.06-CPP,G.CLS.03-CPP) same as clang, project code style
34     // NOLINTNEXTLINE(google-explicit-constructor)
StringView(std::string_view sv)35     StringView(std::string_view sv) noexcept : sv_(sv) {}
36     // CC-OFFNXT(G.FMT.06-CPP, G.CLS.03-CPP) same as clang, project code style
37     // NOLINTNEXTLINE(google-explicit-constructor)
StringView(const char * str)38     StringView(const char *str) noexcept : sv_(str == nullptr ? "" : str) {}
39     DEFAULT_COPY_SEMANTIC(StringView);
40     DEFAULT_MOVE_SEMANTIC(StringView);
41     ~StringView() = default;
42 
43     bool operator==(const StringView &rhs) const noexcept
44     {
45         return sv_ == rhs.sv_;
46     }
47 
48     bool operator!=(const StringView &rhs) const noexcept
49     {
50         return sv_ != rhs.sv_;
51     }
52 
53     bool operator<(const StringView &rhs) const noexcept
54     {
55         return sv_ < rhs.sv_;
56     }
57 
58     bool operator>(const StringView &rhs) const noexcept
59     {
60         return sv_ > rhs.sv_;
61     }
62 
63     bool operator<=(const StringView &rhs) const noexcept
64     {
65         return sv_ <= rhs.sv_;
66     }
67 
68     bool operator>=(const StringView &rhs) const noexcept
69     {
70         return sv_ >= rhs.sv_;
71     }
72 
Compare(const StringView & other)73     int Compare(const StringView &other) const noexcept
74     {
75         return sv_.compare(other.sv_);
76     }
77 
Compare(const std::string_view & other)78     int Compare(const std::string_view &other) const noexcept
79     {
80         return sv_.compare(other);
81     }
82 
Is(const char * str)83     bool Is(const char *str) const noexcept
84     {
85         return sv_ == str;
86     }
87 
Is(const std::string_view str)88     bool Is(const std::string_view str) const noexcept
89     {
90         return sv_ == str;
91     }
92 
StartsWith(const std::string_view str)93     bool StartsWith(const std::string_view str) const noexcept
94     {
95         auto const length = str.size();
96         return sv_.size() >= length && sv_.substr(0U, length) == str;
97     }
98 
EndsWith(const std::string_view str)99     bool EndsWith(const std::string_view str) const noexcept
100     {
101         auto const myLength = sv_.size();
102         auto const strLength = str.size();
103         return myLength >= strLength && sv_.substr(myLength - strLength, strLength) == str;
104     }
105 
Length()106     size_t Length() const noexcept
107     {
108         return sv_.length();
109     }
110 
Empty()111     bool Empty() const noexcept
112     {
113         return sv_.empty();
114     }
115 
Utf8()116     std::string_view Utf8() const noexcept
117     {
118         return sv_;
119     }
120 
string()121     explicit operator std::string() const noexcept
122     {
123         return std::string {sv_};
124     }
125 
Bytes()126     const char *Bytes() const noexcept
127     {
128         return sv_.data();
129     }
130 
Substr(size_t begin,size_t end)131     StringView Substr(size_t begin, size_t end) const noexcept
132     {
133         // NOLINTNEXTLINE(cppcoreguidelines-pro-bounds-pointer-arithmetic)
134         return StringView(std::string_view(sv_.data() + begin, end - begin));
135     }
136 
IsHighSurrogate(char32_t cp)137     static bool IsHighSurrogate(char32_t cp)
138     {
139         return (cp >= Constants::SURROGATE_HIGH_MIN && cp < Constants::SURROGATE_HIGH_MAX);
140     }
141 
IsLowSurrogate(char32_t cp)142     static bool IsLowSurrogate(char32_t cp)
143     {
144         return (cp >= Constants::SURROGATE_LOW_MIN && cp < Constants::SURROGATE_LOW_MAX);
145     }
146 
147     std::string Mutf8() const noexcept;
148     static char32_t DecodeSurrogates(char32_t high, char32_t low);
149     static std::tuple<char32_t, char32_t> EncodeSurrogate(char32_t cp);
150 
151     template <void ENCODER(std::string *, char32_t)>
152     std::string EscapeSymbol() const;
153 
154     template <typename T>
155     static void Utf8Encode(T *str, char32_t cu);
156     template <typename T>
157     static void Mutf8Encode(T *str, char32_t cu);
158 
159     class Iterator {
160     public:
161         static char32_t constexpr INVALID_CP = std::numeric_limits<char32_t>::max();
162 
Iterator(const StringView & sv)163         explicit Iterator(const StringView &sv) noexcept : sv_(sv.sv_), iter_(sv_.begin()) {}
164         DEFAULT_COPY_SEMANTIC(Iterator);
165         DEFAULT_MOVE_SEMANTIC(Iterator);
166         ~Iterator() = default;
167 
Index()168         inline size_t Index() const
169         {
170             return static_cast<size_t>(iter_ - sv_.begin());
171         }
172 
Next()173         inline char32_t Next()
174         {
175             return DecodeCP<true>(nullptr);
176         }
177 
Peek()178         inline char32_t Peek() const
179         {
180             return HasNext() ? *iter_ : INVALID_CP;
181         }
182 
PeekCp()183         inline char32_t PeekCp() const
184         {
185             return DecodeCP<false>(nullptr);
186         }
187 
PeekCp(size_t * cpSize)188         inline char32_t PeekCp(size_t *cpSize) const
189         {
190             return DecodeCP<false, true>(cpSize);
191         }
192 
Forward(size_t offset)193         inline void Forward(size_t offset)
194         {
195             iter_ += offset;
196         }
197 
Backward(size_t offset)198         inline void Backward(size_t offset)
199         {
200             iter_ -= offset;
201         }
202 
Reset(size_t offset)203         inline void Reset(size_t offset)
204         {
205             ES2PANDA_ASSERT(sv_.begin() + offset <= sv_.end());
206             iter_ = sv_.begin() + offset;
207         }
208 
Rewind(std::string_view::const_iterator pos)209         inline void Rewind(std::string_view::const_iterator pos)
210         {
211             iter_ = pos;
212         }
213 
Save()214         inline std::string_view::const_iterator Save() const
215         {
216             return iter_;
217         }
218 
HasNext()219         inline bool HasNext() const
220         {
221             return iter_ < sv_.end();
222         }
223 
224         void SkipCp();
225 
226     private:
227         template <bool MOVE_ITER, bool SET_CP_SIZE = false>
228         char32_t DecodeCP([[maybe_unused]] size_t *cpSize) const;
229 
230         std::string_view sv_;
231         mutable std::string_view::const_iterator iter_;
232     };
233 
234     class Constants {
235     public:
236         static constexpr uint16_t UTF8_2BYTE_REQUIRED = 2;
237         static constexpr uint16_t UTF8_3BYTE_REQUIRED = 3;
238         static constexpr uint16_t UTF8_4BYTE_REQUIRED = 4;
239 
240         static constexpr uint16_t UTF8_1BYTE_LIMIT = 0x80;
241         static constexpr uint16_t UTF8_2BYTE_LIMIT = 0x800;
242         static constexpr uint32_t UTF8_3BYTE_LIMIT = 0x10000;
243 
244         static constexpr uint16_t UTF8_2BYTE_MASK = 0x1F;
245         static constexpr uint16_t UTF8_3BYTE_MASK = 0x0F;
246         static constexpr uint16_t UTF8_4BYTE_MASK = 0x07;
247 
248         static constexpr uint16_t UTF8_DECODE_4BYTE_MASK = 0xf8;
249         static constexpr uint16_t UTF8_DECODE_4BYTE_LIMIT = 0xf4;
250 
251         static constexpr uint16_t UTF8_2BYTE_HEADER = 0xC0;
252         static constexpr uint16_t UTF8_3BYTE_HEADER = 0xE0;
253         static constexpr uint16_t UTF8_4BYTE_HEADER = 0xF0;
254 
255         static constexpr uint16_t UTF8_2BYTE_SHIFT = 6U;
256         static constexpr uint16_t UTF8_3BYTE_SHIFT = 12U;
257         static constexpr uint16_t UTF8_4BYTE_SHIFT = 18U;
258 
259         static constexpr uint16_t UTF8_CONT_MASK = 0x3F;
260         static constexpr uint16_t UTF8_CONT_HEADER = 0x80;
261 
262         static constexpr char32_t SURROGATE_HIGH_MIN = 0xD800;
263         static constexpr char32_t SURROGATE_HIGH_MAX = 0xDC00;
264         static constexpr char32_t SURROGATE_LOW_MIN = 0xDC00;
265         static constexpr char32_t SURROGATE_LOW_MAX = 0xE000;
266         static constexpr char32_t SURROGATE_LOW_MARKER = 0x3ff;
267         static constexpr char32_t CELESTIAL_OFFSET = UTF8_3BYTE_LIMIT;
268     };
269 
270 private:
271     friend class Iterator;
272     std::string_view sv_;
273 };
274 
275 class UString {
276 public:
277     UString() = default;
UString(ArenaAllocator * allocator)278     explicit UString(ArenaAllocator *allocator) : allocator_(allocator) {}
UString(const std::string & str,ArenaAllocator * allocator)279     explicit UString(const std::string &str, ArenaAllocator *allocator) : UString(allocator)
280     {
281         Alloc();
282         *str_ = str;
283     }
284 
UString(const std::string_view & str,ArenaAllocator * allocator)285     explicit UString(const std::string_view &str, ArenaAllocator *allocator) : UString(allocator)
286     {
287         Alloc();
288         *str_ = str;
289     }
290 
UString(const util::StringView & str,ArenaAllocator * allocator)291     explicit UString(const util::StringView &str, ArenaAllocator *allocator) : UString(str.Utf8(), allocator) {}
292 
293     DEFAULT_COPY_SEMANTIC(UString);
294     DEFAULT_MOVE_SEMANTIC(UString);
295     ~UString() = default;
296 
View()297     util::StringView View() const
298     {
299         if (str_ == nullptr) {
300             return util::StringView();
301         }
302 
303         return util::StringView(str_);
304     }
305 
View()306     util::StringView View()
307     {
308         if (str_ == nullptr) {
309             return util::StringView();
310         }
311 
312         return util::StringView(str_);
313     }
314 
Append(char32_t ch)315     util::UString &Append(char32_t ch) noexcept
316     {
317         if (str_ == nullptr) {
318             Alloc();
319         }
320 
321         StringView::Utf8Encode<ArenaString>(str_, ch);
322         return *this;
323     }
324 
Append(StringView other)325     util::UString &Append(StringView other) noexcept
326     {
327         if (str_ == nullptr) {
328             Alloc();
329         }
330 
331         *str_ += other.Utf8();
332         return *this;
333     }
334 
Append(std::string_view other)335     util::UString &Append(std::string_view other) noexcept
336     {
337         return Append(StringView(other));
338     }
339 
Append(const char * other)340     util::UString &Append(const char *other) noexcept
341     {
342         if (str_ == nullptr) {
343             Alloc();
344         }
345 
346         *str_ += other;
347         return *this;
348     }
349 
Append(const std::string & other)350     util::UString &Append(const std::string &other) noexcept
351     {
352         if (str_ == nullptr) {
353             Alloc();
354         }
355 
356         *str_ += other;
357         return *this;
358     }
359 
360 private:
Alloc()361     void Alloc()
362     {
363         str_ = allocator_->New<ArenaString>(allocator_->Adapter());
364         ES2PANDA_ASSERT(str_ != nullptr);
365     }
366 
367 protected:
368     // NOLINTBEGIN(misc-non-private-member-variables-in-classes)
369     ArenaString *str_ {};
370     ArenaAllocator *allocator_ {};
371     // NOLINTEND(misc-non-private-member-variables-in-classes)
372 };
373 
374 template <bool MOVE_ITER, bool SET_CP_SIZE>
DecodeCP(size_t * cpSize)375 char32_t StringView::Iterator::DecodeCP([[maybe_unused]] size_t *cpSize) const
376 {
377     if (!HasNext()) {
378         return INVALID_CP;
379     }
380 
381     const auto *iterNext = iter_;
382     const auto remain = static_cast<size_t>(sv_.end() - iterNext);
383 
384     char32_t cu0 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
385     char32_t res {};
386 
387     if (cu0 < Constants::UTF8_1BYTE_LIMIT) {
388         res = cu0;
389     } else if ((cu0 & Constants::UTF8_3BYTE_HEADER) == Constants::UTF8_2BYTE_HEADER &&
390                remain >= Constants::UTF8_2BYTE_REQUIRED) {
391         char32_t cu1 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
392         res = ((cu0 & Constants::UTF8_2BYTE_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu1 & Constants::UTF8_CONT_MASK);
393     } else if ((cu0 & Constants::UTF8_4BYTE_HEADER) == Constants::UTF8_3BYTE_HEADER &&
394                remain >= Constants::UTF8_3BYTE_REQUIRED) {
395         char32_t cu1 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
396         char32_t cu2 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
397         res = ((cu0 & Constants::UTF8_3BYTE_MASK) << Constants::UTF8_3BYTE_SHIFT) |
398               ((cu1 & Constants::UTF8_CONT_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu2 & Constants::UTF8_CONT_MASK);
399     } else if (((cu0 & Constants::UTF8_DECODE_4BYTE_MASK) == Constants::UTF8_4BYTE_HEADER) &&
400                cu0 <= Constants::UTF8_DECODE_4BYTE_LIMIT && remain >= Constants::UTF8_4BYTE_REQUIRED) {
401         char32_t cu1 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
402         char32_t cu2 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
403         char32_t cu3 = static_cast<uint8_t>(*iterNext++);  // NOLINT(cppcoreguidelines-pro-bounds-pointer-arithmetic)
404         res = ((cu0 & Constants::UTF8_4BYTE_MASK) << Constants::UTF8_4BYTE_SHIFT) |
405               ((cu1 & Constants::UTF8_CONT_MASK) << Constants::UTF8_3BYTE_SHIFT) |
406               ((cu2 & Constants::UTF8_CONT_MASK) << Constants::UTF8_2BYTE_SHIFT) | (cu3 & Constants::UTF8_CONT_MASK);
407     } else {
408         res = INVALID_CP;
409     }
410 
411     // NOLINTNEXTLINE(readability-braces-around-statements,bugprone-suspicious-semicolon)
412     if constexpr (MOVE_ITER) {
413         iter_ = iterNext;
414         return res;
415     }
416 
417     // NOLINTNEXTLINE(readability-braces-around-statements,bugprone-suspicious-semicolon)
418     if constexpr (SET_CP_SIZE) {
419         *cpSize = iterNext - iter_;
420     }
421 
422     return res;
423 }
424 
425 template <void ENCODER(std::string *, char32_t)>
EscapeSymbol()426 std::string StringView::EscapeSymbol() const
427 {
428     std::string str;
429     str.reserve(Length());
430 
431     auto skipNewLine = [](auto &iter) {
432         if (iter.HasNext()) {
433             iter.Forward(1);
434 
435             if (iter.Peek() != '\n') {
436                 iter.Backward(1);
437             }
438         }
439     };
440 
441     Iterator iter(*this);
442     while (iter.HasNext()) {
443         auto cp = iter.Next();
444 
445         switch (cp) {
446             case '\r': {
447                 skipNewLine(iter);
448                 [[fallthrough]];
449             }
450             case '\n': {
451                 str += "\\n";
452                 break;
453             }
454             case '\b': {
455                 str += "\\b";
456                 break;
457             }
458             case '\t': {
459                 str += "\\t";
460                 break;
461             }
462             case '\f': {
463                 str += "\\f";
464                 break;
465             }
466             case '"': {
467                 str += "\\\"";
468                 break;
469             }
470             case '\\': {
471                 str += "\\\\";
472                 break;
473             }
474             default: {
475                 ENCODER(&str, cp);
476             }
477         }
478     }
479 
480     return str;
481 }
482 
483 template <typename T>
Utf8Encode(T * str,char32_t cu)484 void StringView::Utf8Encode(T *str, char32_t cu)
485 {
486     ES2PANDA_ASSERT(str != nullptr);
487     if (cu < Constants::UTF8_1BYTE_LIMIT) {
488         str->push_back(static_cast<char>(cu));
489     } else if (cu < Constants::UTF8_2BYTE_LIMIT) {
490         str->push_back(static_cast<char>(((cu >> Constants::UTF8_2BYTE_SHIFT) & Constants::UTF8_2BYTE_MASK) |
491                                          Constants::UTF8_2BYTE_HEADER));
492         str->push_back(static_cast<char>((cu & Constants::UTF8_CONT_MASK) | Constants::UTF8_CONT_HEADER));
493     } else if (cu < Constants::UTF8_3BYTE_LIMIT) {
494         str->push_back(static_cast<char>(((cu >> Constants::UTF8_3BYTE_SHIFT) & Constants::UTF8_3BYTE_MASK) |
495                                          Constants::UTF8_3BYTE_HEADER));
496         str->push_back(static_cast<char>(((cu >> Constants::UTF8_2BYTE_SHIFT) & Constants::UTF8_CONT_MASK) |
497                                          Constants::UTF8_CONT_HEADER));
498         str->push_back(static_cast<char>((cu & Constants::UTF8_CONT_MASK) | Constants::UTF8_CONT_HEADER));
499     } else {
500         str->push_back(static_cast<char>(((cu >> Constants::UTF8_4BYTE_SHIFT) & Constants::UTF8_4BYTE_MASK) |
501                                          Constants::UTF8_4BYTE_HEADER));
502         str->push_back(static_cast<char>(((cu >> Constants::UTF8_3BYTE_SHIFT) & Constants::UTF8_CONT_MASK) |
503                                          Constants::UTF8_CONT_HEADER));
504         str->push_back(static_cast<char>(((cu >> Constants::UTF8_2BYTE_SHIFT) & Constants::UTF8_CONT_MASK) |
505                                          Constants::UTF8_CONT_HEADER));
506         str->push_back(static_cast<char>((cu & Constants::UTF8_CONT_MASK) | Constants::UTF8_CONT_HEADER));
507     }
508 }
509 
510 template <typename T>
Mutf8Encode(T * str,char32_t cu)511 void StringView::Mutf8Encode(T *str, char32_t cu)
512 {
513     if (cu == 0) {
514         str->push_back(static_cast<char>(Constants::UTF8_2BYTE_HEADER));
515         str->push_back(static_cast<char>(Constants::UTF8_CONT_HEADER));
516     } else if (cu < Constants::UTF8_1BYTE_LIMIT) {
517         str->push_back(static_cast<char>(cu));
518     } else if (cu < Constants::UTF8_2BYTE_LIMIT) {
519         str->push_back(static_cast<char>((cu >> Constants::UTF8_2BYTE_SHIFT) | Constants::UTF8_2BYTE_HEADER));
520         str->push_back(static_cast<char>((cu & Constants::UTF8_CONT_MASK) | Constants::UTF8_CONT_HEADER));
521     } else if (cu < Constants::UTF8_3BYTE_LIMIT) {
522         str->push_back(static_cast<char>((cu >> Constants::UTF8_3BYTE_SHIFT) | Constants::UTF8_3BYTE_HEADER));
523         str->push_back(static_cast<char>(((cu >> Constants::UTF8_2BYTE_SHIFT) & Constants::UTF8_CONT_MASK) |
524                                          Constants::UTF8_CONT_HEADER));
525         str->push_back(static_cast<char>((cu & Constants::UTF8_CONT_MASK) | Constants::UTF8_CONT_HEADER));
526     } else {
527         auto [cu1, cu2] = EncodeSurrogate(cu);
528         Mutf8Encode(str, cu1);
529         Mutf8Encode(str, cu2);
530     }
531 }
532 }  // namespace ark::es2panda::util
533 
534 // NOLINTNEXTLINE(cert-dcl58-cpp)
535 namespace std {
536 
537 template <>
538 // NOLINTNEXTLINE(altera-struct-pack-align)
539 struct hash<ark::es2panda::util::StringView> {
540     std::size_t operator()(const ark::es2panda::util::StringView &str) const
541     {
542         return std::hash<std::string_view> {}(str.Utf8());
543     }
544 };
545 
546 ostream &operator<<(ostream &os, const ark::es2panda::util::StringView &us);
547 
548 }  // namespace std
549 
550 #endif
551