• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2023 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "namemangler.h"
17 #include <map>
18 
19 namespace namemangler {
20 #ifdef __MRT_DEBUG
21 #define DEBUG_ASSERT(f) assert(f)
22 #else
23 #define DEBUG_ASSERT(f) ((void)0)
24 #endif
25 
26 #define GETHEXCHAR(n) static_cast<char>((n) < 10 ? (n) + '0' : (n)-10 + 'a')
27 #define GETHEXCHARU(n) static_cast<char>((n) < 10 ? (n) + '0' : (n)-10 + 'A')
28 
29 using StringMap = std::map<const std::string, const std::string>;
30 
31 #ifdef ARK_LITECG_DEBUG
UpdatePrimType(bool primType,int splitNo,uint32_t ch)32 static inline bool UpdatePrimType(bool primType, int splitNo, uint32_t ch)
33 {
34     if (ch == 'L') {
35         return false;
36     }
37 
38     if (((ch == ';') || (ch == '(') || (ch == ')')) && (splitNo > 1)) {
39         return true;
40     }
41 
42     return primType;
43 }
44 
45 namespace {
46 constexpr int kNumLimit = 10;
47 constexpr int kCodeOffset3 = 12;
48 constexpr int kCodeOffset2 = 8;
49 constexpr int kCodeOffset = 4;
50 constexpr size_t k64BitShift = 6; // 64 is 1 << 6
51 }
52 
DecodeName(const std::string & name)53 std::string DecodeName(const std::string &name)
54 {
55     if (name.find(';') != std::string::npos) {  // no need Decoding a non-encoded string
56         return name;
57     }
58     std::string decompressedName;
59     const char *namePtr = nullptr;
60     size_t nameLen;
61     namePtr = name.c_str();
62     nameLen = name.length();
63 
64     // Demangled name is supposed to be shorter. No buffer overflow issue here.
65     std::string newName(nameLen, '\0');
66 
67     bool primType = true;
68     int splitNo = 0;  // split: class 0 | method 1 | signature 2
69     size_t pos = 0;
70     std::string str;
71     std::u16string str16;
72     for (size_t i = 0; i < nameLen;) {
73         unsigned char c = static_cast<unsigned char>(namePtr[i]);
74         ++i;
75         if (c == '_') {  // _XX: '_' followed by ascii code in hex
76             if (i >= nameLen) {
77                 break;
78             }
79             if (namePtr[i] == '_') {
80                 newName[pos++] = namePtr[i++];
81             } else if (namePtr[i] == 'u') {
82                 str.clear();
83                 str16.clear();
84                 i++;
85                 c = static_cast<unsigned char>(namePtr[i++]);
86                 uint8_t b1 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
87                 c = static_cast<unsigned char>(namePtr[i++]);
88                 uint8_t b2 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
89                 c = static_cast<unsigned char>(namePtr[i++]);
90                 uint8_t b3 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
91                 c = static_cast<unsigned char>(namePtr[i++]);
92                 uint8_t b4 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
93                 uint32_t codepoint = (b1 << kCodeOffset3) | (b2 << kCodeOffset2) | (b3 << kCodeOffset) | b4;
94                 str16 += static_cast<char16_t>(codepoint);
95                 unsigned int count = UTF16ToUTF8(str, str16, 1, false) >> 16; // shift 16 to get count
96                 if (count == 2) {  // the count of str equal 2 to 4, use array to save the utf8
97                     newName[pos++] = str[0];
98                     newName[pos++] = str[1];
99                 } else if (count == 3) {  // the count of str equal 2 to 4, deal 3 new
100                     newName[pos++] = str[0];
101                     newName[pos++] = str[1];
102                     newName[pos++] = str[2];  // 2 is index of third char
103                 } else if (count == 4) {      // the count of str equal 2 to 4
104                     newName[pos++] = str[0];
105                     newName[pos++] = str[1];
106                     newName[pos++] = str[2];  // 2 is index of third char
107                     newName[pos++] = str[3];  // 3 is index of fourth char
108                 }
109             } else {
110                 c = static_cast<unsigned char>(namePtr[i++]);
111                 unsigned int v = (c <= '9') ? c - '0' : c - 'A' + kNumLimit;
112                 unsigned int asc = v << kCodeOffset;
113                 if (i >= nameLen) {
114                     break;
115                 }
116                 c = static_cast<unsigned char>(namePtr[i++]);
117                 v = (c <= '9') ? c - '0' : c - 'A' + kNumLimit;
118                 asc += v;
119 
120                 newName[pos++] = static_cast<char>(asc);
121 
122                 if (asc == '|') {
123                     splitNo++;
124                 }
125 
126                 primType = UpdatePrimType(primType, splitNo, asc);
127             }
128         } else {
129             if (splitNo < 2) {  // split: class 0 | method 1 | signature 2
130                 newName[pos++] = static_cast<char>(c);
131                 continue;
132             }
133 
134             primType = UpdatePrimType(primType, splitNo, c);
135             if (primType) {
136                 newName[pos++] = (c == 'A') ? '[' : c;
137             } else {
138                 newName[pos++] = static_cast<char>(c);
139             }
140         }
141     }
142 
143     newName.resize(pos);
144     return newName;
145 }
146 
147 // input: maple name
148 // output: Lj/lang/Object;  [Lj/lang/Object;
DecodeMapleNameToJDescriptor(const std::string & nameIn,std::string & nameOut)149 void DecodeMapleNameToJDescriptor(const std::string &nameIn, std::string &nameOut)
150 {
151     nameOut = DecodeName(nameIn);
152     if (nameOut[0] == 'A') {
153         size_t i = 0;
154         while (nameOut[i] == 'A') {
155             nameOut[i++] = '[';
156         }
157     }
158 }
159 
ChangeEndian16(uint16_t u16)160 static uint16_t ChangeEndian16(uint16_t u16)
161 {
162     return ((u16 & 0xFF00) >> kCodeOffset2) | ((u16 & 0xFF) << kCodeOffset2);
163 }
164 
165 /* UTF8
166  * U+0000 - U+007F   0xxxxxxx
167  * U+0080 - U+07FF   110xxxxx 10xxxxxx
168  * U+0800 - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
169  * U+10000- U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
170  *
171  * UTF16
172  * U+0000 - U+D7FF   codePoint
173  * U+E000 - U+FFFF   codePoint
174  * U+10000- U+10FFFF XXXX YYYY
175  *   code = codePoint - 0x010000, ie, 20-bit number in the range 0x000000..0x0FFFFF
176  *   XXXX: top 10 bits of code + 0xD800: 0xD800..0xDBFF
177  *   YYYY: low 10 bits of code + 0xDC00: 0xDC00..0xDFFF
178  *
179  * convert upto num UTF8 elements
180  * return two 16-bit values: return_number_of_elements | consumed_input_number_of_elements
181  */
182 const int kCodepointOffset1 = 6;   // U+0080 - U+07FF   110xxxxx 10xxxxxx
183 const int kCodepointOffset2 = 12;  // U+0800 - U+FFFF   1110xxxx 10xxxxxx 10xxxxxx
184 const int kCodepointOffset3 = 18;  // U+10000- U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
185 const int kCountOffset = 16;
186 
UTF16ToUTF8(std::string & str,const std::u16string & str16,unsigned short num,bool isBigEndian)187 unsigned UTF16ToUTF8(std::string &str, const std::u16string &str16, unsigned short num, bool isBigEndian)
188 {
189     uint32_t codePoint = 0;
190     uint32_t i = 0;
191     unsigned short count = 0;
192     unsigned short retNum = 0;
193     while (i < str16.length()) {
194         if (isBigEndian || num == 1) {
195             codePoint = str16[i++];
196         } else {
197             codePoint = ChangeEndian16(str16[i++]);
198         }
199         if (codePoint > 0xFFFF) {
200             codePoint &= 0x3FF;
201             codePoint <<= kNumLimit;
202             if (isBigEndian) {
203                 codePoint += str16[i++] & 0x3FF;
204             } else {
205                 codePoint += ChangeEndian16(str16[i++]) & 0x3FF;
206             }
207         }
208         if (codePoint <= 0x7F) {
209             str += static_cast<char>(codePoint);
210             retNum += 1;  // 1 UTF8 char
211         } else if (codePoint <= 0x7FF) {
212             str += static_cast<char>(0xC0 + (codePoint >> kCodepointOffset1));
213             str += static_cast<char>(0x80 + (codePoint & 0x3F));
214             retNum += 2;  // 2 UTF8 chars
215         } else if (codePoint <= 0xFFFF) {
216             str += static_cast<char>(0xE0 + ((codePoint >> kCodepointOffset2) & 0xF));
217             str += static_cast<char>(0x80 + ((codePoint >> kCodepointOffset1) & 0x3F));
218             str += static_cast<char>(0x80 + (codePoint & 0x3F));
219             retNum += 3;  // 3 UTF8 chars
220         } else {
221             str += static_cast<char>(0xF0 + ((codePoint >> kCodepointOffset3) & 0x7));
222             str += static_cast<char>(0x80 + ((codePoint >> kCodepointOffset2) & 0x3F));
223             str += static_cast<char>(0x80 + ((codePoint >> kCodepointOffset1) & 0x3F));
224             str += static_cast<char>(0x80 + (codePoint & 0x3F));
225             retNum += 4;  // 4 UTF8 chars
226         }
227         count++;
228         if (num == count) {
229             return ((static_cast<unsigned>(retNum)) << kCountOffset) | static_cast<unsigned>(i);
230         }
231     }
232     return i;
233 }
234 
235 const uint32_t kGreybackOffset = 7;
236 
GetUleb128Size(uint64_t v)237 size_t GetUleb128Size(uint64_t v)
238 {
239     DEBUG_ASSERT(v && "if v == 0, __builtin_clzll(v) is not defined");
240     size_t clz = static_cast<size_t>(__builtin_clzll(v));
241     // num of 7-bit groups, (64 - clz + 6) / 7
242     return size_t((64 - clz + 6) / 7);
243 }
244 
GetSleb128Size(int32_t v)245 size_t GetSleb128Size(int32_t v)
246 {
247     size_t size = 0;
248     int rem = v >> kGreybackOffset;
249     bool hasMore = true;
250     int end = ((v >= 0) ? 0 : -1);
251 
252     while (hasMore) {
253         hasMore = (rem != end) || ((rem & 1) != ((v >> k64BitShift) & 1));  // judege whether has More valid rem
254         size++;
255         v = rem;
256         rem >>= static_cast<int>(kGreybackOffset);  // intended signed shift: block codedex here
257     }
258     return size;
259 }
260 #endif
261 }  // namespace namemangler
262