1 /*
2 * Copyright (c) 2023 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16 #include "namemangler.h"
17 #include <map>
18
19 namespace namemangler {
20 #ifdef __MRT_DEBUG
21 #define DEBUG_ASSERT(f) assert(f)
22 #else
23 #define DEBUG_ASSERT(f) ((void)0)
24 #endif
25
26 #define GETHEXCHAR(n) static_cast<char>((n) < 10 ? (n) + '0' : (n)-10 + 'a')
27 #define GETHEXCHARU(n) static_cast<char>((n) < 10 ? (n) + '0' : (n)-10 + 'A')
28
29 using StringMap = std::map<const std::string, const std::string>;
30
31 #ifdef ARK_LITECG_DEBUG
UpdatePrimType(bool primType,int splitNo,uint32_t ch)32 static inline bool UpdatePrimType(bool primType, int splitNo, uint32_t ch)
33 {
34 if (ch == 'L') {
35 return false;
36 }
37
38 if (((ch == ';') || (ch == '(') || (ch == ')')) && (splitNo > 1)) {
39 return true;
40 }
41
42 return primType;
43 }
44
45 namespace {
46 constexpr int kNumLimit = 10;
47 constexpr int kCodeOffset3 = 12;
48 constexpr int kCodeOffset2 = 8;
49 constexpr int kCodeOffset = 4;
50 constexpr size_t k64BitShift = 6; // 64 is 1 << 6
51 }
52
DecodeName(const std::string & name)53 std::string DecodeName(const std::string &name)
54 {
55 if (name.find(';') != std::string::npos) { // no need Decoding a non-encoded string
56 return name;
57 }
58 std::string decompressedName;
59 const char *namePtr = nullptr;
60 size_t nameLen;
61 namePtr = name.c_str();
62 nameLen = name.length();
63
64 // Demangled name is supposed to be shorter. No buffer overflow issue here.
65 std::string newName(nameLen, '\0');
66
67 bool primType = true;
68 int splitNo = 0; // split: class 0 | method 1 | signature 2
69 size_t pos = 0;
70 std::string str;
71 std::u16string str16;
72 for (size_t i = 0; i < nameLen;) {
73 unsigned char c = static_cast<unsigned char>(namePtr[i]);
74 ++i;
75 if (c == '_') { // _XX: '_' followed by ascii code in hex
76 if (i >= nameLen) {
77 break;
78 }
79 if (namePtr[i] == '_') {
80 newName[pos++] = namePtr[i++];
81 } else if (namePtr[i] == 'u') {
82 str.clear();
83 str16.clear();
84 i++;
85 c = static_cast<unsigned char>(namePtr[i++]);
86 uint8_t b1 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
87 c = static_cast<unsigned char>(namePtr[i++]);
88 uint8_t b2 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
89 c = static_cast<unsigned char>(namePtr[i++]);
90 uint8_t b3 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
91 c = static_cast<unsigned char>(namePtr[i++]);
92 uint8_t b4 = (c <= '9') ? c - '0' : c - 'a' + kNumLimit;
93 uint32_t codepoint = (b1 << kCodeOffset3) | (b2 << kCodeOffset2) | (b3 << kCodeOffset) | b4;
94 str16 += (char16_t)codepoint;
95 unsigned int count = UTF16ToUTF8(str, str16, 1, false) >> 16; // shift 16 to get count
96 if (count == 2) { // the count of str equal 2 to 4, use array to save the utf8
97 newName[pos++] = str[0];
98 newName[pos++] = str[1];
99 } else if (count == 3) {
100 newName[pos++] = str[0];
101 newName[pos++] = str[1];
102 newName[pos++] = str[2]; // 2 is index of third char
103 } else if (count == 4) {
104 newName[pos++] = str[0];
105 newName[pos++] = str[1];
106 newName[pos++] = str[2]; // 2 is index of third char
107 newName[pos++] = str[3]; // 3 is index of fourth char
108 }
109 } else {
110 c = static_cast<unsigned char>(namePtr[i++]);
111 unsigned int v = (c <= '9') ? c - '0' : c - 'A' + kNumLimit;
112 unsigned int asc = v << kCodeOffset;
113 if (i >= nameLen) {
114 break;
115 }
116 c = static_cast<unsigned char>(namePtr[i++]);
117 v = (c <= '9') ? c - '0' : c - 'A' + kNumLimit;
118 asc += v;
119
120 newName[pos++] = static_cast<char>(asc);
121
122 if (asc == '|') {
123 splitNo++;
124 }
125
126 primType = UpdatePrimType(primType, splitNo, asc);
127 }
128 } else {
129 if (splitNo < 2) { // split: class 0 | method 1 | signature 2
130 newName[pos++] = static_cast<char>(c);
131 continue;
132 }
133
134 primType = UpdatePrimType(primType, splitNo, c);
135 if (primType) {
136 newName[pos++] = (c == 'A') ? '[' : c;
137 } else {
138 newName[pos++] = static_cast<char>(c);
139 }
140 }
141 }
142
143 newName.resize(pos);
144 return newName;
145 }
146
147 // input: maple name
148 // output: Lj/lang/Object; [Lj/lang/Object;
DecodeMapleNameToJDescriptor(const std::string & nameIn,std::string & nameOut)149 void DecodeMapleNameToJDescriptor(const std::string &nameIn, std::string &nameOut)
150 {
151 nameOut = DecodeName(nameIn);
152 if (nameOut[0] == 'A') {
153 size_t i = 0;
154 while (nameOut[i] == 'A') {
155 nameOut[i++] = '[';
156 }
157 }
158 }
159
ChangeEndian16(uint16_t u16)160 static uint16_t ChangeEndian16(uint16_t u16)
161 {
162 return ((u16 & 0xFF00) >> kCodeOffset2) | ((u16 & 0xFF) << kCodeOffset2);
163 }
164
165 /* UTF8
166 * U+0000 - U+007F 0xxxxxxx
167 * U+0080 - U+07FF 110xxxxx 10xxxxxx
168 * U+0800 - U+FFFF 1110xxxx 10xxxxxx 10xxxxxx
169 * U+10000- U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
170 *
171 * UTF16
172 * U+0000 - U+D7FF codePoint
173 * U+E000 - U+FFFF codePoint
174 * U+10000- U+10FFFF XXXX YYYY
175 * code = codePoint - 0x010000, ie, 20-bit number in the range 0x000000..0x0FFFFF
176 * XXXX: top 10 bits of code + 0xD800: 0xD800..0xDBFF
177 * YYYY: low 10 bits of code + 0xDC00: 0xDC00..0xDFFF
178 *
179 * convert upto num UTF8 elements
180 * return two 16-bit values: return_number_of_elements | consumed_input_number_of_elements
181 */
182 const int kCodepointOffset1 = 6; // U+0080 - U+07FF 110xxxxx 10xxxxxx
183 const int kCodepointOffset2 = 12; // U+0800 - U+FFFF 1110xxxx 10xxxxxx 10xxxxxx
184 const int kCodepointOffset3 = 18; // U+10000- U+10FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
185 const int kCountOffset = 16;
186
UTF16ToUTF8(std::string & str,const std::u16string & str16,unsigned short num,bool isBigEndian)187 unsigned UTF16ToUTF8(std::string &str, const std::u16string &str16, unsigned short num, bool isBigEndian)
188 {
189 uint32_t codePoint = 0;
190 uint32_t i = 0;
191 unsigned short count = 0;
192 unsigned short retNum = 0;
193 while (i < str16.length()) {
194 if (isBigEndian || num == 1) {
195 codePoint = str16[i++];
196 } else {
197 codePoint = ChangeEndian16(str16[i++]);
198 }
199 if (codePoint > 0xFFFF) {
200 codePoint &= 0x3FF;
201 codePoint <<= kNumLimit;
202 if (isBigEndian) {
203 codePoint += str16[i++] & 0x3FF;
204 } else {
205 codePoint += ChangeEndian16(str16[i++]) & 0x3FF;
206 }
207 }
208 if (codePoint <= 0x7F) {
209 str += static_cast<char>(codePoint);
210 retNum += 1; // 1 UTF8 char
211 } else if (codePoint <= 0x7FF) {
212 str += static_cast<char>(0xC0 + (codePoint >> kCodepointOffset1));
213 str += static_cast<char>(0x80 + (codePoint & 0x3F));
214 retNum += 2; // 2 UTF8 chars
215 } else if (codePoint <= 0xFFFF) {
216 str += static_cast<char>(0xE0 + ((codePoint >> kCodepointOffset2) & 0xF));
217 str += static_cast<char>(0x80 + ((codePoint >> kCodepointOffset1) & 0x3F));
218 str += static_cast<char>(0x80 + (codePoint & 0x3F));
219 retNum += 3; // 3 UTF8 chars
220 } else {
221 str += static_cast<char>(0xF0 + ((codePoint >> kCodepointOffset3) & 0x7));
222 str += static_cast<char>(0x80 + ((codePoint >> kCodepointOffset2) & 0x3F));
223 str += static_cast<char>(0x80 + ((codePoint >> kCodepointOffset1) & 0x3F));
224 str += static_cast<char>(0x80 + (codePoint & 0x3F));
225 retNum += 4; // 4 UTF8 chars
226 }
227 count++;
228 if (num == count) {
229 return ((static_cast<unsigned>(retNum)) << kCountOffset) | static_cast<unsigned>(i);
230 }
231 }
232 return i;
233 }
234
235 const uint32_t kGreybackOffset = 7;
236
GetUleb128Size(uint64_t v)237 size_t GetUleb128Size(uint64_t v)
238 {
239 DEBUG_ASSERT(v && "if v == 0, __builtin_clzll(v) is not defined");
240 size_t clz = static_cast<size_t>(__builtin_clzll(v));
241 // num of 7-bit groups, (64 - clz + 6) / 7
242 return size_t((64 - clz + 6) / 7);
243 }
244
GetSleb128Size(int32_t v)245 size_t GetSleb128Size(int32_t v)
246 {
247 size_t size = 0;
248 int rem = v >> kGreybackOffset;
249 bool hasMore = true;
250 int end = ((v >= 0) ? 0 : -1);
251
252 while (hasMore) {
253 hasMore = (rem != end) || ((rem & 1) != ((v >> k64BitShift) & 1)); // judege whether has More valid rem
254 size++;
255 v = rem;
256 rem >>= static_cast<int>(kGreybackOffset); // intended signed shift: block codedex here
257 }
258 return size;
259 }
260 #endif
261 } // namespace namemangler
262