1 /*
2 * Copyright (c) 2016 GitHub, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <unordered_map>
17 #include <regex>
18
19 #include "syms.h"
20 #include "usdt.h"
21 #include "vendor/tinyformat.hpp"
22
23 #include "bcc_elf.h"
24 #include "bcc_syms.h"
25
26 namespace USDT {
27
Argument()28 Argument::Argument() {}
~Argument()29 Argument::~Argument() {}
30
ctype() const31 std::string Argument::ctype() const {
32 const int s = arg_size() * 8;
33 return (s < 0) ? tfm::format("int%d_t", -s) : tfm::format("uint%d_t", s);
34 }
35
get_global_address(uint64_t * address,const std::string & binpath,const optional<int> & pid) const36 bool Argument::get_global_address(uint64_t *address, const std::string &binpath,
37 const optional<int> &pid) const {
38 if (pid) {
39 static struct bcc_symbol_option default_option = {
40 .use_debug_file = 1,
41 .check_debug_file_crc = 1,
42 .use_symbol_type = BCC_SYM_ALL_TYPES
43 };
44 return ProcSyms(*pid, &default_option)
45 .resolve_name(binpath.c_str(), deref_ident_->c_str(), address);
46 }
47
48 if (!bcc_elf_is_shared_obj(binpath.c_str())) {
49 struct bcc_symbol sym;
50 if (bcc_resolve_symname(binpath.c_str(), deref_ident_->c_str(), 0x0, -1, nullptr, &sym) == 0) {
51 *address = sym.offset;
52 if (sym.module)
53 ::free(const_cast<char*>(sym.module));
54 return true;
55 }
56 }
57
58 return false;
59 }
60
assign_to_local(std::ostream & stream,const std::string & local_name,const std::string & binpath,const optional<int> & pid) const61 bool Argument::assign_to_local(std::ostream &stream,
62 const std::string &local_name,
63 const std::string &binpath,
64 const optional<int> &pid) const {
65 if (constant_) {
66 tfm::format(stream, "%s = %d;", local_name, *constant_);
67 return true;
68 }
69
70 if (!deref_offset_) {
71 tfm::format(stream, "%s = ctx->%s;", local_name, *base_register_name_);
72 // Put a compiler barrier to prevent optimization
73 // like llvm SimplifyCFG SinkThenElseCodeToEnd
74 // Volatile marking is not sufficient to prevent such optimization.
75 tfm::format(stream, " %s", COMPILER_BARRIER);
76 return true;
77 }
78
79 if (deref_offset_ && !deref_ident_) {
80 tfm::format(stream, "{ u64 __addr = ctx->%s + %d",
81 *base_register_name_, *deref_offset_);
82 if (index_register_name_) {
83 int scale = scale_.value_or(1);
84 tfm::format(stream, " + (ctx->%s * %d);", *index_register_name_, scale);
85 } else {
86 tfm::format(stream, ";");
87 }
88 // Theoretically, llvm SimplifyCFG SinkThenElseCodeToEnd may still
89 // sink bpf_probe_read call, so put a barrier here to prevent sinking
90 // of ctx->#fields.
91 tfm::format(stream, " %s ", COMPILER_BARRIER);
92 tfm::format(stream,
93 "%s __res = 0x0; "
94 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); "
95 "%s = __res; }",
96 ctype(), local_name);
97 return true;
98 }
99
100 if (deref_offset_ && deref_ident_ && *base_register_name_ == "ip") {
101 uint64_t global_address;
102 if (!get_global_address(&global_address, binpath, pid))
103 return false;
104
105 tfm::format(stream,
106 "{ u64 __addr = 0x%xull + %d; %s __res = 0x0; "
107 "bpf_probe_read(&__res, sizeof(__res), (void *)__addr); "
108 "%s = __res; }",
109 global_address, *deref_offset_, ctype(), local_name);
110 return true;
111 }
112
113 return false;
114 }
115
print_error(ssize_t pos)116 void ArgumentParser::print_error(ssize_t pos) {
117 fprintf(stderr, "Parse error:\n %s\n", arg_);
118 for (ssize_t i = 0; i < pos + 4; ++i) fputc('-', stderr);
119 fputc('^', stderr);
120 fputc('\n', stderr);
121 }
122
skip_whitespace_from(size_t pos)123 void ArgumentParser::skip_whitespace_from(size_t pos) {
124 while (isspace(arg_[pos])) pos++;
125 cur_pos_ = pos;
126 }
127
skip_until_whitespace_from(size_t pos)128 void ArgumentParser::skip_until_whitespace_from(size_t pos) {
129 while (arg_[pos] != '\0' && !isspace(arg_[pos]))
130 pos++;
131 cur_pos_ = pos;
132 }
133
parse_register(ssize_t pos,ssize_t & new_pos,optional<int> * reg_num)134 bool ArgumentParser_aarch64::parse_register(ssize_t pos, ssize_t &new_pos,
135 optional<int> *reg_num) {
136 new_pos = parse_number(pos, reg_num);
137 if (new_pos == pos || *reg_num < 0 || *reg_num > 31)
138 return error_return(pos, pos);
139 return true;
140 }
141
parse_size(ssize_t pos,ssize_t & new_pos,optional<int> * arg_size)142 bool ArgumentParser_aarch64::parse_size(ssize_t pos, ssize_t &new_pos,
143 optional<int> *arg_size) {
144 int abs_arg_size;
145
146 new_pos = parse_number(pos, arg_size);
147 if (new_pos == pos)
148 return error_return(pos, pos);
149
150 abs_arg_size = abs(arg_size->value());
151 if (abs_arg_size != 1 && abs_arg_size != 2 && abs_arg_size != 4 &&
152 abs_arg_size != 8)
153 return error_return(pos, pos);
154 return true;
155 }
156
parse_mem(ssize_t pos,ssize_t & new_pos,optional<int> * reg_num,optional<int> * offset)157 bool ArgumentParser_aarch64::parse_mem(ssize_t pos, ssize_t &new_pos,
158 optional<int> *reg_num,
159 optional<int> *offset) {
160 if (arg_[pos] != 'x')
161 return error_return(pos, pos);
162 if (parse_register(pos + 1, new_pos, reg_num) == false)
163 return false;
164
165 if (arg_[new_pos] == ',') {
166 pos = new_pos + 1;
167 new_pos = parse_number(pos, offset);
168 if (new_pos == pos)
169 return error_return(pos, pos);
170 }
171 if (arg_[new_pos] != ']')
172 return error_return(new_pos, new_pos);
173 new_pos++;
174 return true;
175 }
176
parse(Argument * dest)177 bool ArgumentParser_aarch64::parse(Argument *dest) {
178 if (done())
179 return false;
180
181 // Support the following argument patterns:
182 // [-]<size>@<value>, [-]<size>@<reg>, [-]<size>@[<reg>], or
183 // [-]<size>@[<reg>,<offset>]
184 ssize_t cur_pos = cur_pos_, new_pos;
185 optional<int> arg_size;
186
187 // Parse [-]<size>
188 if (parse_size(cur_pos, new_pos, &arg_size) == false)
189 return false;
190 dest->arg_size_ = arg_size;
191
192 // Make sure '@' present
193 if (arg_[new_pos] != '@')
194 return error_return(new_pos, new_pos);
195 cur_pos = new_pos + 1;
196
197 if (arg_[cur_pos] == 'x') {
198 // Parse ...@<reg>
199 optional<int> reg_num;
200 if (parse_register(cur_pos + 1, new_pos, ®_num) == false)
201 return false;
202 cur_pos_ = new_pos;
203 dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]";
204 } else if (arg_[cur_pos] == '[') {
205 // Parse ...@[<reg>] and ...@[<reg,<offset>]
206 optional<int> reg_num, offset = 0;
207 if (parse_mem(cur_pos + 1, new_pos, ®_num, &offset) == false)
208 return false;
209 cur_pos_ = new_pos;
210 dest->base_register_name_ = "regs[" + std::to_string(reg_num.value()) + "]";
211 dest->deref_offset_ = offset;
212 } else {
213 // Parse ...@<value>
214 optional<int> val;
215 new_pos = parse_number(cur_pos, &val);
216 if (cur_pos == new_pos)
217 return error_return(cur_pos, cur_pos);
218 cur_pos_ = new_pos;
219 dest->constant_ = val;
220 }
221
222 skip_whitespace_from(cur_pos_);
223 return true;
224 }
225
parse(Argument * dest)226 bool ArgumentParser_powerpc64::parse(Argument *dest) {
227 if (done())
228 return false;
229
230 bool matched;
231 std::smatch matches;
232 std::string arg_str(&arg_[cur_pos_]);
233 std::regex arg_n_regex("^(\\-?[1248])\\@");
234 // Operands with constants of form iNUM or i-NUM
235 std::regex arg_op_regex_const("^i(\\-?[0-9]+)( +|$)");
236 // Operands with register only of form REG or %rREG
237 std::regex arg_op_regex_reg("^(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)");
238 // Operands with a base register and an offset of form
239 // NUM(REG) or -NUM(REG) or NUM(%rREG) or -NUM(%rREG)
240 std::regex arg_op_regex_breg_off(
241 "^(\\-?[0-9]+)\\((?:%r)?([1-2]?[0-9]|3[0-1])\\)( +|$)");
242 // Operands with a base register and an index register
243 // of form REG,REG or %rREG,%rREG
244 std::regex arg_op_regex_breg_ireg(
245 "^(?:%r)?([1-2]?[0-9]|3[0-1])\\,(?:%r)?([1-2]?[0-9]|3[0-1])( +|$)");
246
247 matched = std::regex_search(arg_str, matches, arg_n_regex);
248 if (matched) {
249 dest->arg_size_ = stoi(matches.str(1));
250 cur_pos_ += matches.length(0);
251 arg_str = &arg_[cur_pos_];
252
253 if (std::regex_search(arg_str, matches, arg_op_regex_const)) {
254 dest->constant_ = stoi(matches.str(1));
255 } else if (std::regex_search(arg_str, matches, arg_op_regex_reg)) {
256 dest->base_register_name_ = "gpr[" + matches.str(1) + "]";
257 } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_off)) {
258 dest->deref_offset_ = stoi(matches.str(1));
259 dest->base_register_name_ = "gpr[" + matches.str(2) + "]";
260 } else if (std::regex_search(arg_str, matches, arg_op_regex_breg_ireg)) {
261 dest->deref_offset_ = 0; // In powerpc64, such operands contain a base
262 // register and an index register which are
263 // part of an indexed load/store operation.
264 // Even if no offset value is present, this
265 // is required by Argument::assign_to_local()
266 // in order to generate code for reading the
267 // argument. So, this is set to zero.
268 dest->base_register_name_ = "gpr[" + matches.str(1) + "]";
269 dest->index_register_name_ = "gpr[" + matches.str(2) + "]";
270 dest->scale_ = abs(*dest->arg_size_);
271 } else {
272 matched = false;
273 }
274 }
275
276 if (!matched) {
277 print_error(cur_pos_);
278 skip_until_whitespace_from(cur_pos_);
279 skip_whitespace_from(cur_pos_);
280 return false;
281 }
282
283 cur_pos_ += matches.length(0);
284 skip_whitespace_from(cur_pos_);
285 return true;
286 }
287
parse_identifier(ssize_t pos,optional<std::string> * result)288 ssize_t ArgumentParser_x64::parse_identifier(ssize_t pos,
289 optional<std::string> *result) {
290 if (isalpha(arg_[pos]) || arg_[pos] == '_') {
291 ssize_t start = pos++;
292 while (isalnum(arg_[pos]) || arg_[pos] == '_') pos++;
293 if (pos - start)
294 result->emplace(arg_ + start, pos - start);
295 }
296 return pos;
297 }
298
parse_register(ssize_t pos,std::string & name,int & size)299 ssize_t ArgumentParser_x64::parse_register(ssize_t pos, std::string &name,
300 int &size) {
301 ssize_t start = ++pos;
302 if (arg_[start - 1] != '%')
303 return -start;
304
305 while (isalnum(arg_[pos])) pos++;
306
307 std::string regname(arg_ + start, pos - start);
308 if (!normalize_register(®name, &size))
309 return -start;
310
311 name = regname;
312 return pos;
313 }
314
parse_base_register(ssize_t pos,Argument * dest)315 ssize_t ArgumentParser_x64::parse_base_register(ssize_t pos, Argument *dest) {
316 int size;
317 std::string name;
318 ssize_t res = parse_register(pos, name, size);
319 if (res < 0)
320 return res;
321
322 dest->base_register_name_ = name;
323 if (!dest->arg_size_)
324 dest->arg_size_ = size;
325
326 return res;
327 }
328
parse_index_register(ssize_t pos,Argument * dest)329 ssize_t ArgumentParser_x64::parse_index_register(ssize_t pos, Argument *dest) {
330 int size;
331 std::string name;
332 ssize_t res = parse_register(pos, name, size);
333 if (res < 0)
334 return res;
335
336 dest->index_register_name_ = name;
337
338 return res;
339 }
340
parse_scale(ssize_t pos,Argument * dest)341 ssize_t ArgumentParser_x64::parse_scale(ssize_t pos, Argument *dest) {
342 return parse_number(pos, &dest->scale_);
343 }
344
parse_expr(ssize_t pos,Argument * dest)345 ssize_t ArgumentParser_x64::parse_expr(ssize_t pos, Argument *dest) {
346 if (arg_[pos] == '$')
347 return parse_number(pos + 1, &dest->constant_);
348
349 if (arg_[pos] == '%')
350 return parse_base_register(pos, dest);
351
352 if (isdigit(arg_[pos]) || arg_[pos] == '-') {
353 pos = parse_number(pos, &dest->deref_offset_);
354 if (arg_[pos] == '+') {
355 pos = parse_identifier(pos + 1, &dest->deref_ident_);
356 if (!dest->deref_ident_)
357 return -pos;
358 }
359 } else {
360 dest->deref_offset_ = 0;
361 pos = parse_identifier(pos, &dest->deref_ident_);
362 if (arg_[pos] == '+' || arg_[pos] == '-') {
363 pos = parse_number(pos, &dest->deref_offset_);
364 }
365 }
366
367 if (arg_[pos] != '(')
368 return -pos;
369
370 pos = parse_base_register(pos + 1, dest);
371 if (pos < 0)
372 return pos;
373
374 if (arg_[pos] == ',') {
375 pos = parse_index_register(pos + 1, dest);
376 if (pos < 0)
377 return pos;
378
379 if (arg_[pos] == ',') {
380 pos = parse_scale(pos + 1, dest);
381 if (pos < 0)
382 return pos;
383 }
384 }
385
386 return (arg_[pos] == ')') ? pos + 1 : -pos;
387 }
388
parse_1(ssize_t pos,Argument * dest)389 ssize_t ArgumentParser_x64::parse_1(ssize_t pos, Argument *dest) {
390 if (isdigit(arg_[pos]) || arg_[pos] == '-') {
391 optional<int> asize;
392 ssize_t m = parse_number(pos, &asize);
393 if (arg_[m] == '@' && asize) {
394 dest->arg_size_ = asize;
395 return parse_expr(m + 1, dest);
396 }
397 }
398 return parse_expr(pos, dest);
399 }
400
parse(Argument * dest)401 bool ArgumentParser_x64::parse(Argument *dest) {
402 if (done())
403 return false;
404
405 ssize_t res = parse_1(cur_pos_, dest);
406 if (res < 0)
407 return error_return(-res, -res + 1);
408 if (!isspace(arg_[res]) && arg_[res] != '\0')
409 return error_return(res, res);
410 skip_whitespace_from(res);
411 return true;
412 }
413
414 const std::unordered_map<std::string, ArgumentParser_x64::RegInfo>
415 ArgumentParser_x64::registers_ = {
416 {"rax", {REG_A, 8}}, {"eax", {REG_A, 4}},
417 {"ax", {REG_A, 2}}, {"al", {REG_A, 1}},
418
419 {"rbx", {REG_B, 8}}, {"ebx", {REG_B, 4}},
420 {"bx", {REG_B, 2}}, {"bl", {REG_B, 1}},
421
422 {"rcx", {REG_C, 8}}, {"ecx", {REG_C, 4}},
423 {"cx", {REG_C, 2}}, {"cl", {REG_C, 1}},
424
425 {"rdx", {REG_D, 8}}, {"edx", {REG_D, 4}},
426 {"dx", {REG_D, 2}}, {"dl", {REG_D, 1}},
427
428 {"rsi", {REG_SI, 8}}, {"esi", {REG_SI, 4}},
429 {"si", {REG_SI, 2}}, {"sil", {REG_SI, 1}},
430
431 {"rdi", {REG_DI, 8}}, {"edi", {REG_DI, 4}},
432 {"di", {REG_DI, 2}}, {"dil", {REG_DI, 1}},
433
434 {"rbp", {REG_BP, 8}}, {"ebp", {REG_BP, 4}},
435 {"bp", {REG_BP, 2}}, {"bpl", {REG_BP, 1}},
436
437 {"rsp", {REG_SP, 8}}, {"esp", {REG_SP, 4}},
438 {"sp", {REG_SP, 2}}, {"spl", {REG_SP, 1}},
439
440 {"r8", {REG_8, 8}}, {"r8d", {REG_8, 4}},
441 {"r8w", {REG_8, 2}}, {"r8b", {REG_8, 1}},
442
443 {"r9", {REG_9, 8}}, {"r9d", {REG_9, 4}},
444 {"r9w", {REG_9, 2}}, {"r9b", {REG_9, 1}},
445
446 {"r10", {REG_10, 8}}, {"r10d", {REG_10, 4}},
447 {"r10w", {REG_10, 2}}, {"r10b", {REG_10, 1}},
448
449 {"r11", {REG_11, 8}}, {"r11d", {REG_11, 4}},
450 {"r11w", {REG_11, 2}}, {"r11b", {REG_11, 1}},
451
452 {"r12", {REG_12, 8}}, {"r12d", {REG_12, 4}},
453 {"r12w", {REG_12, 2}}, {"r12b", {REG_12, 1}},
454
455 {"r13", {REG_13, 8}}, {"r13d", {REG_13, 4}},
456 {"r13w", {REG_13, 2}}, {"r13b", {REG_13, 1}},
457
458 {"r14", {REG_14, 8}}, {"r14d", {REG_14, 4}},
459 {"r14w", {REG_14, 2}}, {"r14b", {REG_14, 1}},
460
461 {"r15", {REG_15, 8}}, {"r15d", {REG_15, 4}},
462 {"r15w", {REG_15, 2}}, {"r15b", {REG_15, 1}},
463
464 {"rip", {REG_RIP, 8}},
465 };
466
reg_to_name(std::string * norm,Register reg)467 void ArgumentParser_x64::reg_to_name(std::string *norm, Register reg) {
468 switch (reg) {
469 case REG_A:
470 *norm = "ax";
471 break;
472 case REG_B:
473 *norm = "bx";
474 break;
475 case REG_C:
476 *norm = "cx";
477 break;
478 case REG_D:
479 *norm = "dx";
480 break;
481
482 case REG_SI:
483 *norm = "si";
484 break;
485 case REG_DI:
486 *norm = "di";
487 break;
488 case REG_BP:
489 *norm = "bp";
490 break;
491 case REG_SP:
492 *norm = "sp";
493 break;
494
495 case REG_8:
496 *norm = "r8";
497 break;
498 case REG_9:
499 *norm = "r9";
500 break;
501 case REG_10:
502 *norm = "r10";
503 break;
504 case REG_11:
505 *norm = "r11";
506 break;
507 case REG_12:
508 *norm = "r12";
509 break;
510 case REG_13:
511 *norm = "r13";
512 break;
513 case REG_14:
514 *norm = "r14";
515 break;
516 case REG_15:
517 *norm = "r15";
518 break;
519
520 case REG_RIP:
521 *norm = "ip";
522 break;
523 }
524 }
525
normalize_register(std::string * reg,int * reg_size)526 bool ArgumentParser_x64::normalize_register(std::string *reg, int *reg_size) {
527 auto it = registers_.find(*reg);
528 if (it == registers_.end())
529 return false;
530
531 *reg_size = it->second.size;
532 reg_to_name(reg, it->second.reg);
533 return true;
534 }
535 }
536