• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2015 PLUMgrid, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <linux/bpf.h>
17 #include <linux/version.h>
18 #include <sys/utsname.h>
19 #include <unistd.h>
20 #include <stdlib.h>
21 
22 #include <clang/AST/ASTConsumer.h>
23 #include <clang/AST/ASTContext.h>
24 #include <clang/AST/RecordLayout.h>
25 #include <clang/Frontend/CompilerInstance.h>
26 #include <clang/Frontend/MultiplexConsumer.h>
27 #include <clang/Rewrite/Core/Rewriter.h>
28 #include <clang/Lex/Lexer.h>
29 
30 #include "frontend_action_common.h"
31 #include "b_frontend_action.h"
32 #include "bpf_module.h"
33 #include "common.h"
34 #include "loader.h"
35 #include "table_storage.h"
36 #include "arch_helper.h"
37 #include "bcc_libbpf_inc.h"
38 
39 #include "libbpf.h"
40 #include "bcc_syms.h"
41 
42 namespace ebpf {
43 
44 constexpr int MAX_CALLING_CONV_REGS = 6;
45 const char *calling_conv_regs_x86[] = {
46   "di", "si", "dx", "cx", "r8", "r9"
47 };
48 const char *calling_conv_syscall_regs_x86[] = {
49   "di", "si", "dx", "r10", "r8", "r9"
50 };
51 const char *calling_conv_regs_ppc[] = {"gpr[3]", "gpr[4]", "gpr[5]",
52                                        "gpr[6]", "gpr[7]", "gpr[8]"};
53 
54 const char *calling_conv_regs_s390x[] = {"gprs[2]", "gprs[3]", "gprs[4]",
55 					 "gprs[5]", "gprs[6]" };
56 
57 const char *calling_conv_regs_arm64[] = {"regs[0]", "regs[1]", "regs[2]",
58                                        "regs[3]", "regs[4]", "regs[5]"};
59 
60 const char *calling_conv_regs_mips[] = {"regs[4]", "regs[5]", "regs[6]",
61                                        "regs[7]", "regs[8]", "regs[9]"};
62 
get_call_conv_cb(bcc_arch_t arch,bool for_syscall)63 void *get_call_conv_cb(bcc_arch_t arch, bool for_syscall)
64 {
65   const char **ret;
66 
67   switch(arch) {
68     case BCC_ARCH_PPC:
69     case BCC_ARCH_PPC_LE:
70       ret = calling_conv_regs_ppc;
71       break;
72     case BCC_ARCH_S390X:
73       ret = calling_conv_regs_s390x;
74       break;
75     case BCC_ARCH_ARM64:
76       ret = calling_conv_regs_arm64;
77       break;
78     case BCC_ARCH_MIPS:
79       ret = calling_conv_regs_mips;
80       break;
81     default:
82       if (for_syscall)
83         ret = calling_conv_syscall_regs_x86;
84       else
85         ret = calling_conv_regs_x86;
86   }
87 
88   return (void *)ret;
89 }
90 
get_call_conv(bool for_syscall=false)91 const char **get_call_conv(bool for_syscall = false) {
92   const char **ret;
93 
94   ret = (const char **)run_arch_callback(get_call_conv_cb, for_syscall);
95   return ret;
96 }
97 
98 /* Use resolver only once per translation */
99 static void *kresolver = NULL;
get_symbol_resolver(void)100 static void *get_symbol_resolver(void) {
101   if (!kresolver)
102     kresolver = bcc_symcache_new(-1, nullptr);
103   return kresolver;
104 }
105 
check_bpf_probe_read_kernel(void)106 static std::string check_bpf_probe_read_kernel(void) {
107   bool is_probe_read_kernel;
108   void *resolver = get_symbol_resolver();
109   uint64_t addr = 0;
110   is_probe_read_kernel = bcc_symcache_resolve_name(resolver, nullptr,
111                           "bpf_probe_read_kernel", &addr) >= 0 ? true: false;
112 
113   /* If bpf_probe_read is not found (ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE) is
114    * not set in newer kernel, then bcc would anyway fail */
115   if (is_probe_read_kernel)
116     return "bpf_probe_read_kernel";
117   else
118     return "bpf_probe_read";
119 }
120 
check_bpf_probe_read_user(llvm::StringRef probe,bool & overlap_addr)121 static std::string check_bpf_probe_read_user(llvm::StringRef probe,
122         bool& overlap_addr) {
123   if (probe.str() == "bpf_probe_read_user" ||
124       probe.str() == "bpf_probe_read_user_str") {
125     // Check for probe_user symbols in backported kernel before fallback
126     void *resolver = get_symbol_resolver();
127     uint64_t addr = 0;
128     bool found = bcc_symcache_resolve_name(resolver, nullptr,
129                   "bpf_probe_read_user", &addr) >= 0 ? true: false;
130     if (found)
131       return probe.str();
132 
133     /* For arch with overlapping address space, dont use bpf_probe_read for
134      * user read. Just error out */
135 #if defined(__s390x__)
136     overlap_addr = true;
137     return "";
138 #endif
139 
140     if (probe.str() == "bpf_probe_read_user")
141       return "bpf_probe_read";
142     else
143       return "bpf_probe_read_str";
144   }
145   return "";
146 }
147 
148 using std::map;
149 using std::move;
150 using std::set;
151 using std::tuple;
152 using std::make_tuple;
153 using std::string;
154 using std::to_string;
155 using std::unique_ptr;
156 using std::vector;
157 using namespace clang;
158 
159 class ProbeChecker : public RecursiveASTVisitor<ProbeChecker> {
160  public:
ProbeChecker(Expr * arg,const set<tuple<Decl *,int>> & ptregs,bool track_helpers,bool is_assign)161   explicit ProbeChecker(Expr *arg, const set<tuple<Decl *, int>> &ptregs,
162                         bool track_helpers, bool is_assign)
163       : needs_probe_(false), is_transitive_(false), ptregs_(ptregs),
164         track_helpers_(track_helpers), nb_derefs_(0), is_assign_(is_assign) {
165     if (arg) {
166       TraverseStmt(arg);
167       if (arg->getType()->isPointerType())
168         is_transitive_ = needs_probe_;
169     }
170   }
ProbeChecker(Expr * arg,const set<tuple<Decl *,int>> & ptregs,bool is_transitive)171   explicit ProbeChecker(Expr *arg, const set<tuple<Decl *, int>> &ptregs,
172                         bool is_transitive)
173       : ProbeChecker(arg, ptregs, is_transitive, false) {}
VisitCallExpr(CallExpr * E)174   bool VisitCallExpr(CallExpr *E) {
175     needs_probe_ = false;
176 
177     if (is_assign_) {
178       // We're looking for a function that returns an external pointer,
179       // regardless of the number of dereferences.
180       for(auto p : ptregs_) {
181         if (std::get<0>(p) == E->getDirectCallee()) {
182           needs_probe_ = true;
183           // ptregs_ stores the number of dereferences needed to get the external
184           // pointer, while nb_derefs_ stores the number of dereferences
185           // encountered.  So, any dereference encountered is one less
186           // dereference needed to get the external pointer.
187           nb_derefs_ -= std::get<1>(p);
188           return false;
189         }
190       }
191     } else {
192       tuple<Decl *, int> pt = make_tuple(E->getDirectCallee(), nb_derefs_);
193       if (ptregs_.find(pt) != ptregs_.end())
194         needs_probe_ = true;
195     }
196 
197     if (!track_helpers_)
198       return false;
199     if (VarDecl *V = dyn_cast<VarDecl>(E->getCalleeDecl()))
200       needs_probe_ = V->getName() == "bpf_get_current_task";
201     return false;
202   }
VisitMemberExpr(MemberExpr * M)203   bool VisitMemberExpr(MemberExpr *M) {
204     tuple<Decl *, int> pt = make_tuple(M->getMemberDecl(), nb_derefs_);
205     if (ptregs_.find(pt) != ptregs_.end()) {
206       needs_probe_ = true;
207       return false;
208     }
209     if (M->isArrow()) {
210       /* In A->b, if A is an external pointer, then A->b should be considered
211        * one too.  However, if we're taking the address of A->b
212        * (nb_derefs_ < 0), we should take it into account for the number of
213        * indirections; &A->b is a pointer to A with an offset. */
214       if (nb_derefs_ >= 0) {
215         ProbeChecker checker = ProbeChecker(M->getBase(), ptregs_,
216                                             track_helpers_, is_assign_);
217         if (checker.needs_probe() && checker.get_nb_derefs() == 0) {
218           needs_probe_ = true;
219           return false;
220         }
221       }
222       nb_derefs_++;
223     }
224     return true;
225   }
VisitUnaryOperator(UnaryOperator * E)226   bool VisitUnaryOperator(UnaryOperator *E) {
227     if (E->getOpcode() == UO_Deref) {
228       /* In *A, if A is an external pointer, then *A should be considered one
229        * too. */
230       ProbeChecker checker = ProbeChecker(E->getSubExpr(), ptregs_,
231                                           track_helpers_, is_assign_);
232       if (checker.needs_probe() && checker.get_nb_derefs() == 0) {
233         needs_probe_ = true;
234         return false;
235       }
236       nb_derefs_++;
237     } else if (E->getOpcode() == UO_AddrOf) {
238       nb_derefs_--;
239     }
240     return true;
241   }
VisitDeclRefExpr(DeclRefExpr * E)242   bool VisitDeclRefExpr(DeclRefExpr *E) {
243     if (is_assign_) {
244       // We're looking for an external pointer, regardless of the number of
245       // dereferences.
246       for(auto p : ptregs_) {
247         if (std::get<0>(p) == E->getDecl()) {
248           needs_probe_ = true;
249           // ptregs_ stores the number of dereferences needed to get the external
250           // pointer, while nb_derefs_ stores the number of dereferences
251           // encountered.  So, any dereference encountered is one less
252           // dereference needed to get the external pointer.
253           nb_derefs_ -= std::get<1>(p);
254           return false;
255         }
256       }
257     } else {
258       tuple<Decl *, int> pt = make_tuple(E->getDecl(), nb_derefs_);
259       if (ptregs_.find(pt) != ptregs_.end())
260         needs_probe_ = true;
261     }
262     return true;
263   }
needs_probe() const264   bool needs_probe() const { return needs_probe_; }
is_transitive() const265   bool is_transitive() const { return is_transitive_; }
get_nb_derefs() const266   int get_nb_derefs() const { return nb_derefs_; }
267  private:
268   bool needs_probe_;
269   bool is_transitive_;
270   const set<tuple<Decl *, int>> &ptregs_;
271   bool track_helpers_;
272   // Nb of dereferences we go through before finding the external pointer.
273   // A negative number counts the number of addrof.
274   int nb_derefs_;
275   bool is_assign_;
276 };
277 
278 // Visit a piece of the AST and mark it as needing probe reads
279 class ProbeSetter : public RecursiveASTVisitor<ProbeSetter> {
280  public:
ProbeSetter(set<tuple<Decl *,int>> * ptregs,int nb_derefs)281   explicit ProbeSetter(set<tuple<Decl *, int>> *ptregs, int nb_derefs)
282       : ptregs_(ptregs), nb_derefs_(nb_derefs) {}
VisitDeclRefExpr(DeclRefExpr * E)283   bool VisitDeclRefExpr(DeclRefExpr *E) {
284     tuple<Decl *, int> pt = make_tuple(E->getDecl(), nb_derefs_);
285     ptregs_->insert(pt);
286     return true;
287   }
ProbeSetter(set<tuple<Decl *,int>> * ptregs)288   explicit ProbeSetter(set<tuple<Decl *, int>> *ptregs)
289       : ProbeSetter(ptregs, 0) {}
VisitUnaryOperator(UnaryOperator * E)290   bool VisitUnaryOperator(UnaryOperator *E) {
291     if (E->getOpcode() == UO_Deref)
292       nb_derefs_++;
293     return true;
294   }
VisitMemberExpr(MemberExpr * M)295   bool VisitMemberExpr(MemberExpr *M) {
296     tuple<Decl *, int> pt = make_tuple(M->getMemberDecl(), nb_derefs_);
297     ptregs_->insert(pt);
298     return false;
299   }
300  private:
301   set<tuple<Decl *, int>> *ptregs_;
302   // Nb of dereferences we go through before getting to the actual variable.
303   int nb_derefs_;
304 };
305 
MapVisitor(set<Decl * > & m)306 MapVisitor::MapVisitor(set<Decl *> &m) : m_(m) {}
307 
VisitCallExpr(CallExpr * Call)308 bool MapVisitor::VisitCallExpr(CallExpr *Call) {
309   if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
310     StringRef memb_name = Memb->getMemberDecl()->getName();
311     if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
312       if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
313         if (!A->getName().startswith("maps"))
314           return true;
315 
316         if (memb_name == "update" || memb_name == "insert") {
317           ProbeChecker checker = ProbeChecker(Call->getArg(1), ptregs_, true,
318                                               true);
319           if (checker.needs_probe())
320             m_.insert(Ref->getDecl());
321         }
322       }
323     }
324   }
325   return true;
326 }
327 
ProbeVisitor(ASTContext & C,Rewriter & rewriter,set<Decl * > & m,bool track_helpers)328 ProbeVisitor::ProbeVisitor(ASTContext &C, Rewriter &rewriter,
329                            set<Decl *> &m, bool track_helpers) :
330   C(C), rewriter_(rewriter), m_(m), ctx_(nullptr), track_helpers_(track_helpers),
331   addrof_stmt_(nullptr), is_addrof_(false) {
332   const char **calling_conv_regs = get_call_conv();
333   has_overlap_kuaddr_ = calling_conv_regs == calling_conv_regs_s390x;
334 }
335 
assignsExtPtr(Expr * E,int * nbDerefs)336 bool ProbeVisitor::assignsExtPtr(Expr *E, int *nbDerefs) {
337   if (IsContextMemberExpr(E)) {
338     *nbDerefs = 0;
339     return true;
340   }
341 
342   /* If the expression contains a call to another function, we need to visit
343   * that function first to know if a rewrite is necessary (i.e., if the
344   * function returns an external pointer). */
345   if (!TraverseStmt(E))
346     return false;
347 
348   ProbeChecker checker = ProbeChecker(E, ptregs_, track_helpers_,
349                                       true);
350   if (checker.is_transitive()) {
351     // The negative of the number of dereferences is the number of addrof.  In
352     // an assignment, if we went through n addrof before getting the external
353     // pointer, then we'll need n dereferences on the left-hand side variable
354     // to get to the external pointer.
355     *nbDerefs = -checker.get_nb_derefs();
356     return true;
357   }
358 
359   if (E->IgnoreParenCasts()->getStmtClass() == Stmt::CallExprClass) {
360     CallExpr *Call = dyn_cast<CallExpr>(E->IgnoreParenCasts());
361     if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
362       StringRef memb_name = Memb->getMemberDecl()->getName();
363       if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
364         if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
365           if (!A->getName().startswith("maps"))
366             return false;
367 
368           if (memb_name == "lookup" || memb_name == "lookup_or_init" ||
369               memb_name == "lookup_or_try_init") {
370             if (m_.find(Ref->getDecl()) != m_.end()) {
371               // Retrieved an ext. pointer from a map, mark LHS as ext. pointer.
372               // Pointers from maps always need a single dereference to get the
373               // actual value.  The value may be an external pointer but cannot
374               // be a pointer to an external pointer as the verifier prohibits
375               // storing known pointers (to map values, context, the stack, or
376               // the packet) in maps.
377               *nbDerefs = 1;
378               return true;
379             }
380           }
381         }
382       }
383     }
384   }
385   return false;
386 }
VisitVarDecl(VarDecl * D)387 bool ProbeVisitor::VisitVarDecl(VarDecl *D) {
388   if (Expr *E = D->getInit()) {
389     int nbDerefs;
390     if (assignsExtPtr(E, &nbDerefs)) {
391       // The negative of the number of addrof is the number of dereferences.
392       tuple<Decl *, int> pt = make_tuple(D, nbDerefs);
393       set_ptreg(pt);
394     }
395   }
396   return true;
397 }
398 
TraverseStmt(Stmt * S)399 bool ProbeVisitor::TraverseStmt(Stmt *S) {
400   if (whitelist_.find(S) != whitelist_.end())
401     return true;
402   auto ret = RecursiveASTVisitor<ProbeVisitor>::TraverseStmt(S);
403   if (addrof_stmt_ == S) {
404     addrof_stmt_ = nullptr;
405     is_addrof_ = false;
406   }
407   return ret;
408 }
409 
VisitCallExpr(CallExpr * Call)410 bool ProbeVisitor::VisitCallExpr(CallExpr *Call) {
411   // Skip bpf_probe_read for the third argument if it is an AddrOf.
412   if (VarDecl *V = dyn_cast<VarDecl>(Call->getCalleeDecl())) {
413     if (V->getName() == "bpf_probe_read" && Call->getNumArgs() >= 3) {
414       const Expr *E = Call->getArg(2)->IgnoreParenCasts();
415       whitelist_.insert(E);
416       return true;
417     }
418   }
419 
420   if (FunctionDecl *F = dyn_cast<FunctionDecl>(Call->getCalleeDecl())) {
421     if (F->hasBody()) {
422       unsigned i = 0;
423       for (auto arg : Call->arguments()) {
424         ProbeChecker checker = ProbeChecker(arg, ptregs_, track_helpers_,
425                                             true);
426         if (checker.needs_probe()) {
427           tuple<Decl *, int> pt = make_tuple(F->getParamDecl(i),
428                                              -checker.get_nb_derefs());
429           ptregs_.insert(pt);
430         }
431         ++i;
432       }
433       if (fn_visited_.find(F) == fn_visited_.end()) {
434         fn_visited_.insert(F);
435         /* Maintains a stack of the number of dereferences for the external
436          * pointers returned by each function in the call stack or -1 if the
437          * function didn't return an external pointer. */
438         ptregs_returned_.push_back(-1);
439         TraverseDecl(F);
440         int nb_derefs = ptregs_returned_.back();
441         ptregs_returned_.pop_back();
442         if (nb_derefs != -1) {
443           tuple<Decl *, int> pt = make_tuple(F, nb_derefs);
444           ptregs_.insert(pt);
445         }
446       }
447     }
448   }
449   return true;
450 }
VisitReturnStmt(ReturnStmt * R)451 bool ProbeVisitor::VisitReturnStmt(ReturnStmt *R) {
452   /* If this function wasn't called by another, there's no need to check the
453    * return statement for external pointers. */
454   if (ptregs_returned_.size() == 0)
455     return true;
456 
457   /* Reverse order of traversals.  This is needed if, in the return statement,
458    * we're calling a function that's returning an external pointer: we need to
459    * know what the function is returning to decide what this function is
460    * returning. */
461   if (!TraverseStmt(R->getRetValue()))
462     return false;
463 
464   ProbeChecker checker = ProbeChecker(R->getRetValue(), ptregs_,
465                                       track_helpers_, true);
466   if (checker.needs_probe()) {
467     int curr_nb_derefs = ptregs_returned_.back();
468     int nb_derefs = -checker.get_nb_derefs();
469     /* If the function returns external pointers with different levels of
470      * indirection, we handle the case with the highest level of indirection
471      * and leave it to the user to manually handle other cases. */
472     if (nb_derefs > curr_nb_derefs) {
473       ptregs_returned_.pop_back();
474       ptregs_returned_.push_back(nb_derefs);
475     }
476   }
477   return true;
478 }
VisitBinaryOperator(BinaryOperator * E)479 bool ProbeVisitor::VisitBinaryOperator(BinaryOperator *E) {
480   if (!E->isAssignmentOp())
481     return true;
482 
483   // copy probe attribute from RHS to LHS if present
484   int nbDerefs;
485   if (assignsExtPtr(E->getRHS(), &nbDerefs)) {
486     ProbeSetter setter(&ptregs_, nbDerefs);
487     setter.TraverseStmt(E->getLHS());
488   }
489   return true;
490 }
VisitUnaryOperator(UnaryOperator * E)491 bool ProbeVisitor::VisitUnaryOperator(UnaryOperator *E) {
492   if (E->getOpcode() == UO_AddrOf) {
493     addrof_stmt_ = E;
494     is_addrof_ = true;
495   }
496   if (E->getOpcode() != UO_Deref)
497     return true;
498   if (memb_visited_.find(E) != memb_visited_.end())
499     return true;
500   Expr *sub = E->getSubExpr();
501   if (!ProbeChecker(sub, ptregs_, track_helpers_).needs_probe())
502     return true;
503   memb_visited_.insert(E);
504   string pre, post;
505   pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
506   if (has_overlap_kuaddr_)
507     pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (u64)";
508   else
509     pre += " bpf_probe_read(&_val, sizeof(_val), (u64)";
510   post = "); _val; })";
511   rewriter_.ReplaceText(expansionLoc(E->getOperatorLoc()), 1, pre);
512   rewriter_.InsertTextAfterToken(expansionLoc(GET_ENDLOC(sub)), post);
513   return true;
514 }
VisitMemberExpr(MemberExpr * E)515 bool ProbeVisitor::VisitMemberExpr(MemberExpr *E) {
516   if (memb_visited_.find(E) != memb_visited_.end()) return true;
517 
518   Expr *base;
519   SourceLocation rhs_start, member;
520   bool found = false;
521   for (MemberExpr *M = E; M; M = dyn_cast<MemberExpr>(M->getBase())) {
522     memb_visited_.insert(M);
523     rhs_start = GET_ENDLOC(M);
524     base = M->getBase();
525     member = M->getMemberLoc();
526     if (M->isArrow()) {
527       found = true;
528       break;
529     }
530   }
531   if (!found)
532     return true;
533   if (member.isInvalid()) {
534     error(GET_ENDLOC(base), "internal error: MemberLoc is invalid while preparing probe rewrite");
535     return false;
536   }
537 
538   if (!rewriter_.isRewritable(GET_BEGINLOC(E)))
539     return true;
540 
541   // parent expr has addrof, skip the rewrite, set is_addrof_ to flase so
542   // it won't affect next level of indirect address
543   if (is_addrof_) {
544     is_addrof_ = false;
545     return true;
546   }
547 
548   /* If the base of the dereference is a call to another function, we need to
549    * visit that function first to know if a rewrite is necessary (i.e., if the
550    * function returns an external pointer). */
551   if (base->IgnoreParenCasts()->getStmtClass() == Stmt::CallExprClass) {
552     CallExpr *Call = dyn_cast<CallExpr>(base->IgnoreParenCasts());
553     if (!TraverseStmt(Call))
554       return false;
555   }
556 
557   // Checks to see if the expression references something that needs to be run
558   // through bpf_probe_read.
559   if (!ProbeChecker(base, ptregs_, track_helpers_).needs_probe())
560     return true;
561 
562   // If the base is an array, we will skip rewriting. See issue #2352.
563   if (E->getType()->isArrayType())
564     return true;
565 
566   string rhs = rewriter_.getRewrittenText(expansionRange(SourceRange(rhs_start, GET_ENDLOC(E))));
567   string base_type = base->getType()->getPointeeType().getAsString();
568   string pre, post;
569   pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
570   if (has_overlap_kuaddr_)
571     pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (u64)&";
572   else
573     pre += " bpf_probe_read(&_val, sizeof(_val), (u64)&";
574   post = rhs + "); _val; })";
575   rewriter_.InsertText(expansionLoc(GET_BEGINLOC(E)), pre);
576   rewriter_.ReplaceText(expansionRange(SourceRange(member, GET_ENDLOC(E))), post);
577   return true;
578 }
VisitArraySubscriptExpr(ArraySubscriptExpr * E)579 bool ProbeVisitor::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
580   if (memb_visited_.find(E) != memb_visited_.end()) return true;
581   if (!ProbeChecker(E, ptregs_, track_helpers_).needs_probe())
582     return true;
583 
584   // Parent expr has addrof, skip the rewrite.
585   if (is_addrof_)
586     return true;
587 
588   // If the base is an array, we will skip rewriting. See issue #2352.
589   if (E->getType()->isArrayType())
590     return true;
591 
592   if (!rewriter_.isRewritable(GET_BEGINLOC(E)))
593     return true;
594 
595   Expr *base = E->getBase();
596   Expr *idx = E->getIdx();
597   memb_visited_.insert(E);
598 
599   if (!rewriter_.isRewritable(GET_BEGINLOC(base)))
600     return true;
601   if (!rewriter_.isRewritable(GET_BEGINLOC(idx)))
602     return true;
603 
604 
605   string pre, lbracket, rbracket;
606   LangOptions opts;
607   SourceLocation lbracket_start, lbracket_end;
608   SourceRange lbracket_range;
609 
610   /* For cases like daddr->s6_addr[4], clang encodes the end location of "base"
611    * as "]". This makes it hard to rewrite the expression like
612    * "daddr->s6_addr  [ 4 ]" since we do not know the end location
613    * of "addr->s6_addr". Let us abort the operation if this is the case.
614    */
615   lbracket_start = Lexer::getLocForEndOfToken(GET_ENDLOC(base), 1,
616                                               rewriter_.getSourceMgr(),
617                                               opts).getLocWithOffset(1);
618   lbracket_end = GET_BEGINLOC(idx).getLocWithOffset(-1);
619   lbracket_range = expansionRange(SourceRange(lbracket_start, lbracket_end));
620   if (rewriter_.getRewrittenText(lbracket_range).size() == 0)
621     return true;
622 
623   pre = "({ typeof(" + E->getType().getAsString() + ") _val; __builtin_memset(&_val, 0, sizeof(_val));";
624   if (has_overlap_kuaddr_)
625     pre += " bpf_probe_read_kernel(&_val, sizeof(_val), (u64)((";
626   else
627     pre += " bpf_probe_read(&_val, sizeof(_val), (u64)((";
628   if (isMemberDereference(base)) {
629     pre += "&";
630     // If the base of the array subscript is a member dereference, we'll rewrite
631     // both at the same time.
632     addrof_stmt_ = base;
633     is_addrof_ = true;
634   }
635   rewriter_.InsertText(expansionLoc(GET_BEGINLOC(base)), pre);
636 
637   /* Replace left bracket and any space around it.  Since Clang doesn't provide
638    * a method to retrieve the left bracket, replace everything from the end of
639    * the base to the start of the index. */
640   lbracket = ") + (";
641   rewriter_.ReplaceText(lbracket_range, lbracket);
642 
643   rbracket = "))); _val; })";
644   rewriter_.ReplaceText(expansionLoc(E->getRBracketLoc()), 1, rbracket);
645 
646   return true;
647 }
648 
isMemberDereference(Expr * E)649 bool ProbeVisitor::isMemberDereference(Expr *E) {
650   if (E->IgnoreParenCasts()->getStmtClass() != Stmt::MemberExprClass)
651     return false;
652   for (MemberExpr *M = dyn_cast<MemberExpr>(E->IgnoreParenCasts()); M;
653        M = dyn_cast<MemberExpr>(M->getBase()->IgnoreParenCasts())) {
654     if (M->isArrow())
655       return true;
656   }
657   return false;
658 }
IsContextMemberExpr(Expr * E)659 bool ProbeVisitor::IsContextMemberExpr(Expr *E) {
660   if (!E->getType()->isPointerType())
661     return false;
662 
663   Expr *base;
664   SourceLocation member;
665   bool found = false;
666   MemberExpr *M;
667   Expr *Ex = E->IgnoreParenCasts();
668   while (Ex->getStmtClass() == Stmt::ArraySubscriptExprClass
669          || Ex->getStmtClass() == Stmt::MemberExprClass) {
670     if (Ex->getStmtClass() == Stmt::ArraySubscriptExprClass) {
671       Ex = dyn_cast<ArraySubscriptExpr>(Ex)->getBase()->IgnoreParenCasts();
672     } else if (Ex->getStmtClass() == Stmt::MemberExprClass) {
673       M = dyn_cast<MemberExpr>(Ex);
674       base = M->getBase()->IgnoreParenCasts();
675       member = M->getMemberLoc();
676       if (M->isArrow()) {
677         found = true;
678         break;
679       }
680       Ex = base;
681     }
682   }
683   if (!found) {
684     return false;
685   }
686   if (member.isInvalid()) {
687     return false;
688   }
689 
690   if (DeclRefExpr *base_expr = dyn_cast<DeclRefExpr>(base)) {
691     if (base_expr->getDecl() == ctx_) {
692       return true;
693     }
694   }
695   return false;
696 }
697 
698 SourceRange
expansionRange(SourceRange range)699 ProbeVisitor::expansionRange(SourceRange range) {
700 #if LLVM_MAJOR_VERSION >= 7
701   return rewriter_.getSourceMgr().getExpansionRange(range).getAsRange();
702 #else
703   return rewriter_.getSourceMgr().getExpansionRange(range);
704 #endif
705 }
706 
707 SourceLocation
expansionLoc(SourceLocation loc)708 ProbeVisitor::expansionLoc(SourceLocation loc) {
709   return rewriter_.getSourceMgr().getExpansionLoc(loc);
710 }
711 
712 template <unsigned N>
error(SourceLocation loc,const char (& fmt)[N])713 DiagnosticBuilder ProbeVisitor::error(SourceLocation loc, const char (&fmt)[N]) {
714   unsigned int diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, fmt);
715   return C.getDiagnostics().Report(loc, diag_id);
716 }
717 
BTypeVisitor(ASTContext & C,BFrontendAction & fe)718 BTypeVisitor::BTypeVisitor(ASTContext &C, BFrontendAction &fe)
719     : C(C), diag_(C.getDiagnostics()), fe_(fe), rewriter_(fe.rewriter()), out_(llvm::errs()) {
720   const char **calling_conv_regs = get_call_conv();
721   has_overlap_kuaddr_ = calling_conv_regs == calling_conv_regs_s390x;
722 }
723 
genParamDirectAssign(FunctionDecl * D,string & preamble,const char ** calling_conv_regs)724 void BTypeVisitor::genParamDirectAssign(FunctionDecl *D, string& preamble,
725                                         const char **calling_conv_regs) {
726   for (size_t idx = 0; idx < fn_args_.size(); idx++) {
727     ParmVarDecl *arg = fn_args_[idx];
728 
729     if (idx >= 1) {
730       // Move the args into a preamble section where the same params are
731       // declared and initialized from pt_regs.
732       // Todo: this init should be done only when the program requests it.
733       string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
734       arg->addAttr(UnavailableAttr::CreateImplicit(C, "ptregs"));
735       size_t d = idx - 1;
736       const char *reg = calling_conv_regs[d];
737       preamble += " " + text + " = " + fn_args_[0]->getName().str() + "->" +
738                   string(reg) + ";";
739     }
740   }
741 }
742 
genParamIndirectAssign(FunctionDecl * D,string & preamble,const char ** calling_conv_regs)743 void BTypeVisitor::genParamIndirectAssign(FunctionDecl *D, string& preamble,
744                                           const char **calling_conv_regs) {
745   string new_ctx;
746 
747   for (size_t idx = 0; idx < fn_args_.size(); idx++) {
748     ParmVarDecl *arg = fn_args_[idx];
749 
750     if (idx == 0) {
751       new_ctx = "__" + arg->getName().str();
752       preamble += " struct pt_regs * " + new_ctx + " = " +
753                   arg->getName().str() + "->" +
754                   string(calling_conv_regs[0]) + ";";
755     } else {
756       // Move the args into a preamble section where the same params are
757       // declared and initialized from pt_regs.
758       // Todo: this init should be done only when the program requests it.
759       string text = rewriter_.getRewrittenText(expansionRange(arg->getSourceRange()));
760       size_t d = idx - 1;
761       const char *reg = calling_conv_regs[d];
762       preamble += "\n " + text + ";";
763       if (has_overlap_kuaddr_)
764         preamble += " bpf_probe_read_kernel";
765       else
766         preamble += " bpf_probe_read";
767       preamble += "(&" + arg->getName().str() + ", sizeof(" +
768                   arg->getName().str() + "), &" + new_ctx + "->" +
769                   string(reg) + ");";
770     }
771   }
772 }
773 
rewriteFuncParam(FunctionDecl * D)774 void BTypeVisitor::rewriteFuncParam(FunctionDecl *D) {
775   string preamble = "{\n";
776   if (D->param_size() > 1) {
777     bool is_syscall = false;
778     if (strncmp(D->getName().str().c_str(), "syscall__", 9) == 0 ||
779         strncmp(D->getName().str().c_str(), "kprobe____x64_sys_", 18) == 0)
780       is_syscall = true;
781     const char **calling_conv_regs = get_call_conv(is_syscall);
782 
783     // If function prefix is "syscall__" or "kprobe____x64_sys_",
784     // the function will attach to a kprobe syscall function.
785     // Guard parameter assiggnment with CONFIG_ARCH_HAS_SYSCALL_WRAPPER.
786     // For __x64_sys_* syscalls, this is always true, but we guard
787     // it in case of "syscall__" for other architectures.
788     if (is_syscall) {
789       preamble += "#if defined(CONFIG_ARCH_HAS_SYSCALL_WRAPPER) && !defined(__s390x__)\n";
790       genParamIndirectAssign(D, preamble, calling_conv_regs);
791       preamble += "\n#else\n";
792       genParamDirectAssign(D, preamble, calling_conv_regs);
793       preamble += "\n#endif\n";
794     } else {
795       genParamDirectAssign(D, preamble, calling_conv_regs);
796     }
797     rewriter_.ReplaceText(
798         expansionRange(SourceRange(GET_ENDLOC(D->getParamDecl(0)),
799                     GET_ENDLOC(D->getParamDecl(D->getNumParams() - 1)))),
800         fn_args_[0]->getName());
801   }
802   // for each trace argument, convert the variable from ptregs to something on stack
803   if (CompoundStmt *S = dyn_cast<CompoundStmt>(D->getBody()))
804     rewriter_.ReplaceText(S->getLBracLoc(), 1, preamble);
805 }
806 
VisitFunctionDecl(FunctionDecl * D)807 bool BTypeVisitor::VisitFunctionDecl(FunctionDecl *D) {
808   // put each non-static non-inline function decl in its own section, to be
809   // extracted by the MemoryManager
810   auto real_start_loc = rewriter_.getSourceMgr().getFileLoc(GET_BEGINLOC(D));
811   if (fe_.is_rewritable_ext_func(D)) {
812     current_fn_ = string(D->getName());
813     string bd = rewriter_.getRewrittenText(expansionRange(D->getSourceRange()));
814     auto func_info = fe_.prog_func_info_.add_func(current_fn_);
815     if (!func_info) {
816       // We should only reach add_func above once per function seen, but the
817       // BPF_PROG-helper using macros in export/helpers.h (KFUNC_PROBE ..
818       // LSM_PROBE) break this logic. TODO: adjust export/helpers.h to not
819       // do so and bail out here, or find a better place to do add_func
820       func_info = fe_.prog_func_info_.get_func(current_fn_);
821       //error(GET_BEGINLOC(D), "redefinition of existing function");
822       //return false;
823     }
824     func_info->src_ = bd;
825     fe_.func_range_[current_fn_] = expansionRange(D->getSourceRange());
826     if (!D->getAttr<SectionAttr>()) {
827       string attr = string("__attribute__((section(\"") + BPF_FN_PREFIX +
828                     D->getName().str() + "\")))\n";
829       rewriter_.InsertText(real_start_loc, attr);
830     }
831     if (D->param_size() > MAX_CALLING_CONV_REGS + 1) {
832       error(GET_BEGINLOC(D->getParamDecl(MAX_CALLING_CONV_REGS + 1)),
833             "too many arguments, bcc only supports in-register parameters");
834       return false;
835     }
836 
837     fn_args_.clear();
838     for (auto arg_it = D->param_begin(); arg_it != D->param_end(); arg_it++) {
839       auto *arg = *arg_it;
840       if (arg->getName() == "") {
841         error(GET_ENDLOC(arg), "arguments to BPF program definition must be named");
842         return false;
843       }
844       fn_args_.push_back(arg);
845     }
846     rewriteFuncParam(D);
847   } else if (D->hasBody() &&
848              rewriter_.getSourceMgr().getFileID(real_start_loc)
849                == rewriter_.getSourceMgr().getMainFileID()) {
850     // rewritable functions that are static should be always treated as helper
851     rewriter_.InsertText(real_start_loc, "__attribute__((always_inline))\n");
852   }
853   return true;
854 }
855 
856 // Reverse the order of call traversal so that parameters inside of
857 // function calls will get rewritten before the call itself, otherwise
858 // text mangling will result.
TraverseCallExpr(CallExpr * Call)859 bool BTypeVisitor::TraverseCallExpr(CallExpr *Call) {
860   for (auto child : Call->children())
861     if (!TraverseStmt(child))
862       return false;
863   if (!WalkUpFromCallExpr(Call))
864     return false;
865   return true;
866 }
867 
868 // convert calls of the type:
869 //  table.foo(&key)
870 // to:
871 //  bpf_table_foo_elem(bpf_pseudo_fd(table), &key [,&leaf])
VisitCallExpr(CallExpr * Call)872 bool BTypeVisitor::VisitCallExpr(CallExpr *Call) {
873   // make sure node is a reference to a bpf table, which is assured by the
874   // presence of the section("maps/<typename>") GNU __attribute__
875   if (MemberExpr *Memb = dyn_cast<MemberExpr>(Call->getCallee()->IgnoreImplicit())) {
876     StringRef memb_name = Memb->getMemberDecl()->getName();
877     if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Memb->getBase())) {
878       if (SectionAttr *A = Ref->getDecl()->getAttr<SectionAttr>()) {
879         if (!A->getName().startswith("maps"))
880           return true;
881 
882         string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
883                                                    GET_ENDLOC(Call->getArg(Call->getNumArgs() - 1)))));
884 
885         // find the table fd, which was opened at declaration time
886         TableStorage::iterator desc;
887         Path local_path({fe_.id(), string(Ref->getDecl()->getName())});
888         Path global_path({string(Ref->getDecl()->getName())});
889         if (!fe_.table_storage().Find(local_path, desc)) {
890           if (!fe_.table_storage().Find(global_path, desc)) {
891             error(GET_ENDLOC(Ref), "bpf_table %0 failed to open") << Ref->getDecl()->getName();
892             return false;
893           }
894         }
895         string fd = to_string(desc->second.fd >= 0 ? desc->second.fd : desc->second.fake_fd);
896         string prefix, suffix;
897         string txt;
898         auto rewrite_start = GET_BEGINLOC(Call);
899         auto rewrite_end = GET_ENDLOC(Call);
900         if (memb_name == "lookup_or_init" || memb_name == "lookup_or_try_init") {
901           string name = string(Ref->getDecl()->getName());
902           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
903           string arg1 = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
904           string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
905           string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
906           txt  = "({typeof(" + name + ".leaf) *leaf = " + lookup + ", " + arg0 + "); ";
907           txt += "if (!leaf) {";
908           txt += " " + update + ", " + arg0 + ", " + arg1 + ", BPF_NOEXIST);";
909           txt += " leaf = " + lookup + ", " + arg0 + ");";
910           if (memb_name == "lookup_or_init") {
911             txt += " if (!leaf) return 0;";
912           }
913           txt += "}";
914           txt += "leaf;})";
915         } else if (memb_name == "increment" || memb_name == "atomic_increment") {
916           string name = string(Ref->getDecl()->getName());
917           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
918 
919           string increment_value = "1";
920           if (Call->getNumArgs() == 2) {
921             increment_value = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
922 
923           }
924 
925           string lookup = "bpf_map_lookup_elem_(bpf_pseudo_fd(1, " + fd + ")";
926           string update = "bpf_map_update_elem_(bpf_pseudo_fd(1, " + fd + ")";
927           txt  = "({ typeof(" + name + ".key) _key = " + arg0 + "; ";
928           txt += "typeof(" + name + ".leaf) *_leaf = " + lookup + ", &_key); ";
929           txt += "if (_leaf) ";
930 
931           if (memb_name == "atomic_increment") {
932             txt += "lock_xadd(_leaf, " + increment_value + ");";
933           } else {
934             txt += "(*_leaf) += " + increment_value + ";";
935           }
936           if (desc->second.type == BPF_MAP_TYPE_HASH) {
937             txt += "else { typeof(" + name + ".leaf) _zleaf; __builtin_memset(&_zleaf, 0, sizeof(_zleaf)); ";
938             txt += "_zleaf += " + increment_value + ";";
939             txt += update + ", &_key, &_zleaf, BPF_NOEXIST); } ";
940           }
941           txt += "})";
942         } else if (memb_name == "perf_submit") {
943           string name = string(Ref->getDecl()->getName());
944           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
945           string args_other = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(1)),
946                                                            GET_ENDLOC(Call->getArg(2)))));
947           txt = "bpf_perf_event_output(" + arg0 + ", bpf_pseudo_fd(1, " + fd + ")";
948           txt += ", CUR_CPU_IDENTIFIER, " + args_other + ")";
949 
950           // e.g.
951           // struct data_t { u32 pid; }; data_t data;
952           // events.perf_submit(ctx, &data, sizeof(data));
953           // ...
954           //                       &data   ->     data    ->  typeof(data)        ->   data_t
955           auto type_arg1 = Call->getArg(1)->IgnoreCasts()->getType().getTypePtr()->getPointeeType().getTypePtrOrNull();
956           if (type_arg1 && type_arg1->isStructureType()) {
957             auto event_type = type_arg1->getAsTagDecl();
958             const auto *r = dyn_cast<RecordDecl>(event_type);
959             std::vector<std::string> perf_event;
960 
961             for (auto it = r->field_begin(); it != r->field_end(); ++it) {
962               // After LLVM commit aee49255074f
963               // (https://github.com/llvm/llvm-project/commit/aee49255074fd4ef38d97e6e70cbfbf2f9fd0fa7)
964               // array type change from `comm#char [16]` to `comm#char[16]`
965               perf_event.push_back(it->getNameAsString() + "#" + it->getType().getAsString()); //"pid#u32"
966             }
967             fe_.perf_events_[name] = perf_event;
968           }
969         } else if (memb_name == "perf_submit_skb") {
970           string skb = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
971           string skb_len = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
972           string meta = rewriter_.getRewrittenText(expansionRange(Call->getArg(2)->getSourceRange()));
973           string meta_len = rewriter_.getRewrittenText(expansionRange(Call->getArg(3)->getSourceRange()));
974           txt = "bpf_perf_event_output(" +
975             skb + ", " +
976             "bpf_pseudo_fd(1, " + fd + "), " +
977             "((__u64)" + skb_len + " << 32) | BPF_F_CURRENT_CPU, " +
978             meta + ", " +
979             meta_len + ");";
980         } else if (memb_name == "get_stackid") {
981           if (desc->second.type == BPF_MAP_TYPE_STACK_TRACE) {
982             string arg0 =
983                 rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
984             txt = "bcc_get_stackid(";
985             txt += "bpf_pseudo_fd(1, " + fd + "), " + arg0;
986             rewrite_end = GET_ENDLOC(Call->getArg(0));
987             } else {
988               error(GET_BEGINLOC(Call), "get_stackid only available on stacktrace maps");
989               return false;
990             }
991         } else if (memb_name == "sock_map_update" || memb_name == "sock_hash_update") {
992           string ctx = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
993           string keyp = rewriter_.getRewrittenText(expansionRange(Call->getArg(1)->getSourceRange()));
994           string flag = rewriter_.getRewrittenText(expansionRange(Call->getArg(2)->getSourceRange()));
995           txt = "bpf_" + string(memb_name) + "(" + ctx + ", " +
996             "bpf_pseudo_fd(1, " + fd + "), " + keyp + ", " + flag + ");";
997         } else if (memb_name == "ringbuf_output") {
998           string name = string(Ref->getDecl()->getName());
999           string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
1000                                                            GET_ENDLOC(Call->getArg(2)))));
1001           txt = "bpf_ringbuf_output(bpf_pseudo_fd(1, " + fd + ")";
1002           txt += ", " + args + ")";
1003 
1004           // e.g.
1005           // struct data_t { u32 pid; }; data_t data;
1006           // events.ringbuf_output(&data, sizeof(data), 0);
1007           // ...
1008           //                       &data   ->     data    ->  typeof(data)        ->   data_t
1009           auto type_arg0 = Call->getArg(0)->IgnoreCasts()->getType().getTypePtr()->getPointeeType().getTypePtr();
1010           if (type_arg0->isStructureType()) {
1011             auto event_type = type_arg0->getAsTagDecl();
1012             const auto *r = dyn_cast<RecordDecl>(event_type);
1013             std::vector<std::string> perf_event;
1014 
1015             for (auto it = r->field_begin(); it != r->field_end(); ++it) {
1016               perf_event.push_back(it->getNameAsString() + "#" + it->getType().getAsString()); //"pid#u32"
1017             }
1018             fe_.perf_events_[name] = perf_event;
1019           }
1020         } else if (memb_name == "ringbuf_reserve") {
1021           string name = string(Ref->getDecl()->getName());
1022           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1023           txt = "bpf_ringbuf_reserve(bpf_pseudo_fd(1, " + fd + ")";
1024           txt += ", " + arg0 + ", 0)"; // Flags in reserve are meaningless
1025         } else if (memb_name == "ringbuf_discard") {
1026           string name = string(Ref->getDecl()->getName());
1027           string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
1028                                                            GET_ENDLOC(Call->getArg(1)))));
1029           txt = "bpf_ringbuf_discard(" + args + ")";
1030         } else if (memb_name == "ringbuf_submit") {
1031           string name = string(Ref->getDecl()->getName());
1032           string args = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(0)),
1033                                                            GET_ENDLOC(Call->getArg(1)))));
1034           txt = "bpf_ringbuf_submit(" + args + ")";
1035 
1036           // e.g.
1037           // struct data_t { u32 pid; };
1038           // data_t *data = events.ringbuf_reserve(sizeof(data_t));
1039           // events.ringbuf_submit(data, 0);
1040           // ...
1041           //                       &data   ->     data    ->  typeof(data)        ->   data_t
1042           auto type_arg0 = Call->getArg(0)->IgnoreCasts()->getType().getTypePtr()->getPointeeType().getTypePtr();
1043           if (type_arg0->isStructureType()) {
1044             auto event_type = type_arg0->getAsTagDecl();
1045             const auto *r = dyn_cast<RecordDecl>(event_type);
1046             std::vector<std::string> perf_event;
1047 
1048             for (auto it = r->field_begin(); it != r->field_end(); ++it) {
1049               perf_event.push_back(it->getNameAsString() + "#" + it->getType().getAsString()); //"pid#u32"
1050             }
1051             fe_.perf_events_[name] = perf_event;
1052           }
1053         } else if (memb_name == "msg_redirect_hash" || memb_name == "sk_redirect_hash") {
1054           string arg0 = rewriter_.getRewrittenText(expansionRange(Call->getArg(0)->getSourceRange()));
1055           string args_other = rewriter_.getRewrittenText(expansionRange(SourceRange(GET_BEGINLOC(Call->getArg(1)),
1056                                                            GET_ENDLOC(Call->getArg(2)))));
1057 
1058           txt = "bpf_" + string(memb_name) + "(" + arg0 + ", (void *)bpf_pseudo_fd(1, " + fd + "), ";
1059           txt += args_other + ")";
1060         } else {
1061           if (memb_name == "lookup") {
1062             prefix = "bpf_map_lookup_elem";
1063             suffix = ")";
1064           } else if (memb_name == "update") {
1065             prefix = "bpf_map_update_elem";
1066             suffix = ", BPF_ANY)";
1067           } else if (memb_name == "insert") {
1068             if (desc->second.type == BPF_MAP_TYPE_ARRAY) {
1069               warning(GET_BEGINLOC(Call), "all element of an array already exist; insert() will have no effect");
1070             }
1071             prefix = "bpf_map_update_elem";
1072             suffix = ", BPF_NOEXIST)";
1073           } else if (memb_name == "delete") {
1074             prefix = "bpf_map_delete_elem";
1075             suffix = ")";
1076           } else if (memb_name == "call") {
1077             prefix = "bpf_tail_call_";
1078             suffix = ")";
1079           } else if (memb_name == "perf_read") {
1080             prefix = "bpf_perf_event_read";
1081             suffix = ")";
1082           } else if (memb_name == "perf_counter_value") {
1083             prefix = "bpf_perf_event_read_value";
1084             suffix = ")";
1085           } else if (memb_name == "check_current_task") {
1086             prefix = "bpf_current_task_under_cgroup";
1087             suffix = ")";
1088           } else if (memb_name == "redirect_map") {
1089             prefix = "bpf_redirect_map";
1090             suffix = ")";
1091           } else if (memb_name == "sk_storage_get") {
1092             prefix = "bpf_sk_storage_get";
1093             suffix = ")";
1094           } else if (memb_name == "sk_storage_delete") {
1095             prefix = "bpf_sk_storage_delete";
1096             suffix = ")";
1097           } else if (memb_name == "inode_storage_get") {
1098             prefix = "bpf_inode_storage_get";
1099             suffix = ")";
1100           } else if (memb_name == "inode_storage_delete") {
1101             prefix = "bpf_inode_storage_delete";
1102             suffix = ")";
1103           } else if (memb_name == "task_storage_get") {
1104             prefix = "bpf_task_storage_get";
1105             suffix = ")";
1106           } else if (memb_name == "task_storage_delete") {
1107             prefix = "bpf_task_storage_delete";
1108             suffix = ")";
1109           } else if (memb_name == "get_local_storage") {
1110             prefix = "bpf_get_local_storage";
1111             suffix = ")";
1112           } else if (memb_name == "push") {
1113             prefix = "bpf_map_push_elem";
1114             suffix = ")";
1115           } else if (memb_name == "pop") {
1116             prefix = "bpf_map_pop_elem";
1117             suffix = ")";
1118           } else if (memb_name == "peek") {
1119             prefix = "bpf_map_peek_elem";
1120             suffix = ")";
1121            } else {
1122             error(GET_BEGINLOC(Call), "invalid bpf_table operation %0") << memb_name;
1123             return false;
1124           }
1125           prefix += "((void *)bpf_pseudo_fd(1, " + fd + "), ";
1126 
1127           txt = prefix + args + suffix;
1128         }
1129         if (!rewriter_.isRewritable(rewrite_start) || !rewriter_.isRewritable(rewrite_end)) {
1130           error(GET_BEGINLOC(Call), "cannot use map function inside a macro");
1131           return false;
1132         }
1133         rewriter_.ReplaceText(expansionRange(SourceRange(rewrite_start, rewrite_end)), txt);
1134         return true;
1135       }
1136     }
1137   } else if (Call->getCalleeDecl()) {
1138     NamedDecl *Decl = dyn_cast<NamedDecl>(Call->getCalleeDecl());
1139     if (!Decl) return true;
1140 
1141     string text;
1142 
1143     // Bail out when bpf_probe_read_user is unavailable for overlapping address
1144     // space arch.
1145     bool overlap_addr = false;
1146     std::string probe = check_bpf_probe_read_user(Decl->getName(),
1147                           overlap_addr);
1148     if (overlap_addr) {
1149       error(GET_BEGINLOC(Call), "bpf_probe_read_user not found. Use latest kernel");
1150       return false;
1151     }
1152 
1153     if (AsmLabelAttr *A = Decl->getAttr<AsmLabelAttr>()) {
1154       // Functions with the tag asm("llvm.bpf.extra") are implemented in the
1155       // rewriter rather than as a macro since they may also include nested
1156       // rewrites, and clang::Rewriter does not support rewrites in macros,
1157       // unless one preprocesses the entire source file.
1158       if (A->getLabel() == "llvm.bpf.extra") {
1159         if (!rewriter_.isRewritable(GET_BEGINLOC(Call))) {
1160           error(GET_BEGINLOC(Call), "cannot use builtin inside a macro");
1161           return false;
1162         }
1163 
1164         vector<string> args;
1165 
1166         for (auto arg : Call->arguments())
1167           args.push_back(rewriter_.getRewrittenText(expansionRange(arg->getSourceRange())));
1168 
1169         if (Decl->getName() == "incr_cksum_l3") {
1170           text = "bpf_l3_csum_replace_(" + fn_args_[0]->getName().str() + ", (u64)";
1171           text += args[0] + ", " + args[1] + ", " + args[2] + ", sizeof(" + args[2] + "))";
1172           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1173         } else if (Decl->getName() == "incr_cksum_l4") {
1174           text = "bpf_l4_csum_replace_(" + fn_args_[0]->getName().str() + ", (u64)";
1175           text += args[0] + ", " + args[1] + ", " + args[2];
1176           text += ", ((" + args[3] + " & 0x1) << 4) | sizeof(" + args[2] + "))";
1177           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1178         } else if (Decl->getName() == "bpf_trace_printk") {
1179           checkFormatSpecifiers(args[0], GET_BEGINLOC(Call->getArg(0)));
1180           //  #define bpf_trace_printk(fmt, args...)
1181           //    ({ char _fmt[] = fmt; bpf_trace_printk_(_fmt, sizeof(_fmt), args...); })
1182           text = "({ char _fmt[] = " + args[0] + "; bpf_trace_printk_(_fmt, sizeof(_fmt)";
1183           if (args.size() <= 1) {
1184             text += "); })";
1185             rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1186           } else {
1187             rewriter_.ReplaceText(expansionRange(SourceRange(GET_BEGINLOC(Call), GET_ENDLOC(Call->getArg(0)))), text);
1188             rewriter_.InsertTextAfter(GET_ENDLOC(Call), "); }");
1189           }
1190         } else if (Decl->getName() == "bpf_num_cpus") {
1191           int numcpu = sysconf(_SC_NPROCESSORS_ONLN);
1192           if (numcpu <= 0)
1193             numcpu = 1;
1194           text = to_string(numcpu);
1195           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1196         } else if (Decl->getName() == "bpf_usdt_readarg_p") {
1197           text = "({ u64 __addr = 0x0; ";
1198           text += "_bpf_readarg_" + current_fn_ + "_" + args[0] + "(" +
1199                   args[1] + ", &__addr, sizeof(__addr));";
1200 
1201           bool overlap_addr = false;
1202           text += check_bpf_probe_read_user(StringRef("bpf_probe_read_user"),
1203                   overlap_addr);
1204           if (overlap_addr) {
1205             error(GET_BEGINLOC(Call), "bpf_probe_read_user not found. Use latest kernel");
1206             return false;
1207           }
1208 
1209           text += "(" + args[2] + ", " + args[3] + ", (void *)__addr);";
1210           text += "})";
1211           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1212         } else if (Decl->getName() == "bpf_usdt_readarg") {
1213           text = "_bpf_readarg_" + current_fn_ + "_" + args[0] + "(" + args[1] +
1214                  ", " + args[2] + ", sizeof(*(" + args[2] + ")))";
1215           rewriter_.ReplaceText(expansionRange(Call->getSourceRange()), text);
1216         }
1217       }
1218     } else if (FunctionDecl *F = dyn_cast<FunctionDecl>(Decl)) {
1219       if (F->isExternallyVisible() && !F->getBuiltinID()) {
1220         auto start_loc = rewriter_.getSourceMgr().getFileLoc(GET_BEGINLOC(Decl));
1221         if (rewriter_.getSourceMgr().getFileID(start_loc)
1222             == rewriter_.getSourceMgr().getMainFileID()) {
1223           error(GET_BEGINLOC(Call), "cannot call non-static helper function");
1224           return false;
1225         }
1226       }
1227     }
1228   }
1229   return true;
1230 }
1231 
checkFormatSpecifiers(const string & fmt,SourceLocation loc)1232 bool BTypeVisitor::checkFormatSpecifiers(const string& fmt, SourceLocation loc) {
1233   unsigned nb_specifiers = 0, i, j;
1234   bool has_s = false;
1235   for (i = 0; i < fmt.length(); i++) {
1236     if (!isascii(fmt[i]) || (!isprint(fmt[i]) && !isspace(fmt[i]))) {
1237       warning(loc.getLocWithOffset(i), "unrecognized character");
1238       return false;
1239     }
1240     if (fmt[i] != '%')
1241       continue;
1242     if (nb_specifiers >= 3) {
1243       warning(loc.getLocWithOffset(i), "cannot use more than 3 conversion specifiers");
1244       return false;
1245     }
1246     nb_specifiers++;
1247     i++;
1248     if (fmt[i] == 'l') {
1249       i++;
1250     } else if (fmt[i] == 'p' || fmt[i] == 's') {
1251       i++;
1252       if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0) {
1253         warning(loc.getLocWithOffset(i - 2),
1254                 "only %%d %%u %%x %%ld %%lu %%lx %%lld %%llu %%llx %%p %%s conversion specifiers allowed");
1255         return false;
1256       }
1257       if (fmt[i - 1] == 's') {
1258         if (has_s) {
1259           warning(loc.getLocWithOffset(i - 2), "cannot use several %%s conversion specifiers");
1260           return false;
1261         }
1262         has_s = true;
1263       }
1264       continue;
1265     }
1266     j = 1;
1267     if (fmt[i] == 'l') {
1268       i++;
1269       j++;
1270     }
1271     if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x') {
1272       warning(loc.getLocWithOffset(i - j),
1273               "only %%d %%u %%x %%ld %%lu %%lx %%lld %%llu %%llx %%p %%s conversion specifiers allowed");
1274       return false;
1275     }
1276   }
1277   return true;
1278 }
1279 
VisitBinaryOperator(BinaryOperator * E)1280 bool BTypeVisitor::VisitBinaryOperator(BinaryOperator *E) {
1281   if (!E->isAssignmentOp())
1282     return true;
1283   Expr *LHS = E->getLHS()->IgnoreImplicit();
1284   if (MemberExpr *Memb = dyn_cast<MemberExpr>(LHS)) {
1285     if (DeclRefExpr *Base = dyn_cast<DeclRefExpr>(Memb->getBase()->IgnoreImplicit())) {
1286       if (DeprecatedAttr *A = Base->getDecl()->getAttr<DeprecatedAttr>()) {
1287         if (A->getMessage() == "packet") {
1288           if (FieldDecl *F = dyn_cast<FieldDecl>(Memb->getMemberDecl())) {
1289             if (!rewriter_.isRewritable(GET_BEGINLOC(E))) {
1290               error(GET_BEGINLOC(E), "cannot use \"packet\" header type inside a macro");
1291               return false;
1292             }
1293 
1294             auto EndLoc = GET_ENDLOC(E);
1295             if (EndLoc.isMacroID()) {
1296               error(EndLoc, "cannot have macro at the end of expresssion, "
1297                             "workaround: put perentheses around macro \"(MARCO)\"");
1298               return false;
1299             }
1300 
1301             uint64_t ofs = C.getFieldOffset(F);
1302             uint64_t sz = F->isBitField() ? F->getBitWidthValue(C) : C.getTypeSize(F->getType());
1303             string base = rewriter_.getRewrittenText(expansionRange(Base->getSourceRange()));
1304             string text = "bpf_dins_pkt(" + fn_args_[0]->getName().str() + ", (u64)" + base + "+" + to_string(ofs >> 3)
1305                 + ", " + to_string(ofs & 0x7) + ", " + to_string(sz) + ",";
1306             rewriter_.ReplaceText(expansionRange(SourceRange(GET_BEGINLOC(E), E->getOperatorLoc())), text);
1307             rewriter_.InsertTextAfterToken(EndLoc, ")");
1308           }
1309         }
1310       }
1311     }
1312   }
1313   return true;
1314 }
VisitImplicitCastExpr(ImplicitCastExpr * E)1315 bool BTypeVisitor::VisitImplicitCastExpr(ImplicitCastExpr *E) {
1316   // use dext only for RValues
1317   if (E->getCastKind() != CK_LValueToRValue)
1318     return true;
1319   MemberExpr *Memb = dyn_cast<MemberExpr>(E->IgnoreImplicit());
1320   if (!Memb)
1321     return true;
1322   Expr *Base = Memb->getBase()->IgnoreImplicit();
1323   if (DeclRefExpr *Ref = dyn_cast<DeclRefExpr>(Base)) {
1324     if (DeprecatedAttr *A = Ref->getDecl()->getAttr<DeprecatedAttr>()) {
1325       if (A->getMessage() == "packet") {
1326         if (FieldDecl *F = dyn_cast<FieldDecl>(Memb->getMemberDecl())) {
1327           if (!rewriter_.isRewritable(GET_BEGINLOC(E))) {
1328             error(GET_BEGINLOC(E), "cannot use \"packet\" header type inside a macro");
1329             return false;
1330           }
1331           uint64_t ofs = C.getFieldOffset(F);
1332           uint64_t sz = F->isBitField() ? F->getBitWidthValue(C) : C.getTypeSize(F->getType());
1333           string text = "bpf_dext_pkt(" + fn_args_[0]->getName().str() + ", (u64)" + Ref->getDecl()->getName().str() + "+"
1334               + to_string(ofs >> 3) + ", " + to_string(ofs & 0x7) + ", " + to_string(sz) + ")";
1335           rewriter_.ReplaceText(expansionRange(E->getSourceRange()), text);
1336         }
1337       }
1338     }
1339   }
1340   return true;
1341 }
1342 
1343 SourceRange
expansionRange(SourceRange range)1344 BTypeVisitor::expansionRange(SourceRange range) {
1345 #if LLVM_MAJOR_VERSION >= 7
1346   return rewriter_.getSourceMgr().getExpansionRange(range).getAsRange();
1347 #else
1348   return rewriter_.getSourceMgr().getExpansionRange(range);
1349 #endif
1350 }
1351 
1352 template <unsigned N>
error(SourceLocation loc,const char (& fmt)[N])1353 DiagnosticBuilder BTypeVisitor::error(SourceLocation loc, const char (&fmt)[N]) {
1354   unsigned int diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Error, fmt);
1355   return C.getDiagnostics().Report(loc, diag_id);
1356 }
1357 
1358 template <unsigned N>
warning(SourceLocation loc,const char (& fmt)[N])1359 DiagnosticBuilder BTypeVisitor::warning(SourceLocation loc, const char (&fmt)[N]) {
1360   unsigned int diag_id = C.getDiagnostics().getCustomDiagID(DiagnosticsEngine::Warning, fmt);
1361   return C.getDiagnostics().Report(loc, diag_id);
1362 }
1363 
getFieldValue(VarDecl * Decl,FieldDecl * FDecl,int64_t OrigFValue)1364 int64_t BTypeVisitor::getFieldValue(VarDecl *Decl, FieldDecl *FDecl, int64_t OrigFValue) {
1365   unsigned idx = FDecl->getFieldIndex();
1366 
1367   if (auto I = dyn_cast_or_null<InitListExpr>(Decl->getInit())) {
1368 #if LLVM_MAJOR_VERSION >= 8
1369     Expr::EvalResult res;
1370     if (I->getInit(idx)->EvaluateAsInt(res, C)) {
1371       return res.Val.getInt().getExtValue();
1372     }
1373 #else
1374     llvm::APSInt res;
1375     if (I->getInit(idx)->EvaluateAsInt(res, C)) {
1376       return res.getExtValue();
1377     }
1378 #endif
1379   }
1380 
1381   return OrigFValue;
1382 }
1383 
1384 // Open table FDs when bpf tables (as denoted by section("maps*") attribute)
1385 // are declared.
VisitVarDecl(VarDecl * Decl)1386 bool BTypeVisitor::VisitVarDecl(VarDecl *Decl) {
1387   const RecordType *R = Decl->getType()->getAs<RecordType>();
1388   if (SectionAttr *A = Decl->getAttr<SectionAttr>()) {
1389     if (!A->getName().startswith("maps"))
1390       return true;
1391     if (!R) {
1392       error(GET_ENDLOC(Decl), "invalid type for bpf_table, expect struct");
1393       return false;
1394     }
1395     const RecordDecl *RD = R->getDecl()->getDefinition();
1396 
1397     TableDesc table;
1398     TableStorage::iterator table_it;
1399     table.name = string(Decl->getName());
1400     Path local_path({fe_.id(), table.name});
1401     Path maps_ns_path({"ns", fe_.maps_ns(), table.name});
1402     Path global_path({table.name});
1403     QualType key_type, leaf_type;
1404 
1405     unsigned i = 0;
1406     for (auto F : RD->fields()) {
1407       if (F->getType().getTypePtr()->isIncompleteType()) {
1408         error(GET_BEGINLOC(F), "unknown type");
1409         return false;
1410       }
1411 
1412       size_t sz = C.getTypeSize(F->getType()) >> 3;
1413       if (F->getName() == "key") {
1414         if (sz == 0) {
1415           error(GET_BEGINLOC(F), "invalid zero-sized leaf");
1416           return false;
1417         }
1418         table.key_size = sz;
1419         key_type = F->getType();
1420       } else if (F->getName() == "leaf") {
1421         if (sz == 0) {
1422           error(GET_BEGINLOC(F), "invalid zero-sized leaf");
1423           return false;
1424         }
1425         table.leaf_size = sz;
1426         leaf_type = F->getType();
1427       } else if (F->getName() == "max_entries") {
1428             table.max_entries = getFieldValue(Decl, F, table.max_entries);
1429       } else if (F->getName() == "flags") {
1430             table.flags = getFieldValue(Decl, F, table.flags);
1431       }
1432       ++i;
1433     }
1434 
1435     std::string section_attr = string(A->getName()), pinned;
1436     size_t pinned_path_pos = section_attr.find(":");
1437     // 0 is not a valid map ID, -1 is to create and pin it to file
1438     int pinned_id = 0;
1439 
1440     if (pinned_path_pos != std::string::npos) {
1441       pinned = section_attr.substr(pinned_path_pos + 1);
1442       section_attr = section_attr.substr(0, pinned_path_pos);
1443       int fd = bpf_obj_get(pinned.c_str());
1444       if (fd < 0) {
1445         if (bcc_make_parent_dir(pinned.c_str()) ||
1446             bcc_check_bpffs_path(pinned.c_str())) {
1447           return false;
1448         }
1449 
1450         pinned_id = -1;
1451       } else {
1452         struct bpf_map_info info = {};
1453         unsigned int info_len = sizeof(info);
1454 
1455         if (bpf_obj_get_info_by_fd(fd, &info, &info_len)) {
1456           error(GET_BEGINLOC(Decl), "get map info failed: %0")
1457                 << strerror(errno);
1458           return false;
1459         }
1460 
1461         pinned_id = info.id;
1462       }
1463 
1464       close(fd);
1465     }
1466 
1467     // Additional map specific information
1468     size_t map_info_pos = section_attr.find("$");
1469     std::string inner_map_name;
1470 
1471     if (map_info_pos != std::string::npos) {
1472       std::string map_info = section_attr.substr(map_info_pos + 1);
1473       section_attr = section_attr.substr(0, map_info_pos);
1474       if (section_attr == "maps/array_of_maps" ||
1475           section_attr == "maps/hash_of_maps") {
1476         inner_map_name = map_info;
1477       }
1478     }
1479 
1480     bpf_map_type map_type = BPF_MAP_TYPE_UNSPEC;
1481     if (section_attr == "maps/hash") {
1482       map_type = BPF_MAP_TYPE_HASH;
1483     } else if (section_attr == "maps/array") {
1484       map_type = BPF_MAP_TYPE_ARRAY;
1485     } else if (section_attr == "maps/percpu_hash") {
1486       map_type = BPF_MAP_TYPE_PERCPU_HASH;
1487     } else if (section_attr == "maps/percpu_array") {
1488       map_type = BPF_MAP_TYPE_PERCPU_ARRAY;
1489     } else if (section_attr == "maps/lru_hash") {
1490       map_type = BPF_MAP_TYPE_LRU_HASH;
1491     } else if (section_attr == "maps/lru_percpu_hash") {
1492       map_type = BPF_MAP_TYPE_LRU_PERCPU_HASH;
1493     } else if (section_attr == "maps/lpm_trie") {
1494       map_type = BPF_MAP_TYPE_LPM_TRIE;
1495     } else if (section_attr == "maps/histogram") {
1496       map_type = BPF_MAP_TYPE_HASH;
1497       if (key_type->isSpecificBuiltinType(BuiltinType::Int))
1498         map_type = BPF_MAP_TYPE_ARRAY;
1499       if (!leaf_type->isSpecificBuiltinType(BuiltinType::ULongLong))
1500         error(GET_BEGINLOC(Decl), "histogram leaf type must be u64, got %0") << leaf_type;
1501     } else if (section_attr == "maps/prog") {
1502       map_type = BPF_MAP_TYPE_PROG_ARRAY;
1503     } else if (section_attr == "maps/perf_output") {
1504       map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY;
1505       int numcpu = get_possible_cpus().size();
1506       if (numcpu <= 0)
1507         numcpu = 1;
1508       table.max_entries = numcpu;
1509     } else if (section_attr == "maps/ringbuf") {
1510       map_type = BPF_MAP_TYPE_RINGBUF;
1511       // values from libbpf/src/libbpf_probes.c
1512       table.key_size = 0;
1513       table.leaf_size = 0;
1514     } else if (section_attr == "maps/perf_array") {
1515       map_type = BPF_MAP_TYPE_PERF_EVENT_ARRAY;
1516     } else if (section_attr == "maps/queue") {
1517       table.key_size = 0;
1518       map_type = BPF_MAP_TYPE_QUEUE;
1519     } else if (section_attr == "maps/stack") {
1520       table.key_size = 0;
1521       map_type = BPF_MAP_TYPE_STACK;
1522     } else if (section_attr == "maps/cgroup_array") {
1523       map_type = BPF_MAP_TYPE_CGROUP_ARRAY;
1524     } else if (section_attr == "maps/stacktrace") {
1525       map_type = BPF_MAP_TYPE_STACK_TRACE;
1526     } else if (section_attr == "maps/devmap") {
1527       map_type = BPF_MAP_TYPE_DEVMAP;
1528     } else if (section_attr == "maps/cpumap") {
1529       map_type = BPF_MAP_TYPE_CPUMAP;
1530     } else if (section_attr == "maps/xskmap") {
1531       map_type = BPF_MAP_TYPE_XSKMAP;
1532     } else if (section_attr == "maps/hash_of_maps") {
1533       map_type = BPF_MAP_TYPE_HASH_OF_MAPS;
1534     } else if (section_attr == "maps/array_of_maps") {
1535       map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
1536     } else if (section_attr == "maps/sk_storage") {
1537       map_type = BPF_MAP_TYPE_SK_STORAGE;
1538     } else if (section_attr == "maps/inode_storage") {
1539       map_type = BPF_MAP_TYPE_INODE_STORAGE;
1540     } else if (section_attr == "maps/task_storage") {
1541       map_type = BPF_MAP_TYPE_TASK_STORAGE;
1542     } else if (section_attr == "maps/sockmap") {
1543       map_type = BPF_MAP_TYPE_SOCKMAP;
1544     } else if (section_attr == "maps/sockhash") {
1545       map_type = BPF_MAP_TYPE_SOCKHASH;
1546     } else if (section_attr == "maps/cgroup_storage") {
1547       map_type = BPF_MAP_TYPE_CGROUP_STORAGE;
1548     } else if (section_attr == "maps/percpu_cgroup_storage") {
1549       map_type = BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
1550     } else if (section_attr == "maps/extern") {
1551       if (!fe_.table_storage().Find(maps_ns_path, table_it)) {
1552         if (!fe_.table_storage().Find(global_path, table_it)) {
1553           error(GET_BEGINLOC(Decl), "reference to undefined table");
1554           return false;
1555         }
1556       }
1557       table = table_it->second.dup();
1558       table.is_extern = true;
1559     } else if (section_attr == "maps/export") {
1560       if (table.name.substr(0, 2) == "__")
1561         table.name = table.name.substr(2);
1562       Path local_path({fe_.id(), table.name});
1563       Path global_path({table.name});
1564       if (!fe_.table_storage().Find(local_path, table_it)) {
1565         error(GET_BEGINLOC(Decl), "reference to undefined table");
1566         return false;
1567       }
1568       fe_.table_storage().Insert(global_path, table_it->second.dup());
1569       return true;
1570     } else if(section_attr == "maps/shared") {
1571       if (table.name.substr(0, 2) == "__")
1572         table.name = table.name.substr(2);
1573       Path local_path({fe_.id(), table.name});
1574       Path maps_ns_path({"ns", fe_.maps_ns(), table.name});
1575       if (!fe_.table_storage().Find(local_path, table_it)) {
1576         error(GET_BEGINLOC(Decl), "reference to undefined table");
1577         return false;
1578       }
1579       fe_.table_storage().Insert(maps_ns_path, table_it->second.dup());
1580       return true;
1581     }
1582 
1583     if (!table.is_extern) {
1584       if (map_type == BPF_MAP_TYPE_UNSPEC) {
1585         error(GET_BEGINLOC(Decl), "unsupported map type: %0") << section_attr;
1586         return false;
1587       }
1588 
1589       table.type = map_type;
1590       table.fake_fd = fe_.get_next_fake_fd();
1591       fe_.add_map_def(table.fake_fd, std::make_tuple((int)map_type, std::string(table.name),
1592                       (int)table.key_size, (int)table.leaf_size,
1593                       (int)table.max_entries, table.flags, pinned_id,
1594                       inner_map_name, pinned));
1595     }
1596 
1597     if (!table.is_extern)
1598       fe_.table_storage().VisitMapType(table, C, key_type, leaf_type);
1599     fe_.table_storage().Insert(local_path, move(table));
1600   } else if (const PointerType *P = Decl->getType()->getAs<PointerType>()) {
1601     // if var is a pointer to a packet type, clone the annotation into the var
1602     // decl so that the packet dext/dins rewriter can catch it
1603     if (const RecordType *RT = P->getPointeeType()->getAs<RecordType>()) {
1604       if (const RecordDecl *RD = RT->getDecl()->getDefinition()) {
1605         if (DeprecatedAttr *DA = RD->getAttr<DeprecatedAttr>()) {
1606           if (DA->getMessage() == "packet") {
1607             Decl->addAttr(DA->clone(C));
1608           }
1609         }
1610       }
1611     }
1612   }
1613   return true;
1614 }
1615 
1616 // First traversal of AST to retrieve maps with external pointers.
BTypeConsumer(ASTContext & C,BFrontendAction & fe,Rewriter & rewriter,set<Decl * > & m)1617 BTypeConsumer::BTypeConsumer(ASTContext &C, BFrontendAction &fe,
1618                              Rewriter &rewriter, set<Decl *> &m)
1619     : fe_(fe),
1620       map_visitor_(m),
1621       btype_visitor_(C, fe),
1622       probe_visitor1_(C, rewriter, m, true),
1623       probe_visitor2_(C, rewriter, m, false) {}
1624 
HandleTranslationUnit(ASTContext & Context)1625 void BTypeConsumer::HandleTranslationUnit(ASTContext &Context) {
1626   DeclContext::decl_iterator it;
1627   DeclContext *DC = TranslationUnitDecl::castToDeclContext(Context.getTranslationUnitDecl());
1628 
1629   /**
1630    * In a first traversal, ProbeVisitor tracks external pointers identified
1631    * through each function's arguments and replaces their dereferences with
1632    * calls to bpf_probe_read. It also passes all identified pointers to
1633    * external addresses to MapVisitor.
1634    */
1635   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
1636     Decl *D = *it;
1637     if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
1638       if (fe_.is_rewritable_ext_func(F)) {
1639         for (auto arg : F->parameters()) {
1640           if (arg == F->getParamDecl(0)) {
1641             /**
1642              * Limit tracing of pointers from context to tracing contexts.
1643              * We're whitelisting instead of blacklisting to avoid issues with
1644              * existing programs if new context types are added in the future.
1645              */
1646             string type = arg->getType().getAsString();
1647             if (type == "struct pt_regs *" ||
1648                 type == "struct bpf_raw_tracepoint_args *" ||
1649                 type.substr(0, 19) == "struct tracepoint__")
1650               probe_visitor1_.set_ctx(arg);
1651           } else if (!arg->getType()->isFundamentalType()) {
1652             tuple<Decl *, int> pt = make_tuple(arg, 0);
1653             probe_visitor1_.set_ptreg(pt);
1654           }
1655         }
1656 
1657         probe_visitor1_.TraverseDecl(D);
1658         for (auto ptreg : probe_visitor1_.get_ptregs()) {
1659           map_visitor_.set_ptreg(ptreg);
1660         }
1661       }
1662     }
1663   }
1664 
1665   /**
1666    * MapVisitor uses external pointers identified by the first ProbeVisitor
1667    * traversal to identify all maps with external pointers as values.
1668    * MapVisitor runs only after ProbeVisitor finished its traversal of the
1669    * whole translation unit to clearly separate the role of each ProbeVisitor's
1670    * traversal: the first tracks external pointers from function arguments,
1671    * whereas the second tracks external pointers from maps. Without this clear
1672    * separation, ProbeVisitor might attempt to replace several times the same
1673    * dereferences.
1674    */
1675   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
1676     Decl *D = *it;
1677     if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
1678       if (fe_.is_rewritable_ext_func(F)) {
1679         map_visitor_.TraverseDecl(D);
1680       }
1681     }
1682   }
1683 
1684   /**
1685    * In a second traversal, ProbeVisitor tracks pointers passed through the
1686    * maps identified by MapVisitor and replaces their dereferences with calls
1687    * to bpf_probe_read.
1688    * This last traversal runs after MapVisitor went through an entire
1689    * translation unit, to ensure maps with external pointers have all been
1690    * identified.
1691    */
1692   for (it = DC->decls_begin(); it != DC->decls_end(); it++) {
1693     Decl *D = *it;
1694     if (FunctionDecl *F = dyn_cast<FunctionDecl>(D)) {
1695       if (fe_.is_rewritable_ext_func(F)) {
1696         probe_visitor2_.TraverseDecl(D);
1697       }
1698     }
1699 
1700     btype_visitor_.TraverseDecl(D);
1701   }
1702 
1703 }
1704 
BFrontendAction(llvm::raw_ostream & os,unsigned flags,TableStorage & ts,const std::string & id,const std::string & main_path,ProgFuncInfo & prog_func_info,std::string & mod_src,const std::string & maps_ns,fake_fd_map_def & fake_fd_map,std::map<std::string,std::vector<std::string>> & perf_events)1705 BFrontendAction::BFrontendAction(
1706     llvm::raw_ostream &os, unsigned flags, TableStorage &ts,
1707     const std::string &id, const std::string &main_path,
1708     ProgFuncInfo &prog_func_info, std::string &mod_src,
1709     const std::string &maps_ns, fake_fd_map_def &fake_fd_map,
1710     std::map<std::string, std::vector<std::string>> &perf_events)
1711     : os_(os),
1712       flags_(flags),
1713       ts_(ts),
1714       id_(id),
1715       maps_ns_(maps_ns),
1716       rewriter_(new Rewriter),
1717       main_path_(main_path),
1718       prog_func_info_(prog_func_info),
1719       mod_src_(mod_src),
1720       next_fake_fd_(-1),
1721       fake_fd_map_(fake_fd_map),
1722       perf_events_(perf_events) {}
1723 
is_rewritable_ext_func(FunctionDecl * D)1724 bool BFrontendAction::is_rewritable_ext_func(FunctionDecl *D) {
1725   StringRef file_name = rewriter_->getSourceMgr().getFilename(GET_BEGINLOC(D));
1726   return (D->isExternallyVisible() && D->hasBody() &&
1727           (file_name.empty() || file_name == main_path_));
1728 }
1729 
DoMiscWorkAround()1730 void BFrontendAction::DoMiscWorkAround() {
1731   // In 4.16 and later, CONFIG_CC_STACKPROTECTOR is moved out of Kconfig and into
1732   // Makefile. It will be set depending on CONFIG_CC_STACKPROTECTOR_{AUTO|REGULAR|STRONG}.
1733   // CONFIG_CC_STACKPROTECTOR is still used in various places, e.g., struct task_struct,
1734   // to guard certain fields. The workaround here intends to define
1735   // CONFIG_CC_STACKPROTECTOR properly based on other configs, so it relieved any bpf
1736   // program (using task_struct, etc.) of patching the below code.
1737   std::string probefunc = check_bpf_probe_read_kernel();
1738   if (kresolver) {
1739     bcc_free_symcache(kresolver, -1);
1740     kresolver = NULL;
1741   }
1742   if (probefunc == "bpf_probe_read") {
1743     probefunc = "#define bpf_probe_read_kernel bpf_probe_read\n"
1744       "#define bpf_probe_read_kernel_str bpf_probe_read_str\n"
1745       "#define bpf_probe_read_user bpf_probe_read\n"
1746       "#define bpf_probe_read_user_str bpf_probe_read_str\n";
1747   }
1748   else {
1749     probefunc = "";
1750   }
1751   std::string prologue = "#if defined(BPF_LICENSE)\n"
1752     "#error BPF_LICENSE cannot be specified through cflags\n"
1753     "#endif\n"
1754     "#if !defined(CONFIG_CC_STACKPROTECTOR)\n"
1755     "#if defined(CONFIG_CC_STACKPROTECTOR_AUTO) \\\n"
1756     "    || defined(CONFIG_CC_STACKPROTECTOR_REGULAR) \\\n"
1757     "    || defined(CONFIG_CC_STACKPROTECTOR_STRONG)\n"
1758     "#define CONFIG_CC_STACKPROTECTOR\n"
1759     "#endif\n"
1760     "#endif\n";
1761   prologue = prologue + probefunc;
1762   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).InsertText(0,
1763     prologue,
1764     false);
1765 
1766   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).InsertTextAfter(
1767 #if LLVM_MAJOR_VERSION >= 12
1768     rewriter_->getSourceMgr().getBufferOrFake(rewriter_->getSourceMgr().getMainFileID()).getBufferSize(),
1769 #else
1770     rewriter_->getSourceMgr().getBuffer(rewriter_->getSourceMgr().getMainFileID())->getBufferSize(),
1771 #endif
1772     "\n#include <bcc/footer.h>\n");
1773 }
1774 
EndSourceFileAction()1775 void BFrontendAction::EndSourceFileAction() {
1776   // Additional misc rewrites
1777   DoMiscWorkAround();
1778 
1779   if (flags_ & DEBUG_PREPROCESSOR)
1780     rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(llvm::errs());
1781 #if LLVM_MAJOR_VERSION >= 9
1782   llvm::raw_string_ostream tmp_os(mod_src_);
1783   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID())
1784       .write(tmp_os);
1785 #else
1786   if (flags_ & DEBUG_SOURCE) {
1787     llvm::raw_string_ostream tmp_os(mod_src_);
1788     rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID())
1789         .write(tmp_os);
1790   }
1791 #endif
1792 
1793   for (auto func : func_range_) {
1794     auto f = func.first;
1795     string bd = rewriter_->getRewrittenText(func_range_[f]);
1796     auto fn = prog_func_info_.get_func(f);
1797     if (fn)
1798       fn->src_rewritten_ = bd;
1799   }
1800   rewriter_->getEditBuffer(rewriter_->getSourceMgr().getMainFileID()).write(os_);
1801   os_.flush();
1802 }
1803 
CreateASTConsumer(CompilerInstance & Compiler,llvm::StringRef InFile)1804 unique_ptr<ASTConsumer> BFrontendAction::CreateASTConsumer(CompilerInstance &Compiler, llvm::StringRef InFile) {
1805   rewriter_->setSourceMgr(Compiler.getSourceManager(), Compiler.getLangOpts());
1806   vector<unique_ptr<ASTConsumer>> consumers;
1807   consumers.push_back(unique_ptr<ASTConsumer>(new BTypeConsumer(Compiler.getASTContext(), *this, *rewriter_, m_)));
1808   return unique_ptr<ASTConsumer>(new MultiplexConsumer(std::move(consumers)));
1809 }
1810 
1811 }
1812