• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * BPF JIT compiler for ARM64
4  *
5  * Copyright (C) 2014-2016 Zi Shen Lim <zlim.lnx@gmail.com>
6  */
7 
8 #define pr_fmt(fmt) "bpf_jit: " fmt
9 
10 #include <linux/arm-smccc.h>
11 #include <linux/bitfield.h>
12 #include <linux/bpf.h>
13 #include <linux/filter.h>
14 #include <linux/memory.h>
15 #include <linux/printk.h>
16 #include <linux/slab.h>
17 
18 #include <asm/asm-extable.h>
19 #include <asm/byteorder.h>
20 #include <asm/cacheflush.h>
21 #include <asm/cfi.h>
22 #include <asm/cpufeature.h>
23 #include <asm/debug-monitors.h>
24 #include <asm/insn.h>
25 #include <asm/patching.h>
26 #include <asm/set_memory.h>
27 
28 #include "bpf_jit.h"
29 
30 #define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
31 #define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
32 #define TCCNT_PTR (MAX_BPF_JIT_REG + 2)
33 #define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
34 #define ARENA_VM_START (MAX_BPF_JIT_REG + 5)
35 
36 #define check_imm(bits, imm) do {				\
37 	if ((((imm) > 0) && ((imm) >> (bits))) ||		\
38 	    (((imm) < 0) && (~(imm) >> (bits)))) {		\
39 		pr_info("[%2d] imm=%d(0x%x) out of range\n",	\
40 			i, imm, imm);				\
41 		return -EINVAL;					\
42 	}							\
43 } while (0)
44 #define check_imm19(imm) check_imm(19, imm)
45 #define check_imm26(imm) check_imm(26, imm)
46 
47 /* Map BPF registers to A64 registers */
48 static const int bpf2a64[] = {
49 	/* return value from in-kernel function, and exit value from eBPF */
50 	[BPF_REG_0] = A64_R(7),
51 	/* arguments from eBPF program to in-kernel function */
52 	[BPF_REG_1] = A64_R(0),
53 	[BPF_REG_2] = A64_R(1),
54 	[BPF_REG_3] = A64_R(2),
55 	[BPF_REG_4] = A64_R(3),
56 	[BPF_REG_5] = A64_R(4),
57 	/* callee saved registers that in-kernel function will preserve */
58 	[BPF_REG_6] = A64_R(19),
59 	[BPF_REG_7] = A64_R(20),
60 	[BPF_REG_8] = A64_R(21),
61 	[BPF_REG_9] = A64_R(22),
62 	/* read-only frame pointer to access stack */
63 	[BPF_REG_FP] = A64_R(25),
64 	/* temporary registers for BPF JIT */
65 	[TMP_REG_1] = A64_R(10),
66 	[TMP_REG_2] = A64_R(11),
67 	[TMP_REG_3] = A64_R(12),
68 	/* tail_call_cnt_ptr */
69 	[TCCNT_PTR] = A64_R(26),
70 	/* temporary register for blinding constants */
71 	[BPF_REG_AX] = A64_R(9),
72 	/* callee saved register for kern_vm_start address */
73 	[ARENA_VM_START] = A64_R(28),
74 };
75 
76 struct jit_ctx {
77 	const struct bpf_prog *prog;
78 	int idx;
79 	int epilogue_offset;
80 	int *offset;
81 	int exentry_idx;
82 	int nr_used_callee_reg;
83 	u8 used_callee_reg[8]; /* r6~r9, fp, arena_vm_start */
84 	__le32 *image;
85 	__le32 *ro_image;
86 	u32 stack_size;
87 	u64 user_vm_start;
88 	u64 arena_vm_start;
89 	bool fp_used;
90 	bool write;
91 };
92 
93 struct bpf_plt {
94 	u32 insn_ldr; /* load target */
95 	u32 insn_br;  /* branch to target */
96 	u64 target;   /* target value */
97 };
98 
99 #define PLT_TARGET_SIZE   sizeof_field(struct bpf_plt, target)
100 #define PLT_TARGET_OFFSET offsetof(struct bpf_plt, target)
101 
emit(const u32 insn,struct jit_ctx * ctx)102 static inline void emit(const u32 insn, struct jit_ctx *ctx)
103 {
104 	if (ctx->image != NULL && ctx->write)
105 		ctx->image[ctx->idx] = cpu_to_le32(insn);
106 
107 	ctx->idx++;
108 }
109 
emit_a64_mov_i(const int is64,const int reg,const s32 val,struct jit_ctx * ctx)110 static inline void emit_a64_mov_i(const int is64, const int reg,
111 				  const s32 val, struct jit_ctx *ctx)
112 {
113 	u16 hi = val >> 16;
114 	u16 lo = val & 0xffff;
115 
116 	if (hi & 0x8000) {
117 		if (hi == 0xffff) {
118 			emit(A64_MOVN(is64, reg, (u16)~lo, 0), ctx);
119 		} else {
120 			emit(A64_MOVN(is64, reg, (u16)~hi, 16), ctx);
121 			if (lo != 0xffff)
122 				emit(A64_MOVK(is64, reg, lo, 0), ctx);
123 		}
124 	} else {
125 		emit(A64_MOVZ(is64, reg, lo, 0), ctx);
126 		if (hi)
127 			emit(A64_MOVK(is64, reg, hi, 16), ctx);
128 	}
129 }
130 
i64_i16_blocks(const u64 val,bool inverse)131 static int i64_i16_blocks(const u64 val, bool inverse)
132 {
133 	return (((val >>  0) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
134 	       (((val >> 16) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
135 	       (((val >> 32) & 0xffff) != (inverse ? 0xffff : 0x0000)) +
136 	       (((val >> 48) & 0xffff) != (inverse ? 0xffff : 0x0000));
137 }
138 
emit_a64_mov_i64(const int reg,const u64 val,struct jit_ctx * ctx)139 static inline void emit_a64_mov_i64(const int reg, const u64 val,
140 				    struct jit_ctx *ctx)
141 {
142 	u64 nrm_tmp = val, rev_tmp = ~val;
143 	bool inverse;
144 	int shift;
145 
146 	if (!(nrm_tmp >> 32))
147 		return emit_a64_mov_i(0, reg, (u32)val, ctx);
148 
149 	inverse = i64_i16_blocks(nrm_tmp, true) < i64_i16_blocks(nrm_tmp, false);
150 	shift = max(round_down((inverse ? (fls64(rev_tmp) - 1) :
151 					  (fls64(nrm_tmp) - 1)), 16), 0);
152 	if (inverse)
153 		emit(A64_MOVN(1, reg, (rev_tmp >> shift) & 0xffff, shift), ctx);
154 	else
155 		emit(A64_MOVZ(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
156 	shift -= 16;
157 	while (shift >= 0) {
158 		if (((nrm_tmp >> shift) & 0xffff) != (inverse ? 0xffff : 0x0000))
159 			emit(A64_MOVK(1, reg, (nrm_tmp >> shift) & 0xffff, shift), ctx);
160 		shift -= 16;
161 	}
162 }
163 
emit_bti(u32 insn,struct jit_ctx * ctx)164 static inline void emit_bti(u32 insn, struct jit_ctx *ctx)
165 {
166 	if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL))
167 		emit(insn, ctx);
168 }
169 
emit_kcfi(u32 hash,struct jit_ctx * ctx)170 static inline void emit_kcfi(u32 hash, struct jit_ctx *ctx)
171 {
172 	if (IS_ENABLED(CONFIG_CFI_CLANG))
173 		emit(hash, ctx);
174 }
175 
176 /*
177  * Kernel addresses in the vmalloc space use at most 48 bits, and the
178  * remaining bits are guaranteed to be 0x1. So we can compose the address
179  * with a fixed length movn/movk/movk sequence.
180  */
emit_addr_mov_i64(const int reg,const u64 val,struct jit_ctx * ctx)181 static inline void emit_addr_mov_i64(const int reg, const u64 val,
182 				     struct jit_ctx *ctx)
183 {
184 	u64 tmp = val;
185 	int shift = 0;
186 
187 	emit(A64_MOVN(1, reg, ~tmp & 0xffff, shift), ctx);
188 	while (shift < 32) {
189 		tmp >>= 16;
190 		shift += 16;
191 		emit(A64_MOVK(1, reg, tmp & 0xffff, shift), ctx);
192 	}
193 }
194 
should_emit_indirect_call(long target,const struct jit_ctx * ctx)195 static bool should_emit_indirect_call(long target, const struct jit_ctx *ctx)
196 {
197 	long offset;
198 
199 	/* when ctx->ro_image is not allocated or the target is unknown,
200 	 * emit indirect call
201 	 */
202 	if (!ctx->ro_image || !target)
203 		return true;
204 
205 	offset = target - (long)&ctx->ro_image[ctx->idx];
206 	return offset < -SZ_128M || offset >= SZ_128M;
207 }
208 
emit_direct_call(u64 target,struct jit_ctx * ctx)209 static void emit_direct_call(u64 target, struct jit_ctx *ctx)
210 {
211 	u32 insn;
212 	unsigned long pc;
213 
214 	pc = (unsigned long)&ctx->ro_image[ctx->idx];
215 	insn = aarch64_insn_gen_branch_imm(pc, target, AARCH64_INSN_BRANCH_LINK);
216 	emit(insn, ctx);
217 }
218 
emit_indirect_call(u64 target,struct jit_ctx * ctx)219 static void emit_indirect_call(u64 target, struct jit_ctx *ctx)
220 {
221 	u8 tmp;
222 
223 	tmp = bpf2a64[TMP_REG_1];
224 	emit_addr_mov_i64(tmp, target, ctx);
225 	emit(A64_BLR(tmp), ctx);
226 }
227 
emit_call(u64 target,struct jit_ctx * ctx)228 static void emit_call(u64 target, struct jit_ctx *ctx)
229 {
230 	if (should_emit_indirect_call((long)target, ctx))
231 		emit_indirect_call(target, ctx);
232 	else
233 		emit_direct_call(target, ctx);
234 }
235 
bpf2a64_offset(int bpf_insn,int off,const struct jit_ctx * ctx)236 static inline int bpf2a64_offset(int bpf_insn, int off,
237 				 const struct jit_ctx *ctx)
238 {
239 	/* BPF JMP offset is relative to the next instruction */
240 	bpf_insn++;
241 	/*
242 	 * Whereas arm64 branch instructions encode the offset
243 	 * from the branch itself, so we must subtract 1 from the
244 	 * instruction offset.
245 	 */
246 	return ctx->offset[bpf_insn + off] - (ctx->offset[bpf_insn] - 1);
247 }
248 
jit_fill_hole(void * area,unsigned int size)249 static void jit_fill_hole(void *area, unsigned int size)
250 {
251 	__le32 *ptr;
252 	/* We are guaranteed to have aligned memory. */
253 	for (ptr = area; size >= sizeof(u32); size -= sizeof(u32))
254 		*ptr++ = cpu_to_le32(AARCH64_BREAK_FAULT);
255 }
256 
bpf_arch_text_invalidate(void * dst,size_t len)257 int bpf_arch_text_invalidate(void *dst, size_t len)
258 {
259 	if (!aarch64_insn_set(dst, AARCH64_BREAK_FAULT, len))
260 		return -EINVAL;
261 
262 	return 0;
263 }
264 
epilogue_offset(const struct jit_ctx * ctx)265 static inline int epilogue_offset(const struct jit_ctx *ctx)
266 {
267 	int to = ctx->epilogue_offset;
268 	int from = ctx->idx;
269 
270 	return to - from;
271 }
272 
is_addsub_imm(u32 imm)273 static bool is_addsub_imm(u32 imm)
274 {
275 	/* Either imm12 or shifted imm12. */
276 	return !(imm & ~0xfff) || !(imm & ~0xfff000);
277 }
278 
279 /*
280  * There are 3 types of AArch64 LDR/STR (immediate) instruction:
281  * Post-index, Pre-index, Unsigned offset.
282  *
283  * For BPF ldr/str, the "unsigned offset" type is sufficient.
284  *
285  * "Unsigned offset" type LDR(immediate) format:
286  *
287  *    3                   2                   1                   0
288  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
289  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
290  * |x x|1 1 1 0 0 1 0 1|         imm12         |    Rn   |    Rt   |
291  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
292  * scale
293  *
294  * "Unsigned offset" type STR(immediate) format:
295  *    3                   2                   1                   0
296  *  1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0 9 8 7 6 5 4 3 2 1 0
297  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
298  * |x x|1 1 1 0 0 1 0 0|         imm12         |    Rn   |    Rt   |
299  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
300  * scale
301  *
302  * The offset is calculated from imm12 and scale in the following way:
303  *
304  * offset = (u64)imm12 << scale
305  */
is_lsi_offset(int offset,int scale)306 static bool is_lsi_offset(int offset, int scale)
307 {
308 	if (offset < 0)
309 		return false;
310 
311 	if (offset > (0xFFF << scale))
312 		return false;
313 
314 	if (offset & ((1 << scale) - 1))
315 		return false;
316 
317 	return true;
318 }
319 
320 /* generated main prog prologue:
321  *      bti c // if CONFIG_ARM64_BTI_KERNEL
322  *      mov x9, lr
323  *      nop  // POKE_OFFSET
324  *      paciasp // if CONFIG_ARM64_PTR_AUTH_KERNEL
325  *      stp x29, lr, [sp, #-16]!
326  *      mov x29, sp
327  *      stp xzr, x26, [sp, #-16]!
328  *      mov x26, sp
329  *      // PROLOGUE_OFFSET
330  *	// save callee-saved registers
331  */
prepare_bpf_tail_call_cnt(struct jit_ctx * ctx)332 static void prepare_bpf_tail_call_cnt(struct jit_ctx *ctx)
333 {
334 	const bool is_main_prog = !bpf_is_subprog(ctx->prog);
335 	const u8 ptr = bpf2a64[TCCNT_PTR];
336 
337 	if (is_main_prog) {
338 		/* Initialize tail_call_cnt. */
339 		emit(A64_PUSH(A64_ZR, ptr, A64_SP), ctx);
340 		emit(A64_MOV(1, ptr, A64_SP), ctx);
341 	} else
342 		emit(A64_PUSH(ptr, ptr, A64_SP), ctx);
343 }
344 
find_used_callee_regs(struct jit_ctx * ctx)345 static void find_used_callee_regs(struct jit_ctx *ctx)
346 {
347 	int i;
348 	const struct bpf_prog *prog = ctx->prog;
349 	const struct bpf_insn *insn = &prog->insnsi[0];
350 	int reg_used = 0;
351 
352 	for (i = 0; i < prog->len; i++, insn++) {
353 		if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6)
354 			reg_used |= 1;
355 
356 		if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7)
357 			reg_used |= 2;
358 
359 		if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8)
360 			reg_used |= 4;
361 
362 		if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9)
363 			reg_used |= 8;
364 
365 		if (insn->dst_reg == BPF_REG_FP || insn->src_reg == BPF_REG_FP) {
366 			ctx->fp_used = true;
367 			reg_used |= 16;
368 		}
369 	}
370 
371 	i = 0;
372 	if (reg_used & 1)
373 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_6];
374 
375 	if (reg_used & 2)
376 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_7];
377 
378 	if (reg_used & 4)
379 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_8];
380 
381 	if (reg_used & 8)
382 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_9];
383 
384 	if (reg_used & 16)
385 		ctx->used_callee_reg[i++] = bpf2a64[BPF_REG_FP];
386 
387 	if (ctx->arena_vm_start)
388 		ctx->used_callee_reg[i++] = bpf2a64[ARENA_VM_START];
389 
390 	ctx->nr_used_callee_reg = i;
391 }
392 
393 /* Save callee-saved registers */
push_callee_regs(struct jit_ctx * ctx)394 static void push_callee_regs(struct jit_ctx *ctx)
395 {
396 	int reg1, reg2, i;
397 
398 	/*
399 	 * Program acting as exception boundary should save all ARM64
400 	 * Callee-saved registers as the exception callback needs to recover
401 	 * all ARM64 Callee-saved registers in its epilogue.
402 	 */
403 	if (ctx->prog->aux->exception_boundary) {
404 		emit(A64_PUSH(A64_R(19), A64_R(20), A64_SP), ctx);
405 		emit(A64_PUSH(A64_R(21), A64_R(22), A64_SP), ctx);
406 		emit(A64_PUSH(A64_R(23), A64_R(24), A64_SP), ctx);
407 		emit(A64_PUSH(A64_R(25), A64_R(26), A64_SP), ctx);
408 		emit(A64_PUSH(A64_R(27), A64_R(28), A64_SP), ctx);
409 		ctx->fp_used = true;
410 	} else {
411 		find_used_callee_regs(ctx);
412 		for (i = 0; i + 1 < ctx->nr_used_callee_reg; i += 2) {
413 			reg1 = ctx->used_callee_reg[i];
414 			reg2 = ctx->used_callee_reg[i + 1];
415 			emit(A64_PUSH(reg1, reg2, A64_SP), ctx);
416 		}
417 		if (i < ctx->nr_used_callee_reg) {
418 			reg1 = ctx->used_callee_reg[i];
419 			/* keep SP 16-byte aligned */
420 			emit(A64_PUSH(reg1, A64_ZR, A64_SP), ctx);
421 		}
422 	}
423 }
424 
425 /* Restore callee-saved registers */
pop_callee_regs(struct jit_ctx * ctx)426 static void pop_callee_regs(struct jit_ctx *ctx)
427 {
428 	struct bpf_prog_aux *aux = ctx->prog->aux;
429 	int reg1, reg2, i;
430 
431 	/*
432 	 * Program acting as exception boundary pushes R23 and R24 in addition
433 	 * to BPF callee-saved registers. Exception callback uses the boundary
434 	 * program's stack frame, so recover these extra registers in the above
435 	 * two cases.
436 	 */
437 	if (aux->exception_boundary || aux->exception_cb) {
438 		emit(A64_POP(A64_R(27), A64_R(28), A64_SP), ctx);
439 		emit(A64_POP(A64_R(25), A64_R(26), A64_SP), ctx);
440 		emit(A64_POP(A64_R(23), A64_R(24), A64_SP), ctx);
441 		emit(A64_POP(A64_R(21), A64_R(22), A64_SP), ctx);
442 		emit(A64_POP(A64_R(19), A64_R(20), A64_SP), ctx);
443 	} else {
444 		i = ctx->nr_used_callee_reg - 1;
445 		if (ctx->nr_used_callee_reg % 2 != 0) {
446 			reg1 = ctx->used_callee_reg[i];
447 			emit(A64_POP(reg1, A64_ZR, A64_SP), ctx);
448 			i--;
449 		}
450 		while (i > 0) {
451 			reg1 = ctx->used_callee_reg[i - 1];
452 			reg2 = ctx->used_callee_reg[i];
453 			emit(A64_POP(reg1, reg2, A64_SP), ctx);
454 			i -= 2;
455 		}
456 	}
457 }
458 
459 #define BTI_INSNS (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) ? 1 : 0)
460 #define PAC_INSNS (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL) ? 1 : 0)
461 
462 /* Offset of nop instruction in bpf prog entry to be poked */
463 #define POKE_OFFSET (BTI_INSNS + 1)
464 
465 /* Tail call offset to jump into */
466 #define PROLOGUE_OFFSET (BTI_INSNS + 2 + PAC_INSNS + 4)
467 
build_prologue(struct jit_ctx * ctx,bool ebpf_from_cbpf)468 static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
469 {
470 	const struct bpf_prog *prog = ctx->prog;
471 	const bool is_main_prog = !bpf_is_subprog(prog);
472 	const u8 fp = bpf2a64[BPF_REG_FP];
473 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
474 	int cur_offset;
475 
476 	/*
477 	 * BPF prog stack layout
478 	 *
479 	 *                         high
480 	 * original A64_SP =>   0:+-----+ BPF prologue
481 	 *                        |FP/LR|
482 	 * current A64_FP =>  -16:+-----+
483 	 *                        | ... | callee saved registers
484 	 * BPF fp register => -64:+-----+ <= (BPF_FP)
485 	 *                        |     |
486 	 *                        | ... | BPF prog stack
487 	 *                        |     |
488 	 *                        +-----+ <= (BPF_FP - prog->aux->stack_depth)
489 	 *                        |RSVD | padding
490 	 * current A64_SP =>      +-----+ <= (BPF_FP - ctx->stack_size)
491 	 *                        |     |
492 	 *                        | ... | Function call stack
493 	 *                        |     |
494 	 *                        +-----+
495 	 *                          low
496 	 *
497 	 */
498 
499 	emit_kcfi(is_main_prog ? cfi_bpf_hash : cfi_bpf_subprog_hash, ctx);
500 	const int idx0 = ctx->idx;
501 
502 	/* bpf function may be invoked by 3 instruction types:
503 	 * 1. bl, attached via freplace to bpf prog via short jump
504 	 * 2. br, attached via freplace to bpf prog via long jump
505 	 * 3. blr, working as a function pointer, used by emit_call.
506 	 * So BTI_JC should used here to support both br and blr.
507 	 */
508 	emit_bti(A64_BTI_JC, ctx);
509 
510 	emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
511 	emit(A64_NOP, ctx);
512 
513 	if (!prog->aux->exception_cb) {
514 		/* Sign lr */
515 		if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
516 			emit(A64_PACIASP, ctx);
517 
518 		/* Save FP and LR registers to stay align with ARM64 AAPCS */
519 		emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
520 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
521 
522 		prepare_bpf_tail_call_cnt(ctx);
523 
524 		if (!ebpf_from_cbpf && is_main_prog) {
525 			cur_offset = ctx->idx - idx0;
526 			if (cur_offset != PROLOGUE_OFFSET) {
527 				pr_err_once("PROLOGUE_OFFSET = %d, expected %d!\n",
528 						cur_offset, PROLOGUE_OFFSET);
529 				return -1;
530 			}
531 			/* BTI landing pad for the tail call, done with a BR */
532 			emit_bti(A64_BTI_J, ctx);
533 		}
534 		push_callee_regs(ctx);
535 	} else {
536 		/*
537 		 * Exception callback receives FP of Main Program as third
538 		 * parameter
539 		 */
540 		emit(A64_MOV(1, A64_FP, A64_R(2)), ctx);
541 		/*
542 		 * Main Program already pushed the frame record and the
543 		 * callee-saved registers. The exception callback will not push
544 		 * anything and re-use the main program's stack.
545 		 *
546 		 * 12 registers are on the stack
547 		 */
548 		emit(A64_SUB_I(1, A64_SP, A64_FP, 96), ctx);
549 	}
550 
551 	if (ctx->fp_used)
552 		/* Set up BPF prog stack base register */
553 		emit(A64_MOV(1, fp, A64_SP), ctx);
554 
555 	/* Stack must be multiples of 16B */
556 	ctx->stack_size = round_up(prog->aux->stack_depth, 16);
557 
558 	/* Set up function call stack */
559 	if (ctx->stack_size)
560 		emit(A64_SUB_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
561 
562 	if (ctx->arena_vm_start)
563 		emit_a64_mov_i64(arena_vm_base, ctx->arena_vm_start, ctx);
564 
565 	return 0;
566 }
567 
emit_bpf_tail_call(struct jit_ctx * ctx)568 static int emit_bpf_tail_call(struct jit_ctx *ctx)
569 {
570 	/* bpf_tail_call(void *prog_ctx, struct bpf_array *array, u64 index) */
571 	const u8 r2 = bpf2a64[BPF_REG_2];
572 	const u8 r3 = bpf2a64[BPF_REG_3];
573 
574 	const u8 tmp = bpf2a64[TMP_REG_1];
575 	const u8 prg = bpf2a64[TMP_REG_2];
576 	const u8 tcc = bpf2a64[TMP_REG_3];
577 	const u8 ptr = bpf2a64[TCCNT_PTR];
578 	size_t off;
579 	__le32 *branch1 = NULL;
580 	__le32 *branch2 = NULL;
581 	__le32 *branch3 = NULL;
582 
583 	/* if (index >= array->map.max_entries)
584 	 *     goto out;
585 	 */
586 	off = offsetof(struct bpf_array, map.max_entries);
587 	emit_a64_mov_i64(tmp, off, ctx);
588 	emit(A64_LDR32(tmp, r2, tmp), ctx);
589 	emit(A64_MOV(0, r3, r3), ctx);
590 	emit(A64_CMP(0, r3, tmp), ctx);
591 	branch1 = ctx->image + ctx->idx;
592 	emit(A64_NOP, ctx);
593 
594 	/*
595 	 * if ((*tail_call_cnt_ptr) >= MAX_TAIL_CALL_CNT)
596 	 *     goto out;
597 	 */
598 	emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx);
599 	emit(A64_LDR64I(tcc, ptr, 0), ctx);
600 	emit(A64_CMP(1, tcc, tmp), ctx);
601 	branch2 = ctx->image + ctx->idx;
602 	emit(A64_NOP, ctx);
603 
604 	/* (*tail_call_cnt_ptr)++; */
605 	emit(A64_ADD_I(1, tcc, tcc, 1), ctx);
606 
607 	/* prog = array->ptrs[index];
608 	 * if (prog == NULL)
609 	 *     goto out;
610 	 */
611 	off = offsetof(struct bpf_array, ptrs);
612 	emit_a64_mov_i64(tmp, off, ctx);
613 	emit(A64_ADD(1, tmp, r2, tmp), ctx);
614 	emit(A64_LSL(1, prg, r3, 3), ctx);
615 	emit(A64_LDR64(prg, tmp, prg), ctx);
616 	branch3 = ctx->image + ctx->idx;
617 	emit(A64_NOP, ctx);
618 
619 	/* Update tail_call_cnt if the slot is populated. */
620 	emit(A64_STR64I(tcc, ptr, 0), ctx);
621 
622 	/* restore SP */
623 	if (ctx->stack_size)
624 		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
625 
626 	pop_callee_regs(ctx);
627 
628 	/* goto *(prog->bpf_func + prologue_offset); */
629 	off = offsetof(struct bpf_prog, bpf_func);
630 	emit_a64_mov_i64(tmp, off, ctx);
631 	emit(A64_LDR64(tmp, prg, tmp), ctx);
632 	emit(A64_ADD_I(1, tmp, tmp, sizeof(u32) * PROLOGUE_OFFSET), ctx);
633 	emit(A64_BR(tmp), ctx);
634 
635 	if (ctx->image) {
636 		off = &ctx->image[ctx->idx] - branch1;
637 		*branch1 = cpu_to_le32(A64_B_(A64_COND_CS, off));
638 
639 		off = &ctx->image[ctx->idx] - branch2;
640 		*branch2 = cpu_to_le32(A64_B_(A64_COND_CS, off));
641 
642 		off = &ctx->image[ctx->idx] - branch3;
643 		*branch3 = cpu_to_le32(A64_CBZ(1, prg, off));
644 	}
645 
646 	return 0;
647 }
648 
649 #ifdef CONFIG_ARM64_LSE_ATOMICS
emit_lse_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)650 static int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
651 {
652 	const u8 code = insn->code;
653 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
654 	const u8 dst = bpf2a64[insn->dst_reg];
655 	const u8 src = bpf2a64[insn->src_reg];
656 	const u8 tmp = bpf2a64[TMP_REG_1];
657 	const u8 tmp2 = bpf2a64[TMP_REG_2];
658 	const bool isdw = BPF_SIZE(code) == BPF_DW;
659 	const bool arena = BPF_MODE(code) == BPF_PROBE_ATOMIC;
660 	const s16 off = insn->off;
661 	u8 reg = dst;
662 
663 	if (off || arena) {
664 		if (off) {
665 			emit_a64_mov_i(1, tmp, off, ctx);
666 			emit(A64_ADD(1, tmp, tmp, dst), ctx);
667 			reg = tmp;
668 		}
669 		if (arena) {
670 			emit(A64_ADD(1, tmp, reg, arena_vm_base), ctx);
671 			reg = tmp;
672 		}
673 	}
674 
675 	switch (insn->imm) {
676 	/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
677 	case BPF_ADD:
678 		emit(A64_STADD(isdw, reg, src), ctx);
679 		break;
680 	case BPF_AND:
681 		emit(A64_MVN(isdw, tmp2, src), ctx);
682 		emit(A64_STCLR(isdw, reg, tmp2), ctx);
683 		break;
684 	case BPF_OR:
685 		emit(A64_STSET(isdw, reg, src), ctx);
686 		break;
687 	case BPF_XOR:
688 		emit(A64_STEOR(isdw, reg, src), ctx);
689 		break;
690 	/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
691 	case BPF_ADD | BPF_FETCH:
692 		emit(A64_LDADDAL(isdw, src, reg, src), ctx);
693 		break;
694 	case BPF_AND | BPF_FETCH:
695 		emit(A64_MVN(isdw, tmp2, src), ctx);
696 		emit(A64_LDCLRAL(isdw, src, reg, tmp2), ctx);
697 		break;
698 	case BPF_OR | BPF_FETCH:
699 		emit(A64_LDSETAL(isdw, src, reg, src), ctx);
700 		break;
701 	case BPF_XOR | BPF_FETCH:
702 		emit(A64_LDEORAL(isdw, src, reg, src), ctx);
703 		break;
704 	/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
705 	case BPF_XCHG:
706 		emit(A64_SWPAL(isdw, src, reg, src), ctx);
707 		break;
708 	/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
709 	case BPF_CMPXCHG:
710 		emit(A64_CASAL(isdw, src, reg, bpf2a64[BPF_REG_0]), ctx);
711 		break;
712 	default:
713 		pr_err_once("unknown atomic op code %02x\n", insn->imm);
714 		return -EINVAL;
715 	}
716 
717 	return 0;
718 }
719 #else
emit_lse_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)720 static inline int emit_lse_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
721 {
722 	return -EINVAL;
723 }
724 #endif
725 
emit_ll_sc_atomic(const struct bpf_insn * insn,struct jit_ctx * ctx)726 static int emit_ll_sc_atomic(const struct bpf_insn *insn, struct jit_ctx *ctx)
727 {
728 	const u8 code = insn->code;
729 	const u8 dst = bpf2a64[insn->dst_reg];
730 	const u8 src = bpf2a64[insn->src_reg];
731 	const u8 tmp = bpf2a64[TMP_REG_1];
732 	const u8 tmp2 = bpf2a64[TMP_REG_2];
733 	const u8 tmp3 = bpf2a64[TMP_REG_3];
734 	const int i = insn - ctx->prog->insnsi;
735 	const s32 imm = insn->imm;
736 	const s16 off = insn->off;
737 	const bool isdw = BPF_SIZE(code) == BPF_DW;
738 	u8 reg;
739 	s32 jmp_offset;
740 
741 	if (BPF_MODE(code) == BPF_PROBE_ATOMIC) {
742 		/* ll_sc based atomics don't support unsafe pointers yet. */
743 		pr_err_once("unknown atomic opcode %02x\n", code);
744 		return -EINVAL;
745 	}
746 
747 	if (!off) {
748 		reg = dst;
749 	} else {
750 		emit_a64_mov_i(1, tmp, off, ctx);
751 		emit(A64_ADD(1, tmp, tmp, dst), ctx);
752 		reg = tmp;
753 	}
754 
755 	if (imm == BPF_ADD || imm == BPF_AND ||
756 	    imm == BPF_OR || imm == BPF_XOR) {
757 		/* lock *(u32/u64 *)(dst_reg + off) <op>= src_reg */
758 		emit(A64_LDXR(isdw, tmp2, reg), ctx);
759 		if (imm == BPF_ADD)
760 			emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
761 		else if (imm == BPF_AND)
762 			emit(A64_AND(isdw, tmp2, tmp2, src), ctx);
763 		else if (imm == BPF_OR)
764 			emit(A64_ORR(isdw, tmp2, tmp2, src), ctx);
765 		else
766 			emit(A64_EOR(isdw, tmp2, tmp2, src), ctx);
767 		emit(A64_STXR(isdw, tmp2, reg, tmp3), ctx);
768 		jmp_offset = -3;
769 		check_imm19(jmp_offset);
770 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
771 	} else if (imm == (BPF_ADD | BPF_FETCH) ||
772 		   imm == (BPF_AND | BPF_FETCH) ||
773 		   imm == (BPF_OR | BPF_FETCH) ||
774 		   imm == (BPF_XOR | BPF_FETCH)) {
775 		/* src_reg = atomic_fetch_<op>(dst_reg + off, src_reg) */
776 		const u8 ax = bpf2a64[BPF_REG_AX];
777 
778 		emit(A64_MOV(isdw, ax, src), ctx);
779 		emit(A64_LDXR(isdw, src, reg), ctx);
780 		if (imm == (BPF_ADD | BPF_FETCH))
781 			emit(A64_ADD(isdw, tmp2, src, ax), ctx);
782 		else if (imm == (BPF_AND | BPF_FETCH))
783 			emit(A64_AND(isdw, tmp2, src, ax), ctx);
784 		else if (imm == (BPF_OR | BPF_FETCH))
785 			emit(A64_ORR(isdw, tmp2, src, ax), ctx);
786 		else
787 			emit(A64_EOR(isdw, tmp2, src, ax), ctx);
788 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
789 		jmp_offset = -3;
790 		check_imm19(jmp_offset);
791 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
792 		emit(A64_DMB_ISH, ctx);
793 	} else if (imm == BPF_XCHG) {
794 		/* src_reg = atomic_xchg(dst_reg + off, src_reg); */
795 		emit(A64_MOV(isdw, tmp2, src), ctx);
796 		emit(A64_LDXR(isdw, src, reg), ctx);
797 		emit(A64_STLXR(isdw, tmp2, reg, tmp3), ctx);
798 		jmp_offset = -2;
799 		check_imm19(jmp_offset);
800 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
801 		emit(A64_DMB_ISH, ctx);
802 	} else if (imm == BPF_CMPXCHG) {
803 		/* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */
804 		const u8 r0 = bpf2a64[BPF_REG_0];
805 
806 		emit(A64_MOV(isdw, tmp2, r0), ctx);
807 		emit(A64_LDXR(isdw, r0, reg), ctx);
808 		emit(A64_EOR(isdw, tmp3, r0, tmp2), ctx);
809 		jmp_offset = 4;
810 		check_imm19(jmp_offset);
811 		emit(A64_CBNZ(isdw, tmp3, jmp_offset), ctx);
812 		emit(A64_STLXR(isdw, src, reg, tmp3), ctx);
813 		jmp_offset = -4;
814 		check_imm19(jmp_offset);
815 		emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
816 		emit(A64_DMB_ISH, ctx);
817 	} else {
818 		pr_err_once("unknown atomic op code %02x\n", imm);
819 		return -EINVAL;
820 	}
821 
822 	return 0;
823 }
824 
825 void dummy_tramp(void);
826 
827 asm (
828 "	.pushsection .text, \"ax\", @progbits\n"
829 "	.global dummy_tramp\n"
830 "	.type dummy_tramp, %function\n"
831 "dummy_tramp:"
832 #if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)
833 "	bti j\n" /* dummy_tramp is called via "br x10" */
834 #endif
835 "	mov x10, x30\n"
836 "	mov x30, x9\n"
837 "	ret x10\n"
838 "	.size dummy_tramp, .-dummy_tramp\n"
839 "	.popsection\n"
840 );
841 
842 /* build a plt initialized like this:
843  *
844  * plt:
845  *      ldr tmp, target
846  *      br tmp
847  * target:
848  *      .quad dummy_tramp
849  *
850  * when a long jump trampoline is attached, target is filled with the
851  * trampoline address, and when the trampoline is removed, target is
852  * restored to dummy_tramp address.
853  */
build_plt(struct jit_ctx * ctx)854 static void build_plt(struct jit_ctx *ctx)
855 {
856 	const u8 tmp = bpf2a64[TMP_REG_1];
857 	struct bpf_plt *plt = NULL;
858 
859 	/* make sure target is 64-bit aligned */
860 	if ((ctx->idx + PLT_TARGET_OFFSET / AARCH64_INSN_SIZE) % 2)
861 		emit(A64_NOP, ctx);
862 
863 	plt = (struct bpf_plt *)(ctx->image + ctx->idx);
864 	/* plt is called via bl, no BTI needed here */
865 	emit(A64_LDR64LIT(tmp, 2 * AARCH64_INSN_SIZE), ctx);
866 	emit(A64_BR(tmp), ctx);
867 
868 	if (ctx->image)
869 		plt->target = (u64)&dummy_tramp;
870 }
871 
872 /* Clobbers BPF registers 1-4, aka x0-x3 */
build_bhb_mitigation(struct jit_ctx * ctx)873 static void __maybe_unused build_bhb_mitigation(struct jit_ctx *ctx)
874 {
875 	const u8 r1 = bpf2a64[BPF_REG_1]; /* aka x0 */
876 	u8 k = get_spectre_bhb_loop_value();
877 
878 	if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY) ||
879 	    cpu_mitigations_off() || __nospectre_bhb ||
880 	    arm64_get_spectre_v2_state() == SPECTRE_VULNERABLE)
881 		return;
882 
883 	if (capable(CAP_SYS_ADMIN))
884 		return;
885 
886 	if (supports_clearbhb(SCOPE_SYSTEM)) {
887 		emit(aarch64_insn_gen_hint(AARCH64_INSN_HINT_CLEARBHB), ctx);
888 		return;
889 	}
890 
891 	if (k) {
892 		emit_a64_mov_i64(r1, k, ctx);
893 		emit(A64_B(1), ctx);
894 		emit(A64_SUBS_I(true, r1, r1, 1), ctx);
895 		emit(A64_B_(A64_COND_NE, -2), ctx);
896 		emit(aarch64_insn_gen_dsb(AARCH64_INSN_MB_ISH), ctx);
897 		emit(aarch64_insn_get_isb_value(), ctx);
898 	}
899 
900 	if (is_spectre_bhb_fw_mitigated()) {
901 		emit(A64_ORR_I(false, r1, AARCH64_INSN_REG_ZR,
902 			       ARM_SMCCC_ARCH_WORKAROUND_3), ctx);
903 		switch (arm_smccc_1_1_get_conduit()) {
904 		case SMCCC_CONDUIT_HVC:
905 			emit(aarch64_insn_get_hvc_value(), ctx);
906 			break;
907 		case SMCCC_CONDUIT_SMC:
908 			emit(aarch64_insn_get_smc_value(), ctx);
909 			break;
910 		default:
911 			pr_err_once("Firmware mitigation enabled with unknown conduit\n");
912 		}
913 	}
914 }
915 
build_epilogue(struct jit_ctx * ctx,bool was_classic)916 static void build_epilogue(struct jit_ctx *ctx, bool was_classic)
917 {
918 	const u8 r0 = bpf2a64[BPF_REG_0];
919 	const u8 ptr = bpf2a64[TCCNT_PTR];
920 
921 	/* We're done with BPF stack */
922 	if (ctx->stack_size)
923 		emit(A64_ADD_I(1, A64_SP, A64_SP, ctx->stack_size), ctx);
924 
925 	pop_callee_regs(ctx);
926 
927 	emit(A64_POP(A64_ZR, ptr, A64_SP), ctx);
928 
929 	if (was_classic)
930 		build_bhb_mitigation(ctx);
931 
932 	/* Restore FP/LR registers */
933 	emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
934 
935 	/* Move the return value from bpf:r0 (aka x7) to x0 */
936 	emit(A64_MOV(1, A64_R(0), r0), ctx);
937 
938 	/* Authenticate lr */
939 	if (IS_ENABLED(CONFIG_ARM64_PTR_AUTH_KERNEL))
940 		emit(A64_AUTIASP, ctx);
941 
942 	emit(A64_RET(A64_LR), ctx);
943 }
944 
945 #define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
946 #define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
947 #define DONT_CLEAR 5 /* Unused ARM64 register from BPF's POV */
948 
ex_handler_bpf(const struct exception_table_entry * ex,struct pt_regs * regs)949 bool ex_handler_bpf(const struct exception_table_entry *ex,
950 		    struct pt_regs *regs)
951 {
952 	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
953 	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
954 
955 	if (dst_reg != DONT_CLEAR)
956 		regs->regs[dst_reg] = 0;
957 	regs->pc = (unsigned long)&ex->fixup - offset;
958 	return true;
959 }
960 
961 /* For accesses to BTF pointers, add an entry to the exception table */
add_exception_handler(const struct bpf_insn * insn,struct jit_ctx * ctx,int dst_reg)962 static int add_exception_handler(const struct bpf_insn *insn,
963 				 struct jit_ctx *ctx,
964 				 int dst_reg)
965 {
966 	off_t ins_offset;
967 	off_t fixup_offset;
968 	unsigned long pc;
969 	struct exception_table_entry *ex;
970 
971 	if (!ctx->image)
972 		/* First pass */
973 		return 0;
974 
975 	if (BPF_MODE(insn->code) != BPF_PROBE_MEM &&
976 		BPF_MODE(insn->code) != BPF_PROBE_MEMSX &&
977 			BPF_MODE(insn->code) != BPF_PROBE_MEM32 &&
978 				BPF_MODE(insn->code) != BPF_PROBE_ATOMIC)
979 		return 0;
980 
981 	if (!ctx->prog->aux->extable ||
982 	    WARN_ON_ONCE(ctx->exentry_idx >= ctx->prog->aux->num_exentries))
983 		return -EINVAL;
984 
985 	ex = &ctx->prog->aux->extable[ctx->exentry_idx];
986 	pc = (unsigned long)&ctx->ro_image[ctx->idx - 1];
987 
988 	/*
989 	 * This is the relative offset of the instruction that may fault from
990 	 * the exception table itself. This will be written to the exception
991 	 * table and if this instruction faults, the destination register will
992 	 * be set to '0' and the execution will jump to the next instruction.
993 	 */
994 	ins_offset = pc - (long)&ex->insn;
995 	if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
996 		return -ERANGE;
997 
998 	/*
999 	 * Since the extable follows the program, the fixup offset is always
1000 	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
1001 	 * to keep things simple, and put the destination register in the upper
1002 	 * bits. We don't need to worry about buildtime or runtime sort
1003 	 * modifying the upper bits because the table is already sorted, and
1004 	 * isn't part of the main exception table.
1005 	 *
1006 	 * The fixup_offset is set to the next instruction from the instruction
1007 	 * that may fault. The execution will jump to this after handling the
1008 	 * fault.
1009 	 */
1010 	fixup_offset = (long)&ex->fixup - (pc + AARCH64_INSN_SIZE);
1011 	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
1012 		return -ERANGE;
1013 
1014 	/*
1015 	 * The offsets above have been calculated using the RO buffer but we
1016 	 * need to use the R/W buffer for writes.
1017 	 * switch ex to rw buffer for writing.
1018 	 */
1019 	ex = (void *)ctx->image + ((void *)ex - (void *)ctx->ro_image);
1020 
1021 	ex->insn = ins_offset;
1022 
1023 	if (BPF_CLASS(insn->code) != BPF_LDX)
1024 		dst_reg = DONT_CLEAR;
1025 
1026 	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
1027 		    FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
1028 
1029 	ex->type = EX_TYPE_BPF;
1030 
1031 	ctx->exentry_idx++;
1032 	return 0;
1033 }
1034 
1035 /* JITs an eBPF instruction.
1036  * Returns:
1037  * 0  - successfully JITed an 8-byte eBPF instruction.
1038  * >0 - successfully JITed a 16-byte eBPF instruction.
1039  * <0 - failed to JIT.
1040  */
build_insn(const struct bpf_insn * insn,struct jit_ctx * ctx,bool extra_pass)1041 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
1042 		      bool extra_pass)
1043 {
1044 	const u8 code = insn->code;
1045 	u8 dst = bpf2a64[insn->dst_reg];
1046 	u8 src = bpf2a64[insn->src_reg];
1047 	const u8 tmp = bpf2a64[TMP_REG_1];
1048 	const u8 tmp2 = bpf2a64[TMP_REG_2];
1049 	const u8 fp = bpf2a64[BPF_REG_FP];
1050 	const u8 arena_vm_base = bpf2a64[ARENA_VM_START];
1051 	const s16 off = insn->off;
1052 	const s32 imm = insn->imm;
1053 	const int i = insn - ctx->prog->insnsi;
1054 	const bool is64 = BPF_CLASS(code) == BPF_ALU64 ||
1055 			  BPF_CLASS(code) == BPF_JMP;
1056 	u8 jmp_cond;
1057 	s32 jmp_offset;
1058 	u32 a64_insn;
1059 	u8 src_adj;
1060 	u8 dst_adj;
1061 	int off_adj;
1062 	int ret;
1063 	bool sign_extend;
1064 
1065 	switch (code) {
1066 	/* dst = src */
1067 	case BPF_ALU | BPF_MOV | BPF_X:
1068 	case BPF_ALU64 | BPF_MOV | BPF_X:
1069 		if (insn_is_cast_user(insn)) {
1070 			emit(A64_MOV(0, tmp, src), ctx); // 32-bit mov clears the upper 32 bits
1071 			emit_a64_mov_i(0, dst, ctx->user_vm_start >> 32, ctx);
1072 			emit(A64_LSL(1, dst, dst, 32), ctx);
1073 			emit(A64_CBZ(1, tmp, 2), ctx);
1074 			emit(A64_ORR(1, tmp, dst, tmp), ctx);
1075 			emit(A64_MOV(1, dst, tmp), ctx);
1076 			break;
1077 		} else if (insn_is_mov_percpu_addr(insn)) {
1078 			if (dst != src)
1079 				emit(A64_MOV(1, dst, src), ctx);
1080 			if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
1081 				emit(A64_MRS_TPIDR_EL2(tmp), ctx);
1082 			else
1083 				emit(A64_MRS_TPIDR_EL1(tmp), ctx);
1084 			emit(A64_ADD(1, dst, dst, tmp), ctx);
1085 			break;
1086 		}
1087 		switch (insn->off) {
1088 		case 0:
1089 			emit(A64_MOV(is64, dst, src), ctx);
1090 			break;
1091 		case 8:
1092 			emit(A64_SXTB(is64, dst, src), ctx);
1093 			break;
1094 		case 16:
1095 			emit(A64_SXTH(is64, dst, src), ctx);
1096 			break;
1097 		case 32:
1098 			emit(A64_SXTW(is64, dst, src), ctx);
1099 			break;
1100 		}
1101 		break;
1102 	/* dst = dst OP src */
1103 	case BPF_ALU | BPF_ADD | BPF_X:
1104 	case BPF_ALU64 | BPF_ADD | BPF_X:
1105 		emit(A64_ADD(is64, dst, dst, src), ctx);
1106 		break;
1107 	case BPF_ALU | BPF_SUB | BPF_X:
1108 	case BPF_ALU64 | BPF_SUB | BPF_X:
1109 		emit(A64_SUB(is64, dst, dst, src), ctx);
1110 		break;
1111 	case BPF_ALU | BPF_AND | BPF_X:
1112 	case BPF_ALU64 | BPF_AND | BPF_X:
1113 		emit(A64_AND(is64, dst, dst, src), ctx);
1114 		break;
1115 	case BPF_ALU | BPF_OR | BPF_X:
1116 	case BPF_ALU64 | BPF_OR | BPF_X:
1117 		emit(A64_ORR(is64, dst, dst, src), ctx);
1118 		break;
1119 	case BPF_ALU | BPF_XOR | BPF_X:
1120 	case BPF_ALU64 | BPF_XOR | BPF_X:
1121 		emit(A64_EOR(is64, dst, dst, src), ctx);
1122 		break;
1123 	case BPF_ALU | BPF_MUL | BPF_X:
1124 	case BPF_ALU64 | BPF_MUL | BPF_X:
1125 		emit(A64_MUL(is64, dst, dst, src), ctx);
1126 		break;
1127 	case BPF_ALU | BPF_DIV | BPF_X:
1128 	case BPF_ALU64 | BPF_DIV | BPF_X:
1129 		if (!off)
1130 			emit(A64_UDIV(is64, dst, dst, src), ctx);
1131 		else
1132 			emit(A64_SDIV(is64, dst, dst, src), ctx);
1133 		break;
1134 	case BPF_ALU | BPF_MOD | BPF_X:
1135 	case BPF_ALU64 | BPF_MOD | BPF_X:
1136 		if (!off)
1137 			emit(A64_UDIV(is64, tmp, dst, src), ctx);
1138 		else
1139 			emit(A64_SDIV(is64, tmp, dst, src), ctx);
1140 		emit(A64_MSUB(is64, dst, dst, tmp, src), ctx);
1141 		break;
1142 	case BPF_ALU | BPF_LSH | BPF_X:
1143 	case BPF_ALU64 | BPF_LSH | BPF_X:
1144 		emit(A64_LSLV(is64, dst, dst, src), ctx);
1145 		break;
1146 	case BPF_ALU | BPF_RSH | BPF_X:
1147 	case BPF_ALU64 | BPF_RSH | BPF_X:
1148 		emit(A64_LSRV(is64, dst, dst, src), ctx);
1149 		break;
1150 	case BPF_ALU | BPF_ARSH | BPF_X:
1151 	case BPF_ALU64 | BPF_ARSH | BPF_X:
1152 		emit(A64_ASRV(is64, dst, dst, src), ctx);
1153 		break;
1154 	/* dst = -dst */
1155 	case BPF_ALU | BPF_NEG:
1156 	case BPF_ALU64 | BPF_NEG:
1157 		emit(A64_NEG(is64, dst, dst), ctx);
1158 		break;
1159 	/* dst = BSWAP##imm(dst) */
1160 	case BPF_ALU | BPF_END | BPF_FROM_LE:
1161 	case BPF_ALU | BPF_END | BPF_FROM_BE:
1162 	case BPF_ALU64 | BPF_END | BPF_FROM_LE:
1163 #ifdef CONFIG_CPU_BIG_ENDIAN
1164 		if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE)
1165 			goto emit_bswap_uxt;
1166 #else /* !CONFIG_CPU_BIG_ENDIAN */
1167 		if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE)
1168 			goto emit_bswap_uxt;
1169 #endif
1170 		switch (imm) {
1171 		case 16:
1172 			emit(A64_REV16(is64, dst, dst), ctx);
1173 			/* zero-extend 16 bits into 64 bits */
1174 			emit(A64_UXTH(is64, dst, dst), ctx);
1175 			break;
1176 		case 32:
1177 			emit(A64_REV32(0, dst, dst), ctx);
1178 			/* upper 32 bits already cleared */
1179 			break;
1180 		case 64:
1181 			emit(A64_REV64(dst, dst), ctx);
1182 			break;
1183 		}
1184 		break;
1185 emit_bswap_uxt:
1186 		switch (imm) {
1187 		case 16:
1188 			/* zero-extend 16 bits into 64 bits */
1189 			emit(A64_UXTH(is64, dst, dst), ctx);
1190 			break;
1191 		case 32:
1192 			/* zero-extend 32 bits into 64 bits */
1193 			emit(A64_UXTW(is64, dst, dst), ctx);
1194 			break;
1195 		case 64:
1196 			/* nop */
1197 			break;
1198 		}
1199 		break;
1200 	/* dst = imm */
1201 	case BPF_ALU | BPF_MOV | BPF_K:
1202 	case BPF_ALU64 | BPF_MOV | BPF_K:
1203 		emit_a64_mov_i(is64, dst, imm, ctx);
1204 		break;
1205 	/* dst = dst OP imm */
1206 	case BPF_ALU | BPF_ADD | BPF_K:
1207 	case BPF_ALU64 | BPF_ADD | BPF_K:
1208 		if (is_addsub_imm(imm)) {
1209 			emit(A64_ADD_I(is64, dst, dst, imm), ctx);
1210 		} else if (is_addsub_imm(-imm)) {
1211 			emit(A64_SUB_I(is64, dst, dst, -imm), ctx);
1212 		} else {
1213 			emit_a64_mov_i(is64, tmp, imm, ctx);
1214 			emit(A64_ADD(is64, dst, dst, tmp), ctx);
1215 		}
1216 		break;
1217 	case BPF_ALU | BPF_SUB | BPF_K:
1218 	case BPF_ALU64 | BPF_SUB | BPF_K:
1219 		if (is_addsub_imm(imm)) {
1220 			emit(A64_SUB_I(is64, dst, dst, imm), ctx);
1221 		} else if (is_addsub_imm(-imm)) {
1222 			emit(A64_ADD_I(is64, dst, dst, -imm), ctx);
1223 		} else {
1224 			emit_a64_mov_i(is64, tmp, imm, ctx);
1225 			emit(A64_SUB(is64, dst, dst, tmp), ctx);
1226 		}
1227 		break;
1228 	case BPF_ALU | BPF_AND | BPF_K:
1229 	case BPF_ALU64 | BPF_AND | BPF_K:
1230 		a64_insn = A64_AND_I(is64, dst, dst, imm);
1231 		if (a64_insn != AARCH64_BREAK_FAULT) {
1232 			emit(a64_insn, ctx);
1233 		} else {
1234 			emit_a64_mov_i(is64, tmp, imm, ctx);
1235 			emit(A64_AND(is64, dst, dst, tmp), ctx);
1236 		}
1237 		break;
1238 	case BPF_ALU | BPF_OR | BPF_K:
1239 	case BPF_ALU64 | BPF_OR | BPF_K:
1240 		a64_insn = A64_ORR_I(is64, dst, dst, imm);
1241 		if (a64_insn != AARCH64_BREAK_FAULT) {
1242 			emit(a64_insn, ctx);
1243 		} else {
1244 			emit_a64_mov_i(is64, tmp, imm, ctx);
1245 			emit(A64_ORR(is64, dst, dst, tmp), ctx);
1246 		}
1247 		break;
1248 	case BPF_ALU | BPF_XOR | BPF_K:
1249 	case BPF_ALU64 | BPF_XOR | BPF_K:
1250 		a64_insn = A64_EOR_I(is64, dst, dst, imm);
1251 		if (a64_insn != AARCH64_BREAK_FAULT) {
1252 			emit(a64_insn, ctx);
1253 		} else {
1254 			emit_a64_mov_i(is64, tmp, imm, ctx);
1255 			emit(A64_EOR(is64, dst, dst, tmp), ctx);
1256 		}
1257 		break;
1258 	case BPF_ALU | BPF_MUL | BPF_K:
1259 	case BPF_ALU64 | BPF_MUL | BPF_K:
1260 		emit_a64_mov_i(is64, tmp, imm, ctx);
1261 		emit(A64_MUL(is64, dst, dst, tmp), ctx);
1262 		break;
1263 	case BPF_ALU | BPF_DIV | BPF_K:
1264 	case BPF_ALU64 | BPF_DIV | BPF_K:
1265 		emit_a64_mov_i(is64, tmp, imm, ctx);
1266 		if (!off)
1267 			emit(A64_UDIV(is64, dst, dst, tmp), ctx);
1268 		else
1269 			emit(A64_SDIV(is64, dst, dst, tmp), ctx);
1270 		break;
1271 	case BPF_ALU | BPF_MOD | BPF_K:
1272 	case BPF_ALU64 | BPF_MOD | BPF_K:
1273 		emit_a64_mov_i(is64, tmp2, imm, ctx);
1274 		if (!off)
1275 			emit(A64_UDIV(is64, tmp, dst, tmp2), ctx);
1276 		else
1277 			emit(A64_SDIV(is64, tmp, dst, tmp2), ctx);
1278 		emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx);
1279 		break;
1280 	case BPF_ALU | BPF_LSH | BPF_K:
1281 	case BPF_ALU64 | BPF_LSH | BPF_K:
1282 		emit(A64_LSL(is64, dst, dst, imm), ctx);
1283 		break;
1284 	case BPF_ALU | BPF_RSH | BPF_K:
1285 	case BPF_ALU64 | BPF_RSH | BPF_K:
1286 		emit(A64_LSR(is64, dst, dst, imm), ctx);
1287 		break;
1288 	case BPF_ALU | BPF_ARSH | BPF_K:
1289 	case BPF_ALU64 | BPF_ARSH | BPF_K:
1290 		emit(A64_ASR(is64, dst, dst, imm), ctx);
1291 		break;
1292 
1293 	/* JUMP off */
1294 	case BPF_JMP | BPF_JA:
1295 	case BPF_JMP32 | BPF_JA:
1296 		if (BPF_CLASS(code) == BPF_JMP)
1297 			jmp_offset = bpf2a64_offset(i, off, ctx);
1298 		else
1299 			jmp_offset = bpf2a64_offset(i, imm, ctx);
1300 		check_imm26(jmp_offset);
1301 		emit(A64_B(jmp_offset), ctx);
1302 		break;
1303 	/* IF (dst COND src) JUMP off */
1304 	case BPF_JMP | BPF_JEQ | BPF_X:
1305 	case BPF_JMP | BPF_JGT | BPF_X:
1306 	case BPF_JMP | BPF_JLT | BPF_X:
1307 	case BPF_JMP | BPF_JGE | BPF_X:
1308 	case BPF_JMP | BPF_JLE | BPF_X:
1309 	case BPF_JMP | BPF_JNE | BPF_X:
1310 	case BPF_JMP | BPF_JSGT | BPF_X:
1311 	case BPF_JMP | BPF_JSLT | BPF_X:
1312 	case BPF_JMP | BPF_JSGE | BPF_X:
1313 	case BPF_JMP | BPF_JSLE | BPF_X:
1314 	case BPF_JMP32 | BPF_JEQ | BPF_X:
1315 	case BPF_JMP32 | BPF_JGT | BPF_X:
1316 	case BPF_JMP32 | BPF_JLT | BPF_X:
1317 	case BPF_JMP32 | BPF_JGE | BPF_X:
1318 	case BPF_JMP32 | BPF_JLE | BPF_X:
1319 	case BPF_JMP32 | BPF_JNE | BPF_X:
1320 	case BPF_JMP32 | BPF_JSGT | BPF_X:
1321 	case BPF_JMP32 | BPF_JSLT | BPF_X:
1322 	case BPF_JMP32 | BPF_JSGE | BPF_X:
1323 	case BPF_JMP32 | BPF_JSLE | BPF_X:
1324 		emit(A64_CMP(is64, dst, src), ctx);
1325 emit_cond_jmp:
1326 		jmp_offset = bpf2a64_offset(i, off, ctx);
1327 		check_imm19(jmp_offset);
1328 		switch (BPF_OP(code)) {
1329 		case BPF_JEQ:
1330 			jmp_cond = A64_COND_EQ;
1331 			break;
1332 		case BPF_JGT:
1333 			jmp_cond = A64_COND_HI;
1334 			break;
1335 		case BPF_JLT:
1336 			jmp_cond = A64_COND_CC;
1337 			break;
1338 		case BPF_JGE:
1339 			jmp_cond = A64_COND_CS;
1340 			break;
1341 		case BPF_JLE:
1342 			jmp_cond = A64_COND_LS;
1343 			break;
1344 		case BPF_JSET:
1345 		case BPF_JNE:
1346 			jmp_cond = A64_COND_NE;
1347 			break;
1348 		case BPF_JSGT:
1349 			jmp_cond = A64_COND_GT;
1350 			break;
1351 		case BPF_JSLT:
1352 			jmp_cond = A64_COND_LT;
1353 			break;
1354 		case BPF_JSGE:
1355 			jmp_cond = A64_COND_GE;
1356 			break;
1357 		case BPF_JSLE:
1358 			jmp_cond = A64_COND_LE;
1359 			break;
1360 		default:
1361 			return -EFAULT;
1362 		}
1363 		emit(A64_B_(jmp_cond, jmp_offset), ctx);
1364 		break;
1365 	case BPF_JMP | BPF_JSET | BPF_X:
1366 	case BPF_JMP32 | BPF_JSET | BPF_X:
1367 		emit(A64_TST(is64, dst, src), ctx);
1368 		goto emit_cond_jmp;
1369 	/* IF (dst COND imm) JUMP off */
1370 	case BPF_JMP | BPF_JEQ | BPF_K:
1371 	case BPF_JMP | BPF_JGT | BPF_K:
1372 	case BPF_JMP | BPF_JLT | BPF_K:
1373 	case BPF_JMP | BPF_JGE | BPF_K:
1374 	case BPF_JMP | BPF_JLE | BPF_K:
1375 	case BPF_JMP | BPF_JNE | BPF_K:
1376 	case BPF_JMP | BPF_JSGT | BPF_K:
1377 	case BPF_JMP | BPF_JSLT | BPF_K:
1378 	case BPF_JMP | BPF_JSGE | BPF_K:
1379 	case BPF_JMP | BPF_JSLE | BPF_K:
1380 	case BPF_JMP32 | BPF_JEQ | BPF_K:
1381 	case BPF_JMP32 | BPF_JGT | BPF_K:
1382 	case BPF_JMP32 | BPF_JLT | BPF_K:
1383 	case BPF_JMP32 | BPF_JGE | BPF_K:
1384 	case BPF_JMP32 | BPF_JLE | BPF_K:
1385 	case BPF_JMP32 | BPF_JNE | BPF_K:
1386 	case BPF_JMP32 | BPF_JSGT | BPF_K:
1387 	case BPF_JMP32 | BPF_JSLT | BPF_K:
1388 	case BPF_JMP32 | BPF_JSGE | BPF_K:
1389 	case BPF_JMP32 | BPF_JSLE | BPF_K:
1390 		if (is_addsub_imm(imm)) {
1391 			emit(A64_CMP_I(is64, dst, imm), ctx);
1392 		} else if (is_addsub_imm(-imm)) {
1393 			emit(A64_CMN_I(is64, dst, -imm), ctx);
1394 		} else {
1395 			emit_a64_mov_i(is64, tmp, imm, ctx);
1396 			emit(A64_CMP(is64, dst, tmp), ctx);
1397 		}
1398 		goto emit_cond_jmp;
1399 	case BPF_JMP | BPF_JSET | BPF_K:
1400 	case BPF_JMP32 | BPF_JSET | BPF_K:
1401 		a64_insn = A64_TST_I(is64, dst, imm);
1402 		if (a64_insn != AARCH64_BREAK_FAULT) {
1403 			emit(a64_insn, ctx);
1404 		} else {
1405 			emit_a64_mov_i(is64, tmp, imm, ctx);
1406 			emit(A64_TST(is64, dst, tmp), ctx);
1407 		}
1408 		goto emit_cond_jmp;
1409 	/* function call */
1410 	case BPF_JMP | BPF_CALL:
1411 	{
1412 		const u8 r0 = bpf2a64[BPF_REG_0];
1413 		bool func_addr_fixed;
1414 		u64 func_addr;
1415 		u32 cpu_offset;
1416 
1417 		/* Implement helper call to bpf_get_smp_processor_id() inline */
1418 		if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) {
1419 			cpu_offset = offsetof(struct thread_info, cpu);
1420 
1421 			emit(A64_MRS_SP_EL0(tmp), ctx);
1422 			if (is_lsi_offset(cpu_offset, 2)) {
1423 				emit(A64_LDR32I(r0, tmp, cpu_offset), ctx);
1424 			} else {
1425 				emit_a64_mov_i(1, tmp2, cpu_offset, ctx);
1426 				emit(A64_LDR32(r0, tmp, tmp2), ctx);
1427 			}
1428 			break;
1429 		}
1430 
1431 		/* Implement helper call to bpf_get_current_task/_btf() inline */
1432 		if (insn->src_reg == 0 && (insn->imm == BPF_FUNC_get_current_task ||
1433 					   insn->imm == BPF_FUNC_get_current_task_btf)) {
1434 			emit(A64_MRS_SP_EL0(r0), ctx);
1435 			break;
1436 		}
1437 
1438 		ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass,
1439 					    &func_addr, &func_addr_fixed);
1440 		if (ret < 0)
1441 			return ret;
1442 		emit_call(func_addr, ctx);
1443 		emit(A64_MOV(1, r0, A64_R(0)), ctx);
1444 		break;
1445 	}
1446 	/* tail call */
1447 	case BPF_JMP | BPF_TAIL_CALL:
1448 		if (emit_bpf_tail_call(ctx))
1449 			return -EFAULT;
1450 		break;
1451 	/* function return */
1452 	case BPF_JMP | BPF_EXIT:
1453 		/* Optimization: when last instruction is EXIT,
1454 		   simply fallthrough to epilogue. */
1455 		if (i == ctx->prog->len - 1)
1456 			break;
1457 		jmp_offset = epilogue_offset(ctx);
1458 		check_imm26(jmp_offset);
1459 		emit(A64_B(jmp_offset), ctx);
1460 		break;
1461 
1462 	/* dst = imm64 */
1463 	case BPF_LD | BPF_IMM | BPF_DW:
1464 	{
1465 		const struct bpf_insn insn1 = insn[1];
1466 		u64 imm64;
1467 
1468 		imm64 = (u64)insn1.imm << 32 | (u32)imm;
1469 		if (bpf_pseudo_func(insn))
1470 			emit_addr_mov_i64(dst, imm64, ctx);
1471 		else
1472 			emit_a64_mov_i64(dst, imm64, ctx);
1473 
1474 		return 1;
1475 	}
1476 
1477 	/* LDX: dst = (u64)*(unsigned size *)(src + off) */
1478 	case BPF_LDX | BPF_MEM | BPF_W:
1479 	case BPF_LDX | BPF_MEM | BPF_H:
1480 	case BPF_LDX | BPF_MEM | BPF_B:
1481 	case BPF_LDX | BPF_MEM | BPF_DW:
1482 	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1483 	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1484 	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1485 	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1486 	/* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */
1487 	case BPF_LDX | BPF_MEMSX | BPF_B:
1488 	case BPF_LDX | BPF_MEMSX | BPF_H:
1489 	case BPF_LDX | BPF_MEMSX | BPF_W:
1490 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_B:
1491 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_H:
1492 	case BPF_LDX | BPF_PROBE_MEMSX | BPF_W:
1493 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_B:
1494 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
1495 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
1496 	case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
1497 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1498 			emit(A64_ADD(1, tmp2, src, arena_vm_base), ctx);
1499 			src = tmp2;
1500 		}
1501 		if (src == fp) {
1502 			src_adj = A64_SP;
1503 			off_adj = off + ctx->stack_size;
1504 		} else {
1505 			src_adj = src;
1506 			off_adj = off;
1507 		}
1508 		sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX ||
1509 				BPF_MODE(insn->code) == BPF_PROBE_MEMSX);
1510 		switch (BPF_SIZE(code)) {
1511 		case BPF_W:
1512 			if (is_lsi_offset(off_adj, 2)) {
1513 				if (sign_extend)
1514 					emit(A64_LDRSWI(dst, src_adj, off_adj), ctx);
1515 				else
1516 					emit(A64_LDR32I(dst, src_adj, off_adj), ctx);
1517 			} else {
1518 				emit_a64_mov_i(1, tmp, off, ctx);
1519 				if (sign_extend)
1520 					emit(A64_LDRSW(dst, src, tmp), ctx);
1521 				else
1522 					emit(A64_LDR32(dst, src, tmp), ctx);
1523 			}
1524 			break;
1525 		case BPF_H:
1526 			if (is_lsi_offset(off_adj, 1)) {
1527 				if (sign_extend)
1528 					emit(A64_LDRSHI(dst, src_adj, off_adj), ctx);
1529 				else
1530 					emit(A64_LDRHI(dst, src_adj, off_adj), ctx);
1531 			} else {
1532 				emit_a64_mov_i(1, tmp, off, ctx);
1533 				if (sign_extend)
1534 					emit(A64_LDRSH(dst, src, tmp), ctx);
1535 				else
1536 					emit(A64_LDRH(dst, src, tmp), ctx);
1537 			}
1538 			break;
1539 		case BPF_B:
1540 			if (is_lsi_offset(off_adj, 0)) {
1541 				if (sign_extend)
1542 					emit(A64_LDRSBI(dst, src_adj, off_adj), ctx);
1543 				else
1544 					emit(A64_LDRBI(dst, src_adj, off_adj), ctx);
1545 			} else {
1546 				emit_a64_mov_i(1, tmp, off, ctx);
1547 				if (sign_extend)
1548 					emit(A64_LDRSB(dst, src, tmp), ctx);
1549 				else
1550 					emit(A64_LDRB(dst, src, tmp), ctx);
1551 			}
1552 			break;
1553 		case BPF_DW:
1554 			if (is_lsi_offset(off_adj, 3)) {
1555 				emit(A64_LDR64I(dst, src_adj, off_adj), ctx);
1556 			} else {
1557 				emit_a64_mov_i(1, tmp, off, ctx);
1558 				emit(A64_LDR64(dst, src, tmp), ctx);
1559 			}
1560 			break;
1561 		}
1562 
1563 		ret = add_exception_handler(insn, ctx, dst);
1564 		if (ret)
1565 			return ret;
1566 		break;
1567 
1568 	/* speculation barrier */
1569 	case BPF_ST | BPF_NOSPEC:
1570 		/*
1571 		 * Nothing required here.
1572 		 *
1573 		 * In case of arm64, we rely on the firmware mitigation of
1574 		 * Speculative Store Bypass as controlled via the ssbd kernel
1575 		 * parameter. Whenever the mitigation is enabled, it works
1576 		 * for all of the kernel code with no need to provide any
1577 		 * additional instructions.
1578 		 */
1579 		break;
1580 
1581 	/* ST: *(size *)(dst + off) = imm */
1582 	case BPF_ST | BPF_MEM | BPF_W:
1583 	case BPF_ST | BPF_MEM | BPF_H:
1584 	case BPF_ST | BPF_MEM | BPF_B:
1585 	case BPF_ST | BPF_MEM | BPF_DW:
1586 	case BPF_ST | BPF_PROBE_MEM32 | BPF_B:
1587 	case BPF_ST | BPF_PROBE_MEM32 | BPF_H:
1588 	case BPF_ST | BPF_PROBE_MEM32 | BPF_W:
1589 	case BPF_ST | BPF_PROBE_MEM32 | BPF_DW:
1590 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1591 			emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
1592 			dst = tmp2;
1593 		}
1594 		if (dst == fp) {
1595 			dst_adj = A64_SP;
1596 			off_adj = off + ctx->stack_size;
1597 		} else {
1598 			dst_adj = dst;
1599 			off_adj = off;
1600 		}
1601 		/* Load imm to a register then store it */
1602 		emit_a64_mov_i(1, tmp, imm, ctx);
1603 		switch (BPF_SIZE(code)) {
1604 		case BPF_W:
1605 			if (is_lsi_offset(off_adj, 2)) {
1606 				emit(A64_STR32I(tmp, dst_adj, off_adj), ctx);
1607 			} else {
1608 				emit_a64_mov_i(1, tmp2, off, ctx);
1609 				emit(A64_STR32(tmp, dst, tmp2), ctx);
1610 			}
1611 			break;
1612 		case BPF_H:
1613 			if (is_lsi_offset(off_adj, 1)) {
1614 				emit(A64_STRHI(tmp, dst_adj, off_adj), ctx);
1615 			} else {
1616 				emit_a64_mov_i(1, tmp2, off, ctx);
1617 				emit(A64_STRH(tmp, dst, tmp2), ctx);
1618 			}
1619 			break;
1620 		case BPF_B:
1621 			if (is_lsi_offset(off_adj, 0)) {
1622 				emit(A64_STRBI(tmp, dst_adj, off_adj), ctx);
1623 			} else {
1624 				emit_a64_mov_i(1, tmp2, off, ctx);
1625 				emit(A64_STRB(tmp, dst, tmp2), ctx);
1626 			}
1627 			break;
1628 		case BPF_DW:
1629 			if (is_lsi_offset(off_adj, 3)) {
1630 				emit(A64_STR64I(tmp, dst_adj, off_adj), ctx);
1631 			} else {
1632 				emit_a64_mov_i(1, tmp2, off, ctx);
1633 				emit(A64_STR64(tmp, dst, tmp2), ctx);
1634 			}
1635 			break;
1636 		}
1637 
1638 		ret = add_exception_handler(insn, ctx, dst);
1639 		if (ret)
1640 			return ret;
1641 		break;
1642 
1643 	/* STX: *(size *)(dst + off) = src */
1644 	case BPF_STX | BPF_MEM | BPF_W:
1645 	case BPF_STX | BPF_MEM | BPF_H:
1646 	case BPF_STX | BPF_MEM | BPF_B:
1647 	case BPF_STX | BPF_MEM | BPF_DW:
1648 	case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
1649 	case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
1650 	case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
1651 	case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
1652 		if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) {
1653 			emit(A64_ADD(1, tmp2, dst, arena_vm_base), ctx);
1654 			dst = tmp2;
1655 		}
1656 		if (dst == fp) {
1657 			dst_adj = A64_SP;
1658 			off_adj = off + ctx->stack_size;
1659 		} else {
1660 			dst_adj = dst;
1661 			off_adj = off;
1662 		}
1663 		switch (BPF_SIZE(code)) {
1664 		case BPF_W:
1665 			if (is_lsi_offset(off_adj, 2)) {
1666 				emit(A64_STR32I(src, dst_adj, off_adj), ctx);
1667 			} else {
1668 				emit_a64_mov_i(1, tmp, off, ctx);
1669 				emit(A64_STR32(src, dst, tmp), ctx);
1670 			}
1671 			break;
1672 		case BPF_H:
1673 			if (is_lsi_offset(off_adj, 1)) {
1674 				emit(A64_STRHI(src, dst_adj, off_adj), ctx);
1675 			} else {
1676 				emit_a64_mov_i(1, tmp, off, ctx);
1677 				emit(A64_STRH(src, dst, tmp), ctx);
1678 			}
1679 			break;
1680 		case BPF_B:
1681 			if (is_lsi_offset(off_adj, 0)) {
1682 				emit(A64_STRBI(src, dst_adj, off_adj), ctx);
1683 			} else {
1684 				emit_a64_mov_i(1, tmp, off, ctx);
1685 				emit(A64_STRB(src, dst, tmp), ctx);
1686 			}
1687 			break;
1688 		case BPF_DW:
1689 			if (is_lsi_offset(off_adj, 3)) {
1690 				emit(A64_STR64I(src, dst_adj, off_adj), ctx);
1691 			} else {
1692 				emit_a64_mov_i(1, tmp, off, ctx);
1693 				emit(A64_STR64(src, dst, tmp), ctx);
1694 			}
1695 			break;
1696 		}
1697 
1698 		ret = add_exception_handler(insn, ctx, dst);
1699 		if (ret)
1700 			return ret;
1701 		break;
1702 
1703 	case BPF_STX | BPF_ATOMIC | BPF_W:
1704 	case BPF_STX | BPF_ATOMIC | BPF_DW:
1705 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
1706 	case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
1707 		if (cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
1708 			ret = emit_lse_atomic(insn, ctx);
1709 		else
1710 			ret = emit_ll_sc_atomic(insn, ctx);
1711 		if (ret)
1712 			return ret;
1713 
1714 		ret = add_exception_handler(insn, ctx, dst);
1715 		if (ret)
1716 			return ret;
1717 		break;
1718 
1719 	default:
1720 		pr_err_once("unknown opcode %02x\n", code);
1721 		return -EINVAL;
1722 	}
1723 
1724 	return 0;
1725 }
1726 
build_body(struct jit_ctx * ctx,bool extra_pass)1727 static int build_body(struct jit_ctx *ctx, bool extra_pass)
1728 {
1729 	const struct bpf_prog *prog = ctx->prog;
1730 	int i;
1731 
1732 	/*
1733 	 * - offset[0] offset of the end of prologue,
1734 	 *   start of the 1st instruction.
1735 	 * - offset[1] - offset of the end of 1st instruction,
1736 	 *   start of the 2nd instruction
1737 	 * [....]
1738 	 * - offset[3] - offset of the end of 3rd instruction,
1739 	 *   start of 4th instruction
1740 	 */
1741 	for (i = 0; i < prog->len; i++) {
1742 		const struct bpf_insn *insn = &prog->insnsi[i];
1743 		int ret;
1744 
1745 		ctx->offset[i] = ctx->idx;
1746 		ret = build_insn(insn, ctx, extra_pass);
1747 		if (ret > 0) {
1748 			i++;
1749 			ctx->offset[i] = ctx->idx;
1750 			continue;
1751 		}
1752 		if (ret)
1753 			return ret;
1754 	}
1755 	/*
1756 	 * offset is allocated with prog->len + 1 so fill in
1757 	 * the last element with the offset after the last
1758 	 * instruction (end of program)
1759 	 */
1760 	ctx->offset[i] = ctx->idx;
1761 
1762 	return 0;
1763 }
1764 
validate_code(struct jit_ctx * ctx)1765 static int validate_code(struct jit_ctx *ctx)
1766 {
1767 	int i;
1768 
1769 	for (i = 0; i < ctx->idx; i++) {
1770 		u32 a64_insn = le32_to_cpu(ctx->image[i]);
1771 
1772 		if (a64_insn == AARCH64_BREAK_FAULT)
1773 			return -1;
1774 	}
1775 	return 0;
1776 }
1777 
validate_ctx(struct jit_ctx * ctx)1778 static int validate_ctx(struct jit_ctx *ctx)
1779 {
1780 	if (validate_code(ctx))
1781 		return -1;
1782 
1783 	if (WARN_ON_ONCE(ctx->exentry_idx != ctx->prog->aux->num_exentries))
1784 		return -1;
1785 
1786 	return 0;
1787 }
1788 
bpf_flush_icache(void * start,void * end)1789 static inline void bpf_flush_icache(void *start, void *end)
1790 {
1791 	flush_icache_range((unsigned long)start, (unsigned long)end);
1792 }
1793 
1794 struct arm64_jit_data {
1795 	struct bpf_binary_header *header;
1796 	u8 *ro_image;
1797 	struct bpf_binary_header *ro_header;
1798 	struct jit_ctx ctx;
1799 };
1800 
bpf_int_jit_compile(struct bpf_prog * prog)1801 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
1802 {
1803 	int image_size, prog_size, extable_size, extable_align, extable_offset;
1804 	struct bpf_prog *tmp, *orig_prog = prog;
1805 	struct bpf_binary_header *header;
1806 	struct bpf_binary_header *ro_header;
1807 	struct arm64_jit_data *jit_data;
1808 	bool was_classic = bpf_prog_was_classic(prog);
1809 	bool tmp_blinded = false;
1810 	bool extra_pass = false;
1811 	struct jit_ctx ctx;
1812 	u8 *image_ptr;
1813 	u8 *ro_image_ptr;
1814 	int body_idx;
1815 	int exentry_idx;
1816 
1817 	if (!prog->jit_requested)
1818 		return orig_prog;
1819 
1820 	tmp = bpf_jit_blind_constants(prog);
1821 	/* If blinding was requested and we failed during blinding,
1822 	 * we must fall back to the interpreter.
1823 	 */
1824 	if (IS_ERR(tmp))
1825 		return orig_prog;
1826 	if (tmp != prog) {
1827 		tmp_blinded = true;
1828 		prog = tmp;
1829 	}
1830 
1831 	jit_data = prog->aux->jit_data;
1832 	if (!jit_data) {
1833 		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1834 		if (!jit_data) {
1835 			prog = orig_prog;
1836 			goto out;
1837 		}
1838 		prog->aux->jit_data = jit_data;
1839 	}
1840 	if (jit_data->ctx.offset) {
1841 		ctx = jit_data->ctx;
1842 		ro_image_ptr = jit_data->ro_image;
1843 		ro_header = jit_data->ro_header;
1844 		header = jit_data->header;
1845 		image_ptr = (void *)header + ((void *)ro_image_ptr
1846 						 - (void *)ro_header);
1847 		extra_pass = true;
1848 		prog_size = sizeof(u32) * ctx.idx;
1849 		goto skip_init_ctx;
1850 	}
1851 	memset(&ctx, 0, sizeof(ctx));
1852 	ctx.prog = prog;
1853 
1854 	ctx.offset = kvcalloc(prog->len + 1, sizeof(int), GFP_KERNEL);
1855 	if (ctx.offset == NULL) {
1856 		prog = orig_prog;
1857 		goto out_off;
1858 	}
1859 
1860 	ctx.user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena);
1861 	ctx.arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena);
1862 
1863 	/* Pass 1: Estimate the maximum image size.
1864 	 *
1865 	 * BPF line info needs ctx->offset[i] to be the offset of
1866 	 * instruction[i] in jited image, so build prologue first.
1867 	 */
1868 	if (build_prologue(&ctx, was_classic)) {
1869 		prog = orig_prog;
1870 		goto out_off;
1871 	}
1872 
1873 	if (build_body(&ctx, extra_pass)) {
1874 		prog = orig_prog;
1875 		goto out_off;
1876 	}
1877 
1878 	ctx.epilogue_offset = ctx.idx;
1879 	build_epilogue(&ctx, was_classic);
1880 	build_plt(&ctx);
1881 
1882 	extable_align = __alignof__(struct exception_table_entry);
1883 	extable_size = prog->aux->num_exentries *
1884 		sizeof(struct exception_table_entry);
1885 
1886 	/* Now we know the maximum image size. */
1887 	prog_size = sizeof(u32) * ctx.idx;
1888 	/* also allocate space for plt target */
1889 	extable_offset = round_up(prog_size + PLT_TARGET_SIZE, extable_align);
1890 	image_size = extable_offset + extable_size;
1891 	ro_header = bpf_jit_binary_pack_alloc(image_size, &ro_image_ptr,
1892 					      sizeof(u32), &header, &image_ptr,
1893 					      jit_fill_hole);
1894 	if (!ro_header) {
1895 		prog = orig_prog;
1896 		goto out_off;
1897 	}
1898 
1899 	/* Pass 2: Determine jited position and result for each instruction */
1900 
1901 	/*
1902 	 * Use the image(RW) for writing the JITed instructions. But also save
1903 	 * the ro_image(RX) for calculating the offsets in the image. The RW
1904 	 * image will be later copied to the RX image from where the program
1905 	 * will run. The bpf_jit_binary_pack_finalize() will do this copy in the
1906 	 * final step.
1907 	 */
1908 	ctx.image = (__le32 *)image_ptr;
1909 	ctx.ro_image = (__le32 *)ro_image_ptr;
1910 	if (extable_size)
1911 		prog->aux->extable = (void *)ro_image_ptr + extable_offset;
1912 skip_init_ctx:
1913 	ctx.idx = 0;
1914 	ctx.exentry_idx = 0;
1915 	ctx.write = true;
1916 
1917 	build_prologue(&ctx, was_classic);
1918 
1919 	/* Record exentry_idx and body_idx before first build_body */
1920 	exentry_idx = ctx.exentry_idx;
1921 	body_idx = ctx.idx;
1922 	/* Dont write body instructions to memory for now */
1923 	ctx.write = false;
1924 
1925 	if (build_body(&ctx, extra_pass)) {
1926 		prog = orig_prog;
1927 		goto out_free_hdr;
1928 	}
1929 
1930 	ctx.epilogue_offset = ctx.idx;
1931 	ctx.exentry_idx = exentry_idx;
1932 	ctx.idx = body_idx;
1933 	ctx.write = true;
1934 
1935 	/* Pass 3: Adjust jump offset and write final image */
1936 	if (build_body(&ctx, extra_pass) ||
1937 		WARN_ON_ONCE(ctx.idx != ctx.epilogue_offset)) {
1938 		prog = orig_prog;
1939 		goto out_free_hdr;
1940 	}
1941 
1942 	build_epilogue(&ctx, was_classic);
1943 	build_plt(&ctx);
1944 
1945 	/* Extra pass to validate JITed code. */
1946 	if (validate_ctx(&ctx)) {
1947 		prog = orig_prog;
1948 		goto out_free_hdr;
1949 	}
1950 
1951 	/* update the real prog size */
1952 	prog_size = sizeof(u32) * ctx.idx;
1953 
1954 	/* And we're done. */
1955 	if (bpf_jit_enable > 1)
1956 		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
1957 
1958 	if (!prog->is_func || extra_pass) {
1959 		/* The jited image may shrink since the jited result for
1960 		 * BPF_CALL to subprog may be changed from indirect call
1961 		 * to direct call.
1962 		 */
1963 		if (extra_pass && ctx.idx > jit_data->ctx.idx) {
1964 			pr_err_once("multi-func JIT bug %d > %d\n",
1965 				    ctx.idx, jit_data->ctx.idx);
1966 			prog->bpf_func = NULL;
1967 			prog->jited = 0;
1968 			prog->jited_len = 0;
1969 			goto out_free_hdr;
1970 		}
1971 		if (WARN_ON(bpf_jit_binary_pack_finalize(ro_header, header))) {
1972 			/* ro_header has been freed */
1973 			ro_header = NULL;
1974 			prog = orig_prog;
1975 			goto out_off;
1976 		}
1977 		/*
1978 		 * The instructions have now been copied to the ROX region from
1979 		 * where they will execute. Now the data cache has to be cleaned to
1980 		 * the PoU and the I-cache has to be invalidated for the VAs.
1981 		 */
1982 		bpf_flush_icache(ro_header, ctx.ro_image + ctx.idx);
1983 	} else {
1984 		jit_data->ctx = ctx;
1985 		jit_data->ro_image = ro_image_ptr;
1986 		jit_data->header = header;
1987 		jit_data->ro_header = ro_header;
1988 	}
1989 
1990 	prog->bpf_func = (void *)ctx.ro_image + cfi_get_offset();
1991 	prog->jited = 1;
1992 	prog->jited_len = prog_size - cfi_get_offset();
1993 
1994 	if (!prog->is_func || extra_pass) {
1995 		int i;
1996 
1997 		/* offset[prog->len] is the size of program */
1998 		for (i = 0; i <= prog->len; i++)
1999 			ctx.offset[i] *= AARCH64_INSN_SIZE;
2000 		bpf_prog_fill_jited_linfo(prog, ctx.offset + 1);
2001 out_off:
2002 		kvfree(ctx.offset);
2003 		kfree(jit_data);
2004 		prog->aux->jit_data = NULL;
2005 	}
2006 out:
2007 	if (tmp_blinded)
2008 		bpf_jit_prog_release_other(prog, prog == orig_prog ?
2009 					   tmp : orig_prog);
2010 	return prog;
2011 
2012 out_free_hdr:
2013 	if (header) {
2014 		bpf_arch_text_copy(&ro_header->size, &header->size,
2015 				   sizeof(header->size));
2016 		bpf_jit_binary_pack_free(ro_header, header);
2017 	}
2018 	goto out_off;
2019 }
2020 
bpf_jit_supports_kfunc_call(void)2021 bool bpf_jit_supports_kfunc_call(void)
2022 {
2023 	return true;
2024 }
2025 
bpf_arch_text_copy(void * dst,void * src,size_t len)2026 void *bpf_arch_text_copy(void *dst, void *src, size_t len)
2027 {
2028 	if (!aarch64_insn_copy(dst, src, len))
2029 		return ERR_PTR(-EINVAL);
2030 	return dst;
2031 }
2032 
bpf_jit_alloc_exec_limit(void)2033 u64 bpf_jit_alloc_exec_limit(void)
2034 {
2035 	return VMALLOC_END - VMALLOC_START;
2036 }
2037 
2038 /* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
bpf_jit_supports_subprog_tailcalls(void)2039 bool bpf_jit_supports_subprog_tailcalls(void)
2040 {
2041 	return true;
2042 }
2043 
invoke_bpf_prog(struct jit_ctx * ctx,struct bpf_tramp_link * l,int args_off,int retval_off,int run_ctx_off,bool save_ret)2044 static void invoke_bpf_prog(struct jit_ctx *ctx, struct bpf_tramp_link *l,
2045 			    int args_off, int retval_off, int run_ctx_off,
2046 			    bool save_ret)
2047 {
2048 	__le32 *branch;
2049 	u64 enter_prog;
2050 	u64 exit_prog;
2051 	struct bpf_prog *p = l->link.prog;
2052 	int cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2053 
2054 	enter_prog = (u64)bpf_trampoline_enter(p);
2055 	exit_prog = (u64)bpf_trampoline_exit(p);
2056 
2057 	if (l->cookie == 0) {
2058 		/* if cookie is zero, one instruction is enough to store it */
2059 		emit(A64_STR64I(A64_ZR, A64_SP, run_ctx_off + cookie_off), ctx);
2060 	} else {
2061 		emit_a64_mov_i64(A64_R(10), l->cookie, ctx);
2062 		emit(A64_STR64I(A64_R(10), A64_SP, run_ctx_off + cookie_off),
2063 		     ctx);
2064 	}
2065 
2066 	/* save p to callee saved register x19 to avoid loading p with mov_i64
2067 	 * each time.
2068 	 */
2069 	emit_addr_mov_i64(A64_R(19), (const u64)p, ctx);
2070 
2071 	/* arg1: prog */
2072 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2073 	/* arg2: &run_ctx */
2074 	emit(A64_ADD_I(1, A64_R(1), A64_SP, run_ctx_off), ctx);
2075 
2076 	emit_call(enter_prog, ctx);
2077 
2078 	/* save return value to callee saved register x20 */
2079 	emit(A64_MOV(1, A64_R(20), A64_R(0)), ctx);
2080 
2081 	/* if (__bpf_prog_enter(prog) == 0)
2082 	 *         goto skip_exec_of_prog;
2083 	 */
2084 	branch = ctx->image + ctx->idx;
2085 	emit(A64_NOP, ctx);
2086 
2087 	emit(A64_ADD_I(1, A64_R(0), A64_SP, args_off), ctx);
2088 	if (!p->jited)
2089 		emit_addr_mov_i64(A64_R(1), (const u64)p->insnsi, ctx);
2090 
2091 	emit_call((const u64)p->bpf_func, ctx);
2092 
2093 	if (save_ret)
2094 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2095 
2096 	if (ctx->image) {
2097 		int offset = &ctx->image[ctx->idx] - branch;
2098 		*branch = cpu_to_le32(A64_CBZ(1, A64_R(0), offset));
2099 	}
2100 
2101 	/* arg1: prog */
2102 	emit(A64_MOV(1, A64_R(0), A64_R(19)), ctx);
2103 	/* arg2: start time */
2104 	emit(A64_MOV(1, A64_R(1), A64_R(20)), ctx);
2105 	/* arg3: &run_ctx */
2106 	emit(A64_ADD_I(1, A64_R(2), A64_SP, run_ctx_off), ctx);
2107 
2108 	emit_call(exit_prog, ctx);
2109 }
2110 
invoke_bpf_mod_ret(struct jit_ctx * ctx,struct bpf_tramp_links * tl,int args_off,int retval_off,int run_ctx_off,__le32 ** branches)2111 static void invoke_bpf_mod_ret(struct jit_ctx *ctx, struct bpf_tramp_links *tl,
2112 			       int args_off, int retval_off, int run_ctx_off,
2113 			       __le32 **branches)
2114 {
2115 	int i;
2116 
2117 	/* The first fmod_ret program will receive a garbage return value.
2118 	 * Set this to 0 to avoid confusing the program.
2119 	 */
2120 	emit(A64_STR64I(A64_ZR, A64_SP, retval_off), ctx);
2121 	for (i = 0; i < tl->nr_links; i++) {
2122 		invoke_bpf_prog(ctx, tl->links[i], args_off, retval_off,
2123 				run_ctx_off, true);
2124 		/* if (*(u64 *)(sp + retval_off) !=  0)
2125 		 *	goto do_fexit;
2126 		 */
2127 		emit(A64_LDR64I(A64_R(10), A64_SP, retval_off), ctx);
2128 		/* Save the location of branch, and generate a nop.
2129 		 * This nop will be replaced with a cbnz later.
2130 		 */
2131 		branches[i] = ctx->image + ctx->idx;
2132 		emit(A64_NOP, ctx);
2133 	}
2134 }
2135 
save_args(struct jit_ctx * ctx,int args_off,int nregs)2136 static void save_args(struct jit_ctx *ctx, int args_off, int nregs)
2137 {
2138 	int i;
2139 
2140 	for (i = 0; i < nregs; i++) {
2141 		emit(A64_STR64I(i, A64_SP, args_off), ctx);
2142 		args_off += 8;
2143 	}
2144 }
2145 
restore_args(struct jit_ctx * ctx,int args_off,int nregs)2146 static void restore_args(struct jit_ctx *ctx, int args_off, int nregs)
2147 {
2148 	int i;
2149 
2150 	for (i = 0; i < nregs; i++) {
2151 		emit(A64_LDR64I(i, A64_SP, args_off), ctx);
2152 		args_off += 8;
2153 	}
2154 }
2155 
is_struct_ops_tramp(const struct bpf_tramp_links * fentry_links)2156 static bool is_struct_ops_tramp(const struct bpf_tramp_links *fentry_links)
2157 {
2158 	return fentry_links->nr_links == 1 &&
2159 		fentry_links->links[0]->link.type == BPF_LINK_TYPE_STRUCT_OPS;
2160 }
2161 
2162 /* Based on the x86's implementation of arch_prepare_bpf_trampoline().
2163  *
2164  * bpf prog and function entry before bpf trampoline hooked:
2165  *   mov x9, lr
2166  *   nop
2167  *
2168  * bpf prog and function entry after bpf trampoline hooked:
2169  *   mov x9, lr
2170  *   bl  <bpf_trampoline or plt>
2171  *
2172  */
prepare_trampoline(struct jit_ctx * ctx,struct bpf_tramp_image * im,struct bpf_tramp_links * tlinks,void * func_addr,int nregs,u32 flags)2173 static int prepare_trampoline(struct jit_ctx *ctx, struct bpf_tramp_image *im,
2174 			      struct bpf_tramp_links *tlinks, void *func_addr,
2175 			      int nregs, u32 flags)
2176 {
2177 	int i;
2178 	int stack_size;
2179 	int retaddr_off;
2180 	int regs_off;
2181 	int retval_off;
2182 	int args_off;
2183 	int nregs_off;
2184 	int ip_off;
2185 	int run_ctx_off;
2186 	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2187 	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2188 	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2189 	bool save_ret;
2190 	__le32 **branches = NULL;
2191 	bool is_struct_ops = is_struct_ops_tramp(fentry);
2192 
2193 	/* trampoline stack layout:
2194 	 *                  [ parent ip         ]
2195 	 *                  [ FP                ]
2196 	 * SP + retaddr_off [ self ip           ]
2197 	 *                  [ FP                ]
2198 	 *
2199 	 *                  [ padding           ] align SP to multiples of 16
2200 	 *
2201 	 *                  [ x20               ] callee saved reg x20
2202 	 * SP + regs_off    [ x19               ] callee saved reg x19
2203 	 *
2204 	 * SP + retval_off  [ return value      ] BPF_TRAMP_F_CALL_ORIG or
2205 	 *                                        BPF_TRAMP_F_RET_FENTRY_RET
2206 	 *
2207 	 *                  [ arg reg N         ]
2208 	 *                  [ ...               ]
2209 	 * SP + args_off    [ arg reg 1         ]
2210 	 *
2211 	 * SP + nregs_off   [ arg regs count    ]
2212 	 *
2213 	 * SP + ip_off      [ traced function   ] BPF_TRAMP_F_IP_ARG flag
2214 	 *
2215 	 * SP + run_ctx_off [ bpf_tramp_run_ctx ]
2216 	 */
2217 
2218 	stack_size = 0;
2219 	run_ctx_off = stack_size;
2220 	/* room for bpf_tramp_run_ctx */
2221 	stack_size += round_up(sizeof(struct bpf_tramp_run_ctx), 8);
2222 
2223 	ip_off = stack_size;
2224 	/* room for IP address argument */
2225 	if (flags & BPF_TRAMP_F_IP_ARG)
2226 		stack_size += 8;
2227 
2228 	nregs_off = stack_size;
2229 	/* room for args count */
2230 	stack_size += 8;
2231 
2232 	args_off = stack_size;
2233 	/* room for args */
2234 	stack_size += nregs * 8;
2235 
2236 	/* room for return value */
2237 	retval_off = stack_size;
2238 	save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET);
2239 	if (save_ret)
2240 		stack_size += 8;
2241 
2242 	/* room for callee saved registers, currently x19 and x20 are used */
2243 	regs_off = stack_size;
2244 	stack_size += 16;
2245 
2246 	/* round up to multiples of 16 to avoid SPAlignmentFault */
2247 	stack_size = round_up(stack_size, 16);
2248 
2249 	/* return address locates above FP */
2250 	retaddr_off = stack_size + 8;
2251 
2252 	if (flags & BPF_TRAMP_F_INDIRECT) {
2253 		/*
2254 		 * Indirect call for bpf_struct_ops
2255 		 */
2256 		emit_kcfi(cfi_get_func_hash(func_addr), ctx);
2257 	}
2258 	/* bpf trampoline may be invoked by 3 instruction types:
2259 	 * 1. bl, attached to bpf prog or kernel function via short jump
2260 	 * 2. br, attached to bpf prog or kernel function via long jump
2261 	 * 3. blr, working as a function pointer, used by struct_ops.
2262 	 * So BTI_JC should used here to support both br and blr.
2263 	 */
2264 	emit_bti(A64_BTI_JC, ctx);
2265 
2266 	/* x9 is not set for struct_ops */
2267 	if (!is_struct_ops) {
2268 		/* frame for parent function */
2269 		emit(A64_PUSH(A64_FP, A64_R(9), A64_SP), ctx);
2270 		emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2271 	}
2272 
2273 	/* frame for patched function for tracing, or caller for struct_ops */
2274 	emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx);
2275 	emit(A64_MOV(1, A64_FP, A64_SP), ctx);
2276 
2277 	/* allocate stack space */
2278 	emit(A64_SUB_I(1, A64_SP, A64_SP, stack_size), ctx);
2279 
2280 	if (flags & BPF_TRAMP_F_IP_ARG) {
2281 		/* save ip address of the traced function */
2282 		emit_addr_mov_i64(A64_R(10), (const u64)func_addr, ctx);
2283 		emit(A64_STR64I(A64_R(10), A64_SP, ip_off), ctx);
2284 	}
2285 
2286 	/* save arg regs count*/
2287 	emit(A64_MOVZ(1, A64_R(10), nregs, 0), ctx);
2288 	emit(A64_STR64I(A64_R(10), A64_SP, nregs_off), ctx);
2289 
2290 	/* save arg regs */
2291 	save_args(ctx, args_off, nregs);
2292 
2293 	/* save callee saved registers */
2294 	emit(A64_STR64I(A64_R(19), A64_SP, regs_off), ctx);
2295 	emit(A64_STR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2296 
2297 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2298 		/* for the first pass, assume the worst case */
2299 		if (!ctx->image)
2300 			ctx->idx += 4;
2301 		else
2302 			emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2303 		emit_call((const u64)__bpf_tramp_enter, ctx);
2304 	}
2305 
2306 	for (i = 0; i < fentry->nr_links; i++)
2307 		invoke_bpf_prog(ctx, fentry->links[i], args_off,
2308 				retval_off, run_ctx_off,
2309 				flags & BPF_TRAMP_F_RET_FENTRY_RET);
2310 
2311 	if (fmod_ret->nr_links) {
2312 		branches = kcalloc(fmod_ret->nr_links, sizeof(__le32 *),
2313 				   GFP_KERNEL);
2314 		if (!branches)
2315 			return -ENOMEM;
2316 
2317 		invoke_bpf_mod_ret(ctx, fmod_ret, args_off, retval_off,
2318 				   run_ctx_off, branches);
2319 	}
2320 
2321 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2322 		restore_args(ctx, args_off, nregs);
2323 		/* call original func */
2324 		emit(A64_LDR64I(A64_R(10), A64_SP, retaddr_off), ctx);
2325 		emit(A64_ADR(A64_LR, AARCH64_INSN_SIZE * 2), ctx);
2326 		emit(A64_RET(A64_R(10)), ctx);
2327 		/* store return value */
2328 		emit(A64_STR64I(A64_R(0), A64_SP, retval_off), ctx);
2329 		/* reserve a nop for bpf_tramp_image_put */
2330 		im->ip_after_call = ctx->ro_image + ctx->idx;
2331 		emit(A64_NOP, ctx);
2332 	}
2333 
2334 	/* update the branches saved in invoke_bpf_mod_ret with cbnz */
2335 	for (i = 0; i < fmod_ret->nr_links && ctx->image != NULL; i++) {
2336 		int offset = &ctx->image[ctx->idx] - branches[i];
2337 		*branches[i] = cpu_to_le32(A64_CBNZ(1, A64_R(10), offset));
2338 	}
2339 
2340 	for (i = 0; i < fexit->nr_links; i++)
2341 		invoke_bpf_prog(ctx, fexit->links[i], args_off, retval_off,
2342 				run_ctx_off, false);
2343 
2344 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2345 		im->ip_epilogue = ctx->ro_image + ctx->idx;
2346 		/* for the first pass, assume the worst case */
2347 		if (!ctx->image)
2348 			ctx->idx += 4;
2349 		else
2350 			emit_a64_mov_i64(A64_R(0), (const u64)im, ctx);
2351 		emit_call((const u64)__bpf_tramp_exit, ctx);
2352 	}
2353 
2354 	if (flags & BPF_TRAMP_F_RESTORE_REGS)
2355 		restore_args(ctx, args_off, nregs);
2356 
2357 	/* restore callee saved register x19 and x20 */
2358 	emit(A64_LDR64I(A64_R(19), A64_SP, regs_off), ctx);
2359 	emit(A64_LDR64I(A64_R(20), A64_SP, regs_off + 8), ctx);
2360 
2361 	if (save_ret)
2362 		emit(A64_LDR64I(A64_R(0), A64_SP, retval_off), ctx);
2363 
2364 	/* reset SP  */
2365 	emit(A64_MOV(1, A64_SP, A64_FP), ctx);
2366 
2367 	if (is_struct_ops) {
2368 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2369 		emit(A64_RET(A64_LR), ctx);
2370 	} else {
2371 		/* pop frames */
2372 		emit(A64_POP(A64_FP, A64_LR, A64_SP), ctx);
2373 		emit(A64_POP(A64_FP, A64_R(9), A64_SP), ctx);
2374 
2375 		if (flags & BPF_TRAMP_F_SKIP_FRAME) {
2376 			/* skip patched function, return to parent */
2377 			emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2378 			emit(A64_RET(A64_R(9)), ctx);
2379 		} else {
2380 			/* return to patched function */
2381 			emit(A64_MOV(1, A64_R(10), A64_LR), ctx);
2382 			emit(A64_MOV(1, A64_LR, A64_R(9)), ctx);
2383 			emit(A64_RET(A64_R(10)), ctx);
2384 		}
2385 	}
2386 
2387 	kfree(branches);
2388 
2389 	return ctx->idx;
2390 }
2391 
btf_func_model_nregs(const struct btf_func_model * m)2392 static int btf_func_model_nregs(const struct btf_func_model *m)
2393 {
2394 	int nregs = m->nr_args;
2395 	int i;
2396 
2397 	/* extra registers needed for struct argument */
2398 	for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) {
2399 		/* The arg_size is at most 16 bytes, enforced by the verifier. */
2400 		if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
2401 			nregs += (m->arg_size[i] + 7) / 8 - 1;
2402 	}
2403 
2404 	return nregs;
2405 }
2406 
arch_bpf_trampoline_size(const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2407 int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags,
2408 			     struct bpf_tramp_links *tlinks, void *func_addr)
2409 {
2410 	struct jit_ctx ctx = {
2411 		.image = NULL,
2412 		.idx = 0,
2413 	};
2414 	struct bpf_tramp_image im;
2415 	int nregs, ret;
2416 
2417 	nregs = btf_func_model_nregs(m);
2418 	/* the first 8 registers are used for arguments */
2419 	if (nregs > 8)
2420 		return -ENOTSUPP;
2421 
2422 	ret = prepare_trampoline(&ctx, &im, tlinks, func_addr, nregs, flags);
2423 	if (ret < 0)
2424 		return ret;
2425 
2426 	return ret < 0 ? ret : ret * AARCH64_INSN_SIZE;
2427 }
2428 
arch_alloc_bpf_trampoline(unsigned int size)2429 void *arch_alloc_bpf_trampoline(unsigned int size)
2430 {
2431 	return bpf_prog_pack_alloc(size, jit_fill_hole);
2432 }
2433 
arch_free_bpf_trampoline(void * image,unsigned int size)2434 void arch_free_bpf_trampoline(void *image, unsigned int size)
2435 {
2436 	bpf_prog_pack_free(image, size);
2437 }
2438 
arch_protect_bpf_trampoline(void * image,unsigned int size)2439 int arch_protect_bpf_trampoline(void *image, unsigned int size)
2440 {
2441 	return 0;
2442 }
2443 
arch_prepare_bpf_trampoline(struct bpf_tramp_image * im,void * ro_image,void * ro_image_end,const struct btf_func_model * m,u32 flags,struct bpf_tramp_links * tlinks,void * func_addr)2444 int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *ro_image,
2445 				void *ro_image_end, const struct btf_func_model *m,
2446 				u32 flags, struct bpf_tramp_links *tlinks,
2447 				void *func_addr)
2448 {
2449 	int ret, nregs;
2450 	void *image, *tmp;
2451 	u32 size = ro_image_end - ro_image;
2452 
2453 	/* image doesn't need to be in module memory range, so we can
2454 	 * use kvmalloc.
2455 	 */
2456 	image = kvmalloc(size, GFP_KERNEL);
2457 	if (!image)
2458 		return -ENOMEM;
2459 
2460 	struct jit_ctx ctx = {
2461 		.image = image,
2462 		.ro_image = ro_image,
2463 		.idx = 0,
2464 		.write = true,
2465 	};
2466 
2467 	nregs = btf_func_model_nregs(m);
2468 	/* the first 8 registers are used for arguments */
2469 	if (nregs > 8)
2470 		return -ENOTSUPP;
2471 
2472 	jit_fill_hole(image, (unsigned int)(ro_image_end - ro_image));
2473 	ret = prepare_trampoline(&ctx, im, tlinks, func_addr, nregs, flags);
2474 
2475 	if (ret > 0 && validate_code(&ctx) < 0) {
2476 		ret = -EINVAL;
2477 		goto out;
2478 	}
2479 
2480 	if (ret > 0)
2481 		ret *= AARCH64_INSN_SIZE;
2482 
2483 	tmp = bpf_arch_text_copy(ro_image, image, size);
2484 	if (IS_ERR(tmp)) {
2485 		ret = PTR_ERR(tmp);
2486 		goto out;
2487 	}
2488 
2489 	bpf_flush_icache(ro_image, ro_image + size);
2490 out:
2491 	kvfree(image);
2492 	return ret;
2493 }
2494 
is_long_jump(void * ip,void * target)2495 static bool is_long_jump(void *ip, void *target)
2496 {
2497 	long offset;
2498 
2499 	/* NULL target means this is a NOP */
2500 	if (!target)
2501 		return false;
2502 
2503 	offset = (long)target - (long)ip;
2504 	return offset < -SZ_128M || offset >= SZ_128M;
2505 }
2506 
gen_branch_or_nop(enum aarch64_insn_branch_type type,void * ip,void * addr,void * plt,u32 * insn)2507 static int gen_branch_or_nop(enum aarch64_insn_branch_type type, void *ip,
2508 			     void *addr, void *plt, u32 *insn)
2509 {
2510 	void *target;
2511 
2512 	if (!addr) {
2513 		*insn = aarch64_insn_gen_nop();
2514 		return 0;
2515 	}
2516 
2517 	if (is_long_jump(ip, addr))
2518 		target = plt;
2519 	else
2520 		target = addr;
2521 
2522 	*insn = aarch64_insn_gen_branch_imm((unsigned long)ip,
2523 					    (unsigned long)target,
2524 					    type);
2525 
2526 	return *insn != AARCH64_BREAK_FAULT ? 0 : -EFAULT;
2527 }
2528 
2529 /* Replace the branch instruction from @ip to @old_addr in a bpf prog or a bpf
2530  * trampoline with the branch instruction from @ip to @new_addr. If @old_addr
2531  * or @new_addr is NULL, the old or new instruction is NOP.
2532  *
2533  * When @ip is the bpf prog entry, a bpf trampoline is being attached or
2534  * detached. Since bpf trampoline and bpf prog are allocated separately with
2535  * vmalloc, the address distance may exceed 128MB, the maximum branch range.
2536  * So long jump should be handled.
2537  *
2538  * When a bpf prog is constructed, a plt pointing to empty trampoline
2539  * dummy_tramp is placed at the end:
2540  *
2541  *      bpf_prog:
2542  *              mov x9, lr
2543  *              nop // patchsite
2544  *              ...
2545  *              ret
2546  *
2547  *      plt:
2548  *              ldr x10, target
2549  *              br x10
2550  *      target:
2551  *              .quad dummy_tramp // plt target
2552  *
2553  * This is also the state when no trampoline is attached.
2554  *
2555  * When a short-jump bpf trampoline is attached, the patchsite is patched
2556  * to a bl instruction to the trampoline directly:
2557  *
2558  *      bpf_prog:
2559  *              mov x9, lr
2560  *              bl <short-jump bpf trampoline address> // patchsite
2561  *              ...
2562  *              ret
2563  *
2564  *      plt:
2565  *              ldr x10, target
2566  *              br x10
2567  *      target:
2568  *              .quad dummy_tramp // plt target
2569  *
2570  * When a long-jump bpf trampoline is attached, the plt target is filled with
2571  * the trampoline address and the patchsite is patched to a bl instruction to
2572  * the plt:
2573  *
2574  *      bpf_prog:
2575  *              mov x9, lr
2576  *              bl plt // patchsite
2577  *              ...
2578  *              ret
2579  *
2580  *      plt:
2581  *              ldr x10, target
2582  *              br x10
2583  *      target:
2584  *              .quad <long-jump bpf trampoline address> // plt target
2585  *
2586  * The dummy_tramp is used to prevent another CPU from jumping to unknown
2587  * locations during the patching process, making the patching process easier.
2588  */
bpf_arch_text_poke(void * ip,enum bpf_text_poke_type poke_type,void * old_addr,void * new_addr)2589 int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
2590 		       void *old_addr, void *new_addr)
2591 {
2592 	int ret;
2593 	u32 old_insn;
2594 	u32 new_insn;
2595 	u32 replaced;
2596 	struct bpf_plt *plt = NULL;
2597 	unsigned long size = 0UL;
2598 	unsigned long offset = ~0UL;
2599 	enum aarch64_insn_branch_type branch_type;
2600 	char namebuf[KSYM_NAME_LEN];
2601 	void *image = NULL;
2602 	u64 plt_target = 0ULL;
2603 	bool poking_bpf_entry;
2604 
2605 	if (!__bpf_address_lookup((unsigned long)ip, &size, &offset, namebuf))
2606 		/* Only poking bpf text is supported. Since kernel function
2607 		 * entry is set up by ftrace, we reply on ftrace to poke kernel
2608 		 * functions.
2609 		 */
2610 		return -ENOTSUPP;
2611 
2612 	image = ip - offset;
2613 	/* zero offset means we're poking bpf prog entry */
2614 	poking_bpf_entry = (offset == 0UL);
2615 
2616 	/* bpf prog entry, find plt and the real patchsite */
2617 	if (poking_bpf_entry) {
2618 		/* plt locates at the end of bpf prog */
2619 		plt = image + size - PLT_TARGET_OFFSET;
2620 
2621 		/* skip to the nop instruction in bpf prog entry:
2622 		 * bti c // if BTI enabled
2623 		 * mov x9, x30
2624 		 * nop
2625 		 */
2626 		ip = image + POKE_OFFSET * AARCH64_INSN_SIZE;
2627 	}
2628 
2629 	/* long jump is only possible at bpf prog entry */
2630 	if (WARN_ON((is_long_jump(ip, new_addr) || is_long_jump(ip, old_addr)) &&
2631 		    !poking_bpf_entry))
2632 		return -EINVAL;
2633 
2634 	if (poke_type == BPF_MOD_CALL)
2635 		branch_type = AARCH64_INSN_BRANCH_LINK;
2636 	else
2637 		branch_type = AARCH64_INSN_BRANCH_NOLINK;
2638 
2639 	if (gen_branch_or_nop(branch_type, ip, old_addr, plt, &old_insn) < 0)
2640 		return -EFAULT;
2641 
2642 	if (gen_branch_or_nop(branch_type, ip, new_addr, plt, &new_insn) < 0)
2643 		return -EFAULT;
2644 
2645 	if (is_long_jump(ip, new_addr))
2646 		plt_target = (u64)new_addr;
2647 	else if (is_long_jump(ip, old_addr))
2648 		/* if the old target is a long jump and the new target is not,
2649 		 * restore the plt target to dummy_tramp, so there is always a
2650 		 * legal and harmless address stored in plt target, and we'll
2651 		 * never jump from plt to an unknown place.
2652 		 */
2653 		plt_target = (u64)&dummy_tramp;
2654 
2655 	if (plt_target) {
2656 		/* non-zero plt_target indicates we're patching a bpf prog,
2657 		 * which is read only.
2658 		 */
2659 		if (set_memory_rw(PAGE_MASK & ((uintptr_t)&plt->target), 1))
2660 			return -EFAULT;
2661 		WRITE_ONCE(plt->target, plt_target);
2662 		set_memory_ro(PAGE_MASK & ((uintptr_t)&plt->target), 1);
2663 		/* since plt target points to either the new trampoline
2664 		 * or dummy_tramp, even if another CPU reads the old plt
2665 		 * target value before fetching the bl instruction to plt,
2666 		 * it will be brought back by dummy_tramp, so no barrier is
2667 		 * required here.
2668 		 */
2669 	}
2670 
2671 	/* if the old target and the new target are both long jumps, no
2672 	 * patching is required
2673 	 */
2674 	if (old_insn == new_insn)
2675 		return 0;
2676 
2677 	mutex_lock(&text_mutex);
2678 	if (aarch64_insn_read(ip, &replaced)) {
2679 		ret = -EFAULT;
2680 		goto out;
2681 	}
2682 
2683 	if (replaced != old_insn) {
2684 		ret = -EFAULT;
2685 		goto out;
2686 	}
2687 
2688 	/* We call aarch64_insn_patch_text_nosync() to replace instruction
2689 	 * atomically, so no other CPUs will fetch a half-new and half-old
2690 	 * instruction. But there is chance that another CPU executes the
2691 	 * old instruction after the patching operation finishes (e.g.,
2692 	 * pipeline not flushed, or icache not synchronized yet).
2693 	 *
2694 	 * 1. when a new trampoline is attached, it is not a problem for
2695 	 *    different CPUs to jump to different trampolines temporarily.
2696 	 *
2697 	 * 2. when an old trampoline is freed, we should wait for all other
2698 	 *    CPUs to exit the trampoline and make sure the trampoline is no
2699 	 *    longer reachable, since bpf_tramp_image_put() function already
2700 	 *    uses percpu_ref and task-based rcu to do the sync, no need to call
2701 	 *    the sync version here, see bpf_tramp_image_put() for details.
2702 	 */
2703 	ret = aarch64_insn_patch_text_nosync(ip, new_insn);
2704 out:
2705 	mutex_unlock(&text_mutex);
2706 
2707 	return ret;
2708 }
2709 
bpf_jit_supports_ptr_xchg(void)2710 bool bpf_jit_supports_ptr_xchg(void)
2711 {
2712 	return true;
2713 }
2714 
bpf_jit_supports_exceptions(void)2715 bool bpf_jit_supports_exceptions(void)
2716 {
2717 	/* We unwind through both kernel frames starting from within bpf_throw
2718 	 * call and BPF frames. Therefore we require FP unwinder to be enabled
2719 	 * to walk kernel frames and reach BPF frames in the stack trace.
2720 	 * ARM64 kernel is aways compiled with CONFIG_FRAME_POINTER=y
2721 	 */
2722 	return true;
2723 }
2724 
bpf_jit_supports_arena(void)2725 bool bpf_jit_supports_arena(void)
2726 {
2727 	return true;
2728 }
2729 
bpf_jit_supports_insn(struct bpf_insn * insn,bool in_arena)2730 bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
2731 {
2732 	if (!in_arena)
2733 		return true;
2734 	switch (insn->code) {
2735 	case BPF_STX | BPF_ATOMIC | BPF_W:
2736 	case BPF_STX | BPF_ATOMIC | BPF_DW:
2737 		if (!cpus_have_cap(ARM64_HAS_LSE_ATOMICS))
2738 			return false;
2739 	}
2740 	return true;
2741 }
2742 
bpf_jit_supports_percpu_insn(void)2743 bool bpf_jit_supports_percpu_insn(void)
2744 {
2745 	return true;
2746 }
2747 
bpf_jit_inlines_helper_call(s32 imm)2748 bool bpf_jit_inlines_helper_call(s32 imm)
2749 {
2750 	switch (imm) {
2751 	case BPF_FUNC_get_smp_processor_id:
2752 	case BPF_FUNC_get_current_task:
2753 	case BPF_FUNC_get_current_task_btf:
2754 		return true;
2755 	default:
2756 		return false;
2757 	}
2758 }
2759 
bpf_jit_free(struct bpf_prog * prog)2760 void bpf_jit_free(struct bpf_prog *prog)
2761 {
2762 	if (prog->jited) {
2763 		struct arm64_jit_data *jit_data = prog->aux->jit_data;
2764 		struct bpf_binary_header *hdr;
2765 
2766 		/*
2767 		 * If we fail the final pass of JIT (from jit_subprogs),
2768 		 * the program may not be finalized yet. Call finalize here
2769 		 * before freeing it.
2770 		 */
2771 		if (jit_data) {
2772 			bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
2773 					   sizeof(jit_data->header->size));
2774 			kfree(jit_data);
2775 		}
2776 		prog->bpf_func -= cfi_get_offset();
2777 		hdr = bpf_jit_binary_pack_hdr(prog);
2778 		bpf_jit_binary_pack_free(hdr, NULL);
2779 		WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
2780 	}
2781 
2782 	bpf_prog_unlock_free(prog);
2783 }
2784