1 /*
2 * Copyright (C) 2016-2018 Netronome Systems, Inc.
3 *
4 * This software is dual licensed under the GNU General License Version 2,
5 * June 1991 as shown in the file COPYING in the top-level directory of this
6 * source tree or the BSD 2-Clause License provided below. You have the
7 * option to license this software under the complete terms of either license.
8 *
9 * The BSD 2-Clause License:
10 *
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
13 * conditions are met:
14 *
15 * 1. Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
17 * disclaimer.
18 *
19 * 2. Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34 #define pr_fmt(fmt) "NFP net bpf: " fmt
35
36 #include <linux/bug.h>
37 #include <linux/bpf.h>
38 #include <linux/filter.h>
39 #include <linux/kernel.h>
40 #include <linux/pkt_cls.h>
41 #include <linux/reciprocal_div.h>
42 #include <linux/unistd.h>
43
44 #include "main.h"
45 #include "../nfp_asm.h"
46 #include "../nfp_net_ctrl.h"
47
48 /* --- NFP prog --- */
49 /* Foreach "multiple" entries macros provide pos and next<n> pointers.
50 * It's safe to modify the next pointers (but not pos).
51 */
52 #define nfp_for_each_insn_walk2(nfp_prog, pos, next) \
53 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
54 next = list_next_entry(pos, l); \
55 &(nfp_prog)->insns != &pos->l && \
56 &(nfp_prog)->insns != &next->l; \
57 pos = nfp_meta_next(pos), \
58 next = nfp_meta_next(pos))
59
60 #define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \
61 for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \
62 next = list_next_entry(pos, l), \
63 next2 = list_next_entry(next, l); \
64 &(nfp_prog)->insns != &pos->l && \
65 &(nfp_prog)->insns != &next->l && \
66 &(nfp_prog)->insns != &next2->l; \
67 pos = nfp_meta_next(pos), \
68 next = nfp_meta_next(pos), \
69 next2 = nfp_meta_next(next))
70
71 static bool
nfp_meta_has_prev(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)72 nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
73 {
74 return meta->l.prev != &nfp_prog->insns;
75 }
76
nfp_prog_push(struct nfp_prog * nfp_prog,u64 insn)77 static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn)
78 {
79 if (nfp_prog->__prog_alloc_len / sizeof(u64) == nfp_prog->prog_len) {
80 pr_warn("instruction limit reached (%u NFP instructions)\n",
81 nfp_prog->prog_len);
82 nfp_prog->error = -ENOSPC;
83 return;
84 }
85
86 nfp_prog->prog[nfp_prog->prog_len] = insn;
87 nfp_prog->prog_len++;
88 }
89
nfp_prog_current_offset(struct nfp_prog * nfp_prog)90 static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog)
91 {
92 return nfp_prog->prog_len;
93 }
94
95 static bool
nfp_prog_confirm_current_offset(struct nfp_prog * nfp_prog,unsigned int off)96 nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off)
97 {
98 /* If there is a recorded error we may have dropped instructions;
99 * that doesn't have to be due to translator bug, and the translation
100 * will fail anyway, so just return OK.
101 */
102 if (nfp_prog->error)
103 return true;
104 return !WARN_ON_ONCE(nfp_prog_current_offset(nfp_prog) != off);
105 }
106
107 /* --- Emitters --- */
108 static void
__emit_cmd(struct nfp_prog * nfp_prog,enum cmd_tgt_map op,u8 mode,u8 xfer,u8 areg,u8 breg,u8 size,enum cmd_ctx_swap ctx,bool indir)109 __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op,
110 u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx,
111 bool indir)
112 {
113 u64 insn;
114
115 insn = FIELD_PREP(OP_CMD_A_SRC, areg) |
116 FIELD_PREP(OP_CMD_CTX, ctx) |
117 FIELD_PREP(OP_CMD_B_SRC, breg) |
118 FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) |
119 FIELD_PREP(OP_CMD_XFER, xfer) |
120 FIELD_PREP(OP_CMD_CNT, size) |
121 FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) |
122 FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) |
123 FIELD_PREP(OP_CMD_INDIR, indir) |
124 FIELD_PREP(OP_CMD_MODE, mode);
125
126 nfp_prog_push(nfp_prog, insn);
127 }
128
129 static void
emit_cmd_any(struct nfp_prog * nfp_prog,enum cmd_tgt_map op,u8 mode,u8 xfer,swreg lreg,swreg rreg,u8 size,enum cmd_ctx_swap ctx,bool indir)130 emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
131 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir)
132 {
133 struct nfp_insn_re_regs reg;
134 int err;
135
136 err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false);
137 if (err) {
138 nfp_prog->error = err;
139 return;
140 }
141 if (reg.swap) {
142 pr_err("cmd can't swap arguments\n");
143 nfp_prog->error = -EFAULT;
144 return;
145 }
146 if (reg.dst_lmextn || reg.src_lmextn) {
147 pr_err("cmd can't use LMextn\n");
148 nfp_prog->error = -EFAULT;
149 return;
150 }
151
152 __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx,
153 indir);
154 }
155
156 static void
emit_cmd(struct nfp_prog * nfp_prog,enum cmd_tgt_map op,u8 mode,u8 xfer,swreg lreg,swreg rreg,u8 size,enum cmd_ctx_swap ctx)157 emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
158 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
159 {
160 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false);
161 }
162
163 static void
emit_cmd_indir(struct nfp_prog * nfp_prog,enum cmd_tgt_map op,u8 mode,u8 xfer,swreg lreg,swreg rreg,u8 size,enum cmd_ctx_swap ctx)164 emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
165 swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx)
166 {
167 emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true);
168 }
169
170 static void
__emit_br(struct nfp_prog * nfp_prog,enum br_mask mask,enum br_ev_pip ev_pip,enum br_ctx_signal_state css,u16 addr,u8 defer)171 __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
172 enum br_ctx_signal_state css, u16 addr, u8 defer)
173 {
174 u16 addr_lo, addr_hi;
175 u64 insn;
176
177 addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO));
178 addr_hi = addr != addr_lo;
179
180 insn = OP_BR_BASE |
181 FIELD_PREP(OP_BR_MASK, mask) |
182 FIELD_PREP(OP_BR_EV_PIP, ev_pip) |
183 FIELD_PREP(OP_BR_CSS, css) |
184 FIELD_PREP(OP_BR_DEFBR, defer) |
185 FIELD_PREP(OP_BR_ADDR_LO, addr_lo) |
186 FIELD_PREP(OP_BR_ADDR_HI, addr_hi);
187
188 nfp_prog_push(nfp_prog, insn);
189 }
190
191 static void
emit_br_relo(struct nfp_prog * nfp_prog,enum br_mask mask,u16 addr,u8 defer,enum nfp_relo_type relo)192 emit_br_relo(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer,
193 enum nfp_relo_type relo)
194 {
195 if (mask == BR_UNC && defer > 2) {
196 pr_err("BUG: branch defer out of bounds %d\n", defer);
197 nfp_prog->error = -EFAULT;
198 return;
199 }
200
201 __emit_br(nfp_prog, mask,
202 mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND,
203 BR_CSS_NONE, addr, defer);
204
205 nfp_prog->prog[nfp_prog->prog_len - 1] |=
206 FIELD_PREP(OP_RELO_TYPE, relo);
207 }
208
209 static void
emit_br(struct nfp_prog * nfp_prog,enum br_mask mask,u16 addr,u8 defer)210 emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer)
211 {
212 emit_br_relo(nfp_prog, mask, addr, defer, RELO_BR_REL);
213 }
214
215 static void
__emit_br_bit(struct nfp_prog * nfp_prog,u16 areg,u16 breg,u16 addr,u8 defer,bool set,bool src_lmextn)216 __emit_br_bit(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 addr, u8 defer,
217 bool set, bool src_lmextn)
218 {
219 u16 addr_lo, addr_hi;
220 u64 insn;
221
222 addr_lo = addr & (OP_BR_BIT_ADDR_LO >> __bf_shf(OP_BR_BIT_ADDR_LO));
223 addr_hi = addr != addr_lo;
224
225 insn = OP_BR_BIT_BASE |
226 FIELD_PREP(OP_BR_BIT_A_SRC, areg) |
227 FIELD_PREP(OP_BR_BIT_B_SRC, breg) |
228 FIELD_PREP(OP_BR_BIT_BV, set) |
229 FIELD_PREP(OP_BR_BIT_DEFBR, defer) |
230 FIELD_PREP(OP_BR_BIT_ADDR_LO, addr_lo) |
231 FIELD_PREP(OP_BR_BIT_ADDR_HI, addr_hi) |
232 FIELD_PREP(OP_BR_BIT_SRC_LMEXTN, src_lmextn);
233
234 nfp_prog_push(nfp_prog, insn);
235 }
236
237 static void
emit_br_bit_relo(struct nfp_prog * nfp_prog,swreg src,u8 bit,u16 addr,u8 defer,bool set,enum nfp_relo_type relo)238 emit_br_bit_relo(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr,
239 u8 defer, bool set, enum nfp_relo_type relo)
240 {
241 struct nfp_insn_re_regs reg;
242 int err;
243
244 /* NOTE: The bit to test is specified as an rotation amount, such that
245 * the bit to test will be placed on the MSB of the result when
246 * doing a rotate right. For bit X, we need right rotate X + 1.
247 */
248 bit += 1;
249
250 err = swreg_to_restricted(reg_none(), src, reg_imm(bit), ®, false);
251 if (err) {
252 nfp_prog->error = err;
253 return;
254 }
255
256 __emit_br_bit(nfp_prog, reg.areg, reg.breg, addr, defer, set,
257 reg.src_lmextn);
258
259 nfp_prog->prog[nfp_prog->prog_len - 1] |=
260 FIELD_PREP(OP_RELO_TYPE, relo);
261 }
262
263 static void
emit_br_bset(struct nfp_prog * nfp_prog,swreg src,u8 bit,u16 addr,u8 defer)264 emit_br_bset(struct nfp_prog *nfp_prog, swreg src, u8 bit, u16 addr, u8 defer)
265 {
266 emit_br_bit_relo(nfp_prog, src, bit, addr, defer, true, RELO_BR_REL);
267 }
268
269 static void
__emit_immed(struct nfp_prog * nfp_prog,u16 areg,u16 breg,u16 imm_hi,enum immed_width width,bool invert,enum immed_shift shift,bool wr_both,bool dst_lmextn,bool src_lmextn)270 __emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi,
271 enum immed_width width, bool invert,
272 enum immed_shift shift, bool wr_both,
273 bool dst_lmextn, bool src_lmextn)
274 {
275 u64 insn;
276
277 insn = OP_IMMED_BASE |
278 FIELD_PREP(OP_IMMED_A_SRC, areg) |
279 FIELD_PREP(OP_IMMED_B_SRC, breg) |
280 FIELD_PREP(OP_IMMED_IMM, imm_hi) |
281 FIELD_PREP(OP_IMMED_WIDTH, width) |
282 FIELD_PREP(OP_IMMED_INV, invert) |
283 FIELD_PREP(OP_IMMED_SHIFT, shift) |
284 FIELD_PREP(OP_IMMED_WR_AB, wr_both) |
285 FIELD_PREP(OP_IMMED_SRC_LMEXTN, src_lmextn) |
286 FIELD_PREP(OP_IMMED_DST_LMEXTN, dst_lmextn);
287
288 nfp_prog_push(nfp_prog, insn);
289 }
290
291 static void
emit_immed(struct nfp_prog * nfp_prog,swreg dst,u16 imm,enum immed_width width,bool invert,enum immed_shift shift)292 emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm,
293 enum immed_width width, bool invert, enum immed_shift shift)
294 {
295 struct nfp_insn_ur_regs reg;
296 int err;
297
298 if (swreg_type(dst) == NN_REG_IMM) {
299 nfp_prog->error = -EFAULT;
300 return;
301 }
302
303 err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®);
304 if (err) {
305 nfp_prog->error = err;
306 return;
307 }
308
309 /* Use reg.dst when destination is No-Dest. */
310 __emit_immed(nfp_prog,
311 swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg,
312 reg.breg, imm >> 8, width, invert, shift,
313 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
314 }
315
316 static void
__emit_shf(struct nfp_prog * nfp_prog,u16 dst,enum alu_dst_ab dst_ab,enum shf_sc sc,u8 shift,u16 areg,enum shf_op op,u16 breg,bool i8,bool sw,bool wr_both,bool dst_lmextn,bool src_lmextn)317 __emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
318 enum shf_sc sc, u8 shift,
319 u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both,
320 bool dst_lmextn, bool src_lmextn)
321 {
322 u64 insn;
323
324 if (!FIELD_FIT(OP_SHF_SHIFT, shift)) {
325 nfp_prog->error = -EFAULT;
326 return;
327 }
328
329 /* NFP shift instruction has something special. If shift direction is
330 * left then shift amount of 1 to 31 is specified as 32 minus the amount
331 * to shift.
332 *
333 * But no need to do this for indirect shift which has shift amount be
334 * 0. Even after we do this subtraction, shift amount 0 will be turned
335 * into 32 which will eventually be encoded the same as 0 because only
336 * low 5 bits are encoded, but shift amount be 32 will fail the
337 * FIELD_PREP check done later on shift mask (0x1f), due to 32 is out of
338 * mask range.
339 */
340 if (sc == SHF_SC_L_SHF && shift)
341 shift = 32 - shift;
342
343 insn = OP_SHF_BASE |
344 FIELD_PREP(OP_SHF_A_SRC, areg) |
345 FIELD_PREP(OP_SHF_SC, sc) |
346 FIELD_PREP(OP_SHF_B_SRC, breg) |
347 FIELD_PREP(OP_SHF_I8, i8) |
348 FIELD_PREP(OP_SHF_SW, sw) |
349 FIELD_PREP(OP_SHF_DST, dst) |
350 FIELD_PREP(OP_SHF_SHIFT, shift) |
351 FIELD_PREP(OP_SHF_OP, op) |
352 FIELD_PREP(OP_SHF_DST_AB, dst_ab) |
353 FIELD_PREP(OP_SHF_WR_AB, wr_both) |
354 FIELD_PREP(OP_SHF_SRC_LMEXTN, src_lmextn) |
355 FIELD_PREP(OP_SHF_DST_LMEXTN, dst_lmextn);
356
357 nfp_prog_push(nfp_prog, insn);
358 }
359
360 static void
emit_shf(struct nfp_prog * nfp_prog,swreg dst,swreg lreg,enum shf_op op,swreg rreg,enum shf_sc sc,u8 shift)361 emit_shf(struct nfp_prog *nfp_prog, swreg dst,
362 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc, u8 shift)
363 {
364 struct nfp_insn_re_regs reg;
365 int err;
366
367 err = swreg_to_restricted(dst, lreg, rreg, ®, true);
368 if (err) {
369 nfp_prog->error = err;
370 return;
371 }
372
373 __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift,
374 reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both,
375 reg.dst_lmextn, reg.src_lmextn);
376 }
377
378 static void
emit_shf_indir(struct nfp_prog * nfp_prog,swreg dst,swreg lreg,enum shf_op op,swreg rreg,enum shf_sc sc)379 emit_shf_indir(struct nfp_prog *nfp_prog, swreg dst,
380 swreg lreg, enum shf_op op, swreg rreg, enum shf_sc sc)
381 {
382 if (sc == SHF_SC_R_ROT) {
383 pr_err("indirect shift is not allowed on rotation\n");
384 nfp_prog->error = -EFAULT;
385 return;
386 }
387
388 emit_shf(nfp_prog, dst, lreg, op, rreg, sc, 0);
389 }
390
391 static void
__emit_alu(struct nfp_prog * nfp_prog,u16 dst,enum alu_dst_ab dst_ab,u16 areg,enum alu_op op,u16 breg,bool swap,bool wr_both,bool dst_lmextn,bool src_lmextn)392 __emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab,
393 u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both,
394 bool dst_lmextn, bool src_lmextn)
395 {
396 u64 insn;
397
398 insn = OP_ALU_BASE |
399 FIELD_PREP(OP_ALU_A_SRC, areg) |
400 FIELD_PREP(OP_ALU_B_SRC, breg) |
401 FIELD_PREP(OP_ALU_DST, dst) |
402 FIELD_PREP(OP_ALU_SW, swap) |
403 FIELD_PREP(OP_ALU_OP, op) |
404 FIELD_PREP(OP_ALU_DST_AB, dst_ab) |
405 FIELD_PREP(OP_ALU_WR_AB, wr_both) |
406 FIELD_PREP(OP_ALU_SRC_LMEXTN, src_lmextn) |
407 FIELD_PREP(OP_ALU_DST_LMEXTN, dst_lmextn);
408
409 nfp_prog_push(nfp_prog, insn);
410 }
411
412 static void
emit_alu(struct nfp_prog * nfp_prog,swreg dst,swreg lreg,enum alu_op op,swreg rreg)413 emit_alu(struct nfp_prog *nfp_prog, swreg dst,
414 swreg lreg, enum alu_op op, swreg rreg)
415 {
416 struct nfp_insn_ur_regs reg;
417 int err;
418
419 err = swreg_to_unrestricted(dst, lreg, rreg, ®);
420 if (err) {
421 nfp_prog->error = err;
422 return;
423 }
424
425 __emit_alu(nfp_prog, reg.dst, reg.dst_ab,
426 reg.areg, op, reg.breg, reg.swap, reg.wr_both,
427 reg.dst_lmextn, reg.src_lmextn);
428 }
429
430 static void
__emit_mul(struct nfp_prog * nfp_prog,enum alu_dst_ab dst_ab,u16 areg,enum mul_type type,enum mul_step step,u16 breg,bool swap,bool wr_both,bool dst_lmextn,bool src_lmextn)431 __emit_mul(struct nfp_prog *nfp_prog, enum alu_dst_ab dst_ab, u16 areg,
432 enum mul_type type, enum mul_step step, u16 breg, bool swap,
433 bool wr_both, bool dst_lmextn, bool src_lmextn)
434 {
435 u64 insn;
436
437 insn = OP_MUL_BASE |
438 FIELD_PREP(OP_MUL_A_SRC, areg) |
439 FIELD_PREP(OP_MUL_B_SRC, breg) |
440 FIELD_PREP(OP_MUL_STEP, step) |
441 FIELD_PREP(OP_MUL_DST_AB, dst_ab) |
442 FIELD_PREP(OP_MUL_SW, swap) |
443 FIELD_PREP(OP_MUL_TYPE, type) |
444 FIELD_PREP(OP_MUL_WR_AB, wr_both) |
445 FIELD_PREP(OP_MUL_SRC_LMEXTN, src_lmextn) |
446 FIELD_PREP(OP_MUL_DST_LMEXTN, dst_lmextn);
447
448 nfp_prog_push(nfp_prog, insn);
449 }
450
451 static void
emit_mul(struct nfp_prog * nfp_prog,swreg lreg,enum mul_type type,enum mul_step step,swreg rreg)452 emit_mul(struct nfp_prog *nfp_prog, swreg lreg, enum mul_type type,
453 enum mul_step step, swreg rreg)
454 {
455 struct nfp_insn_ur_regs reg;
456 u16 areg;
457 int err;
458
459 if (type == MUL_TYPE_START && step != MUL_STEP_NONE) {
460 nfp_prog->error = -EINVAL;
461 return;
462 }
463
464 if (step == MUL_LAST || step == MUL_LAST_2) {
465 /* When type is step and step Number is LAST or LAST2, left
466 * source is used as destination.
467 */
468 err = swreg_to_unrestricted(lreg, reg_none(), rreg, ®);
469 areg = reg.dst;
470 } else {
471 err = swreg_to_unrestricted(reg_none(), lreg, rreg, ®);
472 areg = reg.areg;
473 }
474
475 if (err) {
476 nfp_prog->error = err;
477 return;
478 }
479
480 __emit_mul(nfp_prog, reg.dst_ab, areg, type, step, reg.breg, reg.swap,
481 reg.wr_both, reg.dst_lmextn, reg.src_lmextn);
482 }
483
484 static void
__emit_ld_field(struct nfp_prog * nfp_prog,enum shf_sc sc,u8 areg,u8 bmask,u8 breg,u8 shift,bool imm8,bool zero,bool swap,bool wr_both,bool dst_lmextn,bool src_lmextn)485 __emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc,
486 u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8,
487 bool zero, bool swap, bool wr_both,
488 bool dst_lmextn, bool src_lmextn)
489 {
490 u64 insn;
491
492 insn = OP_LDF_BASE |
493 FIELD_PREP(OP_LDF_A_SRC, areg) |
494 FIELD_PREP(OP_LDF_SC, sc) |
495 FIELD_PREP(OP_LDF_B_SRC, breg) |
496 FIELD_PREP(OP_LDF_I8, imm8) |
497 FIELD_PREP(OP_LDF_SW, swap) |
498 FIELD_PREP(OP_LDF_ZF, zero) |
499 FIELD_PREP(OP_LDF_BMASK, bmask) |
500 FIELD_PREP(OP_LDF_SHF, shift) |
501 FIELD_PREP(OP_LDF_WR_AB, wr_both) |
502 FIELD_PREP(OP_LDF_SRC_LMEXTN, src_lmextn) |
503 FIELD_PREP(OP_LDF_DST_LMEXTN, dst_lmextn);
504
505 nfp_prog_push(nfp_prog, insn);
506 }
507
508 static void
emit_ld_field_any(struct nfp_prog * nfp_prog,swreg dst,u8 bmask,swreg src,enum shf_sc sc,u8 shift,bool zero)509 emit_ld_field_any(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
510 enum shf_sc sc, u8 shift, bool zero)
511 {
512 struct nfp_insn_re_regs reg;
513 int err;
514
515 /* Note: ld_field is special as it uses one of the src regs as dst */
516 err = swreg_to_restricted(dst, dst, src, ®, true);
517 if (err) {
518 nfp_prog->error = err;
519 return;
520 }
521
522 __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift,
523 reg.i8, zero, reg.swap, reg.wr_both,
524 reg.dst_lmextn, reg.src_lmextn);
525 }
526
527 static void
emit_ld_field(struct nfp_prog * nfp_prog,swreg dst,u8 bmask,swreg src,enum shf_sc sc,u8 shift)528 emit_ld_field(struct nfp_prog *nfp_prog, swreg dst, u8 bmask, swreg src,
529 enum shf_sc sc, u8 shift)
530 {
531 emit_ld_field_any(nfp_prog, dst, bmask, src, sc, shift, false);
532 }
533
534 static void
__emit_lcsr(struct nfp_prog * nfp_prog,u16 areg,u16 breg,bool wr,u16 addr,bool dst_lmextn,bool src_lmextn)535 __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr,
536 bool dst_lmextn, bool src_lmextn)
537 {
538 u64 insn;
539
540 insn = OP_LCSR_BASE |
541 FIELD_PREP(OP_LCSR_A_SRC, areg) |
542 FIELD_PREP(OP_LCSR_B_SRC, breg) |
543 FIELD_PREP(OP_LCSR_WRITE, wr) |
544 FIELD_PREP(OP_LCSR_ADDR, addr / 4) |
545 FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) |
546 FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn);
547
548 nfp_prog_push(nfp_prog, insn);
549 }
550
emit_csr_wr(struct nfp_prog * nfp_prog,swreg src,u16 addr)551 static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr)
552 {
553 struct nfp_insn_ur_regs reg;
554 int err;
555
556 /* This instruction takes immeds instead of reg_none() for the ignored
557 * operand, but we can't encode 2 immeds in one instr with our normal
558 * swreg infra so if param is an immed, we encode as reg_none() and
559 * copy the immed to both operands.
560 */
561 if (swreg_type(src) == NN_REG_IMM) {
562 err = swreg_to_unrestricted(reg_none(), src, reg_none(), ®);
563 reg.breg = reg.areg;
564 } else {
565 err = swreg_to_unrestricted(reg_none(), src, reg_imm(0), ®);
566 }
567 if (err) {
568 nfp_prog->error = err;
569 return;
570 }
571
572 __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr,
573 false, reg.src_lmextn);
574 }
575
576 /* CSR value is read in following immed[gpr, 0] */
__emit_csr_rd(struct nfp_prog * nfp_prog,u16 addr)577 static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr)
578 {
579 __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false);
580 }
581
emit_nop(struct nfp_prog * nfp_prog)582 static void emit_nop(struct nfp_prog *nfp_prog)
583 {
584 __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0);
585 }
586
587 /* --- Wrappers --- */
pack_immed(u32 imm,u16 * val,enum immed_shift * shift)588 static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift)
589 {
590 if (!(imm & 0xffff0000)) {
591 *val = imm;
592 *shift = IMMED_SHIFT_0B;
593 } else if (!(imm & 0xff0000ff)) {
594 *val = imm >> 8;
595 *shift = IMMED_SHIFT_1B;
596 } else if (!(imm & 0x0000ffff)) {
597 *val = imm >> 16;
598 *shift = IMMED_SHIFT_2B;
599 } else {
600 return false;
601 }
602
603 return true;
604 }
605
wrp_immed(struct nfp_prog * nfp_prog,swreg dst,u32 imm)606 static void wrp_immed(struct nfp_prog *nfp_prog, swreg dst, u32 imm)
607 {
608 enum immed_shift shift;
609 u16 val;
610
611 if (pack_immed(imm, &val, &shift)) {
612 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift);
613 } else if (pack_immed(~imm, &val, &shift)) {
614 emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift);
615 } else {
616 emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL,
617 false, IMMED_SHIFT_0B);
618 emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD,
619 false, IMMED_SHIFT_2B);
620 }
621 }
622
623 static void
wrp_immed_relo(struct nfp_prog * nfp_prog,swreg dst,u32 imm,enum nfp_relo_type relo)624 wrp_immed_relo(struct nfp_prog *nfp_prog, swreg dst, u32 imm,
625 enum nfp_relo_type relo)
626 {
627 if (imm > 0xffff) {
628 pr_err("relocation of a large immediate!\n");
629 nfp_prog->error = -EFAULT;
630 return;
631 }
632 emit_immed(nfp_prog, dst, imm, IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
633
634 nfp_prog->prog[nfp_prog->prog_len - 1] |=
635 FIELD_PREP(OP_RELO_TYPE, relo);
636 }
637
638 /* ur_load_imm_any() - encode immediate or use tmp register (unrestricted)
639 * If the @imm is small enough encode it directly in operand and return
640 * otherwise load @imm to a spare register and return its encoding.
641 */
ur_load_imm_any(struct nfp_prog * nfp_prog,u32 imm,swreg tmp_reg)642 static swreg ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
643 {
644 if (FIELD_FIT(UR_REG_IMM_MAX, imm))
645 return reg_imm(imm);
646
647 wrp_immed(nfp_prog, tmp_reg, imm);
648 return tmp_reg;
649 }
650
651 /* re_load_imm_any() - encode immediate or use tmp register (restricted)
652 * If the @imm is small enough encode it directly in operand and return
653 * otherwise load @imm to a spare register and return its encoding.
654 */
re_load_imm_any(struct nfp_prog * nfp_prog,u32 imm,swreg tmp_reg)655 static swreg re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, swreg tmp_reg)
656 {
657 if (FIELD_FIT(RE_REG_IMM_MAX, imm))
658 return reg_imm(imm);
659
660 wrp_immed(nfp_prog, tmp_reg, imm);
661 return tmp_reg;
662 }
663
wrp_nops(struct nfp_prog * nfp_prog,unsigned int count)664 static void wrp_nops(struct nfp_prog *nfp_prog, unsigned int count)
665 {
666 while (count--)
667 emit_nop(nfp_prog);
668 }
669
wrp_mov(struct nfp_prog * nfp_prog,swreg dst,swreg src)670 static void wrp_mov(struct nfp_prog *nfp_prog, swreg dst, swreg src)
671 {
672 emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, src);
673 }
674
wrp_reg_mov(struct nfp_prog * nfp_prog,u16 dst,u16 src)675 static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
676 {
677 wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
678 }
679
680 /* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
681 * result to @dst from low end.
682 */
683 static void
wrp_reg_subpart(struct nfp_prog * nfp_prog,swreg dst,swreg src,u8 field_len,u8 offset)684 wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
685 u8 offset)
686 {
687 enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
688 u8 mask = (1 << field_len) - 1;
689
690 emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
691 }
692
693 /* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the
694 * result to @dst from offset, there is no change on the other bits of @dst.
695 */
696 static void
wrp_reg_or_subpart(struct nfp_prog * nfp_prog,swreg dst,swreg src,u8 field_len,u8 offset)697 wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src,
698 u8 field_len, u8 offset)
699 {
700 enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE;
701 u8 mask = ((1 << field_len) - 1) << offset;
702
703 emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8);
704 }
705
706 static void
addr40_offset(struct nfp_prog * nfp_prog,u8 src_gpr,swreg offset,swreg * rega,swreg * regb)707 addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
708 swreg *rega, swreg *regb)
709 {
710 if (offset == reg_imm(0)) {
711 *rega = reg_a(src_gpr);
712 *regb = reg_b(src_gpr + 1);
713 return;
714 }
715
716 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(src_gpr), ALU_OP_ADD, offset);
717 emit_alu(nfp_prog, imm_b(nfp_prog), reg_b(src_gpr + 1), ALU_OP_ADD_C,
718 reg_imm(0));
719 *rega = imm_a(nfp_prog);
720 *regb = imm_b(nfp_prog);
721 }
722
723 /* NFP has Command Push Pull bus which supports bluk memory operations. */
nfp_cpp_memcpy(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)724 static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
725 {
726 bool descending_seq = meta->ldst_gather_len < 0;
727 s16 len = abs(meta->ldst_gather_len);
728 swreg src_base, off;
729 bool src_40bit_addr;
730 unsigned int i;
731 u8 xfer_num;
732
733 off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
734 src_40bit_addr = meta->ptr.type == PTR_TO_MAP_VALUE;
735 src_base = reg_a(meta->insn.src_reg * 2);
736 xfer_num = round_up(len, 4) / 4;
737
738 if (src_40bit_addr)
739 addr40_offset(nfp_prog, meta->insn.src_reg * 2, off, &src_base,
740 &off);
741
742 /* Setup PREV_ALU fields to override memory read length. */
743 if (len > 32)
744 wrp_immed(nfp_prog, reg_none(),
745 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
746
747 /* Memory read from source addr into transfer-in registers. */
748 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP,
749 src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0,
750 src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32);
751
752 /* Move from transfer-in to transfer-out. */
753 for (i = 0; i < xfer_num; i++)
754 wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
755
756 off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
757
758 if (len <= 8) {
759 /* Use single direct_ref write8. */
760 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
761 reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
762 CMD_CTX_SWAP);
763 } else if (len <= 32 && IS_ALIGNED(len, 4)) {
764 /* Use single direct_ref write32. */
765 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
766 reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
767 CMD_CTX_SWAP);
768 } else if (len <= 32) {
769 /* Use single indirect_ref write8. */
770 wrp_immed(nfp_prog, reg_none(),
771 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
772 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
773 reg_a(meta->paired_st->dst_reg * 2), off,
774 len - 1, CMD_CTX_SWAP);
775 } else if (IS_ALIGNED(len, 4)) {
776 /* Use single indirect_ref write32. */
777 wrp_immed(nfp_prog, reg_none(),
778 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
779 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
780 reg_a(meta->paired_st->dst_reg * 2), off,
781 xfer_num - 1, CMD_CTX_SWAP);
782 } else if (len <= 40) {
783 /* Use one direct_ref write32 to write the first 32-bytes, then
784 * another direct_ref write8 to write the remaining bytes.
785 */
786 emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
787 reg_a(meta->paired_st->dst_reg * 2), off, 7,
788 CMD_CTX_SWAP);
789
790 off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
791 imm_b(nfp_prog));
792 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
793 reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
794 CMD_CTX_SWAP);
795 } else {
796 /* Use one indirect_ref write32 to write 4-bytes aligned length,
797 * then another direct_ref write8 to write the remaining bytes.
798 */
799 u8 new_off;
800
801 wrp_immed(nfp_prog, reg_none(),
802 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
803 emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
804 reg_a(meta->paired_st->dst_reg * 2), off,
805 xfer_num - 2, CMD_CTX_SWAP);
806 new_off = meta->paired_st->off + (xfer_num - 1) * 4;
807 off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
808 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
809 xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
810 (len & 0x3) - 1, CMD_CTX_SWAP);
811 }
812
813 /* TODO: The following extra load is to make sure data flow be identical
814 * before and after we do memory copy optimization.
815 *
816 * The load destination register is not guaranteed to be dead, so we
817 * need to make sure it is loaded with the value the same as before
818 * this transformation.
819 *
820 * These extra loads could be removed once we have accurate register
821 * usage information.
822 */
823 if (descending_seq)
824 xfer_num = 0;
825 else if (BPF_SIZE(meta->insn.code) != BPF_DW)
826 xfer_num = xfer_num - 1;
827 else
828 xfer_num = xfer_num - 2;
829
830 switch (BPF_SIZE(meta->insn.code)) {
831 case BPF_B:
832 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
833 reg_xfer(xfer_num), 1,
834 IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
835 break;
836 case BPF_H:
837 wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
838 reg_xfer(xfer_num), 2, (len & 3) ^ 2);
839 break;
840 case BPF_W:
841 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
842 reg_xfer(0));
843 break;
844 case BPF_DW:
845 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
846 reg_xfer(xfer_num));
847 wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
848 reg_xfer(xfer_num + 1));
849 break;
850 }
851
852 if (BPF_SIZE(meta->insn.code) != BPF_DW)
853 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
854
855 return 0;
856 }
857
858 static int
data_ld(struct nfp_prog * nfp_prog,swreg offset,u8 dst_gpr,int size)859 data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
860 {
861 unsigned int i;
862 u16 shift, sz;
863
864 /* We load the value from the address indicated in @offset and then
865 * shift out the data we don't need. Note: this is big endian!
866 */
867 sz = max(size, 4);
868 shift = size < 4 ? 4 - size : 0;
869
870 emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0,
871 pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP);
872
873 i = 0;
874 if (shift)
875 emit_shf(nfp_prog, reg_both(dst_gpr), reg_none(), SHF_OP_NONE,
876 reg_xfer(0), SHF_SC_R_SHF, shift * 8);
877 else
878 for (; i * 4 < size; i++)
879 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
880
881 if (i < 2)
882 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
883
884 return 0;
885 }
886
887 static int
data_ld_host_order(struct nfp_prog * nfp_prog,u8 dst_gpr,swreg lreg,swreg rreg,int size,enum cmd_mode mode)888 data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr,
889 swreg lreg, swreg rreg, int size, enum cmd_mode mode)
890 {
891 unsigned int i;
892 u8 mask, sz;
893
894 /* We load the value from the address indicated in rreg + lreg and then
895 * mask out the data we don't need. Note: this is little endian!
896 */
897 sz = max(size, 4);
898 mask = size < 4 ? GENMASK(size - 1, 0) : 0;
899
900 emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0,
901 lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP);
902
903 i = 0;
904 if (mask)
905 emit_ld_field_any(nfp_prog, reg_both(dst_gpr), mask,
906 reg_xfer(0), SHF_SC_NONE, 0, true);
907 else
908 for (; i * 4 < size; i++)
909 wrp_mov(nfp_prog, reg_both(dst_gpr + i), reg_xfer(i));
910
911 if (i < 2)
912 wrp_immed(nfp_prog, reg_both(dst_gpr + 1), 0);
913
914 return 0;
915 }
916
917 static int
data_ld_host_order_addr32(struct nfp_prog * nfp_prog,u8 src_gpr,swreg offset,u8 dst_gpr,u8 size)918 data_ld_host_order_addr32(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
919 u8 dst_gpr, u8 size)
920 {
921 return data_ld_host_order(nfp_prog, dst_gpr, reg_a(src_gpr), offset,
922 size, CMD_MODE_32b);
923 }
924
925 static int
data_ld_host_order_addr40(struct nfp_prog * nfp_prog,u8 src_gpr,swreg offset,u8 dst_gpr,u8 size)926 data_ld_host_order_addr40(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset,
927 u8 dst_gpr, u8 size)
928 {
929 swreg rega, regb;
930
931 addr40_offset(nfp_prog, src_gpr, offset, ®a, ®b);
932
933 return data_ld_host_order(nfp_prog, dst_gpr, rega, regb,
934 size, CMD_MODE_40b_BA);
935 }
936
937 static int
construct_data_ind_ld(struct nfp_prog * nfp_prog,u16 offset,u16 src,u8 size)938 construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, u16 src, u8 size)
939 {
940 swreg tmp_reg;
941
942 /* Calculate the true offset (src_reg + imm) */
943 tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
944 emit_alu(nfp_prog, imm_both(nfp_prog), reg_a(src), ALU_OP_ADD, tmp_reg);
945
946 /* Check packet length (size guaranteed to fit b/c it's u8) */
947 emit_alu(nfp_prog, imm_a(nfp_prog),
948 imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size));
949 emit_alu(nfp_prog, reg_none(),
950 plen_reg(nfp_prog), ALU_OP_SUB, imm_a(nfp_prog));
951 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
952
953 /* Load data */
954 return data_ld(nfp_prog, imm_b(nfp_prog), 0, size);
955 }
956
construct_data_ld(struct nfp_prog * nfp_prog,u16 offset,u8 size)957 static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size)
958 {
959 swreg tmp_reg;
960
961 /* Check packet length */
962 tmp_reg = ur_load_imm_any(nfp_prog, offset + size, imm_a(nfp_prog));
963 emit_alu(nfp_prog, reg_none(), plen_reg(nfp_prog), ALU_OP_SUB, tmp_reg);
964 emit_br_relo(nfp_prog, BR_BLO, BR_OFF_RELO, 0, RELO_BR_GO_ABORT);
965
966 /* Load data */
967 tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog));
968 return data_ld(nfp_prog, tmp_reg, 0, size);
969 }
970
971 static int
data_stx_host_order(struct nfp_prog * nfp_prog,u8 dst_gpr,swreg offset,u8 src_gpr,u8 size)972 data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
973 u8 src_gpr, u8 size)
974 {
975 unsigned int i;
976
977 for (i = 0; i * 4 < size; i++)
978 wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i));
979
980 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
981 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
982
983 return 0;
984 }
985
986 static int
data_st_host_order(struct nfp_prog * nfp_prog,u8 dst_gpr,swreg offset,u64 imm,u8 size)987 data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset,
988 u64 imm, u8 size)
989 {
990 wrp_immed(nfp_prog, reg_xfer(0), imm);
991 if (size == 8)
992 wrp_immed(nfp_prog, reg_xfer(1), imm >> 32);
993
994 emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
995 reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP);
996
997 return 0;
998 }
999
1000 typedef int
1001 (*lmem_step)(struct nfp_prog *nfp_prog, u8 gpr, u8 gpr_byte, s32 off,
1002 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1003 bool needs_inc);
1004
1005 static int
wrp_lmem_load(struct nfp_prog * nfp_prog,u8 dst,u8 dst_byte,s32 off,unsigned int size,bool first,bool new_gpr,bool last,bool lm3,bool needs_inc)1006 wrp_lmem_load(struct nfp_prog *nfp_prog, u8 dst, u8 dst_byte, s32 off,
1007 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1008 bool needs_inc)
1009 {
1010 bool should_inc = needs_inc && new_gpr && !last;
1011 u32 idx, src_byte;
1012 enum shf_sc sc;
1013 swreg reg;
1014 int shf;
1015 u8 mask;
1016
1017 if (WARN_ON_ONCE(dst_byte + size > 4 || off % 4 + size > 4))
1018 return -EOPNOTSUPP;
1019
1020 idx = off / 4;
1021
1022 /* Move the entire word */
1023 if (size == 4) {
1024 wrp_mov(nfp_prog, reg_both(dst),
1025 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx));
1026 return 0;
1027 }
1028
1029 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1030 return -EOPNOTSUPP;
1031
1032 src_byte = off % 4;
1033
1034 mask = (1 << size) - 1;
1035 mask <<= dst_byte;
1036
1037 if (WARN_ON_ONCE(mask > 0xf))
1038 return -EOPNOTSUPP;
1039
1040 shf = abs(src_byte - dst_byte) * 8;
1041 if (src_byte == dst_byte) {
1042 sc = SHF_SC_NONE;
1043 } else if (src_byte < dst_byte) {
1044 shf = 32 - shf;
1045 sc = SHF_SC_L_SHF;
1046 } else {
1047 sc = SHF_SC_R_SHF;
1048 }
1049
1050 /* ld_field can address fewer indexes, if offset too large do RMW.
1051 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1052 */
1053 if (idx <= RE_REG_LM_IDX_MAX) {
1054 reg = reg_lm(lm3 ? 3 : 0, idx);
1055 } else {
1056 reg = imm_a(nfp_prog);
1057 /* If it's not the first part of the load and we start a new GPR
1058 * that means we are loading a second part of the LMEM word into
1059 * a new GPR. IOW we've already looked that LMEM word and
1060 * therefore it has been loaded into imm_a().
1061 */
1062 if (first || !new_gpr)
1063 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
1064 }
1065
1066 emit_ld_field_any(nfp_prog, reg_both(dst), mask, reg, sc, shf, new_gpr);
1067
1068 if (should_inc)
1069 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1070
1071 return 0;
1072 }
1073
1074 static int
wrp_lmem_store(struct nfp_prog * nfp_prog,u8 src,u8 src_byte,s32 off,unsigned int size,bool first,bool new_gpr,bool last,bool lm3,bool needs_inc)1075 wrp_lmem_store(struct nfp_prog *nfp_prog, u8 src, u8 src_byte, s32 off,
1076 unsigned int size, bool first, bool new_gpr, bool last, bool lm3,
1077 bool needs_inc)
1078 {
1079 bool should_inc = needs_inc && new_gpr && !last;
1080 u32 idx, dst_byte;
1081 enum shf_sc sc;
1082 swreg reg;
1083 int shf;
1084 u8 mask;
1085
1086 if (WARN_ON_ONCE(src_byte + size > 4 || off % 4 + size > 4))
1087 return -EOPNOTSUPP;
1088
1089 idx = off / 4;
1090
1091 /* Move the entire word */
1092 if (size == 4) {
1093 wrp_mov(nfp_prog,
1094 should_inc ? reg_lm_inc(3) : reg_lm(lm3 ? 3 : 0, idx),
1095 reg_b(src));
1096 return 0;
1097 }
1098
1099 if (WARN_ON_ONCE(lm3 && idx > RE_REG_LM_IDX_MAX))
1100 return -EOPNOTSUPP;
1101
1102 dst_byte = off % 4;
1103
1104 mask = (1 << size) - 1;
1105 mask <<= dst_byte;
1106
1107 if (WARN_ON_ONCE(mask > 0xf))
1108 return -EOPNOTSUPP;
1109
1110 shf = abs(src_byte - dst_byte) * 8;
1111 if (src_byte == dst_byte) {
1112 sc = SHF_SC_NONE;
1113 } else if (src_byte < dst_byte) {
1114 shf = 32 - shf;
1115 sc = SHF_SC_L_SHF;
1116 } else {
1117 sc = SHF_SC_R_SHF;
1118 }
1119
1120 /* ld_field can address fewer indexes, if offset too large do RMW.
1121 * Because we RMV twice we waste 2 cycles on unaligned 8 byte writes.
1122 */
1123 if (idx <= RE_REG_LM_IDX_MAX) {
1124 reg = reg_lm(lm3 ? 3 : 0, idx);
1125 } else {
1126 reg = imm_a(nfp_prog);
1127 /* Only first and last LMEM locations are going to need RMW,
1128 * the middle location will be overwritten fully.
1129 */
1130 if (first || last)
1131 wrp_mov(nfp_prog, reg, reg_lm(0, idx));
1132 }
1133
1134 emit_ld_field(nfp_prog, reg, mask, reg_b(src), sc, shf);
1135
1136 if (new_gpr || last) {
1137 if (idx > RE_REG_LM_IDX_MAX)
1138 wrp_mov(nfp_prog, reg_lm(0, idx), reg);
1139 if (should_inc)
1140 wrp_mov(nfp_prog, reg_none(), reg_lm_inc(3));
1141 }
1142
1143 return 0;
1144 }
1145
1146 static int
mem_op_stack(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size,unsigned int ptr_off,u8 gpr,u8 ptr_gpr,bool clr_gpr,lmem_step step)1147 mem_op_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1148 unsigned int size, unsigned int ptr_off, u8 gpr, u8 ptr_gpr,
1149 bool clr_gpr, lmem_step step)
1150 {
1151 s32 off = nfp_prog->stack_depth + meta->insn.off + ptr_off;
1152 bool first = true, last;
1153 bool needs_inc = false;
1154 swreg stack_off_reg;
1155 u8 prev_gpr = 255;
1156 u32 gpr_byte = 0;
1157 bool lm3 = true;
1158 int ret;
1159
1160 if (meta->ptr_not_const) {
1161 /* Use of the last encountered ptr_off is OK, they all have
1162 * the same alignment. Depend on low bits of value being
1163 * discarded when written to LMaddr register.
1164 */
1165 stack_off_reg = ur_load_imm_any(nfp_prog, meta->insn.off,
1166 stack_imm(nfp_prog));
1167
1168 emit_alu(nfp_prog, imm_b(nfp_prog),
1169 reg_a(ptr_gpr), ALU_OP_ADD, stack_off_reg);
1170
1171 needs_inc = true;
1172 } else if (off + size <= 64) {
1173 /* We can reach bottom 64B with LMaddr0 */
1174 lm3 = false;
1175 } else if (round_down(off, 32) == round_down(off + size - 1, 32)) {
1176 /* We have to set up a new pointer. If we know the offset
1177 * and the entire access falls into a single 32 byte aligned
1178 * window we won't have to increment the LM pointer.
1179 * The 32 byte alignment is imporant because offset is ORed in
1180 * not added when doing *l$indexN[off].
1181 */
1182 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 32),
1183 stack_imm(nfp_prog));
1184 emit_alu(nfp_prog, imm_b(nfp_prog),
1185 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1186
1187 off %= 32;
1188 } else {
1189 stack_off_reg = ur_load_imm_any(nfp_prog, round_down(off, 4),
1190 stack_imm(nfp_prog));
1191
1192 emit_alu(nfp_prog, imm_b(nfp_prog),
1193 stack_reg(nfp_prog), ALU_OP_ADD, stack_off_reg);
1194
1195 needs_inc = true;
1196 }
1197 if (lm3) {
1198 emit_csr_wr(nfp_prog, imm_b(nfp_prog), NFP_CSR_ACT_LM_ADDR3);
1199 /* For size < 4 one slot will be filled by zeroing of upper. */
1200 wrp_nops(nfp_prog, clr_gpr && size < 8 ? 2 : 3);
1201 }
1202
1203 if (clr_gpr && size < 8)
1204 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
1205
1206 while (size) {
1207 u32 slice_end;
1208 u8 slice_size;
1209
1210 slice_size = min(size, 4 - gpr_byte);
1211 slice_end = min(off + slice_size, round_up(off + 1, 4));
1212 slice_size = slice_end - off;
1213
1214 last = slice_size == size;
1215
1216 if (needs_inc)
1217 off %= 4;
1218
1219 ret = step(nfp_prog, gpr, gpr_byte, off, slice_size,
1220 first, gpr != prev_gpr, last, lm3, needs_inc);
1221 if (ret)
1222 return ret;
1223
1224 prev_gpr = gpr;
1225 first = false;
1226
1227 gpr_byte += slice_size;
1228 if (gpr_byte >= 4) {
1229 gpr_byte -= 4;
1230 gpr++;
1231 }
1232
1233 size -= slice_size;
1234 off += slice_size;
1235 }
1236
1237 return 0;
1238 }
1239
1240 static void
wrp_alu_imm(struct nfp_prog * nfp_prog,u8 dst,enum alu_op alu_op,u32 imm)1241 wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm)
1242 {
1243 swreg tmp_reg;
1244
1245 if (alu_op == ALU_OP_AND) {
1246 if (!imm)
1247 wrp_immed(nfp_prog, reg_both(dst), 0);
1248 if (!imm || !~imm)
1249 return;
1250 }
1251 if (alu_op == ALU_OP_OR) {
1252 if (!~imm)
1253 wrp_immed(nfp_prog, reg_both(dst), ~0U);
1254 if (!imm || !~imm)
1255 return;
1256 }
1257 if (alu_op == ALU_OP_XOR) {
1258 if (!~imm)
1259 emit_alu(nfp_prog, reg_both(dst), reg_none(),
1260 ALU_OP_NOT, reg_b(dst));
1261 if (!imm || !~imm)
1262 return;
1263 }
1264
1265 tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1266 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg);
1267 }
1268
1269 static int
wrp_alu64_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,enum alu_op alu_op,bool skip)1270 wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1271 enum alu_op alu_op, bool skip)
1272 {
1273 const struct bpf_insn *insn = &meta->insn;
1274 u64 imm = insn->imm; /* sign extend */
1275
1276 if (skip) {
1277 meta->skip = true;
1278 return 0;
1279 }
1280
1281 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U);
1282 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32);
1283
1284 return 0;
1285 }
1286
1287 static int
wrp_alu64_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,enum alu_op alu_op)1288 wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1289 enum alu_op alu_op)
1290 {
1291 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1292
1293 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1294 emit_alu(nfp_prog, reg_both(dst + 1),
1295 reg_a(dst + 1), alu_op, reg_b(src + 1));
1296
1297 return 0;
1298 }
1299
1300 static int
wrp_alu32_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,enum alu_op alu_op)1301 wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1302 enum alu_op alu_op)
1303 {
1304 const struct bpf_insn *insn = &meta->insn;
1305
1306 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm);
1307 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
1308
1309 return 0;
1310 }
1311
1312 static int
wrp_alu32_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,enum alu_op alu_op)1313 wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1314 enum alu_op alu_op)
1315 {
1316 u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2;
1317
1318 emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src));
1319 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
1320
1321 return 0;
1322 }
1323
1324 static void
wrp_test_reg_one(struct nfp_prog * nfp_prog,u8 dst,enum alu_op alu_op,u8 src,enum br_mask br_mask,u16 off)1325 wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src,
1326 enum br_mask br_mask, u16 off)
1327 {
1328 emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src));
1329 emit_br(nfp_prog, br_mask, off, 0);
1330 }
1331
1332 static int
wrp_test_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,enum alu_op alu_op,enum br_mask br_mask)1333 wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1334 enum alu_op alu_op, enum br_mask br_mask)
1335 {
1336 const struct bpf_insn *insn = &meta->insn;
1337
1338 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op,
1339 insn->src_reg * 2, br_mask, insn->off);
1340 wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op,
1341 insn->src_reg * 2 + 1, br_mask, insn->off);
1342
1343 return 0;
1344 }
1345
1346 static const struct jmp_code_map {
1347 enum br_mask br_mask;
1348 bool swap;
1349 } jmp_code_map[] = {
1350 [BPF_JGT >> 4] = { BR_BLO, true },
1351 [BPF_JGE >> 4] = { BR_BHS, false },
1352 [BPF_JLT >> 4] = { BR_BLO, false },
1353 [BPF_JLE >> 4] = { BR_BHS, true },
1354 [BPF_JSGT >> 4] = { BR_BLT, true },
1355 [BPF_JSGE >> 4] = { BR_BGE, false },
1356 [BPF_JSLT >> 4] = { BR_BLT, false },
1357 [BPF_JSLE >> 4] = { BR_BGE, true },
1358 };
1359
nfp_jmp_code_get(struct nfp_insn_meta * meta)1360 static const struct jmp_code_map *nfp_jmp_code_get(struct nfp_insn_meta *meta)
1361 {
1362 unsigned int op;
1363
1364 op = BPF_OP(meta->insn.code) >> 4;
1365 /* br_mask of 0 is BR_BEQ which we don't use in jump code table */
1366 if (WARN_ONCE(op >= ARRAY_SIZE(jmp_code_map) ||
1367 !jmp_code_map[op].br_mask,
1368 "no code found for jump instruction"))
1369 return NULL;
1370
1371 return &jmp_code_map[op];
1372 }
1373
cmp_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1374 static int cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1375 {
1376 const struct bpf_insn *insn = &meta->insn;
1377 u64 imm = insn->imm; /* sign extend */
1378 const struct jmp_code_map *code;
1379 enum alu_op alu_op, carry_op;
1380 u8 reg = insn->dst_reg * 2;
1381 swreg tmp_reg;
1382
1383 code = nfp_jmp_code_get(meta);
1384 if (!code)
1385 return -EINVAL;
1386
1387 alu_op = meta->jump_neg_op ? ALU_OP_ADD : ALU_OP_SUB;
1388 carry_op = meta->jump_neg_op ? ALU_OP_ADD_C : ALU_OP_SUB_C;
1389
1390 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
1391 if (!code->swap)
1392 emit_alu(nfp_prog, reg_none(), reg_a(reg), alu_op, tmp_reg);
1393 else
1394 emit_alu(nfp_prog, reg_none(), tmp_reg, alu_op, reg_a(reg));
1395
1396 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
1397 if (!code->swap)
1398 emit_alu(nfp_prog, reg_none(),
1399 reg_a(reg + 1), carry_op, tmp_reg);
1400 else
1401 emit_alu(nfp_prog, reg_none(),
1402 tmp_reg, carry_op, reg_a(reg + 1));
1403
1404 emit_br(nfp_prog, code->br_mask, insn->off, 0);
1405
1406 return 0;
1407 }
1408
cmp_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1409 static int cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1410 {
1411 const struct bpf_insn *insn = &meta->insn;
1412 const struct jmp_code_map *code;
1413 u8 areg, breg;
1414
1415 code = nfp_jmp_code_get(meta);
1416 if (!code)
1417 return -EINVAL;
1418
1419 areg = insn->dst_reg * 2;
1420 breg = insn->src_reg * 2;
1421
1422 if (code->swap) {
1423 areg ^= breg;
1424 breg ^= areg;
1425 areg ^= breg;
1426 }
1427
1428 emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg));
1429 emit_alu(nfp_prog, reg_none(),
1430 reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1));
1431 emit_br(nfp_prog, code->br_mask, insn->off, 0);
1432
1433 return 0;
1434 }
1435
wrp_end32(struct nfp_prog * nfp_prog,swreg reg_in,u8 gpr_out)1436 static void wrp_end32(struct nfp_prog *nfp_prog, swreg reg_in, u8 gpr_out)
1437 {
1438 emit_ld_field(nfp_prog, reg_both(gpr_out), 0xf, reg_in,
1439 SHF_SC_R_ROT, 8);
1440 emit_ld_field(nfp_prog, reg_both(gpr_out), 0x5, reg_a(gpr_out),
1441 SHF_SC_R_ROT, 16);
1442 }
1443
1444 static void
wrp_mul_u32(struct nfp_prog * nfp_prog,swreg dst_hi,swreg dst_lo,swreg lreg,swreg rreg,bool gen_high_half)1445 wrp_mul_u32(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1446 swreg rreg, bool gen_high_half)
1447 {
1448 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1449 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_1, rreg);
1450 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_2, rreg);
1451 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_3, rreg);
1452 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_32x32, MUL_STEP_4, rreg);
1453 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_32x32, MUL_LAST, reg_none());
1454 if (gen_high_half)
1455 emit_mul(nfp_prog, dst_hi, MUL_TYPE_STEP_32x32, MUL_LAST_2,
1456 reg_none());
1457 else
1458 wrp_immed(nfp_prog, dst_hi, 0);
1459 }
1460
1461 static void
wrp_mul_u16(struct nfp_prog * nfp_prog,swreg dst_hi,swreg dst_lo,swreg lreg,swreg rreg)1462 wrp_mul_u16(struct nfp_prog *nfp_prog, swreg dst_hi, swreg dst_lo, swreg lreg,
1463 swreg rreg)
1464 {
1465 emit_mul(nfp_prog, lreg, MUL_TYPE_START, MUL_STEP_NONE, rreg);
1466 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_1, rreg);
1467 emit_mul(nfp_prog, lreg, MUL_TYPE_STEP_16x16, MUL_STEP_2, rreg);
1468 emit_mul(nfp_prog, dst_lo, MUL_TYPE_STEP_16x16, MUL_LAST, reg_none());
1469 }
1470
1471 static int
wrp_mul(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,bool gen_high_half,bool ropnd_from_reg)1472 wrp_mul(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
1473 bool gen_high_half, bool ropnd_from_reg)
1474 {
1475 swreg multiplier, multiplicand, dst_hi, dst_lo;
1476 const struct bpf_insn *insn = &meta->insn;
1477 u32 lopnd_max, ropnd_max;
1478 u8 dst_reg;
1479
1480 dst_reg = insn->dst_reg;
1481 multiplicand = reg_a(dst_reg * 2);
1482 dst_hi = reg_both(dst_reg * 2 + 1);
1483 dst_lo = reg_both(dst_reg * 2);
1484 lopnd_max = meta->umax_dst;
1485 if (ropnd_from_reg) {
1486 multiplier = reg_b(insn->src_reg * 2);
1487 ropnd_max = meta->umax_src;
1488 } else {
1489 u32 imm = insn->imm;
1490
1491 multiplier = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1492 ropnd_max = imm;
1493 }
1494 if (lopnd_max > U16_MAX || ropnd_max > U16_MAX)
1495 wrp_mul_u32(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier,
1496 gen_high_half);
1497 else
1498 wrp_mul_u16(nfp_prog, dst_hi, dst_lo, multiplicand, multiplier);
1499
1500 return 0;
1501 }
1502
wrp_div_imm(struct nfp_prog * nfp_prog,u8 dst,u64 imm)1503 static int wrp_div_imm(struct nfp_prog *nfp_prog, u8 dst, u64 imm)
1504 {
1505 swreg dst_both = reg_both(dst), dst_a = reg_a(dst), dst_b = reg_a(dst);
1506 struct reciprocal_value_adv rvalue;
1507 u8 pre_shift, exp;
1508 swreg magic;
1509
1510 if (imm > U32_MAX) {
1511 wrp_immed(nfp_prog, dst_both, 0);
1512 return 0;
1513 }
1514
1515 /* NOTE: because we are using "reciprocal_value_adv" which doesn't
1516 * support "divisor > (1u << 31)", we need to JIT separate NFP sequence
1517 * to handle such case which actually equals to the result of unsigned
1518 * comparison "dst >= imm" which could be calculated using the following
1519 * NFP sequence:
1520 *
1521 * alu[--, dst, -, imm]
1522 * immed[imm, 0]
1523 * alu[dst, imm, +carry, 0]
1524 *
1525 */
1526 if (imm > 1U << 31) {
1527 swreg tmp_b = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog));
1528
1529 emit_alu(nfp_prog, reg_none(), dst_a, ALU_OP_SUB, tmp_b);
1530 wrp_immed(nfp_prog, imm_a(nfp_prog), 0);
1531 emit_alu(nfp_prog, dst_both, imm_a(nfp_prog), ALU_OP_ADD_C,
1532 reg_imm(0));
1533 return 0;
1534 }
1535
1536 rvalue = reciprocal_value_adv(imm, 32);
1537 exp = rvalue.exp;
1538 if (rvalue.is_wide_m && !(imm & 1)) {
1539 pre_shift = fls(imm & -imm) - 1;
1540 rvalue = reciprocal_value_adv(imm >> pre_shift, 32 - pre_shift);
1541 } else {
1542 pre_shift = 0;
1543 }
1544 magic = ur_load_imm_any(nfp_prog, rvalue.m, imm_b(nfp_prog));
1545 if (imm == 1U << exp) {
1546 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1547 SHF_SC_R_SHF, exp);
1548 } else if (rvalue.is_wide_m) {
1549 wrp_mul_u32(nfp_prog, imm_both(nfp_prog), reg_none(), dst_a,
1550 magic, true);
1551 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_SUB,
1552 imm_b(nfp_prog));
1553 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1554 SHF_SC_R_SHF, 1);
1555 emit_alu(nfp_prog, dst_both, dst_a, ALU_OP_ADD,
1556 imm_b(nfp_prog));
1557 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE, dst_b,
1558 SHF_SC_R_SHF, rvalue.sh - 1);
1559 } else {
1560 if (pre_shift)
1561 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1562 dst_b, SHF_SC_R_SHF, pre_shift);
1563 wrp_mul_u32(nfp_prog, dst_both, reg_none(), dst_a, magic, true);
1564 emit_shf(nfp_prog, dst_both, reg_none(), SHF_OP_NONE,
1565 dst_b, SHF_SC_R_SHF, rvalue.sh);
1566 }
1567
1568 return 0;
1569 }
1570
adjust_head(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1571 static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1572 {
1573 swreg tmp = imm_a(nfp_prog), tmp_len = imm_b(nfp_prog);
1574 struct nfp_bpf_cap_adjust_head *adjust_head;
1575 u32 ret_einval, end;
1576
1577 adjust_head = &nfp_prog->bpf->adjust_head;
1578
1579 /* Optimized version - 5 vs 14 cycles */
1580 if (nfp_prog->adjust_head_location != UINT_MAX) {
1581 if (WARN_ON_ONCE(nfp_prog->adjust_head_location != meta->n))
1582 return -EINVAL;
1583
1584 emit_alu(nfp_prog, pptr_reg(nfp_prog),
1585 reg_a(2 * 2), ALU_OP_ADD, pptr_reg(nfp_prog));
1586 emit_alu(nfp_prog, plen_reg(nfp_prog),
1587 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1588 emit_alu(nfp_prog, pv_len(nfp_prog),
1589 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1590
1591 wrp_immed(nfp_prog, reg_both(0), 0);
1592 wrp_immed(nfp_prog, reg_both(1), 0);
1593
1594 /* TODO: when adjust head is guaranteed to succeed we can
1595 * also eliminate the following if (r0 == 0) branch.
1596 */
1597
1598 return 0;
1599 }
1600
1601 ret_einval = nfp_prog_current_offset(nfp_prog) + 14;
1602 end = ret_einval + 2;
1603
1604 /* We need to use a temp because offset is just a part of the pkt ptr */
1605 emit_alu(nfp_prog, tmp,
1606 reg_a(2 * 2), ALU_OP_ADD_2B, pptr_reg(nfp_prog));
1607
1608 /* Validate result will fit within FW datapath constraints */
1609 emit_alu(nfp_prog, reg_none(),
1610 tmp, ALU_OP_SUB, reg_imm(adjust_head->off_min));
1611 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1612 emit_alu(nfp_prog, reg_none(),
1613 reg_imm(adjust_head->off_max), ALU_OP_SUB, tmp);
1614 emit_br(nfp_prog, BR_BLO, ret_einval, 0);
1615
1616 /* Validate the length is at least ETH_HLEN */
1617 emit_alu(nfp_prog, tmp_len,
1618 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1619 emit_alu(nfp_prog, reg_none(),
1620 tmp_len, ALU_OP_SUB, reg_imm(ETH_HLEN));
1621 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1622
1623 /* Load the ret code */
1624 wrp_immed(nfp_prog, reg_both(0), 0);
1625 wrp_immed(nfp_prog, reg_both(1), 0);
1626
1627 /* Modify the packet metadata */
1628 emit_ld_field(nfp_prog, pptr_reg(nfp_prog), 0x3, tmp, SHF_SC_NONE, 0);
1629
1630 /* Skip over the -EINVAL ret code (defer 2) */
1631 emit_br(nfp_prog, BR_UNC, end, 2);
1632
1633 emit_alu(nfp_prog, plen_reg(nfp_prog),
1634 plen_reg(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1635 emit_alu(nfp_prog, pv_len(nfp_prog),
1636 pv_len(nfp_prog), ALU_OP_SUB, reg_a(2 * 2));
1637
1638 /* return -EINVAL target */
1639 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1640 return -EINVAL;
1641
1642 wrp_immed(nfp_prog, reg_both(0), -22);
1643 wrp_immed(nfp_prog, reg_both(1), ~0);
1644
1645 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1646 return -EINVAL;
1647
1648 return 0;
1649 }
1650
adjust_tail(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1651 static int adjust_tail(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1652 {
1653 u32 ret_einval, end;
1654 swreg plen, delta;
1655
1656 BUILD_BUG_ON(plen_reg(nfp_prog) != reg_b(STATIC_REG_PKT_LEN));
1657
1658 plen = imm_a(nfp_prog);
1659 delta = reg_a(2 * 2);
1660
1661 ret_einval = nfp_prog_current_offset(nfp_prog) + 9;
1662 end = nfp_prog_current_offset(nfp_prog) + 11;
1663
1664 /* Calculate resulting length */
1665 emit_alu(nfp_prog, plen, plen_reg(nfp_prog), ALU_OP_ADD, delta);
1666 /* delta == 0 is not allowed by the kernel, add must overflow to make
1667 * length smaller.
1668 */
1669 emit_br(nfp_prog, BR_BCC, ret_einval, 0);
1670
1671 /* if (new_len < 14) then -EINVAL */
1672 emit_alu(nfp_prog, reg_none(), plen, ALU_OP_SUB, reg_imm(ETH_HLEN));
1673 emit_br(nfp_prog, BR_BMI, ret_einval, 0);
1674
1675 emit_alu(nfp_prog, plen_reg(nfp_prog),
1676 plen_reg(nfp_prog), ALU_OP_ADD, delta);
1677 emit_alu(nfp_prog, pv_len(nfp_prog),
1678 pv_len(nfp_prog), ALU_OP_ADD, delta);
1679
1680 emit_br(nfp_prog, BR_UNC, end, 2);
1681 wrp_immed(nfp_prog, reg_both(0), 0);
1682 wrp_immed(nfp_prog, reg_both(1), 0);
1683
1684 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_einval))
1685 return -EINVAL;
1686
1687 wrp_immed(nfp_prog, reg_both(0), -22);
1688 wrp_immed(nfp_prog, reg_both(1), ~0);
1689
1690 if (!nfp_prog_confirm_current_offset(nfp_prog, end))
1691 return -EINVAL;
1692
1693 return 0;
1694 }
1695
1696 static int
map_call_stack_common(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1697 map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1698 {
1699 bool load_lm_ptr;
1700 u32 ret_tgt;
1701 s64 lm_off;
1702
1703 /* We only have to reload LM0 if the key is not at start of stack */
1704 lm_off = nfp_prog->stack_depth;
1705 lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off;
1706 load_lm_ptr = meta->arg2.var_off || lm_off;
1707
1708 /* Set LM0 to start of key */
1709 if (load_lm_ptr)
1710 emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0);
1711 if (meta->func_id == BPF_FUNC_map_update_elem)
1712 emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2);
1713
1714 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1715 2, RELO_BR_HELPER);
1716 ret_tgt = nfp_prog_current_offset(nfp_prog) + 2;
1717
1718 /* Load map ID into A0 */
1719 wrp_mov(nfp_prog, reg_a(0), reg_a(2));
1720
1721 /* Load the return address into B0 */
1722 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1723
1724 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1725 return -EINVAL;
1726
1727 /* Reset the LM0 pointer */
1728 if (!load_lm_ptr)
1729 return 0;
1730
1731 emit_csr_wr(nfp_prog, stack_reg(nfp_prog), NFP_CSR_ACT_LM_ADDR0);
1732 wrp_nops(nfp_prog, 3);
1733
1734 return 0;
1735 }
1736
1737 static int
nfp_get_prandom_u32(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1738 nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1739 {
1740 __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM);
1741 /* CSR value is read in following immed[gpr, 0] */
1742 emit_immed(nfp_prog, reg_both(0), 0,
1743 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1744 emit_immed(nfp_prog, reg_both(1), 0,
1745 IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B);
1746 return 0;
1747 }
1748
1749 static int
nfp_perf_event_output(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1750 nfp_perf_event_output(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1751 {
1752 swreg ptr_type;
1753 u32 ret_tgt;
1754
1755 ptr_type = ur_load_imm_any(nfp_prog, meta->arg1.type, imm_a(nfp_prog));
1756
1757 ret_tgt = nfp_prog_current_offset(nfp_prog) + 3;
1758
1759 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id,
1760 2, RELO_BR_HELPER);
1761
1762 /* Load ptr type into A1 */
1763 wrp_mov(nfp_prog, reg_a(1), ptr_type);
1764
1765 /* Load the return address into B0 */
1766 wrp_immed_relo(nfp_prog, reg_b(0), ret_tgt, RELO_IMMED_REL);
1767
1768 if (!nfp_prog_confirm_current_offset(nfp_prog, ret_tgt))
1769 return -EINVAL;
1770
1771 return 0;
1772 }
1773
1774 static int
nfp_queue_select(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1775 nfp_queue_select(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1776 {
1777 u32 jmp_tgt;
1778
1779 jmp_tgt = nfp_prog_current_offset(nfp_prog) + 5;
1780
1781 /* Make sure the queue id fits into FW field */
1782 emit_alu(nfp_prog, reg_none(), reg_a(meta->insn.src_reg * 2),
1783 ALU_OP_AND_NOT_B, reg_imm(0xff));
1784 emit_br(nfp_prog, BR_BEQ, jmp_tgt, 2);
1785
1786 /* Set the 'queue selected' bit and the queue value */
1787 emit_shf(nfp_prog, pv_qsel_set(nfp_prog),
1788 pv_qsel_set(nfp_prog), SHF_OP_OR, reg_imm(1),
1789 SHF_SC_L_SHF, PKT_VEL_QSEL_SET_BIT);
1790 emit_ld_field(nfp_prog,
1791 pv_qsel_val(nfp_prog), 0x1, reg_b(meta->insn.src_reg * 2),
1792 SHF_SC_NONE, 0);
1793 /* Delay slots end here, we will jump over next instruction if queue
1794 * value fits into the field.
1795 */
1796 emit_ld_field(nfp_prog,
1797 pv_qsel_val(nfp_prog), 0x1, reg_imm(NFP_NET_RXR_MAX),
1798 SHF_SC_NONE, 0);
1799
1800 if (!nfp_prog_confirm_current_offset(nfp_prog, jmp_tgt))
1801 return -EINVAL;
1802
1803 return 0;
1804 }
1805
1806 /* --- Callbacks --- */
mov_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1807 static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1808 {
1809 const struct bpf_insn *insn = &meta->insn;
1810 u8 dst = insn->dst_reg * 2;
1811 u8 src = insn->src_reg * 2;
1812
1813 if (insn->src_reg == BPF_REG_10) {
1814 swreg stack_depth_reg;
1815
1816 stack_depth_reg = ur_load_imm_any(nfp_prog,
1817 nfp_prog->stack_depth,
1818 stack_imm(nfp_prog));
1819 emit_alu(nfp_prog, reg_both(dst),
1820 stack_reg(nfp_prog), ALU_OP_ADD, stack_depth_reg);
1821 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
1822 } else {
1823 wrp_reg_mov(nfp_prog, dst, src);
1824 wrp_reg_mov(nfp_prog, dst + 1, src + 1);
1825 }
1826
1827 return 0;
1828 }
1829
mov_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1830 static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1831 {
1832 u64 imm = meta->insn.imm; /* sign extend */
1833
1834 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U);
1835 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32);
1836
1837 return 0;
1838 }
1839
xor_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1840 static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1841 {
1842 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR);
1843 }
1844
xor_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1845 static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1846 {
1847 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm);
1848 }
1849
and_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1850 static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1851 {
1852 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND);
1853 }
1854
and_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1855 static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1856 {
1857 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm);
1858 }
1859
or_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1860 static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1861 {
1862 return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR);
1863 }
1864
or_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1865 static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1866 {
1867 return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm);
1868 }
1869
add_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1870 static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1871 {
1872 const struct bpf_insn *insn = &meta->insn;
1873
1874 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1875 reg_a(insn->dst_reg * 2), ALU_OP_ADD,
1876 reg_b(insn->src_reg * 2));
1877 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1878 reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C,
1879 reg_b(insn->src_reg * 2 + 1));
1880
1881 return 0;
1882 }
1883
add_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1884 static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1885 {
1886 const struct bpf_insn *insn = &meta->insn;
1887 u64 imm = insn->imm; /* sign extend */
1888
1889 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U);
1890 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32);
1891
1892 return 0;
1893 }
1894
sub_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1895 static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1896 {
1897 const struct bpf_insn *insn = &meta->insn;
1898
1899 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2),
1900 reg_a(insn->dst_reg * 2), ALU_OP_SUB,
1901 reg_b(insn->src_reg * 2));
1902 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1),
1903 reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C,
1904 reg_b(insn->src_reg * 2 + 1));
1905
1906 return 0;
1907 }
1908
sub_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1909 static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1910 {
1911 const struct bpf_insn *insn = &meta->insn;
1912 u64 imm = insn->imm; /* sign extend */
1913
1914 wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U);
1915 wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32);
1916
1917 return 0;
1918 }
1919
mul_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1920 static int mul_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1921 {
1922 return wrp_mul(nfp_prog, meta, true, true);
1923 }
1924
mul_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1925 static int mul_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1926 {
1927 return wrp_mul(nfp_prog, meta, true, false);
1928 }
1929
div_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1930 static int div_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1931 {
1932 const struct bpf_insn *insn = &meta->insn;
1933
1934 return wrp_div_imm(nfp_prog, insn->dst_reg * 2, insn->imm);
1935 }
1936
div_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1937 static int div_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1938 {
1939 /* NOTE: verifier hook has rejected cases for which verifier doesn't
1940 * know whether the source operand is constant or not.
1941 */
1942 return wrp_div_imm(nfp_prog, meta->insn.dst_reg * 2, meta->umin_src);
1943 }
1944
neg_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1945 static int neg_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1946 {
1947 const struct bpf_insn *insn = &meta->insn;
1948
1949 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), reg_imm(0),
1950 ALU_OP_SUB, reg_b(insn->dst_reg * 2));
1951 emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), reg_imm(0),
1952 ALU_OP_SUB_C, reg_b(insn->dst_reg * 2 + 1));
1953
1954 return 0;
1955 }
1956
1957 /* Pseudo code:
1958 * if shift_amt >= 32
1959 * dst_high = dst_low << shift_amt[4:0]
1960 * dst_low = 0;
1961 * else
1962 * dst_high = (dst_high, dst_low) >> (32 - shift_amt)
1963 * dst_low = dst_low << shift_amt
1964 *
1965 * The indirect shift will use the same logic at runtime.
1966 */
__shl_imm64(struct nfp_prog * nfp_prog,u8 dst,u8 shift_amt)1967 static int __shl_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
1968 {
1969 if (shift_amt < 32) {
1970 emit_shf(nfp_prog, reg_both(dst + 1), reg_a(dst + 1),
1971 SHF_OP_NONE, reg_b(dst), SHF_SC_R_DSHF,
1972 32 - shift_amt);
1973 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
1974 reg_b(dst), SHF_SC_L_SHF, shift_amt);
1975 } else if (shift_amt == 32) {
1976 wrp_reg_mov(nfp_prog, dst + 1, dst);
1977 wrp_immed(nfp_prog, reg_both(dst), 0);
1978 } else if (shift_amt > 32) {
1979 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
1980 reg_b(dst), SHF_SC_L_SHF, shift_amt - 32);
1981 wrp_immed(nfp_prog, reg_both(dst), 0);
1982 }
1983
1984 return 0;
1985 }
1986
shl_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)1987 static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
1988 {
1989 const struct bpf_insn *insn = &meta->insn;
1990 u8 dst = insn->dst_reg * 2;
1991
1992 return __shl_imm64(nfp_prog, dst, insn->imm);
1993 }
1994
shl_reg64_lt32_high(struct nfp_prog * nfp_prog,u8 dst,u8 src)1995 static void shl_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
1996 {
1997 emit_alu(nfp_prog, imm_both(nfp_prog), reg_imm(32), ALU_OP_SUB,
1998 reg_b(src));
1999 emit_alu(nfp_prog, reg_none(), imm_a(nfp_prog), ALU_OP_OR, reg_imm(0));
2000 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_a(dst + 1), SHF_OP_NONE,
2001 reg_b(dst), SHF_SC_R_DSHF);
2002 }
2003
2004 /* NOTE: for indirect left shift, HIGH part should be calculated first. */
shl_reg64_lt32_low(struct nfp_prog * nfp_prog,u8 dst,u8 src)2005 static void shl_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2006 {
2007 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2008 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2009 reg_b(dst), SHF_SC_L_SHF);
2010 }
2011
shl_reg64_lt32(struct nfp_prog * nfp_prog,u8 dst,u8 src)2012 static void shl_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2013 {
2014 shl_reg64_lt32_high(nfp_prog, dst, src);
2015 shl_reg64_lt32_low(nfp_prog, dst, src);
2016 }
2017
shl_reg64_ge32(struct nfp_prog * nfp_prog,u8 dst,u8 src)2018 static void shl_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2019 {
2020 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2021 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2022 reg_b(dst), SHF_SC_L_SHF);
2023 wrp_immed(nfp_prog, reg_both(dst), 0);
2024 }
2025
shl_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2026 static int shl_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2027 {
2028 const struct bpf_insn *insn = &meta->insn;
2029 u64 umin, umax;
2030 u8 dst, src;
2031
2032 dst = insn->dst_reg * 2;
2033 umin = meta->umin_src;
2034 umax = meta->umax_src;
2035 if (umin == umax)
2036 return __shl_imm64(nfp_prog, dst, umin);
2037
2038 src = insn->src_reg * 2;
2039 if (umax < 32) {
2040 shl_reg64_lt32(nfp_prog, dst, src);
2041 } else if (umin >= 32) {
2042 shl_reg64_ge32(nfp_prog, dst, src);
2043 } else {
2044 /* Generate different instruction sequences depending on runtime
2045 * value of shift amount.
2046 */
2047 u16 label_ge32, label_end;
2048
2049 label_ge32 = nfp_prog_current_offset(nfp_prog) + 7;
2050 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2051
2052 shl_reg64_lt32_high(nfp_prog, dst, src);
2053 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2054 emit_br(nfp_prog, BR_UNC, label_end, 2);
2055 /* shl_reg64_lt32_low packed in delay slot. */
2056 shl_reg64_lt32_low(nfp_prog, dst, src);
2057
2058 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2059 return -EINVAL;
2060 shl_reg64_ge32(nfp_prog, dst, src);
2061
2062 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2063 return -EINVAL;
2064 }
2065
2066 return 0;
2067 }
2068
2069 /* Pseudo code:
2070 * if shift_amt >= 32
2071 * dst_high = 0;
2072 * dst_low = dst_high >> shift_amt[4:0]
2073 * else
2074 * dst_high = dst_high >> shift_amt
2075 * dst_low = (dst_high, dst_low) >> shift_amt
2076 *
2077 * The indirect shift will use the same logic at runtime.
2078 */
__shr_imm64(struct nfp_prog * nfp_prog,u8 dst,u8 shift_amt)2079 static int __shr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2080 {
2081 if (shift_amt < 32) {
2082 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2083 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2084 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2085 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2086 } else if (shift_amt == 32) {
2087 wrp_reg_mov(nfp_prog, dst, dst + 1);
2088 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2089 } else if (shift_amt > 32) {
2090 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2091 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
2092 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2093 }
2094
2095 return 0;
2096 }
2097
shr_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2098 static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2099 {
2100 const struct bpf_insn *insn = &meta->insn;
2101 u8 dst = insn->dst_reg * 2;
2102
2103 return __shr_imm64(nfp_prog, dst, insn->imm);
2104 }
2105
2106 /* NOTE: for indirect right shift, LOW part should be calculated first. */
shr_reg64_lt32_high(struct nfp_prog * nfp_prog,u8 dst,u8 src)2107 static void shr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2108 {
2109 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2110 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_NONE,
2111 reg_b(dst + 1), SHF_SC_R_SHF);
2112 }
2113
shr_reg64_lt32_low(struct nfp_prog * nfp_prog,u8 dst,u8 src)2114 static void shr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2115 {
2116 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2117 emit_shf_indir(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2118 reg_b(dst), SHF_SC_R_DSHF);
2119 }
2120
shr_reg64_lt32(struct nfp_prog * nfp_prog,u8 dst,u8 src)2121 static void shr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2122 {
2123 shr_reg64_lt32_low(nfp_prog, dst, src);
2124 shr_reg64_lt32_high(nfp_prog, dst, src);
2125 }
2126
shr_reg64_ge32(struct nfp_prog * nfp_prog,u8 dst,u8 src)2127 static void shr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2128 {
2129 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_imm(0));
2130 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_NONE,
2131 reg_b(dst + 1), SHF_SC_R_SHF);
2132 wrp_immed(nfp_prog, reg_both(dst + 1), 0);
2133 }
2134
shr_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2135 static int shr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2136 {
2137 const struct bpf_insn *insn = &meta->insn;
2138 u64 umin, umax;
2139 u8 dst, src;
2140
2141 dst = insn->dst_reg * 2;
2142 umin = meta->umin_src;
2143 umax = meta->umax_src;
2144 if (umin == umax)
2145 return __shr_imm64(nfp_prog, dst, umin);
2146
2147 src = insn->src_reg * 2;
2148 if (umax < 32) {
2149 shr_reg64_lt32(nfp_prog, dst, src);
2150 } else if (umin >= 32) {
2151 shr_reg64_ge32(nfp_prog, dst, src);
2152 } else {
2153 /* Generate different instruction sequences depending on runtime
2154 * value of shift amount.
2155 */
2156 u16 label_ge32, label_end;
2157
2158 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2159 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2160 shr_reg64_lt32_low(nfp_prog, dst, src);
2161 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2162 emit_br(nfp_prog, BR_UNC, label_end, 2);
2163 /* shr_reg64_lt32_high packed in delay slot. */
2164 shr_reg64_lt32_high(nfp_prog, dst, src);
2165
2166 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2167 return -EINVAL;
2168 shr_reg64_ge32(nfp_prog, dst, src);
2169
2170 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2171 return -EINVAL;
2172 }
2173
2174 return 0;
2175 }
2176
2177 /* Code logic is the same as __shr_imm64 except ashr requires signedness bit
2178 * told through PREV_ALU result.
2179 */
__ashr_imm64(struct nfp_prog * nfp_prog,u8 dst,u8 shift_amt)2180 static int __ashr_imm64(struct nfp_prog *nfp_prog, u8 dst, u8 shift_amt)
2181 {
2182 if (shift_amt < 32) {
2183 emit_shf(nfp_prog, reg_both(dst), reg_a(dst + 1), SHF_OP_NONE,
2184 reg_b(dst), SHF_SC_R_DSHF, shift_amt);
2185 /* Set signedness bit. */
2186 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2187 reg_imm(0));
2188 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2189 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt);
2190 } else if (shift_amt == 32) {
2191 /* NOTE: this also helps setting signedness bit. */
2192 wrp_reg_mov(nfp_prog, dst, dst + 1);
2193 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2194 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2195 } else if (shift_amt > 32) {
2196 emit_alu(nfp_prog, reg_none(), reg_a(dst + 1), ALU_OP_OR,
2197 reg_imm(0));
2198 emit_shf(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2199 reg_b(dst + 1), SHF_SC_R_SHF, shift_amt - 32);
2200 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2201 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2202 }
2203
2204 return 0;
2205 }
2206
ashr_imm64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2207 static int ashr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2208 {
2209 const struct bpf_insn *insn = &meta->insn;
2210 u8 dst = insn->dst_reg * 2;
2211
2212 return __ashr_imm64(nfp_prog, dst, insn->imm);
2213 }
2214
ashr_reg64_lt32_high(struct nfp_prog * nfp_prog,u8 dst,u8 src)2215 static void ashr_reg64_lt32_high(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2216 {
2217 /* NOTE: the first insn will set both indirect shift amount (source A)
2218 * and signedness bit (MSB of result).
2219 */
2220 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2221 emit_shf_indir(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2222 reg_b(dst + 1), SHF_SC_R_SHF);
2223 }
2224
ashr_reg64_lt32_low(struct nfp_prog * nfp_prog,u8 dst,u8 src)2225 static void ashr_reg64_lt32_low(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2226 {
2227 /* NOTE: it is the same as logic shift because we don't need to shift in
2228 * signedness bit when the shift amount is less than 32.
2229 */
2230 return shr_reg64_lt32_low(nfp_prog, dst, src);
2231 }
2232
ashr_reg64_lt32(struct nfp_prog * nfp_prog,u8 dst,u8 src)2233 static void ashr_reg64_lt32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2234 {
2235 ashr_reg64_lt32_low(nfp_prog, dst, src);
2236 ashr_reg64_lt32_high(nfp_prog, dst, src);
2237 }
2238
ashr_reg64_ge32(struct nfp_prog * nfp_prog,u8 dst,u8 src)2239 static void ashr_reg64_ge32(struct nfp_prog *nfp_prog, u8 dst, u8 src)
2240 {
2241 emit_alu(nfp_prog, reg_none(), reg_a(src), ALU_OP_OR, reg_b(dst + 1));
2242 emit_shf_indir(nfp_prog, reg_both(dst), reg_none(), SHF_OP_ASHR,
2243 reg_b(dst + 1), SHF_SC_R_SHF);
2244 emit_shf(nfp_prog, reg_both(dst + 1), reg_none(), SHF_OP_ASHR,
2245 reg_b(dst + 1), SHF_SC_R_SHF, 31);
2246 }
2247
2248 /* Like ashr_imm64, but need to use indirect shift. */
ashr_reg64(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2249 static int ashr_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2250 {
2251 const struct bpf_insn *insn = &meta->insn;
2252 u64 umin, umax;
2253 u8 dst, src;
2254
2255 dst = insn->dst_reg * 2;
2256 umin = meta->umin_src;
2257 umax = meta->umax_src;
2258 if (umin == umax)
2259 return __ashr_imm64(nfp_prog, dst, umin);
2260
2261 src = insn->src_reg * 2;
2262 if (umax < 32) {
2263 ashr_reg64_lt32(nfp_prog, dst, src);
2264 } else if (umin >= 32) {
2265 ashr_reg64_ge32(nfp_prog, dst, src);
2266 } else {
2267 u16 label_ge32, label_end;
2268
2269 label_ge32 = nfp_prog_current_offset(nfp_prog) + 6;
2270 emit_br_bset(nfp_prog, reg_a(src), 5, label_ge32, 0);
2271 ashr_reg64_lt32_low(nfp_prog, dst, src);
2272 label_end = nfp_prog_current_offset(nfp_prog) + 6;
2273 emit_br(nfp_prog, BR_UNC, label_end, 2);
2274 /* ashr_reg64_lt32_high packed in delay slot. */
2275 ashr_reg64_lt32_high(nfp_prog, dst, src);
2276
2277 if (!nfp_prog_confirm_current_offset(nfp_prog, label_ge32))
2278 return -EINVAL;
2279 ashr_reg64_ge32(nfp_prog, dst, src);
2280
2281 if (!nfp_prog_confirm_current_offset(nfp_prog, label_end))
2282 return -EINVAL;
2283 }
2284
2285 return 0;
2286 }
2287
mov_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2288 static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2289 {
2290 const struct bpf_insn *insn = &meta->insn;
2291
2292 wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2);
2293 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2294
2295 return 0;
2296 }
2297
mov_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2298 static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2299 {
2300 const struct bpf_insn *insn = &meta->insn;
2301
2302 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm);
2303 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2304
2305 return 0;
2306 }
2307
xor_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2308 static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2309 {
2310 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR);
2311 }
2312
xor_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2313 static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2314 {
2315 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR);
2316 }
2317
and_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2318 static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2319 {
2320 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND);
2321 }
2322
and_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2323 static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2324 {
2325 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND);
2326 }
2327
or_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2328 static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2329 {
2330 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR);
2331 }
2332
or_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2333 static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2334 {
2335 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR);
2336 }
2337
add_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2338 static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2339 {
2340 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD);
2341 }
2342
add_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2343 static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2344 {
2345 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD);
2346 }
2347
sub_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2348 static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2349 {
2350 return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB);
2351 }
2352
sub_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2353 static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2354 {
2355 return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB);
2356 }
2357
mul_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2358 static int mul_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2359 {
2360 return wrp_mul(nfp_prog, meta, false, true);
2361 }
2362
mul_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2363 static int mul_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2364 {
2365 return wrp_mul(nfp_prog, meta, false, false);
2366 }
2367
div_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2368 static int div_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2369 {
2370 return div_reg64(nfp_prog, meta);
2371 }
2372
div_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2373 static int div_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2374 {
2375 return div_imm64(nfp_prog, meta);
2376 }
2377
neg_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2378 static int neg_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2379 {
2380 u8 dst = meta->insn.dst_reg * 2;
2381
2382 emit_alu(nfp_prog, reg_both(dst), reg_imm(0), ALU_OP_SUB, reg_b(dst));
2383 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2384
2385 return 0;
2386 }
2387
shl_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2388 static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2389 {
2390 const struct bpf_insn *insn = &meta->insn;
2391
2392 if (!insn->imm)
2393 return 1; /* TODO: zero shift means indirect */
2394
2395 emit_shf(nfp_prog, reg_both(insn->dst_reg * 2),
2396 reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2),
2397 SHF_SC_L_SHF, insn->imm);
2398 wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0);
2399
2400 return 0;
2401 }
2402
end_reg32(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2403 static int end_reg32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2404 {
2405 const struct bpf_insn *insn = &meta->insn;
2406 u8 gpr = insn->dst_reg * 2;
2407
2408 switch (insn->imm) {
2409 case 16:
2410 emit_ld_field(nfp_prog, reg_both(gpr), 0x9, reg_b(gpr),
2411 SHF_SC_R_ROT, 8);
2412 emit_ld_field(nfp_prog, reg_both(gpr), 0xe, reg_a(gpr),
2413 SHF_SC_R_SHF, 16);
2414
2415 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2416 break;
2417 case 32:
2418 wrp_end32(nfp_prog, reg_a(gpr), gpr);
2419 wrp_immed(nfp_prog, reg_both(gpr + 1), 0);
2420 break;
2421 case 64:
2422 wrp_mov(nfp_prog, imm_a(nfp_prog), reg_b(gpr + 1));
2423
2424 wrp_end32(nfp_prog, reg_a(gpr), gpr + 1);
2425 wrp_end32(nfp_prog, imm_a(nfp_prog), gpr);
2426 break;
2427 }
2428
2429 return 0;
2430 }
2431
imm_ld8_part2(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2432 static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2433 {
2434 struct nfp_insn_meta *prev = nfp_meta_prev(meta);
2435 u32 imm_lo, imm_hi;
2436 u8 dst;
2437
2438 dst = prev->insn.dst_reg * 2;
2439 imm_lo = prev->insn.imm;
2440 imm_hi = meta->insn.imm;
2441
2442 wrp_immed(nfp_prog, reg_both(dst), imm_lo);
2443
2444 /* mov is always 1 insn, load imm may be two, so try to use mov */
2445 if (imm_hi == imm_lo)
2446 wrp_mov(nfp_prog, reg_both(dst + 1), reg_a(dst));
2447 else
2448 wrp_immed(nfp_prog, reg_both(dst + 1), imm_hi);
2449
2450 return 0;
2451 }
2452
imm_ld8(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2453 static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2454 {
2455 meta->double_cb = imm_ld8_part2;
2456 return 0;
2457 }
2458
data_ld1(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2459 static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2460 {
2461 return construct_data_ld(nfp_prog, meta->insn.imm, 1);
2462 }
2463
data_ld2(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2464 static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2465 {
2466 return construct_data_ld(nfp_prog, meta->insn.imm, 2);
2467 }
2468
data_ld4(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2469 static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2470 {
2471 return construct_data_ld(nfp_prog, meta->insn.imm, 4);
2472 }
2473
data_ind_ld1(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2474 static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2475 {
2476 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2477 meta->insn.src_reg * 2, 1);
2478 }
2479
data_ind_ld2(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2480 static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2481 {
2482 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2483 meta->insn.src_reg * 2, 2);
2484 }
2485
data_ind_ld4(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2486 static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2487 {
2488 return construct_data_ind_ld(nfp_prog, meta->insn.imm,
2489 meta->insn.src_reg * 2, 4);
2490 }
2491
2492 static int
mem_ldx_stack(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size,unsigned int ptr_off)2493 mem_ldx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2494 unsigned int size, unsigned int ptr_off)
2495 {
2496 return mem_op_stack(nfp_prog, meta, size, ptr_off,
2497 meta->insn.dst_reg * 2, meta->insn.src_reg * 2,
2498 true, wrp_lmem_load);
2499 }
2500
mem_ldx_skb(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,u8 size)2501 static int mem_ldx_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2502 u8 size)
2503 {
2504 swreg dst = reg_both(meta->insn.dst_reg * 2);
2505
2506 switch (meta->insn.off) {
2507 case offsetof(struct __sk_buff, len):
2508 if (size != FIELD_SIZEOF(struct __sk_buff, len))
2509 return -EOPNOTSUPP;
2510 wrp_mov(nfp_prog, dst, plen_reg(nfp_prog));
2511 break;
2512 case offsetof(struct __sk_buff, data):
2513 if (size != FIELD_SIZEOF(struct __sk_buff, data))
2514 return -EOPNOTSUPP;
2515 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2516 break;
2517 case offsetof(struct __sk_buff, data_end):
2518 if (size != FIELD_SIZEOF(struct __sk_buff, data_end))
2519 return -EOPNOTSUPP;
2520 emit_alu(nfp_prog, dst,
2521 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2522 break;
2523 default:
2524 return -EOPNOTSUPP;
2525 }
2526
2527 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2528
2529 return 0;
2530 }
2531
mem_ldx_xdp(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,u8 size)2532 static int mem_ldx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2533 u8 size)
2534 {
2535 swreg dst = reg_both(meta->insn.dst_reg * 2);
2536
2537 switch (meta->insn.off) {
2538 case offsetof(struct xdp_md, data):
2539 if (size != FIELD_SIZEOF(struct xdp_md, data))
2540 return -EOPNOTSUPP;
2541 wrp_mov(nfp_prog, dst, pptr_reg(nfp_prog));
2542 break;
2543 case offsetof(struct xdp_md, data_end):
2544 if (size != FIELD_SIZEOF(struct xdp_md, data_end))
2545 return -EOPNOTSUPP;
2546 emit_alu(nfp_prog, dst,
2547 plen_reg(nfp_prog), ALU_OP_ADD, pptr_reg(nfp_prog));
2548 break;
2549 default:
2550 return -EOPNOTSUPP;
2551 }
2552
2553 wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
2554
2555 return 0;
2556 }
2557
2558 static int
mem_ldx_data(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2559 mem_ldx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2560 unsigned int size)
2561 {
2562 swreg tmp_reg;
2563
2564 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2565
2566 return data_ld_host_order_addr32(nfp_prog, meta->insn.src_reg * 2,
2567 tmp_reg, meta->insn.dst_reg * 2, size);
2568 }
2569
2570 static int
mem_ldx_emem(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2571 mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2572 unsigned int size)
2573 {
2574 swreg tmp_reg;
2575
2576 tmp_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2577
2578 return data_ld_host_order_addr40(nfp_prog, meta->insn.src_reg * 2,
2579 tmp_reg, meta->insn.dst_reg * 2, size);
2580 }
2581
2582 static void
mem_ldx_data_init_pktcache(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2583 mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog,
2584 struct nfp_insn_meta *meta)
2585 {
2586 s16 range_start = meta->pkt_cache.range_start;
2587 s16 range_end = meta->pkt_cache.range_end;
2588 swreg src_base, off;
2589 u8 xfer_num, len;
2590 bool indir;
2591
2592 off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog));
2593 src_base = reg_a(meta->insn.src_reg * 2);
2594 len = range_end - range_start;
2595 xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH;
2596
2597 indir = len > 8 * REG_WIDTH;
2598 /* Setup PREV_ALU for indirect mode. */
2599 if (indir)
2600 wrp_immed(nfp_prog, reg_none(),
2601 CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
2602
2603 /* Cache memory into transfer-in registers. */
2604 emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
2605 off, xfer_num - 1, CMD_CTX_SWAP, indir);
2606 }
2607
2608 static int
mem_ldx_data_from_pktcache_unaligned(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2609 mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog,
2610 struct nfp_insn_meta *meta,
2611 unsigned int size)
2612 {
2613 s16 range_start = meta->pkt_cache.range_start;
2614 s16 insn_off = meta->insn.off - range_start;
2615 swreg dst_lo, dst_hi, src_lo, src_mid;
2616 u8 dst_gpr = meta->insn.dst_reg * 2;
2617 u8 len_lo = size, len_mid = 0;
2618 u8 idx = insn_off / REG_WIDTH;
2619 u8 off = insn_off % REG_WIDTH;
2620
2621 dst_hi = reg_both(dst_gpr + 1);
2622 dst_lo = reg_both(dst_gpr);
2623 src_lo = reg_xfer(idx);
2624
2625 /* The read length could involve as many as three registers. */
2626 if (size > REG_WIDTH - off) {
2627 /* Calculate the part in the second register. */
2628 len_lo = REG_WIDTH - off;
2629 len_mid = size - len_lo;
2630
2631 /* Calculate the part in the third register. */
2632 if (size > 2 * REG_WIDTH - off)
2633 len_mid = REG_WIDTH;
2634 }
2635
2636 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off);
2637
2638 if (!len_mid) {
2639 wrp_immed(nfp_prog, dst_hi, 0);
2640 return 0;
2641 }
2642
2643 src_mid = reg_xfer(idx + 1);
2644
2645 if (size <= REG_WIDTH) {
2646 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo);
2647 wrp_immed(nfp_prog, dst_hi, 0);
2648 } else {
2649 swreg src_hi = reg_xfer(idx + 2);
2650
2651 wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid,
2652 REG_WIDTH - len_lo, len_lo);
2653 wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo,
2654 REG_WIDTH - len_lo);
2655 wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo,
2656 len_lo);
2657 }
2658
2659 return 0;
2660 }
2661
2662 static int
mem_ldx_data_from_pktcache_aligned(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2663 mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog,
2664 struct nfp_insn_meta *meta,
2665 unsigned int size)
2666 {
2667 swreg dst_lo, dst_hi, src_lo;
2668 u8 dst_gpr, idx;
2669
2670 idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH;
2671 dst_gpr = meta->insn.dst_reg * 2;
2672 dst_hi = reg_both(dst_gpr + 1);
2673 dst_lo = reg_both(dst_gpr);
2674 src_lo = reg_xfer(idx);
2675
2676 if (size < REG_WIDTH) {
2677 wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0);
2678 wrp_immed(nfp_prog, dst_hi, 0);
2679 } else if (size == REG_WIDTH) {
2680 wrp_mov(nfp_prog, dst_lo, src_lo);
2681 wrp_immed(nfp_prog, dst_hi, 0);
2682 } else {
2683 swreg src_hi = reg_xfer(idx + 1);
2684
2685 wrp_mov(nfp_prog, dst_lo, src_lo);
2686 wrp_mov(nfp_prog, dst_hi, src_hi);
2687 }
2688
2689 return 0;
2690 }
2691
2692 static int
mem_ldx_data_from_pktcache(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2693 mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog,
2694 struct nfp_insn_meta *meta, unsigned int size)
2695 {
2696 u8 off = meta->insn.off - meta->pkt_cache.range_start;
2697
2698 if (IS_ALIGNED(off, REG_WIDTH))
2699 return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size);
2700
2701 return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size);
2702 }
2703
2704 static int
mem_ldx(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2705 mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2706 unsigned int size)
2707 {
2708 if (meta->ldst_gather_len)
2709 return nfp_cpp_memcpy(nfp_prog, meta);
2710
2711 if (meta->ptr.type == PTR_TO_CTX) {
2712 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2713 return mem_ldx_xdp(nfp_prog, meta, size);
2714 else
2715 return mem_ldx_skb(nfp_prog, meta, size);
2716 }
2717
2718 if (meta->ptr.type == PTR_TO_PACKET) {
2719 if (meta->pkt_cache.range_end) {
2720 if (meta->pkt_cache.do_init)
2721 mem_ldx_data_init_pktcache(nfp_prog, meta);
2722
2723 return mem_ldx_data_from_pktcache(nfp_prog, meta, size);
2724 } else {
2725 return mem_ldx_data(nfp_prog, meta, size);
2726 }
2727 }
2728
2729 if (meta->ptr.type == PTR_TO_STACK)
2730 return mem_ldx_stack(nfp_prog, meta, size,
2731 meta->ptr.off + meta->ptr.var_off.value);
2732
2733 if (meta->ptr.type == PTR_TO_MAP_VALUE)
2734 return mem_ldx_emem(nfp_prog, meta, size);
2735
2736 return -EOPNOTSUPP;
2737 }
2738
mem_ldx1(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2739 static int mem_ldx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2740 {
2741 return mem_ldx(nfp_prog, meta, 1);
2742 }
2743
mem_ldx2(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2744 static int mem_ldx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2745 {
2746 return mem_ldx(nfp_prog, meta, 2);
2747 }
2748
mem_ldx4(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2749 static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2750 {
2751 return mem_ldx(nfp_prog, meta, 4);
2752 }
2753
mem_ldx8(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2754 static int mem_ldx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2755 {
2756 return mem_ldx(nfp_prog, meta, 8);
2757 }
2758
2759 static int
mem_st_data(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2760 mem_st_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2761 unsigned int size)
2762 {
2763 u64 imm = meta->insn.imm; /* sign extend */
2764 swreg off_reg;
2765
2766 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2767
2768 return data_st_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2769 imm, size);
2770 }
2771
mem_st(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2772 static int mem_st(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2773 unsigned int size)
2774 {
2775 if (meta->ptr.type == PTR_TO_PACKET)
2776 return mem_st_data(nfp_prog, meta, size);
2777
2778 return -EOPNOTSUPP;
2779 }
2780
mem_st1(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2781 static int mem_st1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2782 {
2783 return mem_st(nfp_prog, meta, 1);
2784 }
2785
mem_st2(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2786 static int mem_st2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2787 {
2788 return mem_st(nfp_prog, meta, 2);
2789 }
2790
mem_st4(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2791 static int mem_st4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2792 {
2793 return mem_st(nfp_prog, meta, 4);
2794 }
2795
mem_st8(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2796 static int mem_st8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2797 {
2798 return mem_st(nfp_prog, meta, 8);
2799 }
2800
2801 static int
mem_stx_data(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2802 mem_stx_data(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2803 unsigned int size)
2804 {
2805 swreg off_reg;
2806
2807 off_reg = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2808
2809 return data_stx_host_order(nfp_prog, meta->insn.dst_reg * 2, off_reg,
2810 meta->insn.src_reg * 2, size);
2811 }
2812
2813 static int
mem_stx_stack(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size,unsigned int ptr_off)2814 mem_stx_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2815 unsigned int size, unsigned int ptr_off)
2816 {
2817 return mem_op_stack(nfp_prog, meta, size, ptr_off,
2818 meta->insn.src_reg * 2, meta->insn.dst_reg * 2,
2819 false, wrp_lmem_store);
2820 }
2821
mem_stx_xdp(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2822 static int mem_stx_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2823 {
2824 switch (meta->insn.off) {
2825 case offsetof(struct xdp_md, rx_queue_index):
2826 return nfp_queue_select(nfp_prog, meta);
2827 }
2828
2829 WARN_ON_ONCE(1); /* verifier should have rejected bad accesses */
2830 return -EOPNOTSUPP;
2831 }
2832
2833 static int
mem_stx(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,unsigned int size)2834 mem_stx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
2835 unsigned int size)
2836 {
2837 if (meta->ptr.type == PTR_TO_PACKET)
2838 return mem_stx_data(nfp_prog, meta, size);
2839
2840 if (meta->ptr.type == PTR_TO_STACK)
2841 return mem_stx_stack(nfp_prog, meta, size,
2842 meta->ptr.off + meta->ptr.var_off.value);
2843
2844 return -EOPNOTSUPP;
2845 }
2846
mem_stx1(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2847 static int mem_stx1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2848 {
2849 return mem_stx(nfp_prog, meta, 1);
2850 }
2851
mem_stx2(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2852 static int mem_stx2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2853 {
2854 return mem_stx(nfp_prog, meta, 2);
2855 }
2856
mem_stx4(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2857 static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2858 {
2859 if (meta->ptr.type == PTR_TO_CTX)
2860 if (nfp_prog->type == BPF_PROG_TYPE_XDP)
2861 return mem_stx_xdp(nfp_prog, meta);
2862 return mem_stx(nfp_prog, meta, 4);
2863 }
2864
mem_stx8(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2865 static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2866 {
2867 return mem_stx(nfp_prog, meta, 8);
2868 }
2869
2870 static int
mem_xadd(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta,bool is64)2871 mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64)
2872 {
2873 u8 dst_gpr = meta->insn.dst_reg * 2;
2874 u8 src_gpr = meta->insn.src_reg * 2;
2875 unsigned int full_add, out;
2876 swreg addra, addrb, off;
2877
2878 off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
2879
2880 /* We can fit 16 bits into command immediate, if we know the immediate
2881 * is guaranteed to either always or never fit into 16 bit we only
2882 * generate code to handle that particular case, otherwise generate
2883 * code for both.
2884 */
2885 out = nfp_prog_current_offset(nfp_prog);
2886 full_add = nfp_prog_current_offset(nfp_prog);
2887
2888 if (meta->insn.off) {
2889 out += 2;
2890 full_add += 2;
2891 }
2892 if (meta->xadd_maybe_16bit) {
2893 out += 3;
2894 full_add += 3;
2895 }
2896 if (meta->xadd_over_16bit)
2897 out += 2 + is64;
2898 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2899 out += 5;
2900 full_add += 5;
2901 }
2902
2903 /* Generate the branch for choosing add_imm vs add */
2904 if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) {
2905 swreg max_imm = imm_a(nfp_prog);
2906
2907 wrp_immed(nfp_prog, max_imm, 0xffff);
2908 emit_alu(nfp_prog, reg_none(),
2909 max_imm, ALU_OP_SUB, reg_b(src_gpr));
2910 emit_alu(nfp_prog, reg_none(),
2911 reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1));
2912 emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0);
2913 /* defer for add */
2914 }
2915
2916 /* If insn has an offset add to the address */
2917 if (!meta->insn.off) {
2918 addra = reg_a(dst_gpr);
2919 addrb = reg_b(dst_gpr + 1);
2920 } else {
2921 emit_alu(nfp_prog, imma_a(nfp_prog),
2922 reg_a(dst_gpr), ALU_OP_ADD, off);
2923 emit_alu(nfp_prog, imma_b(nfp_prog),
2924 reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0));
2925 addra = imma_a(nfp_prog);
2926 addrb = imma_b(nfp_prog);
2927 }
2928
2929 /* Generate the add_imm if 16 bits are possible */
2930 if (meta->xadd_maybe_16bit) {
2931 swreg prev_alu = imm_a(nfp_prog);
2932
2933 wrp_immed(nfp_prog, prev_alu,
2934 FIELD_PREP(CMD_OVE_DATA, 2) |
2935 CMD_OVE_LEN |
2936 FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2));
2937 wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2);
2938 emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0,
2939 addra, addrb, 0, CMD_CTX_NO_SWAP);
2940
2941 if (meta->xadd_over_16bit)
2942 emit_br(nfp_prog, BR_UNC, out, 0);
2943 }
2944
2945 if (!nfp_prog_confirm_current_offset(nfp_prog, full_add))
2946 return -EINVAL;
2947
2948 /* Generate the add if 16 bits are not guaranteed */
2949 if (meta->xadd_over_16bit) {
2950 emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0,
2951 addra, addrb, is64 << 2,
2952 is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1);
2953
2954 wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr));
2955 if (is64)
2956 wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1));
2957 }
2958
2959 if (!nfp_prog_confirm_current_offset(nfp_prog, out))
2960 return -EINVAL;
2961
2962 return 0;
2963 }
2964
mem_xadd4(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2965 static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2966 {
2967 return mem_xadd(nfp_prog, meta, false);
2968 }
2969
mem_xadd8(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2970 static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2971 {
2972 return mem_xadd(nfp_prog, meta, true);
2973 }
2974
jump(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2975 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2976 {
2977 emit_br(nfp_prog, BR_UNC, meta->insn.off, 0);
2978
2979 return 0;
2980 }
2981
jeq_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)2982 static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
2983 {
2984 const struct bpf_insn *insn = &meta->insn;
2985 u64 imm = insn->imm; /* sign extend */
2986 swreg or1, or2, tmp_reg;
2987
2988 or1 = reg_a(insn->dst_reg * 2);
2989 or2 = reg_b(insn->dst_reg * 2 + 1);
2990
2991 if (imm & ~0U) {
2992 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
2993 emit_alu(nfp_prog, imm_a(nfp_prog),
2994 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
2995 or1 = imm_a(nfp_prog);
2996 }
2997
2998 if (imm >> 32) {
2999 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3000 emit_alu(nfp_prog, imm_b(nfp_prog),
3001 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3002 or2 = imm_b(nfp_prog);
3003 }
3004
3005 emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2);
3006 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3007
3008 return 0;
3009 }
3010
jset_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3011 static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3012 {
3013 const struct bpf_insn *insn = &meta->insn;
3014 u64 imm = insn->imm; /* sign extend */
3015 swreg tmp_reg;
3016
3017 if (!imm) {
3018 meta->skip = true;
3019 return 0;
3020 }
3021
3022 if (imm & ~0U) {
3023 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3024 emit_alu(nfp_prog, reg_none(),
3025 reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg);
3026 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3027 }
3028
3029 if (imm >> 32) {
3030 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3031 emit_alu(nfp_prog, reg_none(),
3032 reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg);
3033 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3034 }
3035
3036 return 0;
3037 }
3038
jne_imm(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3039 static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3040 {
3041 const struct bpf_insn *insn = &meta->insn;
3042 u64 imm = insn->imm; /* sign extend */
3043 swreg tmp_reg;
3044
3045 if (!imm) {
3046 emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2),
3047 ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1));
3048 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3049 return 0;
3050 }
3051
3052 tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog));
3053 emit_alu(nfp_prog, reg_none(),
3054 reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg);
3055 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3056
3057 tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog));
3058 emit_alu(nfp_prog, reg_none(),
3059 reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg);
3060 emit_br(nfp_prog, BR_BNE, insn->off, 0);
3061
3062 return 0;
3063 }
3064
jeq_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3065 static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3066 {
3067 const struct bpf_insn *insn = &meta->insn;
3068
3069 emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2),
3070 ALU_OP_XOR, reg_b(insn->src_reg * 2));
3071 emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1),
3072 ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1));
3073 emit_alu(nfp_prog, reg_none(),
3074 imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog));
3075 emit_br(nfp_prog, BR_BEQ, insn->off, 0);
3076
3077 return 0;
3078 }
3079
jset_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3080 static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3081 {
3082 return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE);
3083 }
3084
jne_reg(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3085 static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3086 {
3087 return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE);
3088 }
3089
call(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3090 static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3091 {
3092 switch (meta->insn.imm) {
3093 case BPF_FUNC_xdp_adjust_head:
3094 return adjust_head(nfp_prog, meta);
3095 case BPF_FUNC_xdp_adjust_tail:
3096 return adjust_tail(nfp_prog, meta);
3097 case BPF_FUNC_map_lookup_elem:
3098 case BPF_FUNC_map_update_elem:
3099 case BPF_FUNC_map_delete_elem:
3100 return map_call_stack_common(nfp_prog, meta);
3101 case BPF_FUNC_get_prandom_u32:
3102 return nfp_get_prandom_u32(nfp_prog, meta);
3103 case BPF_FUNC_perf_event_output:
3104 return nfp_perf_event_output(nfp_prog, meta);
3105 default:
3106 WARN_ONCE(1, "verifier allowed unsupported function\n");
3107 return -EOPNOTSUPP;
3108 }
3109 }
3110
goto_out(struct nfp_prog * nfp_prog,struct nfp_insn_meta * meta)3111 static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
3112 {
3113 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 0, RELO_BR_GO_OUT);
3114
3115 return 0;
3116 }
3117
3118 static const instr_cb_t instr_cb[256] = {
3119 [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64,
3120 [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64,
3121 [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64,
3122 [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64,
3123 [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64,
3124 [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64,
3125 [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64,
3126 [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64,
3127 [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64,
3128 [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64,
3129 [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64,
3130 [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64,
3131 [BPF_ALU64 | BPF_MUL | BPF_X] = mul_reg64,
3132 [BPF_ALU64 | BPF_MUL | BPF_K] = mul_imm64,
3133 [BPF_ALU64 | BPF_DIV | BPF_X] = div_reg64,
3134 [BPF_ALU64 | BPF_DIV | BPF_K] = div_imm64,
3135 [BPF_ALU64 | BPF_NEG] = neg_reg64,
3136 [BPF_ALU64 | BPF_LSH | BPF_X] = shl_reg64,
3137 [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64,
3138 [BPF_ALU64 | BPF_RSH | BPF_X] = shr_reg64,
3139 [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64,
3140 [BPF_ALU64 | BPF_ARSH | BPF_X] = ashr_reg64,
3141 [BPF_ALU64 | BPF_ARSH | BPF_K] = ashr_imm64,
3142 [BPF_ALU | BPF_MOV | BPF_X] = mov_reg,
3143 [BPF_ALU | BPF_MOV | BPF_K] = mov_imm,
3144 [BPF_ALU | BPF_XOR | BPF_X] = xor_reg,
3145 [BPF_ALU | BPF_XOR | BPF_K] = xor_imm,
3146 [BPF_ALU | BPF_AND | BPF_X] = and_reg,
3147 [BPF_ALU | BPF_AND | BPF_K] = and_imm,
3148 [BPF_ALU | BPF_OR | BPF_X] = or_reg,
3149 [BPF_ALU | BPF_OR | BPF_K] = or_imm,
3150 [BPF_ALU | BPF_ADD | BPF_X] = add_reg,
3151 [BPF_ALU | BPF_ADD | BPF_K] = add_imm,
3152 [BPF_ALU | BPF_SUB | BPF_X] = sub_reg,
3153 [BPF_ALU | BPF_SUB | BPF_K] = sub_imm,
3154 [BPF_ALU | BPF_MUL | BPF_X] = mul_reg,
3155 [BPF_ALU | BPF_MUL | BPF_K] = mul_imm,
3156 [BPF_ALU | BPF_DIV | BPF_X] = div_reg,
3157 [BPF_ALU | BPF_DIV | BPF_K] = div_imm,
3158 [BPF_ALU | BPF_NEG] = neg_reg,
3159 [BPF_ALU | BPF_LSH | BPF_K] = shl_imm,
3160 [BPF_ALU | BPF_END | BPF_X] = end_reg32,
3161 [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8,
3162 [BPF_LD | BPF_ABS | BPF_B] = data_ld1,
3163 [BPF_LD | BPF_ABS | BPF_H] = data_ld2,
3164 [BPF_LD | BPF_ABS | BPF_W] = data_ld4,
3165 [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1,
3166 [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2,
3167 [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4,
3168 [BPF_LDX | BPF_MEM | BPF_B] = mem_ldx1,
3169 [BPF_LDX | BPF_MEM | BPF_H] = mem_ldx2,
3170 [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4,
3171 [BPF_LDX | BPF_MEM | BPF_DW] = mem_ldx8,
3172 [BPF_STX | BPF_MEM | BPF_B] = mem_stx1,
3173 [BPF_STX | BPF_MEM | BPF_H] = mem_stx2,
3174 [BPF_STX | BPF_MEM | BPF_W] = mem_stx4,
3175 [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8,
3176 [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4,
3177 [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8,
3178 [BPF_ST | BPF_MEM | BPF_B] = mem_st1,
3179 [BPF_ST | BPF_MEM | BPF_H] = mem_st2,
3180 [BPF_ST | BPF_MEM | BPF_W] = mem_st4,
3181 [BPF_ST | BPF_MEM | BPF_DW] = mem_st8,
3182 [BPF_JMP | BPF_JA | BPF_K] = jump,
3183 [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm,
3184 [BPF_JMP | BPF_JGT | BPF_K] = cmp_imm,
3185 [BPF_JMP | BPF_JGE | BPF_K] = cmp_imm,
3186 [BPF_JMP | BPF_JLT | BPF_K] = cmp_imm,
3187 [BPF_JMP | BPF_JLE | BPF_K] = cmp_imm,
3188 [BPF_JMP | BPF_JSGT | BPF_K] = cmp_imm,
3189 [BPF_JMP | BPF_JSGE | BPF_K] = cmp_imm,
3190 [BPF_JMP | BPF_JSLT | BPF_K] = cmp_imm,
3191 [BPF_JMP | BPF_JSLE | BPF_K] = cmp_imm,
3192 [BPF_JMP | BPF_JSET | BPF_K] = jset_imm,
3193 [BPF_JMP | BPF_JNE | BPF_K] = jne_imm,
3194 [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg,
3195 [BPF_JMP | BPF_JGT | BPF_X] = cmp_reg,
3196 [BPF_JMP | BPF_JGE | BPF_X] = cmp_reg,
3197 [BPF_JMP | BPF_JLT | BPF_X] = cmp_reg,
3198 [BPF_JMP | BPF_JLE | BPF_X] = cmp_reg,
3199 [BPF_JMP | BPF_JSGT | BPF_X] = cmp_reg,
3200 [BPF_JMP | BPF_JSGE | BPF_X] = cmp_reg,
3201 [BPF_JMP | BPF_JSLT | BPF_X] = cmp_reg,
3202 [BPF_JMP | BPF_JSLE | BPF_X] = cmp_reg,
3203 [BPF_JMP | BPF_JSET | BPF_X] = jset_reg,
3204 [BPF_JMP | BPF_JNE | BPF_X] = jne_reg,
3205 [BPF_JMP | BPF_CALL] = call,
3206 [BPF_JMP | BPF_EXIT] = goto_out,
3207 };
3208
3209 /* --- Assembler logic --- */
nfp_fixup_branches(struct nfp_prog * nfp_prog)3210 static int nfp_fixup_branches(struct nfp_prog *nfp_prog)
3211 {
3212 struct nfp_insn_meta *meta, *jmp_dst;
3213 u32 idx, br_idx;
3214
3215 list_for_each_entry(meta, &nfp_prog->insns, l) {
3216 if (meta->skip)
3217 continue;
3218 if (meta->insn.code == (BPF_JMP | BPF_CALL))
3219 continue;
3220 if (BPF_CLASS(meta->insn.code) != BPF_JMP)
3221 continue;
3222
3223 if (list_is_last(&meta->l, &nfp_prog->insns))
3224 br_idx = nfp_prog->last_bpf_off;
3225 else
3226 br_idx = list_next_entry(meta, l)->off - 1;
3227
3228 if (!nfp_is_br(nfp_prog->prog[br_idx])) {
3229 pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n",
3230 br_idx, meta->insn.code, nfp_prog->prog[br_idx]);
3231 return -ELOOP;
3232 }
3233 /* Leave special branches for later */
3234 if (FIELD_GET(OP_RELO_TYPE, nfp_prog->prog[br_idx]) !=
3235 RELO_BR_REL)
3236 continue;
3237
3238 if (!meta->jmp_dst) {
3239 pr_err("Non-exit jump doesn't have destination info recorded!!\n");
3240 return -ELOOP;
3241 }
3242
3243 jmp_dst = meta->jmp_dst;
3244
3245 if (jmp_dst->skip) {
3246 pr_err("Branch landing on removed instruction!!\n");
3247 return -ELOOP;
3248 }
3249
3250 for (idx = meta->off; idx <= br_idx; idx++) {
3251 if (!nfp_is_br(nfp_prog->prog[idx]))
3252 continue;
3253 br_set_offset(&nfp_prog->prog[idx], jmp_dst->off);
3254 }
3255 }
3256
3257 return 0;
3258 }
3259
nfp_intro(struct nfp_prog * nfp_prog)3260 static void nfp_intro(struct nfp_prog *nfp_prog)
3261 {
3262 wrp_immed(nfp_prog, plen_reg(nfp_prog), GENMASK(13, 0));
3263 emit_alu(nfp_prog, plen_reg(nfp_prog),
3264 plen_reg(nfp_prog), ALU_OP_AND, pv_len(nfp_prog));
3265 }
3266
nfp_outro_tc_da(struct nfp_prog * nfp_prog)3267 static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
3268 {
3269 /* TC direct-action mode:
3270 * 0,1 ok NOT SUPPORTED[1]
3271 * 2 drop 0x22 -> drop, count as stat1
3272 * 4,5 nuke 0x02 -> drop
3273 * 7 redir 0x44 -> redir, count as stat2
3274 * * unspec 0x11 -> pass, count as stat0
3275 *
3276 * [1] We can't support OK and RECLASSIFY because we can't tell TC
3277 * the exact decision made. We are forced to support UNSPEC
3278 * to handle aborts so that's the only one we handle for passing
3279 * packets up the stack.
3280 */
3281 /* Target for aborts */
3282 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3283
3284 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3285
3286 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3287 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16);
3288
3289 /* Target for normal exits */
3290 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3291
3292 /* if R0 > 7 jump to abort */
3293 emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0));
3294 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3295 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3296
3297 wrp_immed(nfp_prog, reg_b(2), 0x41221211);
3298 wrp_immed(nfp_prog, reg_b(3), 0x41001211);
3299
3300 emit_shf(nfp_prog, reg_a(1),
3301 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2);
3302
3303 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3304 emit_shf(nfp_prog, reg_a(2),
3305 reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3306
3307 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3308 emit_shf(nfp_prog, reg_b(2),
3309 reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0);
3310
3311 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3312
3313 emit_shf(nfp_prog, reg_b(2),
3314 reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4);
3315 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3316 }
3317
nfp_outro_xdp(struct nfp_prog * nfp_prog)3318 static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
3319 {
3320 /* XDP return codes:
3321 * 0 aborted 0x82 -> drop, count as stat3
3322 * 1 drop 0x22 -> drop, count as stat1
3323 * 2 pass 0x11 -> pass, count as stat0
3324 * 3 tx 0x44 -> redir, count as stat2
3325 * * unknown 0x82 -> drop, count as stat3
3326 */
3327 /* Target for aborts */
3328 nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
3329
3330 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3331
3332 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3333 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
3334
3335 /* Target for normal exits */
3336 nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
3337
3338 /* if R0 > 3 jump to abort */
3339 emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
3340 emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
3341
3342 wrp_immed(nfp_prog, reg_b(2), 0x44112282);
3343
3344 emit_shf(nfp_prog, reg_a(1),
3345 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
3346
3347 emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
3348 emit_shf(nfp_prog, reg_b(2),
3349 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
3350
3351 emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO, 2, RELO_BR_NEXT_PKT);
3352
3353 wrp_mov(nfp_prog, reg_a(0), NFP_BPF_ABI_FLAGS);
3354 emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
3355 }
3356
nfp_outro(struct nfp_prog * nfp_prog)3357 static void nfp_outro(struct nfp_prog *nfp_prog)
3358 {
3359 switch (nfp_prog->type) {
3360 case BPF_PROG_TYPE_SCHED_CLS:
3361 nfp_outro_tc_da(nfp_prog);
3362 break;
3363 case BPF_PROG_TYPE_XDP:
3364 nfp_outro_xdp(nfp_prog);
3365 break;
3366 default:
3367 WARN_ON(1);
3368 }
3369 }
3370
nfp_translate(struct nfp_prog * nfp_prog)3371 static int nfp_translate(struct nfp_prog *nfp_prog)
3372 {
3373 struct nfp_insn_meta *meta;
3374 int err;
3375
3376 nfp_intro(nfp_prog);
3377 if (nfp_prog->error)
3378 return nfp_prog->error;
3379
3380 list_for_each_entry(meta, &nfp_prog->insns, l) {
3381 instr_cb_t cb = instr_cb[meta->insn.code];
3382
3383 meta->off = nfp_prog_current_offset(nfp_prog);
3384
3385 if (meta->skip) {
3386 nfp_prog->n_translated++;
3387 continue;
3388 }
3389
3390 if (nfp_meta_has_prev(nfp_prog, meta) &&
3391 nfp_meta_prev(meta)->double_cb)
3392 cb = nfp_meta_prev(meta)->double_cb;
3393 if (!cb)
3394 return -ENOENT;
3395 err = cb(nfp_prog, meta);
3396 if (err)
3397 return err;
3398 if (nfp_prog->error)
3399 return nfp_prog->error;
3400
3401 nfp_prog->n_translated++;
3402 }
3403
3404 nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1;
3405
3406 nfp_outro(nfp_prog);
3407 if (nfp_prog->error)
3408 return nfp_prog->error;
3409
3410 wrp_nops(nfp_prog, NFP_USTORE_PREFETCH_WINDOW);
3411 if (nfp_prog->error)
3412 return nfp_prog->error;
3413
3414 return nfp_fixup_branches(nfp_prog);
3415 }
3416
3417 /* --- Optimizations --- */
nfp_bpf_opt_reg_init(struct nfp_prog * nfp_prog)3418 static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog)
3419 {
3420 struct nfp_insn_meta *meta;
3421
3422 list_for_each_entry(meta, &nfp_prog->insns, l) {
3423 struct bpf_insn insn = meta->insn;
3424
3425 /* Programs converted from cBPF start with register xoring */
3426 if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) &&
3427 insn.src_reg == insn.dst_reg)
3428 continue;
3429
3430 /* Programs start with R6 = R1 but we ignore the skb pointer */
3431 if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) &&
3432 insn.src_reg == 1 && insn.dst_reg == 6)
3433 meta->skip = true;
3434
3435 /* Return as soon as something doesn't match */
3436 if (!meta->skip)
3437 return;
3438 }
3439 }
3440
3441 /* abs(insn.imm) will fit better into unrestricted reg immediate -
3442 * convert add/sub of a negative number into a sub/add of a positive one.
3443 */
nfp_bpf_opt_neg_add_sub(struct nfp_prog * nfp_prog)3444 static void nfp_bpf_opt_neg_add_sub(struct nfp_prog *nfp_prog)
3445 {
3446 struct nfp_insn_meta *meta;
3447
3448 list_for_each_entry(meta, &nfp_prog->insns, l) {
3449 struct bpf_insn insn = meta->insn;
3450
3451 if (meta->skip)
3452 continue;
3453
3454 if (BPF_CLASS(insn.code) != BPF_ALU &&
3455 BPF_CLASS(insn.code) != BPF_ALU64 &&
3456 BPF_CLASS(insn.code) != BPF_JMP)
3457 continue;
3458 if (BPF_SRC(insn.code) != BPF_K)
3459 continue;
3460 if (insn.imm >= 0)
3461 continue;
3462
3463 if (BPF_CLASS(insn.code) == BPF_JMP) {
3464 switch (BPF_OP(insn.code)) {
3465 case BPF_JGE:
3466 case BPF_JSGE:
3467 case BPF_JLT:
3468 case BPF_JSLT:
3469 meta->jump_neg_op = true;
3470 break;
3471 default:
3472 continue;
3473 }
3474 } else {
3475 if (BPF_OP(insn.code) == BPF_ADD)
3476 insn.code = BPF_CLASS(insn.code) | BPF_SUB;
3477 else if (BPF_OP(insn.code) == BPF_SUB)
3478 insn.code = BPF_CLASS(insn.code) | BPF_ADD;
3479 else
3480 continue;
3481
3482 meta->insn.code = insn.code | BPF_K;
3483 }
3484
3485 meta->insn.imm = -insn.imm;
3486 }
3487 }
3488
3489 /* Remove masking after load since our load guarantees this is not needed */
nfp_bpf_opt_ld_mask(struct nfp_prog * nfp_prog)3490 static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog)
3491 {
3492 struct nfp_insn_meta *meta1, *meta2;
3493 const s32 exp_mask[] = {
3494 [BPF_B] = 0x000000ffU,
3495 [BPF_H] = 0x0000ffffU,
3496 [BPF_W] = 0xffffffffU,
3497 };
3498
3499 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3500 struct bpf_insn insn, next;
3501
3502 insn = meta1->insn;
3503 next = meta2->insn;
3504
3505 if (BPF_CLASS(insn.code) != BPF_LD)
3506 continue;
3507 if (BPF_MODE(insn.code) != BPF_ABS &&
3508 BPF_MODE(insn.code) != BPF_IND)
3509 continue;
3510
3511 if (next.code != (BPF_ALU64 | BPF_AND | BPF_K))
3512 continue;
3513
3514 if (!exp_mask[BPF_SIZE(insn.code)])
3515 continue;
3516 if (exp_mask[BPF_SIZE(insn.code)] != next.imm)
3517 continue;
3518
3519 if (next.src_reg || next.dst_reg)
3520 continue;
3521
3522 if (meta2->flags & FLAG_INSN_IS_JUMP_DST)
3523 continue;
3524
3525 meta2->skip = true;
3526 }
3527 }
3528
nfp_bpf_opt_ld_shift(struct nfp_prog * nfp_prog)3529 static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog)
3530 {
3531 struct nfp_insn_meta *meta1, *meta2, *meta3;
3532
3533 nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) {
3534 struct bpf_insn insn, next1, next2;
3535
3536 insn = meta1->insn;
3537 next1 = meta2->insn;
3538 next2 = meta3->insn;
3539
3540 if (BPF_CLASS(insn.code) != BPF_LD)
3541 continue;
3542 if (BPF_MODE(insn.code) != BPF_ABS &&
3543 BPF_MODE(insn.code) != BPF_IND)
3544 continue;
3545 if (BPF_SIZE(insn.code) != BPF_W)
3546 continue;
3547
3548 if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) &&
3549 next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) &&
3550 !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) &&
3551 next2.code == (BPF_LSH | BPF_K | BPF_ALU64)))
3552 continue;
3553
3554 if (next1.src_reg || next1.dst_reg ||
3555 next2.src_reg || next2.dst_reg)
3556 continue;
3557
3558 if (next1.imm != 0x20 || next2.imm != 0x20)
3559 continue;
3560
3561 if (meta2->flags & FLAG_INSN_IS_JUMP_DST ||
3562 meta3->flags & FLAG_INSN_IS_JUMP_DST)
3563 continue;
3564
3565 meta2->skip = true;
3566 meta3->skip = true;
3567 }
3568 }
3569
3570 /* load/store pair that forms memory copy sould look like the following:
3571 *
3572 * ld_width R, [addr_src + offset_src]
3573 * st_width [addr_dest + offset_dest], R
3574 *
3575 * The destination register of load and source register of store should
3576 * be the same, load and store should also perform at the same width.
3577 * If either of addr_src or addr_dest is stack pointer, we don't do the
3578 * CPP optimization as stack is modelled by registers on NFP.
3579 */
3580 static bool
curr_pair_is_memcpy(struct nfp_insn_meta * ld_meta,struct nfp_insn_meta * st_meta)3581 curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta,
3582 struct nfp_insn_meta *st_meta)
3583 {
3584 struct bpf_insn *ld = &ld_meta->insn;
3585 struct bpf_insn *st = &st_meta->insn;
3586
3587 if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta))
3588 return false;
3589
3590 if (ld_meta->ptr.type != PTR_TO_PACKET &&
3591 ld_meta->ptr.type != PTR_TO_MAP_VALUE)
3592 return false;
3593
3594 if (st_meta->ptr.type != PTR_TO_PACKET)
3595 return false;
3596
3597 if (BPF_SIZE(ld->code) != BPF_SIZE(st->code))
3598 return false;
3599
3600 if (ld->dst_reg != st->src_reg)
3601 return false;
3602
3603 /* There is jump to the store insn in this pair. */
3604 if (st_meta->flags & FLAG_INSN_IS_JUMP_DST)
3605 return false;
3606
3607 return true;
3608 }
3609
3610 /* Currently, we only support chaining load/store pairs if:
3611 *
3612 * - Their address base registers are the same.
3613 * - Their address offsets are in the same order.
3614 * - They operate at the same memory width.
3615 * - There is no jump into the middle of them.
3616 */
3617 static bool
curr_pair_chain_with_previous(struct nfp_insn_meta * ld_meta,struct nfp_insn_meta * st_meta,struct bpf_insn * prev_ld,struct bpf_insn * prev_st)3618 curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta,
3619 struct nfp_insn_meta *st_meta,
3620 struct bpf_insn *prev_ld,
3621 struct bpf_insn *prev_st)
3622 {
3623 u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst;
3624 struct bpf_insn *ld = &ld_meta->insn;
3625 struct bpf_insn *st = &st_meta->insn;
3626 s16 prev_ld_off, prev_st_off;
3627
3628 /* This pair is the start pair. */
3629 if (!prev_ld)
3630 return true;
3631
3632 prev_size = BPF_LDST_BYTES(prev_ld);
3633 curr_size = BPF_LDST_BYTES(ld);
3634 prev_ld_base = prev_ld->src_reg;
3635 prev_st_base = prev_st->dst_reg;
3636 prev_ld_dst = prev_ld->dst_reg;
3637 prev_ld_off = prev_ld->off;
3638 prev_st_off = prev_st->off;
3639
3640 if (ld->dst_reg != prev_ld_dst)
3641 return false;
3642
3643 if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base)
3644 return false;
3645
3646 if (curr_size != prev_size)
3647 return false;
3648
3649 /* There is jump to the head of this pair. */
3650 if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST)
3651 return false;
3652
3653 /* Both in ascending order. */
3654 if (prev_ld_off + prev_size == ld->off &&
3655 prev_st_off + prev_size == st->off)
3656 return true;
3657
3658 /* Both in descending order. */
3659 if (ld->off + curr_size == prev_ld_off &&
3660 st->off + curr_size == prev_st_off)
3661 return true;
3662
3663 return false;
3664 }
3665
3666 /* Return TRUE if cross memory access happens. Cross memory access means
3667 * store area is overlapping with load area that a later load might load
3668 * the value from previous store, for this case we can't treat the sequence
3669 * as an memory copy.
3670 */
3671 static bool
cross_mem_access(struct bpf_insn * ld,struct nfp_insn_meta * head_ld_meta,struct nfp_insn_meta * head_st_meta)3672 cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta,
3673 struct nfp_insn_meta *head_st_meta)
3674 {
3675 s16 head_ld_off, head_st_off, ld_off;
3676
3677 /* Different pointer types does not overlap. */
3678 if (head_ld_meta->ptr.type != head_st_meta->ptr.type)
3679 return false;
3680
3681 /* load and store are both PTR_TO_PACKET, check ID info. */
3682 if (head_ld_meta->ptr.id != head_st_meta->ptr.id)
3683 return true;
3684
3685 /* Canonicalize the offsets. Turn all of them against the original
3686 * base register.
3687 */
3688 head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off;
3689 head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off;
3690 ld_off = ld->off + head_ld_meta->ptr.off;
3691
3692 /* Ascending order cross. */
3693 if (ld_off > head_ld_off &&
3694 head_ld_off < head_st_off && ld_off >= head_st_off)
3695 return true;
3696
3697 /* Descending order cross. */
3698 if (ld_off < head_ld_off &&
3699 head_ld_off > head_st_off && ld_off <= head_st_off)
3700 return true;
3701
3702 return false;
3703 }
3704
3705 /* This pass try to identify the following instructoin sequences.
3706 *
3707 * load R, [regA + offA]
3708 * store [regB + offB], R
3709 * load R, [regA + offA + const_imm_A]
3710 * store [regB + offB + const_imm_A], R
3711 * load R, [regA + offA + 2 * const_imm_A]
3712 * store [regB + offB + 2 * const_imm_A], R
3713 * ...
3714 *
3715 * Above sequence is typically generated by compiler when lowering
3716 * memcpy. NFP prefer using CPP instructions to accelerate it.
3717 */
nfp_bpf_opt_ldst_gather(struct nfp_prog * nfp_prog)3718 static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog)
3719 {
3720 struct nfp_insn_meta *head_ld_meta = NULL;
3721 struct nfp_insn_meta *head_st_meta = NULL;
3722 struct nfp_insn_meta *meta1, *meta2;
3723 struct bpf_insn *prev_ld = NULL;
3724 struct bpf_insn *prev_st = NULL;
3725 u8 count = 0;
3726
3727 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3728 struct bpf_insn *ld = &meta1->insn;
3729 struct bpf_insn *st = &meta2->insn;
3730
3731 /* Reset record status if any of the following if true:
3732 * - The current insn pair is not load/store.
3733 * - The load/store pair doesn't chain with previous one.
3734 * - The chained load/store pair crossed with previous pair.
3735 * - The chained load/store pair has a total size of memory
3736 * copy beyond 128 bytes which is the maximum length a
3737 * single NFP CPP command can transfer.
3738 */
3739 if (!curr_pair_is_memcpy(meta1, meta2) ||
3740 !curr_pair_chain_with_previous(meta1, meta2, prev_ld,
3741 prev_st) ||
3742 (head_ld_meta && (cross_mem_access(ld, head_ld_meta,
3743 head_st_meta) ||
3744 head_ld_meta->ldst_gather_len >= 128))) {
3745 if (!count)
3746 continue;
3747
3748 if (count > 1) {
3749 s16 prev_ld_off = prev_ld->off;
3750 s16 prev_st_off = prev_st->off;
3751 s16 head_ld_off = head_ld_meta->insn.off;
3752
3753 if (prev_ld_off < head_ld_off) {
3754 head_ld_meta->insn.off = prev_ld_off;
3755 head_st_meta->insn.off = prev_st_off;
3756 head_ld_meta->ldst_gather_len =
3757 -head_ld_meta->ldst_gather_len;
3758 }
3759
3760 head_ld_meta->paired_st = &head_st_meta->insn;
3761 head_st_meta->skip = true;
3762 } else {
3763 head_ld_meta->ldst_gather_len = 0;
3764 }
3765
3766 /* If the chain is ended by an load/store pair then this
3767 * could serve as the new head of the the next chain.
3768 */
3769 if (curr_pair_is_memcpy(meta1, meta2)) {
3770 head_ld_meta = meta1;
3771 head_st_meta = meta2;
3772 head_ld_meta->ldst_gather_len =
3773 BPF_LDST_BYTES(ld);
3774 meta1 = nfp_meta_next(meta1);
3775 meta2 = nfp_meta_next(meta2);
3776 prev_ld = ld;
3777 prev_st = st;
3778 count = 1;
3779 } else {
3780 head_ld_meta = NULL;
3781 head_st_meta = NULL;
3782 prev_ld = NULL;
3783 prev_st = NULL;
3784 count = 0;
3785 }
3786
3787 continue;
3788 }
3789
3790 if (!head_ld_meta) {
3791 head_ld_meta = meta1;
3792 head_st_meta = meta2;
3793 } else {
3794 meta1->skip = true;
3795 meta2->skip = true;
3796 }
3797
3798 head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld);
3799 meta1 = nfp_meta_next(meta1);
3800 meta2 = nfp_meta_next(meta2);
3801 prev_ld = ld;
3802 prev_st = st;
3803 count++;
3804 }
3805 }
3806
nfp_bpf_opt_pkt_cache(struct nfp_prog * nfp_prog)3807 static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog)
3808 {
3809 struct nfp_insn_meta *meta, *range_node = NULL;
3810 s16 range_start = 0, range_end = 0;
3811 bool cache_avail = false;
3812 struct bpf_insn *insn;
3813 s32 range_ptr_off = 0;
3814 u32 range_ptr_id = 0;
3815
3816 list_for_each_entry(meta, &nfp_prog->insns, l) {
3817 if (meta->flags & FLAG_INSN_IS_JUMP_DST)
3818 cache_avail = false;
3819
3820 if (meta->skip)
3821 continue;
3822
3823 insn = &meta->insn;
3824
3825 if (is_mbpf_store_pkt(meta) ||
3826 insn->code == (BPF_JMP | BPF_CALL) ||
3827 is_mbpf_classic_store_pkt(meta) ||
3828 is_mbpf_classic_load(meta)) {
3829 cache_avail = false;
3830 continue;
3831 }
3832
3833 if (!is_mbpf_load(meta))
3834 continue;
3835
3836 if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) {
3837 cache_avail = false;
3838 continue;
3839 }
3840
3841 if (!cache_avail) {
3842 cache_avail = true;
3843 if (range_node)
3844 goto end_current_then_start_new;
3845 goto start_new;
3846 }
3847
3848 /* Check ID to make sure two reads share the same
3849 * variable offset against PTR_TO_PACKET, and check OFF
3850 * to make sure they also share the same constant
3851 * offset.
3852 *
3853 * OFFs don't really need to be the same, because they
3854 * are the constant offsets against PTR_TO_PACKET, so
3855 * for different OFFs, we could canonicalize them to
3856 * offsets against original packet pointer. We don't
3857 * support this.
3858 */
3859 if (meta->ptr.id == range_ptr_id &&
3860 meta->ptr.off == range_ptr_off) {
3861 s16 new_start = range_start;
3862 s16 end, off = insn->off;
3863 s16 new_end = range_end;
3864 bool changed = false;
3865
3866 if (off < range_start) {
3867 new_start = off;
3868 changed = true;
3869 }
3870
3871 end = off + BPF_LDST_BYTES(insn);
3872 if (end > range_end) {
3873 new_end = end;
3874 changed = true;
3875 }
3876
3877 if (!changed)
3878 continue;
3879
3880 if (new_end - new_start <= 64) {
3881 /* Install new range. */
3882 range_start = new_start;
3883 range_end = new_end;
3884 continue;
3885 }
3886 }
3887
3888 end_current_then_start_new:
3889 range_node->pkt_cache.range_start = range_start;
3890 range_node->pkt_cache.range_end = range_end;
3891 start_new:
3892 range_node = meta;
3893 range_node->pkt_cache.do_init = true;
3894 range_ptr_id = range_node->ptr.id;
3895 range_ptr_off = range_node->ptr.off;
3896 range_start = insn->off;
3897 range_end = insn->off + BPF_LDST_BYTES(insn);
3898 }
3899
3900 if (range_node) {
3901 range_node->pkt_cache.range_start = range_start;
3902 range_node->pkt_cache.range_end = range_end;
3903 }
3904
3905 list_for_each_entry(meta, &nfp_prog->insns, l) {
3906 if (meta->skip)
3907 continue;
3908
3909 if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) {
3910 if (meta->pkt_cache.do_init) {
3911 range_start = meta->pkt_cache.range_start;
3912 range_end = meta->pkt_cache.range_end;
3913 } else {
3914 meta->pkt_cache.range_start = range_start;
3915 meta->pkt_cache.range_end = range_end;
3916 }
3917 }
3918 }
3919 }
3920
nfp_bpf_optimize(struct nfp_prog * nfp_prog)3921 static int nfp_bpf_optimize(struct nfp_prog *nfp_prog)
3922 {
3923 nfp_bpf_opt_reg_init(nfp_prog);
3924
3925 nfp_bpf_opt_neg_add_sub(nfp_prog);
3926 nfp_bpf_opt_ld_mask(nfp_prog);
3927 nfp_bpf_opt_ld_shift(nfp_prog);
3928 nfp_bpf_opt_ldst_gather(nfp_prog);
3929 nfp_bpf_opt_pkt_cache(nfp_prog);
3930
3931 return 0;
3932 }
3933
nfp_bpf_replace_map_ptrs(struct nfp_prog * nfp_prog)3934 static int nfp_bpf_replace_map_ptrs(struct nfp_prog *nfp_prog)
3935 {
3936 struct nfp_insn_meta *meta1, *meta2;
3937 struct nfp_bpf_map *nfp_map;
3938 struct bpf_map *map;
3939 u32 id;
3940
3941 nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) {
3942 if (meta1->skip || meta2->skip)
3943 continue;
3944
3945 if (meta1->insn.code != (BPF_LD | BPF_IMM | BPF_DW) ||
3946 meta1->insn.src_reg != BPF_PSEUDO_MAP_FD)
3947 continue;
3948
3949 map = (void *)(unsigned long)((u32)meta1->insn.imm |
3950 (u64)meta2->insn.imm << 32);
3951 if (bpf_map_offload_neutral(map)) {
3952 id = map->id;
3953 } else {
3954 nfp_map = map_to_offmap(map)->dev_priv;
3955 id = nfp_map->tid;
3956 }
3957
3958 meta1->insn.imm = id;
3959 meta2->insn.imm = 0;
3960 }
3961
3962 return 0;
3963 }
3964
nfp_bpf_ustore_calc(u64 * prog,unsigned int len)3965 static int nfp_bpf_ustore_calc(u64 *prog, unsigned int len)
3966 {
3967 __le64 *ustore = (__force __le64 *)prog;
3968 int i;
3969
3970 for (i = 0; i < len; i++) {
3971 int err;
3972
3973 err = nfp_ustore_check_valid_no_ecc(prog[i]);
3974 if (err)
3975 return err;
3976
3977 ustore[i] = cpu_to_le64(nfp_ustore_calc_ecc_insn(prog[i]));
3978 }
3979
3980 return 0;
3981 }
3982
nfp_bpf_prog_trim(struct nfp_prog * nfp_prog)3983 static void nfp_bpf_prog_trim(struct nfp_prog *nfp_prog)
3984 {
3985 void *prog;
3986
3987 prog = kvmalloc_array(nfp_prog->prog_len, sizeof(u64), GFP_KERNEL);
3988 if (!prog)
3989 return;
3990
3991 nfp_prog->__prog_alloc_len = nfp_prog->prog_len * sizeof(u64);
3992 memcpy(prog, nfp_prog->prog, nfp_prog->__prog_alloc_len);
3993 kvfree(nfp_prog->prog);
3994 nfp_prog->prog = prog;
3995 }
3996
nfp_bpf_jit(struct nfp_prog * nfp_prog)3997 int nfp_bpf_jit(struct nfp_prog *nfp_prog)
3998 {
3999 int ret;
4000
4001 ret = nfp_bpf_replace_map_ptrs(nfp_prog);
4002 if (ret)
4003 return ret;
4004
4005 ret = nfp_bpf_optimize(nfp_prog);
4006 if (ret)
4007 return ret;
4008
4009 ret = nfp_translate(nfp_prog);
4010 if (ret) {
4011 pr_err("Translation failed with error %d (translated: %u)\n",
4012 ret, nfp_prog->n_translated);
4013 return -EINVAL;
4014 }
4015
4016 nfp_bpf_prog_trim(nfp_prog);
4017
4018 return ret;
4019 }
4020
nfp_bpf_jit_prepare(struct nfp_prog * nfp_prog,unsigned int cnt)4021 void nfp_bpf_jit_prepare(struct nfp_prog *nfp_prog, unsigned int cnt)
4022 {
4023 struct nfp_insn_meta *meta;
4024
4025 /* Another pass to record jump information. */
4026 list_for_each_entry(meta, &nfp_prog->insns, l) {
4027 u64 code = meta->insn.code;
4028
4029 if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT &&
4030 BPF_OP(code) != BPF_CALL) {
4031 struct nfp_insn_meta *dst_meta;
4032 unsigned short dst_indx;
4033
4034 dst_indx = meta->n + 1 + meta->insn.off;
4035 dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx,
4036 cnt);
4037
4038 meta->jmp_dst = dst_meta;
4039 dst_meta->flags |= FLAG_INSN_IS_JUMP_DST;
4040 }
4041 }
4042 }
4043
nfp_bpf_supported_opcode(u8 code)4044 bool nfp_bpf_supported_opcode(u8 code)
4045 {
4046 return !!instr_cb[code];
4047 }
4048
nfp_bpf_relo_for_vnic(struct nfp_prog * nfp_prog,struct nfp_bpf_vnic * bv)4049 void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv)
4050 {
4051 unsigned int i;
4052 u64 *prog;
4053 int err;
4054
4055 prog = kmemdup(nfp_prog->prog, nfp_prog->prog_len * sizeof(u64),
4056 GFP_KERNEL);
4057 if (!prog)
4058 return ERR_PTR(-ENOMEM);
4059
4060 for (i = 0; i < nfp_prog->prog_len; i++) {
4061 enum nfp_relo_type special;
4062 u32 val;
4063
4064 special = FIELD_GET(OP_RELO_TYPE, prog[i]);
4065 switch (special) {
4066 case RELO_NONE:
4067 continue;
4068 case RELO_BR_REL:
4069 br_add_offset(&prog[i], bv->start_off);
4070 break;
4071 case RELO_BR_GO_OUT:
4072 br_set_offset(&prog[i],
4073 nfp_prog->tgt_out + bv->start_off);
4074 break;
4075 case RELO_BR_GO_ABORT:
4076 br_set_offset(&prog[i],
4077 nfp_prog->tgt_abort + bv->start_off);
4078 break;
4079 case RELO_BR_NEXT_PKT:
4080 br_set_offset(&prog[i], bv->tgt_done);
4081 break;
4082 case RELO_BR_HELPER:
4083 val = br_get_offset(prog[i]);
4084 val -= BR_OFF_RELO;
4085 switch (val) {
4086 case BPF_FUNC_map_lookup_elem:
4087 val = nfp_prog->bpf->helpers.map_lookup;
4088 break;
4089 case BPF_FUNC_map_update_elem:
4090 val = nfp_prog->bpf->helpers.map_update;
4091 break;
4092 case BPF_FUNC_map_delete_elem:
4093 val = nfp_prog->bpf->helpers.map_delete;
4094 break;
4095 case BPF_FUNC_perf_event_output:
4096 val = nfp_prog->bpf->helpers.perf_event_output;
4097 break;
4098 default:
4099 pr_err("relocation of unknown helper %d\n",
4100 val);
4101 err = -EINVAL;
4102 goto err_free_prog;
4103 }
4104 br_set_offset(&prog[i], val);
4105 break;
4106 case RELO_IMMED_REL:
4107 immed_add_value(&prog[i], bv->start_off);
4108 break;
4109 }
4110
4111 prog[i] &= ~OP_RELO_TYPE;
4112 }
4113
4114 err = nfp_bpf_ustore_calc(prog, nfp_prog->prog_len);
4115 if (err)
4116 goto err_free_prog;
4117
4118 return prog;
4119
4120 err_free_prog:
4121 kfree(prog);
4122 return ERR_PTR(err);
4123 }
4124