• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27 
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30 
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field)                                       \
35         ({                                                                \
36                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                 assert((fieldval & ~ field ## _MASK) == 0);               \
38                 fieldval & field ## _MASK;                                \
39          })
40 
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42 
43 #define QPU_UPDATE_FIELD(inst, value, field)                              \
44         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46 
47 #define V3D_QPU_OP_MUL_SHIFT                58
48 #define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49 
50 #define V3D_QPU_SIG_SHIFT                   53
51 #define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52 
53 #define V3D_QPU_COND_SHIFT                  46
54 #define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56 
57 #define V3D_QPU_MM                          QPU_MASK(45, 45)
58 #define V3D_QPU_MA                          QPU_MASK(44, 44)
59 
60 #define V3D_QPU_WADDR_M_SHIFT               38
61 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62 
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65 
66 #define V3D_QPU_WADDR_A_SHIFT               32
67 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68 
69 #define V3D_QPU_BRANCH_COND_SHIFT           32
70 #define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71 
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74 
75 #define V3D_QPU_OP_ADD_SHIFT                24
76 #define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77 
78 #define V3D_QPU_MUL_B_SHIFT                 21
79 #define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80 
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83 
84 #define V3D_QPU_MUL_A_SHIFT                 18
85 #define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86 
87 #define V3D_QPU_RADDR_C_SHIFT               18
88 #define V3D_QPU_RADDR_C_MASK                QPU_MASK(23, 18)
89 
90 #define V3D_QPU_ADD_B_SHIFT                 15
91 #define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
92 
93 #define V3D_QPU_BRANCH_BDU_SHIFT            15
94 #define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
95 
96 #define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
97 
98 #define V3D_QPU_ADD_A_SHIFT                 12
99 #define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
100 
101 #define V3D_QPU_BRANCH_BDI_SHIFT            12
102 #define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
103 
104 #define V3D_QPU_RADDR_D_SHIFT               12
105 #define V3D_QPU_RADDR_D_MASK                QPU_MASK(17, 12)
106 
107 #define V3D_QPU_RADDR_A_SHIFT               6
108 #define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
109 
110 #define V3D_QPU_RADDR_B_SHIFT               0
111 #define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
112 
113 #define THRSW .thrsw = true
114 #define LDUNIF .ldunif = true
115 #define LDUNIFRF .ldunifrf = true
116 #define LDUNIFA .ldunifa = true
117 #define LDUNIFARF .ldunifarf = true
118 #define LDTMU .ldtmu = true
119 #define LDVARY .ldvary = true
120 #define LDVPM .ldvpm = true
121 #define LDTLB .ldtlb = true
122 #define LDTLBU .ldtlbu = true
123 #define UCB .ucb = true
124 #define ROT .rotate = true
125 #define WRTMUC .wrtmuc = true
126 #define SMIMM_A .small_imm_a = true
127 #define SMIMM_B .small_imm_b = true
128 #define SMIMM_C .small_imm_c = true
129 #define SMIMM_D .small_imm_d = true
130 
131 static const struct v3d_qpu_sig v3d42_sig_map[] = {
132         /*      MISC       phys    R5 */
133         [0]  = {                          },
134         [1]  = { THRSW,                   },
135         [2]  = {                   LDUNIF },
136         [3]  = { THRSW,            LDUNIF },
137         [4]  = {           LDTMU,         },
138         [5]  = { THRSW,    LDTMU,         },
139         [6]  = {           LDTMU,  LDUNIF },
140         [7]  = { THRSW,    LDTMU,  LDUNIF },
141         [8]  = {           LDVARY,        },
142         [9]  = { THRSW,    LDVARY,        },
143         [10] = {           LDVARY, LDUNIF },
144         [11] = { THRSW,    LDVARY, LDUNIF },
145         [12] = { LDUNIFRF                 },
146         [13] = { THRSW,    LDUNIFRF       },
147         [14] = { SMIMM_B,    LDVARY       },
148         [15] = { SMIMM_B,                 },
149         [16] = {           LDTLB,         },
150         [17] = {           LDTLBU,        },
151         [18] = {                          WRTMUC },
152         [19] = { THRSW,                   WRTMUC },
153         [20] = {           LDVARY,        WRTMUC },
154         [21] = { THRSW,    LDVARY,        WRTMUC },
155         [22] = { UCB,                     },
156         [23] = { ROT,                     },
157         [24] = {                   LDUNIFA},
158         [25] = { LDUNIFARF                },
159         /* 26-30 reserved */
160         [31] = { SMIMM_B,          LDTMU, },
161 };
162 
163 
164 static const struct v3d_qpu_sig v3d71_sig_map[] = {
165         /*      MISC       phys    RF0 */
166         [0]  = {                          },
167         [1]  = { THRSW,                   },
168         [2]  = {                   LDUNIF },
169         [3]  = { THRSW,            LDUNIF },
170         [4]  = {           LDTMU,         },
171         [5]  = { THRSW,    LDTMU,         },
172         [6]  = {           LDTMU,  LDUNIF },
173         [7]  = { THRSW,    LDTMU,  LDUNIF },
174         [8]  = {           LDVARY,        },
175         [9]  = { THRSW,    LDVARY,        },
176         [10] = {           LDVARY, LDUNIF },
177         [11] = { THRSW,    LDVARY, LDUNIF },
178         [12] = { LDUNIFRF                 },
179         [13] = { THRSW,    LDUNIFRF       },
180         [14] = { SMIMM_A,                 },
181         [15] = { SMIMM_B,                 },
182         [16] = {           LDTLB,         },
183         [17] = {           LDTLBU,        },
184         [18] = {                          WRTMUC },
185         [19] = { THRSW,                   WRTMUC },
186         [20] = {           LDVARY,        WRTMUC },
187         [21] = { THRSW,    LDVARY,        WRTMUC },
188         [22] = { UCB,                     },
189         /* 23 reserved */
190         [24] = {                   LDUNIFA},
191         [25] = { LDUNIFARF                },
192         [26] = {           LDTMU,         WRTMUC },
193         [27] = { THRSW,    LDTMU,         WRTMUC },
194         /* 28-29 reserved */
195         [30] = { SMIMM_C,                 },
196         [31] = { SMIMM_D,                 },
197 };
198 
199 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)200 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
201                    uint32_t packed_sig,
202                    struct v3d_qpu_sig *sig)
203 {
204         if (packed_sig >= ARRAY_SIZE(v3d42_sig_map))
205                 return false;
206 
207         if (devinfo->ver >= 71)
208                 *sig = v3d71_sig_map[packed_sig];
209         else
210                 *sig = v3d42_sig_map[packed_sig];
211 
212         /* Signals with zeroed unpacked contents after element 0 are reserved. */
213         return (packed_sig == 0 ||
214                 memcmp(sig, &v3d42_sig_map[0], sizeof(*sig)) != 0);
215 }
216 
217 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)218 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
219                  const struct v3d_qpu_sig *sig,
220                  uint32_t *packed_sig)
221 {
222         static const struct v3d_qpu_sig *map;
223 
224         if (devinfo->ver >= 71)
225                 map = v3d71_sig_map;
226         else
227                 map = v3d42_sig_map;
228 
229         for (int i = 0; i < ARRAY_SIZE(v3d42_sig_map); i++) {
230                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
231                         *packed_sig = i;
232                         return true;
233                 }
234         }
235 
236         return false;
237 }
238 
239 static const uint32_t small_immediates[] = {
240         0, 1, 2, 3,
241         4, 5, 6, 7,
242         8, 9, 10, 11,
243         12, 13, 14, 15,
244         -16, -15, -14, -13,
245         -12, -11, -10, -9,
246         -8, -7, -6, -5,
247         -4, -3, -2, -1,
248         0x3b800000, /* 2.0^-8 */
249         0x3c000000, /* 2.0^-7 */
250         0x3c800000, /* 2.0^-6 */
251         0x3d000000, /* 2.0^-5 */
252         0x3d800000, /* 2.0^-4 */
253         0x3e000000, /* 2.0^-3 */
254         0x3e800000, /* 2.0^-2 */
255         0x3f000000, /* 2.0^-1 */
256         0x3f800000, /* 2.0^0 */
257         0x40000000, /* 2.0^1 */
258         0x40800000, /* 2.0^2 */
259         0x41000000, /* 2.0^3 */
260         0x41800000, /* 2.0^4 */
261         0x42000000, /* 2.0^5 */
262         0x42800000, /* 2.0^6 */
263         0x43000000, /* 2.0^7 */
264 };
265 
266 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)267 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
268                          uint32_t packed_small_immediate,
269                          uint32_t *small_immediate)
270 {
271         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
272                 return false;
273 
274         *small_immediate = small_immediates[packed_small_immediate];
275         return true;
276 }
277 
278 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)279 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
280                        uint32_t value,
281                        uint32_t *packed_small_immediate)
282 {
283         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
284 
285         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
286                 if (small_immediates[i] == value) {
287                         *packed_small_immediate = i;
288                         return true;
289                 }
290         }
291 
292         return false;
293 }
294 
295 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)296 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
297                      uint32_t packed_cond,
298                      struct v3d_qpu_flags *cond)
299 {
300         static const enum v3d_qpu_cond cond_map[4] = {
301                 [0] = V3D_QPU_COND_IFA,
302                 [1] = V3D_QPU_COND_IFB,
303                 [2] = V3D_QPU_COND_IFNA,
304                 [3] = V3D_QPU_COND_IFNB,
305         };
306 
307         cond->ac = V3D_QPU_COND_NONE;
308         cond->mc = V3D_QPU_COND_NONE;
309         cond->apf = V3D_QPU_PF_NONE;
310         cond->mpf = V3D_QPU_PF_NONE;
311         cond->auf = V3D_QPU_UF_NONE;
312         cond->muf = V3D_QPU_UF_NONE;
313 
314         if (packed_cond == 0) {
315                 return true;
316         } else if (packed_cond >> 2 == 0) {
317                 cond->apf = packed_cond & 0x3;
318         } else if (packed_cond >> 4 == 0) {
319                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
320         } else if (packed_cond == 0x10) {
321                 return false;
322         } else if (packed_cond >> 2 == 0x4) {
323                 cond->mpf = packed_cond & 0x3;
324         } else if (packed_cond >> 4 == 0x1) {
325                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
326         } else if (packed_cond >> 4 == 0x2) {
327                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
328                 cond->mpf = packed_cond & 0x3;
329         } else if (packed_cond >> 4 == 0x3) {
330                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
331                 cond->apf = packed_cond & 0x3;
332         } else if (packed_cond >> 6) {
333                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
334                 if (((packed_cond >> 2) & 0x3) == 0) {
335                         cond->ac = cond_map[packed_cond & 0x3];
336                 } else {
337                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
338                 }
339         }
340 
341         return true;
342 }
343 
344 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)345 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
346                    const struct v3d_qpu_flags *cond,
347                    uint32_t *packed_cond)
348 {
349 #define AC (1 << 0)
350 #define MC (1 << 1)
351 #define APF (1 << 2)
352 #define MPF (1 << 3)
353 #define AUF (1 << 4)
354 #define MUF (1 << 5)
355         static const struct {
356                 uint8_t flags_present;
357                 uint8_t bits;
358         } flags_table[] = {
359                 { 0,        0 },
360                 { APF,      0 },
361                 { AUF,      0 },
362                 { MPF,      (1 << 4) },
363                 { MUF,      (1 << 4) },
364                 { AC,       (1 << 5) },
365                 { AC | MPF, (1 << 5) },
366                 { MC,       (1 << 5) | (1 << 4) },
367                 { MC | APF, (1 << 5) | (1 << 4) },
368                 { MC | AC,  (1 << 6) },
369                 { MC | AUF, (1 << 6) },
370         };
371 
372         uint8_t flags_present = 0;
373         if (cond->ac != V3D_QPU_COND_NONE)
374                 flags_present |= AC;
375         if (cond->mc != V3D_QPU_COND_NONE)
376                 flags_present |= MC;
377         if (cond->apf != V3D_QPU_PF_NONE)
378                 flags_present |= APF;
379         if (cond->mpf != V3D_QPU_PF_NONE)
380                 flags_present |= MPF;
381         if (cond->auf != V3D_QPU_UF_NONE)
382                 flags_present |= AUF;
383         if (cond->muf != V3D_QPU_UF_NONE)
384                 flags_present |= MUF;
385 
386         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
387                 if (flags_table[i].flags_present != flags_present)
388                         continue;
389 
390                 *packed_cond = flags_table[i].bits;
391 
392                 *packed_cond |= cond->apf;
393                 *packed_cond |= cond->mpf;
394 
395                 if (flags_present & AUF)
396                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
397                 if (flags_present & MUF)
398                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
399 
400                 if (flags_present & AC) {
401                         if (*packed_cond & (1 << 6))
402                                 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
403                         else
404                                 *packed_cond |= (cond->ac -
405                                                  V3D_QPU_COND_IFA) << 2;
406                 }
407 
408                 if (flags_present & MC) {
409                         if (*packed_cond & (1 << 6))
410                                 *packed_cond |= (cond->mc -
411                                                  V3D_QPU_COND_IFA) << 4;
412                         else
413                                 *packed_cond |= (cond->mc -
414                                                  V3D_QPU_COND_IFA) << 2;
415                 }
416 
417                 return true;
418         }
419 
420         return false;
421 }
422 
423 /* Make a mapping of the table of opcodes in the spec.  The opcode is
424  * determined by a combination of the opcode field, and in the case of 0 or
425  * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as
426  * well.
427  */
428 #define OP_MASK(val) BITFIELD64_BIT(val)
429 #define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1)
430 #define ANYMUX OP_RANGE(0, 7)
431 #define ANYOPMASK OP_RANGE(0, 63)
432 
433 struct opcode_desc {
434         uint8_t opcode_first;
435         uint8_t opcode_last;
436 
437         union {
438                 struct {
439                         uint8_t b_mask;
440                         uint8_t a_mask;
441                 } mux;
442                 uint64_t raddr_mask;
443         };
444 
445         uint8_t op;
446 
447         /* first_ver == 0 if it's the same across all V3D versions.
448          * first_ver == X, last_ver == 0 if it's the same for all V3D versions
449          *   starting from X
450          * first_ver == X, last_ver == Y if it's the same for all V3D versions
451          *   on the range X through Y
452          */
453         uint8_t first_ver;
454         uint8_t last_ver;
455 };
456 
457 static const struct opcode_desc v3d42_add_ops[] = {
458         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
459         { 0,   47,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD },
460         { 0,   47,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF },
461         { 53,  55,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
462         { 56,  56,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD },
463         { 57,  59,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
464         { 60,  60,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB },
465         { 61,  63,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
466         { 64,  111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB },
467         { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN },
468         { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX },
469         { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN },
470         { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX },
471         { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL },
472         { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR },
473         { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR },
474         { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR },
475         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
476         { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN },
477         { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX },
478         { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN },
479 
480         { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND },
481         { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR },
482         { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR },
483 
484         { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD },
485         { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB },
486         { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT },
487         { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG },
488         { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH },
489         { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH },
490         { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP },
491         { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP },
492         { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF },
493         { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF },
494         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
495         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX },
496         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX },
497         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR },
498         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA },
499         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
500         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB },
501         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
502 
503         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD },
504         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD },
505         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD },
506         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD },
507 
508         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF },
509         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF },
510         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 },
511         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 },
512         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 },
513         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 },
514         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT },
515         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT },
516         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 },
517         { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 },
518         { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
519 
520         { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
521         { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
522         { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
523         { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
524         { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 },
525         { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 },
526         { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 },
527         { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 },
528         { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 },
529         { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 },
530         { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
531         { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
532 
533         /* FIXME: MORE COMPLICATED */
534         /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
535 
536         { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP },
537         { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX },
538 
539         { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND },
540         { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN },
541         { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC },
542         { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ },
543         { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR },
544         { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ },
545         { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL },
546         { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC },
547 
548         { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX },
549         { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY },
550 
551         /* The stvpms are distinguished by the waddr field. */
552         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV },
553         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD },
554         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP },
555 
556         { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF },
557         { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ },
558         { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF },
559 };
560 
561 static const struct opcode_desc v3d42_mul_ops[] = {
562         { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD },
563         { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB },
564         { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 },
565         { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL },
566         { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 },
567         { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP },
568         { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 },
569         { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42},
570         { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 },
571         { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 },
572 
573         { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL },
574 };
575 
576 /* Note that it would have been possible to define all the add/mul opcodes in
577  * just one table, using the first_ver/last_ver. But taking into account that
578  * for v3d71 there were a lot of changes, it was more tidy this way. Also
579  * right now we are doing a linear search on those tables, so this maintains
580  * the tables smaller.
581  *
582  * Just in case we merge the tables, we define the first_ver as 71 for those
583  * opcodes that changed on v3d71
584  */
585 static const struct opcode_desc v3d71_add_ops[] = {
586         /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
587         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
588         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
589         { 53,  55,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
590         { 56,  56,  .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
591         { 57,  59,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
592         { 60,  60,  .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB },
593         { 61,  63,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
594         { 64,  111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB },
595         { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN },
596         { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX },
597         { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN },
598         { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX },
599         { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL },
600         { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
601         { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
602         { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
603         /* FMIN is instead FMAX depending on the raddr_a/b order. */
604         { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
605         { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
606         { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
607 
608         { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
609         { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
610         { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR },
611         { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD },
612         { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB },
613 
614         { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT },
615         { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG },
616         { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH },
617         { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH },
618         { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP },
619         { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ },
620         { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF },
621         { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF },
622 
623         { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
624         { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX },
625         { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX },
626         { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR },
627         { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA },
628         { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
629         { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB },
630         { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
631         { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD },
632         { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD },
633         { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF },
634         { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF },
635         { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID },
636         { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID },
637         { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID },
638         { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT },
639         { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT },
640         { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST },
641         { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST },
642 
643         { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD },
644         { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD },
645 
646         { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 },
647         { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 },
648         { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 },
649 
650         { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 },
651         { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 },
652         { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 },
653         { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 },
654         { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 },
655         { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 },
656         { 188, 188, .raddr_mask = OP_MASK(38), V3D_QPU_A_BALLOT, 71 },
657         { 188, 188, .raddr_mask = OP_MASK(39), V3D_QPU_A_BCASTF, 71 },
658         { 188, 188, .raddr_mask = OP_MASK(40), V3D_QPU_A_ALLEQ, 71 },
659         { 188, 188, .raddr_mask = OP_MASK(41), V3D_QPU_A_ALLFEQ, 71 },
660 
661         { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 },
662 
663         /* The stvpms are distinguished by the waddr field. */
664         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71},
665         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71},
666         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71},
667 
668         { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 },
669 
670         { 245, 245, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FROUND, 71 },
671         { 245, 245, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FROUND, 71 },
672         { 245, 245, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FROUND, 71 },
673         { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 },
674 
675         { 245, 245, .raddr_mask = OP_MASK(3),  V3D_QPU_A_FTOIN, 71 },
676         { 245, 245, .raddr_mask = OP_MASK(7),  V3D_QPU_A_FTOIN, 71 },
677         { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 },
678         { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 },
679 
680         { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 },
681         { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 },
682         { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 },
683         { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 },
684 
685         { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 },
686         { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 },
687         { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 },
688         { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 },
689 
690         { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 },
691         { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 },
692         { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 },
693         { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 },
694 
695         { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 },
696         { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 },
697         { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 },
698         { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 },
699 
700         { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 },
701         { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 },
702         { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 },
703         { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 },
704 
705         { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC },
706         { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC },
707         { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC },
708         { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC },
709 
710         { 246, 246, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FDX, 71 },
711         { 246, 246, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FDX, 71 },
712         { 246, 246, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FDX, 71 },
713         { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 },
714         { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 },
715         { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 },
716         { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 },
717         { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 },
718 
719         { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
720         { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
721 
722         { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
723         { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
724 
725         { 249, 249, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FMOV, 71 },
726         { 249, 249, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FMOV, 71 },
727         { 249, 249, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FMOV, 71 },
728         { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
729         { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
730         { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
731         { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
732 
733         { 249, 249, .raddr_mask = OP_MASK(3),  V3D_QPU_A_MOV, 71 },
734         { 249, 249, .raddr_mask = OP_MASK(7),  V3D_QPU_A_MOV, 71 },
735         { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
736         { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
737         { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
738 
739         { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
740         { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
741 
742         { 252, 252, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROTQ, 71 },
743         { 253, 253, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROT, 71 },
744         { 254, 254, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHUFFLE, 71 },
745 };
746 
747 static const struct opcode_desc v3d71_mul_ops[] = {
748         /* For V3D 7.1, second mask field would be ignored */
749         { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 },
750         { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 },
751         { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
752         { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
753         { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 },
754         { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 },
755         { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 },
756 
757         { 14, 14, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_M_FMOV, 71 },
758         { 14, 14, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_M_FMOV, 71 },
759         { 14, 14, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_M_FMOV, 71 },
760         { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 },
761         { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 },
762         { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 },
763         { 14, 14, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_M_FMOV, 71 },
764         { 14, 14, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_M_FMOV, 71 },
765 
766         { 14, 14, .raddr_mask = OP_MASK(3),  V3D_QPU_M_MOV, 71 },
767         { 14, 14, .raddr_mask = OP_MASK(7),  V3D_QPU_M_MOV, 71 },
768         { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 },
769         { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
770         { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
771 
772         { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
773         { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
774         { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
775         { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
776         { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
777         { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
778 
779         { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
780 
781         { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
782 };
783 
784 /* Returns true if op_desc should be filtered out based on devinfo->ver
785  * against op_desc->first_ver and op_desc->last_ver. Check notes about
786  * first_ver/last_ver on struct opcode_desc comments.
787  */
788 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const uint8_t first_ver,const uint8_t last_ver)789 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
790                           const uint8_t first_ver,
791                           const uint8_t last_ver)
792 {
793         return (first_ver != 0 && devinfo->ver < first_ver) ||
794                 (last_ver != 0  && devinfo->ver > last_ver);
795 }
796 
797 /* Note that we pass as parameters mux_a, mux_b and raddr, even if depending
798  * on the devinfo->ver some would be ignored. We do this way just to avoid
799  * having two really similar lookup_opcode methods
800  */
801 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b,uint32_t raddr)802 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
803                           const struct opcode_desc *opcodes,
804                           size_t num_opcodes, uint32_t opcode,
805                           uint32_t mux_a, uint32_t mux_b,
806                           uint32_t raddr)
807 {
808         for (int i = 0; i < num_opcodes; i++) {
809                 const struct opcode_desc *op_desc = &opcodes[i];
810 
811                 if (opcode < op_desc->opcode_first ||
812                     opcode > op_desc->opcode_last)
813                         continue;
814 
815                 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
816                         continue;
817 
818                 if (devinfo->ver < 71) {
819                         if (!(op_desc->mux.b_mask & (1 << mux_b)))
820                                 continue;
821 
822                         if (!(op_desc->mux.a_mask & (1 << mux_a)))
823                                 continue;
824                 } else {
825                         if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr)))
826                                 continue;
827                 }
828 
829                 return op_desc;
830         }
831 
832         return NULL;
833 }
834 
835 static bool
v3d_qpu_float32_unpack_unpack(const struct v3d_device_info * devinfo,uint32_t packed,enum v3d_qpu_input_unpack * unpacked)836 v3d_qpu_float32_unpack_unpack(const struct v3d_device_info *devinfo,
837                               uint32_t packed,
838                               enum v3d_qpu_input_unpack *unpacked)
839 {
840         switch (packed) {
841         case 0:
842                 *unpacked = V3D_QPU_UNPACK_ABS;
843                 return true;
844         case 1:
845                 *unpacked = V3D_QPU_UNPACK_NONE;
846                 return true;
847         case 2:
848                 *unpacked = V3D_QPU_UNPACK_L;
849                 return true;
850         case 3:
851                 *unpacked = V3D_QPU_UNPACK_H;
852                 return true;
853         case 4:
854                 *unpacked = V3D71_QPU_UNPACK_SAT;
855                 return devinfo->ver >= 71;
856         case 5:
857                 *unpacked = V3D71_QPU_UNPACK_NSAT;
858                 return devinfo->ver >= 71;
859         case 6:
860                 *unpacked = V3D71_QPU_UNPACK_MAX0;
861                 return devinfo->ver >= 71;
862         default:
863                 return false;
864         }
865 }
866 
867 static bool
v3d_qpu_float32_unpack_pack(const struct v3d_device_info * devinfo,enum v3d_qpu_input_unpack unpacked,uint32_t * packed)868 v3d_qpu_float32_unpack_pack(const struct v3d_device_info *devinfo,
869                             enum v3d_qpu_input_unpack unpacked,
870                             uint32_t *packed)
871 {
872         switch (unpacked) {
873         case V3D_QPU_UNPACK_ABS:
874                 *packed = 0;
875                 return true;
876         case V3D_QPU_UNPACK_NONE:
877                 *packed = 1;
878                 return true;
879         case V3D_QPU_UNPACK_L:
880                 *packed = 2;
881                 return true;
882         case V3D_QPU_UNPACK_H:
883                 *packed = 3;
884                 return true;
885         case V3D71_QPU_UNPACK_SAT:
886                 *packed = 4;
887                 return devinfo->ver >= 71;
888         case V3D71_QPU_UNPACK_NSAT:
889                 *packed = 5;
890                 return devinfo->ver >= 71;
891         case V3D71_QPU_UNPACK_MAX0:
892                 *packed = 6;
893                 return devinfo->ver >= 71;
894         default:
895                 return false;
896         }
897 }
898 
899 static bool
v3d_qpu_int32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)900 v3d_qpu_int32_unpack_unpack(uint32_t packed,
901                             enum v3d_qpu_input_unpack *unpacked)
902 {
903         switch (packed) {
904         case 0:
905                 *unpacked = V3D_QPU_UNPACK_NONE;
906                 return true;
907         case 1:
908                 *unpacked = V3D_QPU_UNPACK_UL;
909                 return true;
910         case 2:
911                 *unpacked = V3D_QPU_UNPACK_UH;
912                 return true;
913         case 3:
914                 *unpacked = V3D_QPU_UNPACK_IL;
915                 return true;
916         case 4:
917                 *unpacked = V3D_QPU_UNPACK_IH;
918                 return true;
919         default:
920                 return false;
921         }
922 }
923 
924 static bool
v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)925 v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
926                           uint32_t *packed)
927 {
928         switch (unpacked) {
929         case V3D_QPU_UNPACK_NONE:
930                 *packed = 0;
931                 return true;
932         case V3D_QPU_UNPACK_UL:
933                 *packed = 1;
934                 return true;
935         case V3D_QPU_UNPACK_UH:
936                 *packed = 2;
937                 return true;
938         case V3D_QPU_UNPACK_IL:
939                 *packed = 3;
940                 return true;
941         case V3D_QPU_UNPACK_IH:
942                 *packed = 4;
943                 return true;
944         default:
945                 return false;
946         }
947 }
948 
949 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)950 v3d_qpu_float16_unpack_unpack(uint32_t packed,
951                               enum v3d_qpu_input_unpack *unpacked)
952 {
953         switch (packed) {
954         case 0:
955                 *unpacked = V3D_QPU_UNPACK_NONE;
956                 return true;
957         case 1:
958                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
959                 return true;
960         case 2:
961                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
962                 return true;
963         case 3:
964                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
965                 return true;
966         case 4:
967                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
968                 return true;
969         default:
970                 return false;
971         }
972 }
973 
974 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)975 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
976                             uint32_t *packed)
977 {
978         switch (unpacked) {
979         case V3D_QPU_UNPACK_NONE:
980                 *packed = 0;
981                 return true;
982         case V3D_QPU_UNPACK_REPLICATE_32F_16:
983                 *packed = 1;
984                 return true;
985         case V3D_QPU_UNPACK_REPLICATE_L_16:
986                 *packed = 2;
987                 return true;
988         case V3D_QPU_UNPACK_REPLICATE_H_16:
989                 *packed = 3;
990                 return true;
991         case V3D_QPU_UNPACK_SWAP_16:
992                 *packed = 4;
993                 return true;
994         default:
995                 return false;
996         }
997 }
998 
999 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,uint32_t * packed)1000 v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,
1001                           uint32_t *packed)
1002 {
1003         switch (pack) {
1004         case V3D_QPU_PACK_NONE:
1005                 *packed = 0;
1006                 return true;
1007         case V3D_QPU_PACK_L:
1008                 *packed = 1;
1009                 return true;
1010         case V3D_QPU_PACK_H:
1011                 *packed = 2;
1012                 return true;
1013         default:
1014                 return false;
1015         }
1016 }
1017 
1018 static bool
v3d42_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1019 v3d42_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1020                      struct v3d_qpu_instr *instr)
1021 {
1022         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1023         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
1024         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
1025         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1026 
1027         uint32_t map_op = op;
1028         /* Some big clusters of opcodes are replicated with unpack
1029          * flags
1030          */
1031         if (map_op >= 249 && map_op <= 251)
1032                 map_op = (map_op - 249 + 245);
1033         if (map_op >= 253 && map_op <= 255)
1034                 map_op = (map_op - 253 + 245);
1035 
1036         const struct opcode_desc *desc =
1037                 lookup_opcode_from_packed(devinfo, v3d42_add_ops,
1038                                           ARRAY_SIZE(v3d42_add_ops),
1039                                           map_op, mux_a, mux_b, 0);
1040 
1041         if (!desc)
1042                 return false;
1043 
1044         instr->alu.add.op = desc->op;
1045 
1046         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
1047          * operands.
1048          */
1049         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
1050                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1051                         instr->alu.add.op = V3D_QPU_A_FMAX;
1052                 if (instr->alu.add.op == V3D_QPU_A_FADD)
1053                         instr->alu.add.op = V3D_QPU_A_FADDNF;
1054         }
1055 
1056         /* Some QPU ops require a bit more than just basic opcode and mux a/b
1057          * comparisons to distinguish them.
1058          */
1059         switch (instr->alu.add.op) {
1060         case V3D_QPU_A_STVPMV:
1061         case V3D_QPU_A_STVPMD:
1062         case V3D_QPU_A_STVPMP:
1063                 switch (waddr) {
1064                 case 0:
1065                         instr->alu.add.op = V3D_QPU_A_STVPMV;
1066                         break;
1067                 case 1:
1068                         instr->alu.add.op = V3D_QPU_A_STVPMD;
1069                         break;
1070                 case 2:
1071                         instr->alu.add.op = V3D_QPU_A_STVPMP;
1072                         break;
1073                 default:
1074                         return false;
1075                 }
1076                 break;
1077         default:
1078                 break;
1079         }
1080 
1081         switch (instr->alu.add.op) {
1082         case V3D_QPU_A_FADD:
1083         case V3D_QPU_A_FADDNF:
1084         case V3D_QPU_A_FSUB:
1085         case V3D_QPU_A_FMIN:
1086         case V3D_QPU_A_FMAX:
1087         case V3D_QPU_A_FCMP:
1088         case V3D_QPU_A_VFPACK:
1089                 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
1090                         instr->alu.add.output_pack = (op >> 4) & 0x3;
1091                 else
1092                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1093 
1094                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1095                                                    &instr->alu.add.a.unpack)) {
1096                         return false;
1097                 }
1098 
1099                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1100                                                    &instr->alu.add.b.unpack)) {
1101                         return false;
1102                 }
1103                 break;
1104 
1105         case V3D_QPU_A_FFLOOR:
1106         case V3D_QPU_A_FROUND:
1107         case V3D_QPU_A_FTRUNC:
1108         case V3D_QPU_A_FCEIL:
1109         case V3D_QPU_A_FDX:
1110         case V3D_QPU_A_FDY:
1111                 instr->alu.add.output_pack = mux_b & 0x3;
1112 
1113                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1114                                                    &instr->alu.add.a.unpack)) {
1115                         return false;
1116                 }
1117                 break;
1118 
1119         case V3D_QPU_A_FTOIN:
1120         case V3D_QPU_A_FTOIZ:
1121         case V3D_QPU_A_FTOUZ:
1122         case V3D_QPU_A_FTOC:
1123                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1124 
1125                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1126                                                    &instr->alu.add.a.unpack)) {
1127                         return false;
1128                 }
1129                 break;
1130 
1131         case V3D_QPU_A_VFMIN:
1132         case V3D_QPU_A_VFMAX:
1133                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1134                                                    &instr->alu.add.a.unpack)) {
1135                         return false;
1136                 }
1137 
1138                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1139                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1140                 break;
1141 
1142         default:
1143                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1144                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1145                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1146                 break;
1147         }
1148 
1149         instr->alu.add.a.mux = mux_a;
1150         instr->alu.add.b.mux = mux_b;
1151         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1152 
1153         instr->alu.add.magic_write = false;
1154         if (packed_inst & V3D_QPU_MA) {
1155                 switch (instr->alu.add.op) {
1156                 case V3D_QPU_A_LDVPMV_IN:
1157                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1158                         break;
1159                 case V3D_QPU_A_LDVPMD_IN:
1160                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1161                         break;
1162                 case V3D_QPU_A_LDVPMG_IN:
1163                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1164                         break;
1165                 default:
1166                         instr->alu.add.magic_write = true;
1167                         break;
1168                 }
1169         }
1170 
1171         return true;
1172 }
1173 
1174 static bool
v3d71_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1175 v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1176                      struct v3d_qpu_instr *instr)
1177 {
1178         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1179         uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A);
1180         uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B);
1181         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1182         uint32_t map_op = op;
1183 
1184         const struct opcode_desc *desc =
1185                 lookup_opcode_from_packed(devinfo,
1186                                           v3d71_add_ops,
1187                                           ARRAY_SIZE(v3d71_add_ops),
1188                                           map_op, 0, 0,
1189                                           raddr_b);
1190         if (!desc)
1191                 return false;
1192 
1193         instr->alu.add.op = desc->op;
1194 
1195         /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
1196          * operands.
1197          */
1198         if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
1199             instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
1200                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1201                         instr->alu.add.op = V3D_QPU_A_FMAX;
1202                 if (instr->alu.add.op == V3D_QPU_A_FADD)
1203                         instr->alu.add.op = V3D_QPU_A_FADDNF;
1204         }
1205 
1206         /* Some QPU ops require a bit more than just basic opcode and mux a/b
1207          * comparisons to distinguish them.
1208          */
1209         switch (instr->alu.add.op) {
1210         case V3D_QPU_A_STVPMV:
1211         case V3D_QPU_A_STVPMD:
1212         case V3D_QPU_A_STVPMP:
1213                 switch (waddr) {
1214                 case 0:
1215                         instr->alu.add.op = V3D_QPU_A_STVPMV;
1216                         break;
1217                 case 1:
1218                         instr->alu.add.op = V3D_QPU_A_STVPMD;
1219                         break;
1220                 case 2:
1221                         instr->alu.add.op = V3D_QPU_A_STVPMP;
1222                         break;
1223                 default:
1224                         return false;
1225                 }
1226                 break;
1227         default:
1228                 break;
1229         }
1230 
1231         switch (instr->alu.add.op) {
1232         case V3D_QPU_A_FADD:
1233         case V3D_QPU_A_FADDNF:
1234         case V3D_QPU_A_FSUB:
1235         case V3D_QPU_A_FMIN:
1236         case V3D_QPU_A_FMAX:
1237         case V3D_QPU_A_FCMP:
1238         case V3D_QPU_A_VFPACK:
1239                 if (instr->alu.add.op != V3D_QPU_A_VFPACK &&
1240                     instr->alu.add.op != V3D_QPU_A_FCMP) {
1241                         instr->alu.add.output_pack = (op >> 4) & 0x3;
1242                 } else {
1243                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1244                 }
1245 
1246                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1247                                                    &instr->alu.add.a.unpack)) {
1248                         return false;
1249                 }
1250 
1251                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1252                                                    &instr->alu.add.b.unpack)) {
1253                         return false;
1254                 }
1255                 break;
1256 
1257         case V3D_QPU_A_FFLOOR:
1258         case V3D_QPU_A_FROUND:
1259         case V3D_QPU_A_FTRUNC:
1260         case V3D_QPU_A_FCEIL:
1261         case V3D_QPU_A_FDX:
1262         case V3D_QPU_A_FDY:
1263                 instr->alu.add.output_pack = raddr_b & 0x3;
1264 
1265                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1266                                                    &instr->alu.add.a.unpack)) {
1267                         return false;
1268                 }
1269                 break;
1270 
1271         case V3D_QPU_A_FTOIN:
1272         case V3D_QPU_A_FTOIZ:
1273         case V3D_QPU_A_FTOUZ:
1274         case V3D_QPU_A_FTOC:
1275                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1276 
1277                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_b >> 2) & 0x3,
1278                                                    &instr->alu.add.a.unpack)) {
1279                         return false;
1280                 }
1281                 break;
1282 
1283         case V3D_QPU_A_VFMIN:
1284         case V3D_QPU_A_VFMAX:
1285                 unreachable("pending v3d71 update");
1286                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1287                                                    &instr->alu.add.a.unpack)) {
1288                         return false;
1289                 }
1290 
1291                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1292                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1293                 break;
1294 
1295         case V3D_QPU_A_MOV:
1296                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1297 
1298                 if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
1299                                                  &instr->alu.add.a.unpack)) {
1300                         return false;
1301                 }
1302                 break;
1303 
1304         case V3D_QPU_A_FMOV:
1305                 instr->alu.add.output_pack = raddr_b & 0x3;
1306 
1307                 /* Mul alu FMOV has one additional variant */
1308                 int32_t unpack = (raddr_b >> 2) & 0x7;
1309                 if (unpack == 7)
1310                         return false;
1311 
1312                 if (!v3d_qpu_float32_unpack_unpack(devinfo, unpack,
1313                                                    &instr->alu.add.a.unpack)) {
1314                         return false;
1315                 }
1316                 break;
1317 
1318         default:
1319                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1320                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1321                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1322                 break;
1323         }
1324 
1325         instr->alu.add.a.raddr = raddr_a;
1326         instr->alu.add.b.raddr = raddr_b;
1327         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1328 
1329         instr->alu.add.magic_write = false;
1330         if (packed_inst & V3D_QPU_MA) {
1331                 switch (instr->alu.add.op) {
1332                 case V3D_QPU_A_LDVPMV_IN:
1333                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1334                         break;
1335                 case V3D_QPU_A_LDVPMD_IN:
1336                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1337                         break;
1338                 case V3D_QPU_A_LDVPMG_IN:
1339                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1340                         break;
1341                 default:
1342                         instr->alu.add.magic_write = true;
1343                         break;
1344                 }
1345         }
1346 
1347         return true;
1348 }
1349 
1350 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1351 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1352                    struct v3d_qpu_instr *instr)
1353 {
1354         if (devinfo->ver >= 71)
1355                 return v3d71_qpu_add_unpack(devinfo, packed_inst, instr);
1356         else
1357                 return v3d42_qpu_add_unpack(devinfo, packed_inst, instr);
1358 }
1359 
1360 static bool
v3d42_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1361 v3d42_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1362                      struct v3d_qpu_instr *instr)
1363 {
1364         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1365         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
1366         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
1367 
1368         {
1369                 const struct opcode_desc *desc =
1370                         lookup_opcode_from_packed(devinfo,
1371                                                   v3d42_mul_ops,
1372                                                   ARRAY_SIZE(v3d42_mul_ops),
1373                                                   op, mux_a, mux_b, 0);
1374                 if (!desc)
1375                         return false;
1376 
1377                 instr->alu.mul.op = desc->op;
1378         }
1379 
1380         switch (instr->alu.mul.op) {
1381         case V3D_QPU_M_FMUL:
1382                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1383 
1384                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1385                                                    &instr->alu.mul.a.unpack)) {
1386                         return false;
1387                 }
1388 
1389                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1390                                                    &instr->alu.mul.b.unpack)) {
1391                         return false;
1392                 }
1393 
1394                 break;
1395 
1396         case V3D_QPU_M_FMOV:
1397                 instr->alu.mul.output_pack = (((op & 1) << 1) +
1398                                               ((mux_b >> 2) & 1));
1399 
1400                 if (!v3d_qpu_float32_unpack_unpack(devinfo, mux_b & 0x3,
1401                                                    &instr->alu.mul.a.unpack)) {
1402                         return false;
1403                 }
1404 
1405                 break;
1406 
1407         case V3D_QPU_M_VFMUL:
1408                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1409 
1410                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1411                                                    &instr->alu.mul.a.unpack)) {
1412                         return false;
1413                 }
1414 
1415                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1416 
1417                 break;
1418 
1419         default:
1420                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1421                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1422                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1423                 break;
1424         }
1425 
1426         instr->alu.mul.a.mux = mux_a;
1427         instr->alu.mul.b.mux = mux_b;
1428         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1429         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1430 
1431         return true;
1432 }
1433 
1434 static bool
v3d71_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1435 v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1436                      struct v3d_qpu_instr *instr)
1437 {
1438         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1439         uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C);
1440         uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D);
1441 
1442         {
1443                 const struct opcode_desc *desc =
1444                         lookup_opcode_from_packed(devinfo,
1445                                                   v3d71_mul_ops,
1446                                                   ARRAY_SIZE(v3d71_mul_ops),
1447                                                   op, 0, 0,
1448                                                   raddr_d);
1449                 if (!desc)
1450                         return false;
1451 
1452                 instr->alu.mul.op = desc->op;
1453         }
1454 
1455         switch (instr->alu.mul.op) {
1456         case V3D_QPU_M_FMUL:
1457                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1458 
1459                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1460                                                    &instr->alu.mul.a.unpack)) {
1461                         return false;
1462                 }
1463 
1464                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1465                                                    &instr->alu.mul.b.unpack)) {
1466                         return false;
1467                 }
1468 
1469                 break;
1470 
1471         case V3D_QPU_M_FMOV:
1472                 instr->alu.mul.output_pack = raddr_d & 0x3;
1473 
1474                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_d >> 2) & 0x3,
1475                                                    &instr->alu.mul.a.unpack)) {
1476                         return false;
1477                 }
1478 
1479                 break;
1480 
1481         case V3D_QPU_M_VFMUL:
1482                 unreachable("pending v3d71 update");
1483                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1484 
1485                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1486                                                    &instr->alu.mul.a.unpack)) {
1487                         return false;
1488                 }
1489 
1490                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1491 
1492                 break;
1493 
1494         case V3D_QPU_M_MOV:
1495                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1496 
1497                 if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
1498                                                  &instr->alu.mul.a.unpack)) {
1499                         return false;
1500                 }
1501                 break;
1502 
1503         default:
1504                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1505                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1506                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1507                 break;
1508         }
1509 
1510         instr->alu.mul.a.raddr = raddr_c;
1511         instr->alu.mul.b.raddr = raddr_d;
1512         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1513         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1514 
1515         return true;
1516 }
1517 
1518 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1519 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1520                    struct v3d_qpu_instr *instr)
1521 {
1522         if (devinfo->ver >= 71)
1523                 return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr);
1524         else
1525                 return v3d42_qpu_mul_unpack(devinfo, packed_inst, instr);
1526 }
1527 
1528 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)1529 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
1530                          const struct opcode_desc *opcodes, size_t num_opcodes,
1531                          uint8_t op)
1532 {
1533         for (int i = 0; i < num_opcodes; i++) {
1534                 const struct opcode_desc *op_desc = &opcodes[i];
1535 
1536                 if (op_desc->op != op)
1537                         continue;
1538 
1539                 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
1540                         continue;
1541 
1542                 return op_desc;
1543         }
1544 
1545         return NULL;
1546 }
1547 
1548 static bool
v3d42_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1549 v3d42_qpu_add_pack(const struct v3d_device_info *devinfo,
1550                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1551 {
1552         uint32_t waddr = instr->alu.add.waddr;
1553         uint32_t mux_a = instr->alu.add.a.mux;
1554         uint32_t mux_b = instr->alu.add.b.mux;
1555         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1556         const struct opcode_desc *desc =
1557                 lookup_opcode_from_instr(devinfo, v3d42_add_ops,
1558                                          ARRAY_SIZE(v3d42_add_ops),
1559                                          instr->alu.add.op);
1560 
1561         if (!desc)
1562                 return false;
1563 
1564         uint32_t opcode = desc->opcode_first;
1565 
1566         /* If an operation doesn't use an arg, its mux values may be used to
1567          * identify the operation type.
1568          */
1569         if (nsrc < 2)
1570                 mux_b = ffs(desc->mux.b_mask) - 1;
1571 
1572         if (nsrc < 1)
1573                 mux_a = ffs(desc->mux.a_mask) - 1;
1574 
1575         bool no_magic_write = false;
1576 
1577         switch (instr->alu.add.op) {
1578         case V3D_QPU_A_STVPMV:
1579                 waddr = 0;
1580                 no_magic_write = true;
1581                 break;
1582         case V3D_QPU_A_STVPMD:
1583                 waddr = 1;
1584                 no_magic_write = true;
1585                 break;
1586         case V3D_QPU_A_STVPMP:
1587                 waddr = 2;
1588                 no_magic_write = true;
1589                 break;
1590 
1591         case V3D_QPU_A_LDVPMV_IN:
1592         case V3D_QPU_A_LDVPMD_IN:
1593         case V3D_QPU_A_LDVPMP:
1594         case V3D_QPU_A_LDVPMG_IN:
1595                 assert(!instr->alu.add.magic_write);
1596                 break;
1597 
1598         case V3D_QPU_A_LDVPMV_OUT:
1599         case V3D_QPU_A_LDVPMD_OUT:
1600         case V3D_QPU_A_LDVPMG_OUT:
1601                 assert(!instr->alu.add.magic_write);
1602                 *packed_instr |= V3D_QPU_MA;
1603                 break;
1604 
1605         default:
1606                 break;
1607         }
1608 
1609         switch (instr->alu.add.op) {
1610         case V3D_QPU_A_FADD:
1611         case V3D_QPU_A_FADDNF:
1612         case V3D_QPU_A_FSUB:
1613         case V3D_QPU_A_FMIN:
1614         case V3D_QPU_A_FMAX:
1615         case V3D_QPU_A_FCMP: {
1616                 uint32_t output_pack;
1617                 uint32_t a_unpack;
1618                 uint32_t b_unpack;
1619 
1620                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1621                                                &output_pack)) {
1622                         return false;
1623                 }
1624                 opcode |= output_pack << 4;
1625 
1626                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1627                                                  instr->alu.add.a.unpack,
1628                                                  &a_unpack)) {
1629                         return false;
1630                 }
1631 
1632                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1633                                                  instr->alu.add.b.unpack,
1634                                                  &b_unpack)) {
1635                         return false;
1636                 }
1637 
1638                 /* These operations with commutative operands are
1639                  * distinguished by which order their operands come in.
1640                  */
1641                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1642                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1643                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1644                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1645                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1646                         uint32_t temp;
1647 
1648                         temp = a_unpack;
1649                         a_unpack = b_unpack;
1650                         b_unpack = temp;
1651 
1652                         temp = mux_a;
1653                         mux_a = mux_b;
1654                         mux_b = temp;
1655                 }
1656 
1657                 opcode |= a_unpack << 2;
1658                 opcode |= b_unpack << 0;
1659 
1660                 break;
1661         }
1662 
1663         case V3D_QPU_A_VFPACK: {
1664                 uint32_t a_unpack;
1665                 uint32_t b_unpack;
1666 
1667                 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1668                     instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1669                         return false;
1670                 }
1671 
1672                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1673                                                  instr->alu.add.a.unpack,
1674                                                  &a_unpack)) {
1675                         return false;
1676                 }
1677 
1678                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1679                                                  instr->alu.add.b.unpack,
1680                                                  &b_unpack)) {
1681                         return false;
1682                 }
1683 
1684                 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1685                 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1686 
1687                 break;
1688         }
1689 
1690         case V3D_QPU_A_FFLOOR:
1691         case V3D_QPU_A_FROUND:
1692         case V3D_QPU_A_FTRUNC:
1693         case V3D_QPU_A_FCEIL:
1694         case V3D_QPU_A_FDX:
1695         case V3D_QPU_A_FDY: {
1696                 uint32_t packed;
1697 
1698                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1699                                                &packed)) {
1700                         return false;
1701                 }
1702                 mux_b |= packed;
1703 
1704                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1705                                                  instr->alu.add.a.unpack,
1706                                                  &packed)) {
1707                         return false;
1708                 }
1709                 if (packed == 0)
1710                         return false;
1711                 opcode = (opcode & ~(0x3 << 2)) | packed << 2;
1712                 break;
1713         }
1714 
1715         case V3D_QPU_A_FTOIN:
1716         case V3D_QPU_A_FTOIZ:
1717         case V3D_QPU_A_FTOUZ:
1718         case V3D_QPU_A_FTOC:
1719                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1720                         return false;
1721 
1722                 uint32_t packed;
1723                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1724                                                  instr->alu.add.a.unpack,
1725                                                  &packed)) {
1726                         return false;
1727                 }
1728                 if (packed == 0)
1729                         return false;
1730                 opcode |= packed << 2;
1731 
1732                 break;
1733 
1734         case V3D_QPU_A_VFMIN:
1735         case V3D_QPU_A_VFMAX:
1736                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1737                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1738                         return false;
1739                 }
1740 
1741                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1742                                                  &packed)) {
1743                         return false;
1744                 }
1745                 opcode |= packed;
1746                 break;
1747 
1748         default:
1749                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1750                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1751                      instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
1752                      instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
1753                         return false;
1754                 }
1755                 break;
1756         }
1757 
1758         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1759         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1760         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1761         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1762         if (instr->alu.add.magic_write && !no_magic_write)
1763                 *packed_instr |= V3D_QPU_MA;
1764 
1765         return true;
1766 }
1767 
1768 static bool
v3d71_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1769 v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
1770                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1771 {
1772         uint32_t waddr = instr->alu.add.waddr;
1773         uint32_t raddr_a = instr->alu.add.a.raddr;
1774         uint32_t raddr_b = instr->alu.add.b.raddr;
1775 
1776         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1777         const struct opcode_desc *desc =
1778                 lookup_opcode_from_instr(devinfo, v3d71_add_ops,
1779                                          ARRAY_SIZE(v3d71_add_ops),
1780                                          instr->alu.add.op);
1781         if (!desc)
1782                 return false;
1783 
1784         uint32_t opcode = desc->opcode_first;
1785 
1786         /* If an operation doesn't use an arg, its raddr values may be used to
1787          * identify the operation type.
1788          */
1789         if (nsrc < 2)
1790                 raddr_b = ffsll(desc->raddr_mask) - 1;
1791 
1792         bool no_magic_write = false;
1793 
1794         switch (instr->alu.add.op) {
1795         case V3D_QPU_A_STVPMV:
1796                 waddr = 0;
1797                 no_magic_write = true;
1798                 break;
1799         case V3D_QPU_A_STVPMD:
1800                 waddr = 1;
1801                 no_magic_write = true;
1802                 break;
1803         case V3D_QPU_A_STVPMP:
1804                 waddr = 2;
1805                 no_magic_write = true;
1806                 break;
1807 
1808         case V3D_QPU_A_LDVPMV_IN:
1809         case V3D_QPU_A_LDVPMD_IN:
1810         case V3D_QPU_A_LDVPMP:
1811         case V3D_QPU_A_LDVPMG_IN:
1812                 assert(!instr->alu.add.magic_write);
1813                 break;
1814 
1815         case V3D_QPU_A_LDVPMV_OUT:
1816         case V3D_QPU_A_LDVPMD_OUT:
1817         case V3D_QPU_A_LDVPMG_OUT:
1818                 assert(!instr->alu.add.magic_write);
1819                 *packed_instr |= V3D_QPU_MA;
1820                 break;
1821 
1822         default:
1823                 break;
1824         }
1825 
1826         switch (instr->alu.add.op) {
1827         case V3D_QPU_A_FADD:
1828         case V3D_QPU_A_FADDNF:
1829         case V3D_QPU_A_FSUB:
1830         case V3D_QPU_A_FMIN:
1831         case V3D_QPU_A_FMAX:
1832         case V3D_QPU_A_FCMP: {
1833                 uint32_t output_pack;
1834                 uint32_t a_unpack;
1835                 uint32_t b_unpack;
1836 
1837                 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
1838                         if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1839                                                        &output_pack)) {
1840                                 return false;
1841                         }
1842                         opcode |= output_pack << 4;
1843                 }
1844 
1845                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1846                                                  instr->alu.add.a.unpack,
1847                                                  &a_unpack)) {
1848                         return false;
1849                 }
1850 
1851                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1852                                                  instr->alu.add.b.unpack,
1853                                                  &b_unpack)) {
1854                         return false;
1855                 }
1856 
1857                 /* These operations with commutative operands are
1858                  * distinguished by the order of the operands come in.
1859                  */
1860                 bool ordering =
1861                         instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
1862                         instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
1863                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1864                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1865                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1866                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1867                         uint32_t temp;
1868 
1869                         temp = a_unpack;
1870                         a_unpack = b_unpack;
1871                         b_unpack = temp;
1872 
1873                         temp = raddr_a;
1874                         raddr_a = raddr_b;
1875                         raddr_b = temp;
1876 
1877                         /* If we are swapping raddr_a/b we also need to swap
1878                          * small_imm_a/b.
1879                          */
1880                         if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
1881                                 assert(instr->sig.small_imm_a !=
1882                                        instr->sig.small_imm_b);
1883                                 struct v3d_qpu_sig new_sig = instr->sig;
1884                                 new_sig.small_imm_a = !instr->sig.small_imm_a;
1885                                 new_sig.small_imm_b = !instr->sig.small_imm_b;
1886                                 uint32_t sig;
1887                                 if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
1888                                     return false;
1889                             *packed_instr &= ~V3D_QPU_SIG_MASK;
1890                             *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1891                         }
1892                 }
1893 
1894                 opcode |= a_unpack << 2;
1895                 opcode |= b_unpack << 0;
1896 
1897                 break;
1898         }
1899 
1900         case V3D_QPU_A_VFPACK: {
1901                 uint32_t a_unpack;
1902                 uint32_t b_unpack;
1903 
1904                 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1905                     instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1906                         return false;
1907                 }
1908 
1909                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1910                                                  instr->alu.add.a.unpack,
1911                                                  &a_unpack)) {
1912                         return false;
1913                 }
1914 
1915                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1916                                                  instr->alu.add.b.unpack,
1917                                                  &b_unpack)) {
1918                         return false;
1919                 }
1920 
1921                 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1922                 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1923 
1924                 break;
1925         }
1926 
1927         case V3D_QPU_A_FFLOOR:
1928         case V3D_QPU_A_FROUND:
1929         case V3D_QPU_A_FTRUNC:
1930         case V3D_QPU_A_FCEIL:
1931         case V3D_QPU_A_FDX:
1932         case V3D_QPU_A_FDY: {
1933                 uint32_t packed;
1934 
1935                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1936                                                &packed)) {
1937                         return false;
1938                 }
1939                 raddr_b |= packed;
1940 
1941                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1942                                                  instr->alu.add.a.unpack,
1943                                                  &packed)) {
1944                         return false;
1945                 }
1946                 if (packed == 0)
1947                         return false;
1948                 raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2;
1949                 break;
1950         }
1951 
1952         case V3D_QPU_A_FTOIN:
1953         case V3D_QPU_A_FTOIZ:
1954         case V3D_QPU_A_FTOUZ:
1955         case V3D_QPU_A_FTOC:
1956                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1957                         return false;
1958 
1959                 uint32_t packed;
1960                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1961                                                  instr->alu.add.a.unpack,
1962                                                  &packed)) {
1963                         return false;
1964                 }
1965                 if (packed == 0)
1966                         return false;
1967 
1968                 raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2;
1969 
1970                 break;
1971 
1972         case V3D_QPU_A_VFMIN:
1973         case V3D_QPU_A_VFMAX:
1974                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1975                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1976                         return false;
1977                 }
1978 
1979                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1980                                                  &packed)) {
1981                         return false;
1982                 }
1983                 opcode |= packed;
1984                 break;
1985 
1986         case V3D_QPU_A_MOV: {
1987                 uint32_t packed;
1988 
1989                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1990                         return false;
1991 
1992                 if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
1993                                                &packed)) {
1994                         return false;
1995                 }
1996 
1997                 raddr_b |= packed << 2;
1998                 break;
1999         }
2000 
2001         case V3D_QPU_A_FMOV: {
2002                 uint32_t packed;
2003 
2004                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
2005                                                &packed)) {
2006                         return false;
2007                 }
2008                 raddr_b = packed;
2009 
2010                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2011                                                  instr->alu.add.a.unpack,
2012                                                  &packed)) {
2013                         return false;
2014                 }
2015                 raddr_b |= packed << 2;
2016                 break;
2017         }
2018 
2019         default:
2020                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
2021                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2022                      instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
2023                      instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
2024                         return false;
2025                 }
2026                 break;
2027         }
2028 
2029         *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A);
2030         *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B);
2031         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
2032         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
2033         if (instr->alu.add.magic_write && !no_magic_write)
2034                 *packed_instr |= V3D_QPU_MA;
2035 
2036         return true;
2037 }
2038 
2039 static bool
v3d42_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2040 v3d42_qpu_mul_pack(const struct v3d_device_info *devinfo,
2041                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2042 {
2043         uint32_t mux_a = instr->alu.mul.a.mux;
2044         uint32_t mux_b = instr->alu.mul.b.mux;
2045         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2046 
2047         const struct opcode_desc *desc =
2048                 lookup_opcode_from_instr(devinfo, v3d42_mul_ops,
2049                                          ARRAY_SIZE(v3d42_mul_ops),
2050                                          instr->alu.mul.op);
2051 
2052         if (!desc)
2053                 return false;
2054 
2055         uint32_t opcode = desc->opcode_first;
2056 
2057         /* Some opcodes have a single valid value for their mux a/b, so set
2058          * that here.  If mux a/b determine packing, it will be set below.
2059          */
2060         if (nsrc < 2)
2061                 mux_b = ffs(desc->mux.b_mask) - 1;
2062 
2063         if (nsrc < 1)
2064                 mux_a = ffs(desc->mux.a_mask) - 1;
2065 
2066         switch (instr->alu.mul.op) {
2067         case V3D_QPU_M_FMUL: {
2068                 uint32_t packed;
2069 
2070                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2071                                                &packed)) {
2072                         return false;
2073                 }
2074                 /* No need for a +1 because desc->opcode_first has a 1 in this
2075                  * field.
2076                  */
2077                 opcode += packed << 4;
2078 
2079                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2080                                                  instr->alu.mul.a.unpack,
2081                                                  &packed)) {
2082                         return false;
2083                 }
2084                 opcode |= packed << 2;
2085 
2086                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2087                                                  instr->alu.mul.b.unpack,
2088                                                  &packed)) {
2089                         return false;
2090                 }
2091                 opcode |= packed << 0;
2092                 break;
2093         }
2094 
2095         case V3D_QPU_M_FMOV: {
2096                 uint32_t packed;
2097 
2098                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2099                                                &packed)) {
2100                         return false;
2101                 }
2102                 opcode |= (packed >> 1) & 1;
2103                 mux_b = (packed & 1) << 2;
2104 
2105                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2106                                                  instr->alu.mul.a.unpack,
2107                                                  &packed)) {
2108                         return false;
2109                 }
2110                 mux_b |= packed;
2111                 break;
2112         }
2113 
2114         case V3D_QPU_M_VFMUL: {
2115                 uint32_t packed;
2116 
2117                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2118                         return false;
2119 
2120                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2121                                                  &packed)) {
2122                         return false;
2123                 }
2124                 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2125                         opcode = 8;
2126                 else
2127                         opcode |= (packed + 4) & 7;
2128 
2129                 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2130                         return false;
2131 
2132                 break;
2133         }
2134 
2135         default:
2136                 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2137                     (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2138                      instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2139                      instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2140                         return false;
2141                 }
2142                 break;
2143         }
2144 
2145         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
2146         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
2147 
2148         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2149         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2150         if (instr->alu.mul.magic_write)
2151                 *packed_instr |= V3D_QPU_MM;
2152 
2153         return true;
2154 }
2155 
2156 static bool
v3d71_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2157 v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
2158                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2159 {
2160         uint32_t raddr_c = instr->alu.mul.a.raddr;
2161         uint32_t raddr_d = instr->alu.mul.b.raddr;
2162         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2163 
2164         const struct opcode_desc *desc =
2165                 lookup_opcode_from_instr(devinfo, v3d71_mul_ops,
2166                                          ARRAY_SIZE(v3d71_mul_ops),
2167                                          instr->alu.mul.op);
2168         if (!desc)
2169                 return false;
2170 
2171         uint32_t opcode = desc->opcode_first;
2172 
2173         /* Some opcodes have a single valid value for their raddr_d, so set
2174          * that here.  If raddr_b determine packing, it will be set below.
2175          */
2176         if (nsrc < 2)
2177                 raddr_d = ffsll(desc->raddr_mask) - 1;
2178 
2179         switch (instr->alu.mul.op) {
2180         case V3D_QPU_M_FMUL: {
2181                 uint32_t packed;
2182 
2183                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2184                                                &packed)) {
2185                         return false;
2186                 }
2187                 /* No need for a +1 because desc->opcode_first has a 1 in this
2188                  * field.
2189                  */
2190                 opcode += packed << 4;
2191 
2192                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2193                                                  instr->alu.mul.a.unpack,
2194                                                  &packed)) {
2195                         return false;
2196                 }
2197                 opcode |= packed << 2;
2198 
2199                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2200                                                  instr->alu.mul.b.unpack,
2201                                                  &packed)) {
2202                         return false;
2203                 }
2204                 opcode |= packed << 0;
2205                 break;
2206         }
2207 
2208         case V3D_QPU_M_FMOV: {
2209                 uint32_t packed;
2210 
2211                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2212                                                &packed)) {
2213                         return false;
2214                 }
2215                 raddr_d |= packed;
2216 
2217                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2218                                                  instr->alu.mul.a.unpack,
2219                                                  &packed)) {
2220                         return false;
2221                 }
2222                 raddr_d |= packed << 2;
2223                 break;
2224         }
2225 
2226         case V3D_QPU_M_VFMUL: {
2227                 unreachable("pending v3d71 update");
2228                 uint32_t packed;
2229 
2230                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2231                         return false;
2232 
2233                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2234                                                  &packed)) {
2235                         return false;
2236                 }
2237                 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2238                         opcode = 8;
2239                 else
2240                         opcode |= (packed + 4) & 7;
2241 
2242                 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2243                         return false;
2244 
2245                 break;
2246         }
2247 
2248         case V3D_QPU_M_MOV: {
2249                 uint32_t packed;
2250 
2251                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2252                         return false;
2253 
2254                 if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
2255                                                &packed)) {
2256                         return false;
2257                 }
2258 
2259                 raddr_d |= packed << 2;
2260                 break;
2261         }
2262 
2263         default:
2264                 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2265                     (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2266                      instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2267                      instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2268                         return false;
2269                 }
2270                 break;
2271         }
2272 
2273         *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C);
2274         *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D);
2275         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2276         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2277         if (instr->alu.mul.magic_write)
2278                 *packed_instr |= V3D_QPU_MM;
2279 
2280         return true;
2281 }
2282 
2283 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2284 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
2285                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2286 {
2287         if (devinfo->ver >= 71)
2288                 return v3d71_qpu_add_pack(devinfo, instr, packed_instr);
2289         else
2290                 return v3d42_qpu_add_pack(devinfo, instr, packed_instr);
2291 }
2292 
2293 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2294 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
2295                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2296 {
2297         if (devinfo->ver >= 71)
2298                 return v3d71_qpu_mul_pack(devinfo, instr, packed_instr);
2299         else
2300                 return v3d42_qpu_mul_pack(devinfo, instr, packed_instr);
2301 }
2302 
2303 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2304 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
2305                          uint64_t packed_instr,
2306                          struct v3d_qpu_instr *instr)
2307 {
2308         instr->type = V3D_QPU_INSTR_TYPE_ALU;
2309 
2310         if (!v3d_qpu_sig_unpack(devinfo,
2311                                 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
2312                                 &instr->sig))
2313                 return false;
2314 
2315         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
2316         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2317                 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
2318                 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
2319 
2320                 instr->flags.ac = V3D_QPU_COND_NONE;
2321                 instr->flags.mc = V3D_QPU_COND_NONE;
2322                 instr->flags.apf = V3D_QPU_PF_NONE;
2323                 instr->flags.mpf = V3D_QPU_PF_NONE;
2324                 instr->flags.auf = V3D_QPU_UF_NONE;
2325                 instr->flags.muf = V3D_QPU_UF_NONE;
2326         } else {
2327                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
2328                         return false;
2329         }
2330 
2331         if (devinfo->ver <= 71) {
2332                 /*
2333                  * For v3d71 this will be set on add/mul unpack, as raddr are
2334                  * now part of v3d_qpu_input
2335                  */
2336                 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
2337                 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
2338         }
2339 
2340         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
2341                 return false;
2342 
2343         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
2344                 return false;
2345 
2346         return true;
2347 }
2348 
2349 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2350 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
2351                             uint64_t packed_instr,
2352                             struct v3d_qpu_instr *instr)
2353 {
2354         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
2355 
2356         uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
2357         if (cond == 0)
2358                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
2359         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
2360                  V3D_QPU_BRANCH_COND_ALLNA)
2361                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
2362         else
2363                 return false;
2364 
2365         uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
2366         if (msfign == 3)
2367                 return false;
2368         instr->branch.msfign = msfign;
2369 
2370         instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
2371 
2372         instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
2373         if (instr->branch.ub) {
2374                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
2375                                                   V3D_QPU_BRANCH_BDU);
2376         }
2377 
2378         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
2379                                               V3D_QPU_RADDR_A);
2380 
2381         instr->branch.offset = 0;
2382 
2383         instr->branch.offset +=
2384                 QPU_GET_FIELD(packed_instr,
2385                               V3D_QPU_BRANCH_ADDR_LOW) << 3;
2386 
2387         instr->branch.offset +=
2388                 QPU_GET_FIELD(packed_instr,
2389                               V3D_QPU_BRANCH_ADDR_HIGH) << 24;
2390 
2391         return true;
2392 }
2393 
2394 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2395 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
2396                      uint64_t packed_instr,
2397                      struct v3d_qpu_instr *instr)
2398 {
2399         if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
2400                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
2401         } else {
2402                 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
2403 
2404                 if ((sig & 24) == 16) {
2405                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
2406                                                            instr);
2407                 } else {
2408                         return false;
2409                 }
2410         }
2411 }
2412 
2413 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2414 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
2415                        const struct v3d_qpu_instr *instr,
2416                        uint64_t *packed_instr)
2417 {
2418         uint32_t sig;
2419         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
2420                 return false;
2421         *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
2422 
2423         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
2424                 if (devinfo->ver < 71) {
2425                         /*
2426                          * For v3d71 this will be set on add/mul unpack, as
2427                          * raddr are now part of v3d_qpu_input
2428                          */
2429                         *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
2430                         *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
2431                 }
2432 
2433                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
2434                         return false;
2435                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
2436                         return false;
2437 
2438                 uint32_t flags;
2439                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2440                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
2441                             instr->flags.mc != V3D_QPU_COND_NONE ||
2442                             instr->flags.apf != V3D_QPU_PF_NONE ||
2443                             instr->flags.mpf != V3D_QPU_PF_NONE ||
2444                             instr->flags.auf != V3D_QPU_UF_NONE ||
2445                             instr->flags.muf != V3D_QPU_UF_NONE) {
2446                                 return false;
2447                         }
2448 
2449                         flags = instr->sig_addr;
2450                         if (instr->sig_magic)
2451                                 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
2452                 } else {
2453                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
2454                                 return false;
2455                 }
2456 
2457                 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
2458         } else {
2459                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
2460                         return false;
2461         }
2462 
2463         return true;
2464 }
2465 
2466 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2467 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
2468                           const struct v3d_qpu_instr *instr,
2469                           uint64_t *packed_instr)
2470 {
2471         *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
2472 
2473         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2474                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
2475                                                     V3D_QPU_BRANCH_COND_A0),
2476                                                V3D_QPU_BRANCH_COND);
2477         }
2478 
2479         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2480                                        V3D_QPU_BRANCH_MSFIGN);
2481 
2482         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
2483                                        V3D_QPU_BRANCH_BDI);
2484 
2485         if (instr->branch.ub) {
2486                 *packed_instr |= V3D_QPU_BRANCH_UB;
2487                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
2488                                                V3D_QPU_BRANCH_BDU);
2489         }
2490 
2491         switch (instr->branch.bdi) {
2492         case V3D_QPU_BRANCH_DEST_ABS:
2493         case V3D_QPU_BRANCH_DEST_REL:
2494                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2495                                                V3D_QPU_BRANCH_MSFIGN);
2496 
2497                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
2498                                                 ~0xff000000) >> 3,
2499                                                V3D_QPU_BRANCH_ADDR_LOW);
2500 
2501                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
2502                                                V3D_QPU_BRANCH_ADDR_HIGH);
2503                 break;
2504         default:
2505                 break;
2506         }
2507 
2508         if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
2509             instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
2510                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
2511                                                V3D_QPU_RADDR_A);
2512         }
2513 
2514         return true;
2515 }
2516 
2517 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2518 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
2519                    const struct v3d_qpu_instr *instr,
2520                    uint64_t *packed_instr)
2521 {
2522         *packed_instr = 0;
2523 
2524         switch (instr->type) {
2525         case V3D_QPU_INSTR_TYPE_ALU:
2526                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
2527         case V3D_QPU_INSTR_TYPE_BRANCH:
2528                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
2529         default:
2530                 return false;
2531         }
2532 }
2533